(* $Id: netencoding.ml,v 1.6 2001/08/30 19:45:43 gerd Exp $
 * ----------------------------------------------------------------------
 *
 *)


module Str = Netstring_str;;

module Base64 = struct
  let b64_pattern plus slash =
    [| 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; 'G'; 'H'; 'I'; 'J'; 'K'; 'L'; 'M';
       'N'; 'O'; 'P'; 'Q'; 'R'; 'S'; 'T'; 'U'; 'V'; 'W'; 'X'; 'Y'; 'Z';
       'a'; 'b'; 'c'; 'd'; 'e'; 'f'; 'g'; 'h'; 'i'; 'j'; 'k'; 'l'; 'm';
       'n'; 'o'; 'p'; 'q'; 'r'; 's'; 't'; 'u'; 'v'; 'w'; 'x'; 'y'; 'z';
       '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7'; '8'; '9'; plus; slash |];;


  let rfc_pattern = b64_pattern '+' '/';;
  let url_pattern = b64_pattern '-' '/';;

  let encode_with_options b64 equal s pos len linelen crlf =
  (* encode using "base64".
   * 'b64': The encoding table, created by b64_pattern.
   * 'equal': The character that should be used instead of '=' in the original
   *          encoding scheme. Pass '=' to get the original encoding scheme.
   * s, pos, len, linelen: See the interface description of encode_substring.
   *)
    assert (Array.length b64 = 64);
    if len < 0 or pos < 0 or pos > String.length s or linelen < 0 then
      invalid_arg "Netencoding.Base64.encode_with_options";
    if pos + len > String.length s then
      invalid_arg "Netencoding.Base64.encode_with_options";

    let linelen =
      (linelen/4) * 4 in

    let l_t = if len = 0 then 0 else ((len - 1) / 3 + 1) * 4 in
    (* l_t: length of the result without additional line endings *)

    let l_t' = 
      if linelen < 4 then
	l_t
      else
	if l_t = 0 then 0 else 
	  let n_lines = ((l_t - 1) / linelen) + 1 in
	  l_t + n_lines * (if crlf then 2 else 1)
    in
    (* l_t': length of the result with CRLF or LF characters *)
    
    let t = String.make l_t' equal in
    let j = ref 0 in
    let q = ref 0 in
    for k = 0 to len / 3 - 1 do
      let p = pos + 3*k in
      (* p >= pos >= 0: this is evident
       * p+2 < pos+len <= String.length s:
       *   Because k <= len/3-1
       *         3*k <= 3*(len/3-1) = len - 3
       *   pos+3*k+2 <= pos + len - 3 + 2 = pos + len - 1 < pos + len
       * So it is proved that the following unsafe string accesses always
       * work.
       *)
      let bits = (Char.code (String.unsafe_get s (p))   lsl 16) lor
		 (Char.code (String.unsafe_get s (p+1)) lsl  8) lor
		 (Char.code (String.unsafe_get s (p+2))) in
      (* Obviously, 'bits' is a 24 bit entity (i.e. bits < 2**24) *)
      assert(!j + 3 < l_t');
      String.unsafe_set t !j     (Array.unsafe_get b64 ( bits lsr 18));
      String.unsafe_set t (!j+1) (Array.unsafe_get b64 ((bits lsr 12) land 63));
      String.unsafe_set t (!j+2) (Array.unsafe_get b64 ((bits lsr  6) land 63));
      String.unsafe_set t (!j+3) (Array.unsafe_get b64 ( bits         land 63));
      j := !j + 4;
      if linelen > 3 then begin
	q := !q + 4;
	if !q + 4 > linelen then begin
	  (* The next 4 characters won't fit on the current line. So insert
	   * a line ending.
	   *)
	  if crlf then begin
	    t.[ !j ] <- '\013';
	    t.[ !j+1 ] <- '\010';
	    j := !j + 2;
	  end
	  else begin 
	    t.[ !j ] <- '\010';
	    incr j
	  end;
	  q := 0;
	end;
      end;
    done;
    (* padding if needed: *)
    let m = len mod 3 in
    begin
      match m with
	  0 -> ()
	| 1 ->
            let bits = Char.code (s.[pos + len - 1]) in
	    t.[ !j     ] <- b64.( bits lsr 2);
	    t.[ !j + 1 ] <- b64.( (bits land 0x03) lsl 4);
	    j := !j + 4;
	    q := !q + 4;
	| 2 ->
	    let bits = (Char.code (s.[pos + len - 2]) lsl 8) lor
                       (Char.code (s.[pos + len - 1])) in
	    t.[ !j     ] <- b64.( bits lsr 10);
	    t.[ !j + 1 ] <- b64.((bits lsr  4) land 0x3f);
	    t.[ !j + 2 ] <- b64.((bits lsl  2) land 0x3f);
	    j := !j + 4;
	    q := !q + 4;
	| _ -> assert false
    end;

    (* If required, add another line end: *)

    if linelen > 3 & !q > 0 then begin
      if crlf then begin
	t.[ !j ] <- '\013';
	t.[ !j+1 ] <- '\010';
	j := !j + 2;
      end
      else begin 
	t.[ !j ] <- '\010';
	incr j
      end;	
    end;

    t ;;



  let encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
    let l = match len with None -> String.length s - pos | Some x -> x in
    encode_with_options rfc_pattern '=' s pos l linelength crlf;;


  let encode_substring s ~pos ~len ~linelength ~crlf =
    encode_with_options rfc_pattern '=' s pos len linelength crlf;;


  let url_encode ?(pos=0) ?len ?(linelength=0) ?(crlf=false) s =
    let l = match len with None -> String.length s - pos | Some x -> x in
    encode_with_options url_pattern '.' s pos l linelength crlf;;
    

  let decode_substring t ~pos ~len ~url_variant:p_url ~accept_spaces:p_spaces =
    if len < 0 or pos < 0 or pos > String.length t then
      invalid_arg "Netencoding.Base64.decode_substring";
    if pos + len > String.length t then
      invalid_arg "Netencoding.Base64.decode_substring";

    (* Compute the number of effective characters l_t in 't';
     * pad_chars: number of '=' characters at the end of the string.
     *)
    let l_t, pad_chars =
      if p_spaces then begin
	(* Count all non-whitespace characters: *)
	let c = ref 0 in
	let p = ref 0 in
	for i = pos to pos + len - 1 do
	  match String.unsafe_get t i with
	      (' '|'\t'|'\r'|'\n') -> ()
	    | ('='|'.') as ch ->
		if ch = '.' & not p_url then
		  invalid_arg "Netencoding.Base64.decode_substring";
		incr c;
		incr p;
		if !p > 2 then
		  invalid_arg "Netencoding.Base64.decode_substring";
		for j = i+1 to pos + len - 1 do
		  match String.unsafe_get t j with
		      (' '|'\t'|'\r'|'\n'|'.'|'=') -> ()
		    | _ ->
			(* Only another '=' or spaces allowed *)
			invalid_arg "Netencoding.Base64.decode_substring";
		done
	    | _ -> incr c
	done;
	if !c mod 4 <> 0 then
	  invalid_arg "Netencoding.Base64.decode_substring";
	!c, !p
      end
      else
	len,
	( if len mod 4 <> 0 then
	    invalid_arg "Netencoding.Base64.decode_substring";
	  if len > 0 then (
	    if String.sub t (len - 2) 2 = "==" or 
	       (p_url & String.sub t (len - 2) 2 = "..") then 2
	    else 
	      if String.sub t (len - 1) 1 = "=" or 
		 (p_url & String.sub t (len - 1) 1 = ".") then 1
	      else
		0
	  )
	  else 0 
	)
    in

    let l_s = (l_t / 4) * 3 - pad_chars in       (* sic! *)
    let s = String.create l_s in

    let decode_char c =
      match c with
	  'A' .. 'Z'  -> Char.code(c) - 65     (* 65 = Char.code 'A' *)
	| 'a' .. 'z'  -> Char.code(c) - 71     (* 71 = Char.code 'a' - 26 *)
	| '0' .. '9'  -> Char.code(c) + 4      (* -4 = Char.code '0' - 52 *)
	| '+'         -> 62
	| '-'         -> if not p_url then 
	                   invalid_arg "Netencoding.Base64.decode_substring";
	                 62
	| '/'         -> 63
	| _           -> invalid_arg "Netencoding.Base64.decode_substring";
    in

    (* Decode all but the last quartet: *)

    let cursor = ref pos in
    let rec next_char() = 
      match t.[ !cursor ] with
	  (' '|'\t'|'\r'|'\n') -> 
	    if p_spaces then (incr cursor; next_char())
	    else invalid_arg "Netencoding.Base64.decode_substring"
	| c ->
	    incr cursor; c
    in

    if p_spaces then begin
      for k = 0 to l_t / 4 - 2 do
	let q = 3*k in
	let c0 = next_char() in
	let c1 = next_char() in
	let c2 = next_char() in
	let c3 = next_char() in
	let n0 = decode_char c0 in
	let n1 = decode_char c1 in
	let n2 = decode_char c2 in
	let n3 = decode_char c3 in
	let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
	String.unsafe_set s q     (Char.chr x0);
	String.unsafe_set s (q+1) (Char.chr x1);
	String.unsafe_set s (q+2) (Char.chr x2);
      done;
    end
    else begin
      (* Much faster: *)
      for k = 0 to l_t / 4 - 2 do
	let p = pos + 4*k in
	let q = 3*k in
	let c0 = String.unsafe_get t p in
	let c1 = String.unsafe_get t (p + 1) in
	let c2 = String.unsafe_get t (p + 2) in
	let c3 = String.unsafe_get t (p + 3) in
	let n0 = decode_char c0 in
	let n1 = decode_char c1 in
	let n2 = decode_char c2 in
	let n3 = decode_char c3 in
	let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
	String.unsafe_set s q     (Char.chr x0);
	String.unsafe_set s (q+1) (Char.chr x1);
	String.unsafe_set s (q+2) (Char.chr x2);
      done;
      cursor := pos + l_t - 4;
    end;

    (* Decode the last quartet: *)

    if l_t > 0 then begin
      let q = 3*(l_t / 4 - 1) in
      let c0 = next_char() in
      let c1 = next_char() in
      let c2 = next_char() in
      let c3 = next_char() in

      if (c2 = '=' & c3 = '=') or (p_url & c2 = '.' & c3 = '.') then begin
	let n0 = decode_char c0 in
	let n1 = decode_char c1 in
	let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	s.[ q ]   <- Char.chr x0;
      end
      else
	if (c3 = '=') or (p_url & c3 = '.') then begin
	  let n0 = decode_char c0 in
	  let n1 = decode_char c1 in
	  let n2 = decode_char c2 in
	  let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	  let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	  s.[ q ]   <- Char.chr x0;
	  s.[ q+1 ] <- Char.chr x1;
	end
	else begin
	  let n0 = decode_char c0 in
	  let n1 = decode_char c1 in
	  let n2 = decode_char c2 in
	  let n3 = decode_char c3 in
	  let x0 = (n0 lsl 2) lor (n1 lsr 4) in
	  let x1 = ((n1 lsl 4) land 0xf0) lor (n2 lsr 2) in
	  let x2 = ((n2 lsl 6) land 0xc0) lor n3 in
	  s.[ q ]   <- Char.chr x0;
	  s.[ q+1 ] <- Char.chr x1;
	  s.[ q+2 ] <- Char.chr x2;
	end

    end;

    s ;;



  let decode ?(pos=0) ?len ?(url_variant=true) ?(accept_spaces=false) s =
    let l = match len with None -> String.length s - pos | Some x -> x in
    decode_substring s pos l url_variant accept_spaces;;

  let decode_ignore_spaces s =
    decode_substring s 0 (String.length s) true true;;

  
end



module QuotedPrintable = struct

  let encode_substring s ~pos ~len =
    
    if len < 0 or pos < 0 or pos > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.encode_substring";
    if pos + len > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.encode_substring";

    let rec count n i =
      if i < len then
	match String.unsafe_get s (pos+i) with
	    ('\r'|'\n') -> 
	      count (n+1) (i+1)
	  | ('\000'..'\031'|'\127'..'\255'|
	     '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') ->
	      count (n+3) (i+1)
	  | ' ' ->
	      (* Protect spaces only if they occur at the end of a line *)
	      if i+1 < len then
		match s.[pos+i+1] with
		    ('\r'|'\n') -> 
		      count (n+3) (i+1)
		  | _ ->
		      count (n+1) (i+1)
	      else
		count (n+3) (i+1)
	  | _ ->
	      count (n+1) (i+1)
      else
	n
    in

    let l = count 0 0 in
    let t = String.create l in
    
    let hexdigit =
      [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
	 '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in

    let k = ref 0 in

    let add_quoted c =
      t.[ !k ]   <- '=';
      t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
      t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
    in

    for i = 0 to len - 1 do
      match String.unsafe_get s i with
	  ('\r'|'\n') as c -> 
	    String.unsafe_set t !k c;
	    incr k
	| ('\000'..'\031'|'\127'..'\255'|
	   '!'|'"'|'#'|'$'|'@'|'['|']'|'^'|'\''|'{'|'|'|'}'|'~'|'=') as c ->
	    add_quoted c;
	    k := !k + 3
	| ' ' ->
	    (* Protect spaces only if they occur at the end of a line *)
	    if i+1 < len then
	      match s.[pos+i+1] with
		  ('\r'|'\n') -> 
		    add_quoted ' ';
		    k := !k + 3;
		| _ ->
		    String.unsafe_set t !k ' ';
		    incr k
	    else begin
	      add_quoted ' ';
	      k := !k + 3;
	    end
	| c ->
	    String.unsafe_set t !k c;
	    incr k
    done;

    t ;;


  let encode ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    encode_substring s pos l;;



  let decode_substring s ~pos ~len =
    
    if len < 0 or pos < 0 or pos > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.decode_substring";
    if pos + len > String.length s then
      invalid_arg "Netencoding.QuotedPrintable.decode_substring";

    let decode_hex c =
      match c with
	  '0'..'9' -> Char.code c - 48
	| 'A'..'F' -> Char.code c - 55
	| 'a'..'f' -> Char.code c - 87
	| _ ->
	   invalid_arg "Netencoding.QuotedPrintable.decode_substring";
    in 

    let rec count n i =
      if i < len then
	match String.unsafe_get s (pos+i) with
	    '=' ->
	      if i+1 = len then
		(* A '=' at EOF is ignored *)
		count n (i+1)
	      else
		if i+1 < len then
		  match s.[pos+i+1] with
		      '\r' ->
			(* Official soft break *)
			if i+2 < len & s.[pos+i+2] = '\n' then
			  count n (i+3)
			else
			  count n (i+2)
		    | '\n' ->
			(* Inofficial soft break *)
			count n (i+2)
		    | _ ->
			if i+2 >= len then
			  invalid_arg 
			    "Netencoding.QuotedPrintable.decode_substring";
			let _ = decode_hex s.[pos+i+1] in
			let _ = decode_hex s.[pos+i+2] in
			count (n+1) (i+3)
		else
		  invalid_arg "Netencoding.QuotedPrintable.decode_substring"
	  | _ ->
	      count (n+1) (i+1)
      else
	n
    in

    let l = count 0 0 in
    let t = String.create l in
    let k = ref pos in
    let e = pos + len in
    let i = ref 0 in

    while !i < l do
      match String.unsafe_get s !k with
	  '=' ->
	    if !k+1 = e then
	      (* A '=' at EOF is ignored *)
	      ()
	    else
	      if !k+1 < e then
		match s.[!k+1] with
		    '\r' ->
		      (* Official soft break *)
		      if !k+2 < e & s.[!k+2] = '\n' then
			k := !k + 3
		      else
			k := !k + 2
		  | '\n' ->
		      (* Inofficial soft break *)
		      k := !k + 2
		  | _ ->
		      if !k+2 >= e then
			invalid_arg 
			  "Netencoding.QuotedPrintable.decode_substring";
		      let x1 = decode_hex s.[!k+1] in
		      let x2 = decode_hex s.[!k+2] in
		      t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
		      k := !k + 3;
		      incr i
	      else
		invalid_arg "Netencoding.QuotedPrintable.decode_substring"
	| c ->
	    String.unsafe_set t !i c;
	    incr k;
	    incr i
    done;

    t ;;


  let decode ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    decode_substring s pos l;;

end

	      
module Q = struct

  let encode_substring s ~pos ~len =
    
    if len < 0 or pos < 0 or pos > String.length s then
      invalid_arg "Netencoding.Q.encode_substring";
    if pos + len > String.length s then
      invalid_arg "Netencoding.Q.encode_substring";

    let rec count n i =
      if i < len then
	match String.unsafe_get s (pos+i) with
	  | ('A'..'Z'|'a'..'z'|'0'..'9') ->
	      count (n+1) (i+1)
	  | _ ->
	      count (n+3) (i+1)
      else
	n
    in

    let l = count 0 0 in
    let t = String.create l in
    
    let hexdigit =
      [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
	 '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F'; |] in

    let k = ref 0 in

    let add_quoted c =
      t.[ !k ]   <- '=';
      t.[ !k+1 ] <- hexdigit.( Char.code c lsr 4 );
      t.[ !k+2 ] <- hexdigit.( Char.code c land 15 )
    in

    for i = 0 to len - 1 do
      match String.unsafe_get s i with
	| ('A'..'Z'|'a'..'z'|'0'..'9') as c ->
	    String.unsafe_set t !k c;
	    incr k
	| c ->
	    add_quoted c;
	    k := !k + 3
    done;

    t ;;


  let encode ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    encode_substring s pos l;;



  let decode_substring s ~pos ~len =
    
    if len < 0 or pos < 0 or pos > String.length s then
      invalid_arg "Netencoding.Q.decode_substring";
    if pos + len > String.length s then
      invalid_arg "Netencoding.Q.decode_substring";

    let decode_hex c =
      match c with
	  '0'..'9' -> Char.code c - 48
	| 'A'..'F' -> Char.code c - 55
	| 'a'..'f' -> Char.code c - 87
	| _ ->
	   invalid_arg "Netencoding.Q.decode_substring";
    in 

    let rec count n i =
      if i < len then
	match String.unsafe_get s (pos+i) with
	    '=' ->
	      if i+2 >= len then
		invalid_arg "Netencoding.Q.decode_substring";
	      let _ = decode_hex s.[pos+i+1] in
	      let _ = decode_hex s.[pos+i+2] in
	      count (n+1) (i+3)
	  | _ ->  (* including '_' *)
	      count (n+1) (i+1)
      else
	n
    in

    let l = count 0 0 in
    let t = String.create l in
    let k = ref pos in
    let e = pos + len in
    let i = ref 0 in

    while !i < l do
      match String.unsafe_get s !k with
	  '=' ->
	    if !k+2 >= e then
	      invalid_arg "Netencoding.Q.decode_substring";
	    let x1 = decode_hex s.[!k+1] in
	    let x2 = decode_hex s.[!k+2] in
	    t.[ !i ] <- Char.chr ((x1 lsl 4) lor x2);
	    k := !k + 3;
	    incr i
	| '_' ->
	    String.unsafe_set t !i ' ';
	    incr k;
	    incr i
	| c ->
	    String.unsafe_set t !i c;
	    incr k;
	    incr i
    done;

    t ;;


  let decode ?(pos=0) ?len s =
    let l = match len with None -> String.length s - pos | Some x -> x in 
    decode_substring s pos l ;;

end


module Url = struct
  let hex_digits =
    [| '0'; '1'; '2'; '3'; '4'; '5'; '6'; '7';
       '8'; '9'; 'A'; 'B'; 'C'; 'D'; 'E'; 'F' |];;

  let to_hex2 k =
    (* Converts k to a 2-digit hex string *)
    let s = String.create 2 in
    s.[0] <- hex_digits.( (k lsr 4) land 15 );
    s.[1] <- hex_digits.( k land 15 );
    s ;;


  let of_hex1 c =
    match c with
	('0'..'9') -> Char.code c - Char.code '0'
      | ('A'..'F') -> Char.code c - Char.code 'A' + 10
      | ('a'..'f') -> Char.code c - Char.code 'a' + 10
      | _ ->
	raise Not_found ;;



  let url_encoding_re =
    Str.regexp "[^A-Za-z0-9$_.!*'(),-]";;

  let url_decoding_re =
    Str.regexp "\\+\\|%..\\|%.\\|%";;


  let encode ?(plus = true) s =
    Str.global_substitute
      url_encoding_re
      (fun r _ ->
	 match Str.matched_string r s with
	     " " when plus -> "+"
	   | x ->
	       let k = Char.code(x.[0]) in
	       "%" ^ to_hex2 k
      )
      s ;;


  let decode ?(plus = true) s =
    let l = String.length s in
    Str.global_substitute
      url_decoding_re
      (fun r _ ->
	 match Str.matched_string r s with
	   | "+" -> if plus then " " else "+"
	   | _ ->
	       let i = Str.match_beginning r in
	       (* Assertion: s.[i] = '%' *)
	       if i+2 >= l then failwith "Cgi.decode";
	       let c1 = s.[i+1] in
	       let c2 = s.[i+2] in
	       begin
		 try
		   let k1 = of_hex1 c1 in
		   let k2 = of_hex1 c2 in
		   String.make 1 (Char.chr((k1 lsl 4) lor k2))
		 with
		     Not_found ->
		       failwith "Cgi.decode"
	       end
      )
      s ;;

end


module Html = struct

  let eref_re = 
    Str.regexp 
      "&\\(#\\([0-9]+\\);\\|\\([a-zA-Z]+\\);\\)" ;;
  let unsafe_re = Str.regexp "[<>&\"\000-\008\011-\012\014-\031\127-\255]" ;;
  
  let etable =
    [ "lt", "<";
      "gt", ">";
      "amp", "&";
      "quot", "\"";     
         (* Note: &quot; is new in HTML-4.0, but it has been widely used
	  * much earlier.
	  *)
      "nbsp", "\160";
      "iexcl", "\161";
      "cent", "\162";
      "pound", "\163";
      "curren", "\164";
      "yen", "\165";
      "brvbar", "\166";
      "sect", "\167";
      "uml", "\168";
      "copy", "\169";
      "ordf", "\170";
      "laquo", "\171";
      "not", "\172";
      "shy", "\173";
      "reg", "\174";
      "macr", "\175";
      "deg", "\176";
      "plusmn", "\177";
      "sup2", "\178";
      "sup3", "\179";
      "acute", "\180";
      "micro", "\181";
      "para", "\182";
      "middot", "\183";
      "cedil", "\184";
      "sup1", "\185";
      "ordm", "\186";
      "raquo", "\187";
      "frac14", "\188";
      "frac12", "\189";
      "frac34", "\190";
      "iquest", "\191";
      "Agrave", "\192";
      "Aacute", "\193";
      "Acirc", "\194";
      "Atilde", "\195";
      "Auml", "\196";
      "Aring", "\197";
      "AElig", "\198";
      "Ccedil", "\199";
      "Egrave", "\200";
      "Eacute", "\201";
      "Ecirc", "\202";
      "Euml", "\203";
      "Igrave", "\204";
      "Iacute", "\205";
      "Icirc", "\206";
      "Iuml", "\207";
      "ETH", "\208";
      "Ntilde", "\209";
      "Ograve", "\210";
      "Oacute", "\211";
      "Ocirc", "\212";
      "Otilde", "\213";
      "Ouml", "\214";
      "times", "\215";
      "Oslash", "\216";
      "Ugrave", "\217";
      "Uacute", "\218";
      "Ucirc", "\219";
      "Uuml", "\220";
      "Yacute", "\221";
      "THORN", "\222";
      "szlig", "\223";
      "agrave", "\224";
      "aacute", "\225";
      "acirc", "\226";
      "atilde", "\227";
      "auml", "\228";
      "aring", "\229";
      "aelig", "\230";
      "ccedil", "\231";
      "egrave", "\232";
      "eacute", "\233";
      "ecirc", "\234";
      "euml", "\235";
      "igrave", "\236";
      "iacute", "\237";
      "icirc", "\238";
      "iuml", "\239";
      "eth", "\240";
      "ntilde", "\241";
      "ograve", "\242";
      "oacute", "\243";
      "ocirc", "\244";
      "otilde", "\245";
      "ouml", "\246";
      "divide", "\247";
      "oslash", "\248";
      "ugrave", "\249";
      "uacute", "\250";
      "ucirc", "\251";
      "uuml", "\252";
      "yacute", "\253";
      "thorn", "\254";
      "yuml", "\255";
    ] ;;

  let quick_etable =
    let ht = Hashtbl.create 50 in
    List.iter (fun (name,value) -> Hashtbl.add ht name value) etable;
    (* Entities to be decoded, but that must not be encoded: *)
    Hashtbl.add ht "apos" "'";        (* used in XML documents *)
    ht ;;

  let rev_etable =
    let a = Array.create 256 "" in
    List.iter (fun (name,value) -> 
		 a.(Char.code(value.[0])) <- "&" ^ name ^ ";") etable;
    for i = 0 to 8 do
      a.(i) <- "&#" ^ string_of_int i ^ ";"
    done;
    for i = 11 to 12 do
      a.(i) <- "&#" ^ string_of_int i ^ ";"
    done;
    for i = 14 to 31 do
      a.(i) <- "&#" ^ string_of_int i ^ ";"
    done;
    for i = 127 to 159 do
      a.(i) <- "&#" ^ string_of_int i ^ ";"
    done;
    a ;;

  let decode_to_latin1 s =
    Str.global_substitute
      eref_re
      (fun r _ ->
	 let t = Str.matched_string r s in
	 try
	   let n = int_of_string(Str.matched_group r 2 s) in
	   if n < 256 then
	     String.make 1 (Char.chr n)
	   else
	     t
	 with
	     Not_found ->
	       try
		 let name = Str.matched_group r 3 s in
		 try
		   Hashtbl.find quick_etable name
		 with
		     Not_found ->
		       t
	       with
		   Not_found -> assert false
      )
      s ;;

  let encode_from_latin1 s =
    Str.global_substitute
      unsafe_re
      (fun r _ ->
	 let t = Str.matched_string r s in
	 let i = Char.code (t.[0]) in
	 rev_etable.(i)
      )
      s ;;
end
	 
	     

(* ======================================================================
 * History:
 * 
 * $Log: netencoding.ml,v $
 * Revision 1.6  2001/08/30 19:45:43  gerd
 * 	Module Url: added the option ~plus
 *
 * Revision 1.5  2000/06/25 22:34:43  gerd
 * 	Added labels to arguments.
 *
 * Revision 1.4  2000/06/25 21:15:48  gerd
 * 	Checked thread-safety.
 *
 * Revision 1.3  2000/03/03 17:03:16  gerd
 * 	Q encoding: CR and LF are quoted.
 *
 * Revision 1.2  2000/03/03 01:08:29  gerd
 * 	Added Netencoding.Html functions.
 *
 * Revision 1.1  2000/03/02 01:14:48  gerd
 * 	Initial revision.
 *
 * 
 *)
