3 (***********************************************************************)
6 (* Maxence Guesdon, projet Cristal, INRIA Rocquencourt *)
8 (* Copyright 2001 Institut National de Recherche en Informatique et *)
9 (* en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the Q Public License version 1.0. *)
12 (***********************************************************************)
16 (** Generation of html code to display OCaml code. *)
22 prerr_string ">> Fatal error: "; prerr_endline msg; raise Fatal_error
25 | Illegal_character of char
26 | Unterminated_comment
28 | Unterminated_string_in_comment
29 | Keyword_as_label of string
32 exception Error of error * int * int
34 let base_escape_strings = [
40 let pre_escape_strings = [
43 ("\t", " ") ;
48 let fmt = ref Format.str_formatter
50 (** Escape the strings which would clash with html syntax,
51 and some other strings if we want to get a PRE style.*)
54 (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc)
56 (if !pre then base_escape_strings @ pre_escape_strings else base_escape_strings)
58 (** Escape the strings which would clash with html syntax. *)
61 (fun acc -> fun (s, s2) -> Str.global_replace (Str.regexp s) s2 acc)
65 (** The output functions *)
67 let print ?(esc=true) s =
68 Format.pp_print_string !fmt (if esc then escape s else s)
71 let print_class ?(esc=true) cl s =
72 print ~esc: false ("<span class=\""^cl^"\">"^
73 (if esc then escape s else s)^
77 (** The table of keywords with colors *)
78 let create_hashtable size init =
79 let tbl = Hashtbl.create size in
80 List.iter (fun (key, data) -> Hashtbl.add tbl key data) init;
83 (** The function used to return html code for the given comment body. *)
84 let html_of_comment = ref
85 (fun (s : string) -> "<b>Odoc_ocamlhtml.html_of_comment not initialized</b>")
88 create_hashtable 149 [
94 "constraint", "keyword" ;
100 "exception", "keyword" ;
101 "external", "keyword" ;
105 "function", "keyword" ;
106 "functor", "keyword" ;
109 "include", "keyword" ;
110 "inherit", "keyword" ;
111 "initializer", "keyword" ;
115 "method", "keyword" ;
116 "module", "keyword" ;
117 "mutable", "keyword" ;
119 "object", "keyword" ;
123 "parser", "keyword" ;
124 "private", "keyword" ;
127 "struct", "keyword" ;
134 "virtual", "keyword" ;
148 let kwsign_class = "keywordsign"
149 let constructor_class = "constructor"
150 let comment_class = "comment"
151 let string_class = "string"
152 let code_class = "code"
155 (** To buffer and print comments *)
160 let comment_buffer = Buffer.create 32
161 let reset_comment_buffer () = Buffer.reset comment_buffer
162 let store_comment_char = Buffer.add_char comment_buffer
163 let add_comment_string = Buffer.add_string comment_buffer
168 else " "^(iter (n-1))
172 let print_comment () =
173 let s = Buffer.contents comment_buffer in
174 let len = String.length s in
177 "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
183 let html = !html_of_comment (String.sub s 1 (len-1)) in
184 "</code><table><tr><td>"^(make_margin ())^"</td><td>"^
185 "<span class=\""^comment_class^"\">"^
187 "</span></td></tr></table><code class=\""^code_class^"\">"
190 prerr_endline (Printexc.to_string e);
191 "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
194 "<span class=\""^comment_class^"\">(*"^(escape s)^"*)</span>"
196 print ~esc: false code
198 (** To buffer string literals *)
200 let string_buffer = Buffer.create 32
201 let reset_string_buffer () = Buffer.reset string_buffer
202 let store_string_char = Buffer.add_char string_buffer
203 let get_stored_string () =
204 let s = Buffer.contents string_buffer in
207 (** To translate escape sequences *)
209 let char_for_backslash = function
216 let char_for_decimal_code lexbuf i =
217 let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
218 10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
219 (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in
220 Char.chr(c land 0xFF)
222 (** To store the position of the beginning of a string and comment *)
223 let string_start_pos = ref 0;;
224 let comment_start_pos = ref [];;
225 let in_comment () = !comment_start_pos <> [];;
231 let report_error ppf = function
232 | Illegal_character c ->
233 fprintf ppf "Illegal character (%s)" (Char.escaped c)
234 | Unterminated_comment ->
235 fprintf ppf "Comment not terminated"
236 | Unterminated_string ->
237 fprintf ppf "String literal not terminated"
238 | Unterminated_string_in_comment ->
239 fprintf ppf "This comment contains an unterminated string literal"
240 | Keyword_as_label kwd ->
241 fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
246 let blank = [' ' '\010' '\013' '\009' '\012']
247 let lowercase = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
248 let uppercase = ['A'-'Z' '\192'-'\214' '\216'-'\222']
250 ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
252 ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
253 let decimal_literal = ['0'-'9']+
254 let hex_literal = '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']+
255 let oct_literal = '0' ['o' 'O'] ['0'-'7']+
256 let bin_literal = '0' ['b' 'B'] ['0'-'1']+
258 ['0'-'9']+ ('.' ['0'-'9']* )? (['e' 'E'] ['+' '-']? ['0'-'9']+)?
263 let s = Lexing.lexeme lexbuf in
267 | "\t" -> margin := !margin + 8
268 | "\n" -> margin := 0
275 { print "_" ; token lexbuf }
276 | "~" { print "~" ; token lexbuf }
277 | "~" lowercase identchar * ':'
278 { let s = Lexing.lexeme lexbuf in
279 let name = String.sub s 1 (String.length s - 2) in
280 if Hashtbl.mem keyword_table name then
281 raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
282 Lexing.lexeme_end lexbuf));
283 print s ; token lexbuf }
284 | "?" { print "?" ; token lexbuf }
285 | "?" lowercase identchar * ':'
286 { let s = Lexing.lexeme lexbuf in
287 let name = String.sub s 1 (String.length s - 2) in
288 if Hashtbl.mem keyword_table name then
289 raise (Error(Keyword_as_label name, Lexing.lexeme_start lexbuf,
290 Lexing.lexeme_end lexbuf));
291 print s ; token lexbuf }
292 | lowercase identchar *
293 { let s = Lexing.lexeme lexbuf in
295 let cl = Hashtbl.find keyword_table s in
296 (print_class cl s ; token lexbuf )
298 (print s ; token lexbuf )}
299 | uppercase identchar *
300 { print_class constructor_class (Lexing.lexeme lexbuf) ; token lexbuf } (* No capitalized keywords *)
301 | decimal_literal | hex_literal | oct_literal | bin_literal
302 { print (Lexing.lexeme lexbuf) ; token lexbuf }
304 { print (Lexing.lexeme lexbuf) ; token lexbuf }
306 { reset_string_buffer();
307 let string_start = Lexing.lexeme_start lexbuf in
308 string_start_pos := string_start;
310 lexbuf.Lexing.lex_start_pos <-
311 string_start - lexbuf.Lexing.lex_abs_pos;
312 print_class string_class ("\""^(get_stored_string())^"\"") ;
314 | "'" [^ '\\' '\''] "'"
315 { print_class string_class (Lexing.lexeme lexbuf) ;
317 | "'" '\\' ['\\' '\'' 'n' 't' 'b' 'r'] "'"
318 { print_class string_class (Lexing.lexeme lexbuf ) ;
320 | "'" '\\' ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
321 { print_class string_class (Lexing.lexeme lexbuf ) ;
325 reset_comment_buffer ();
326 comment_start_pos := [Lexing.lexeme_start lexbuf];
331 { reset_comment_buffer ();
332 comment_start_pos := [Lexing.lexeme_start lexbuf];
338 { lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
339 lexbuf.Lexing.lex_curr_p <-
340 { lexbuf.Lexing.lex_curr_p with
341 pos_cnum = lexbuf.Lexing.lex_curr_p.pos_cnum - 1
343 print (Lexing.lexeme lexbuf) ;
346 | "#" [' ' '\t']* ['0'-'9']+ [^ '\n' '\r'] * ('\n' | '\r' | "\r\n")
349 print (Lexing.lexeme lexbuf);
352 | "#" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
353 | "&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
354 | "&&" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
355 | "`" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
356 | "'" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
357 | "(" { print (Lexing.lexeme lexbuf) ; token lexbuf }
358 | ")" { print (Lexing.lexeme lexbuf) ; token lexbuf }
359 | "*" { print (Lexing.lexeme lexbuf) ; token lexbuf }
360 | "," { print (Lexing.lexeme lexbuf) ; token lexbuf }
361 | "??" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
362 | "->" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
363 | "." { print (Lexing.lexeme lexbuf) ; token lexbuf }
364 | ".." { print (Lexing.lexeme lexbuf) ; token lexbuf }
365 | ":" { print (Lexing.lexeme lexbuf) ; token lexbuf }
366 | "::" { print (Lexing.lexeme lexbuf) ; token lexbuf }
367 | ":=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
368 | ":>" { print (Lexing.lexeme lexbuf) ; token lexbuf }
369 | ";" { print (Lexing.lexeme lexbuf) ; token lexbuf }
370 | ";;" { print (Lexing.lexeme lexbuf) ; token lexbuf }
371 | "<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
372 | "<-" { print (Lexing.lexeme lexbuf) ; token lexbuf }
373 | "=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
374 | "[" { print (Lexing.lexeme lexbuf) ; token lexbuf }
375 | "[|" { print (Lexing.lexeme lexbuf) ; token lexbuf }
376 | "[<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
377 | "]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
378 | "{" { print (Lexing.lexeme lexbuf) ; token lexbuf }
379 | "{<" { print (Lexing.lexeme lexbuf) ; token lexbuf }
380 | "|" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
381 | "||" { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
382 | "|]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
383 | ">" { print (Lexing.lexeme lexbuf) ; token lexbuf }
384 | ">]" { print (Lexing.lexeme lexbuf) ; token lexbuf }
385 | "}" { print (Lexing.lexeme lexbuf) ; token lexbuf }
386 | ">}" { print (Lexing.lexeme lexbuf) ; token lexbuf }
388 | "!=" { print (Lexing.lexeme lexbuf) ; token lexbuf }
389 | "+" { print (Lexing.lexeme lexbuf) ; token lexbuf }
390 | "-" { print (Lexing.lexeme lexbuf) ; token lexbuf }
391 | "-." { print (Lexing.lexeme lexbuf) ; token lexbuf }
394 { print (Lexing.lexeme lexbuf) ; token lexbuf }
395 | ['~' '?'] symbolchar +
396 { print_class kwsign_class (Lexing.lexeme lexbuf) ; token lexbuf }
397 | ['=' '<' '>' '|' '&' '$'] symbolchar *
398 { print (Lexing.lexeme lexbuf) ; token lexbuf }
399 | ['@' '^'] symbolchar *
400 { print (Lexing.lexeme lexbuf) ; token lexbuf }
401 | ['+' '-'] symbolchar *
402 { print (Lexing.lexeme lexbuf) ; token lexbuf }
404 { print (Lexing.lexeme lexbuf) ; token lexbuf }
405 | ['*' '/' '%'] symbolchar *
406 { print (Lexing.lexeme lexbuf) ; token lexbuf }
409 { raise (Error(Illegal_character ((Lexing.lexeme lexbuf).[0]),
410 Lexing.lexeme_start lexbuf, Lexing.lexeme_end lexbuf)) }
414 { comment_start_pos := Lexing.lexeme_start lexbuf :: !comment_start_pos;
415 store_comment_char '(';
416 store_comment_char '*';
420 { match !comment_start_pos with
422 | [x] -> comment_start_pos := []
424 store_comment_char '*';
425 store_comment_char ')';
426 comment_start_pos := l;
430 { reset_string_buffer();
431 string_start_pos := Lexing.lexeme_start lexbuf;
432 store_comment_char '"';
434 try string lexbuf; add_comment_string ((get_stored_string()^"\""))
435 with Error (Unterminated_string, _, _) ->
436 let st = List.hd !comment_start_pos in
437 raise (Error (Unterminated_string_in_comment, st, st + 2))
442 store_comment_char '\'';
443 store_comment_char '\'';
445 | "'" [^ '\\' '\''] "'"
447 store_comment_char '\'';
448 store_comment_char (Lexing.lexeme_char lexbuf 1);
449 store_comment_char '\'';
451 | "'\\" ['\\' '\'' 'n' 't' 'b' 'r'] "'"
453 store_comment_char '\'';
454 store_comment_char '\\';
455 store_comment_char(char_for_backslash(Lexing.lexeme_char lexbuf 1)) ;
456 store_comment_char '\'';
458 | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
460 store_comment_char '\'';
461 store_comment_char '\\';
462 store_comment_char(char_for_decimal_code lexbuf 1);
463 store_comment_char '\'';
466 { let st = List.hd !comment_start_pos in
467 raise (Error (Unterminated_comment, st, st + 2));
470 { store_comment_char(Lexing.lexeme_char lexbuf 0);
476 | '\\' ("\010" | "\013" | "\013\010") [' ' '\009'] *
478 | '\\' ['\\' '"' 'n' 't' 'b' 'r']
479 { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
481 | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
482 { store_string_char(char_for_decimal_code lexbuf 1);
485 { raise (Error (Unterminated_string,
486 !string_start_pos, !string_start_pos+1)) }
488 { store_string_char(Lexing.lexeme_char lexbuf 0);
492 let html_of_code b ?(with_pre=true) code =
493 let old_pre = !pre in
494 let old_margin = !margin in
495 let old_comment_buffer = Buffer.contents comment_buffer in
496 let old_string_buffer = Buffer.contents string_buffer in
497 let buf = Buffer.create 256 in
498 let old_fmt = !fmt in
499 fmt := Format.formatter_of_buffer buf ;
503 let start = "<code class=\""^code_class^"\">" in
504 let ending = "</code>" in
508 print ~esc: false start ;
509 let lexbuf = Lexing.from_string code in
510 let _ = token lexbuf in
511 print ~esc: false ending ;
512 Format.pp_print_flush !fmt () ;
516 (* flush str_formatter because we already output
518 Format.pp_print_flush !fmt () ;
523 margin := old_margin ;
524 Buffer.reset comment_buffer;
525 Buffer.add_string comment_buffer old_comment_buffer ;
526 Buffer.reset string_buffer;
527 Buffer.add_string string_buffer old_string_buffer ;
530 Buffer.add_string b html