1 (* Copyright (C) 2009 Mauricio Fernandez <mfp@acm.org> *)
6 (* TYPE_CONV_PATH "Simple_markup" *)
8 type ref = { src
: string; desc
: string }
12 | Pre
of string * string option
13 | Heading
of int * par_text
14 | Quote
of paragraph list
15 | Ulist
of paragraph list
* paragraph list list
16 | Olist
of paragraph list
* paragraph list list
18 and par_text
= text list
30 and href
= { href_target
: string; href_desc
: string; }
32 and img_ref
= { img_src
: string; img_alt
: string; }
34 (* and par_list = paragraph list with sexp *)
36 (* class fold = Camlp4Filters.GenerateFold.generated *)
38 type parse_state
= { max
: int; current
: Buffer.t
; fragments
: text list
; }
40 (* let string_of_paragraph p = Sexplib.Sexp.to_string_hum (sexp_of_paragraph p) *)
41 (* let string_of_paragraphs ps = Sexplib.Sexp.to_string_hum (sexp_of_par_list ps) *)
43 let indentation ?
(ts
=8) s
=
44 let rec loop n indent max
=
45 if n
>= max
then indent
47 ' '
-> loop (n
+ 1) (indent
+ 1) max
48 | '
\t'
-> loop (n
+ 1) (indent
+ 8) max
50 in loop 0 0 (String.length s
)
53 let b = Buffer.create
(String.length s
) in
54 let len = String.length s
in
56 if i
>= len then Buffer.contents
b
58 '
\\'
when i
< len - 1 -> Buffer.add_char
b s
.[i
+1]; loop (i
+ 2)
59 | c
-> Buffer.add_char
b c
; loop (i
+ 1)
62 let unescape_slice s ~first ~last
=
63 unescape (String.strip
(String.slice ~first ~last s
))
65 let snd_is s c
= String.length s
> 1 && s
.[1] = c
66 let snd_is_space s
= snd_is s ' '
|| snd_is s '
\t'
69 let rec loop acc
= match f x
with
71 | Some y
-> loop (y
:: acc
)
74 let push_remainder ?
(first
=2) indent s e
=
75 let s = String.slice ~first
s in
76 let s'
= String.strip
s in
77 Enum.push e
(indent
+ first
+ indentation s, s'
, s'
= "")
79 let adds = Buffer.add_string
81 let addc = Buffer.add_char
83 let new_fragment () = Buffer.create
8
86 if Buffer.length st
.current
> 0 then
87 Text
(Buffer.contents st
.current
) :: st
.fragments
90 let rec read_paragraph ?
(skip_blank
=true) indent e
= match Enum.peek e
with
92 | Some
(indentation, line
, isblank
) -> match isblank
with
95 if skip_blank
then read_paragraph indent e
else None
97 if indentation < indent
then
101 read_nonempty
indentation e line
104 and skip_blank_line e
= match Enum.peek e
with
105 None
| Some
(_
, _
, false) -> ()
106 | Some
(_
, _
, true) -> Enum.junk e
; skip_blank_line e
108 and read_nonempty indent e
s = match s.[0] with
109 '
!'
-> read_heading
s
110 | '
*'
when snd_is_space s -> push_remainder indent
s e
; read_ul indent e
111 | '#'
when snd_is_space s -> push_remainder indent
s e
; read_ol indent e
112 | '
{'
when snd_is s '
{'
-> read_pre
(String.slice
s ~first
:2) e
113 | '
>'
when snd_is_space s || s = ">" ->
114 (* last check needed because "> " becomes ">" *)
115 Enum.push e
(indent
, s, false); read_quote indent e
116 | _
-> Enum.push e
(indent
, s, false); read_normal e
119 let s'
= String.strip ~chars
:"!" s in
120 let level = String.length
s - String.length
s'
in
121 Some
(Heading
(level, parse_text
s'
))
123 and read_ul indent e
=
125 (fun fst others
-> Ulist
(fst
, others
))
126 (fun s -> snd_is_space s && s.[0] = '
*'
)
129 and read_ol indent e
=
131 (fun fst others
-> Olist
(fst
, others
))
132 (fun s -> snd_is_space s && s.[0] = '#'
)
135 and read_list f is_item indent e
=
136 let read_item indent ps
= collect (read_paragraph (indent
+ 1)) e
in
137 let rec read_all fst others
=
139 match Enum.peek e
with
140 | Some
(indentation, s, _
) when indentation >= indent
&& is_item
s ->
142 push_remainder indentation s e
;
143 read_all fst
(read_item indentation [] :: others
)
144 | None
| Some _
-> f fst
(List.rev others
)
145 in Some
(read_all (read_item indent
[]) [])
147 and read_pre kind e
=
148 let kind = match kind with "" -> None
| s -> Some
s in
149 let re = Str.regexp
"^\\\\+}}$" in
150 let unescape = function
151 s when Str.string_match
re s 0 -> String.slice ~first
:1 s
153 (* don't forget the last \n *)
154 let ret ls
= Some
(Pre
(String.concat
"\n" (List.rev
("" :: ls
)), kind)) in
155 let rec read_until_end fstindent ls
= match Enum.get e
with
156 None
| Some
(_
, "}}", _
) -> ret ls
157 | Some
(indentation, s, _
) ->
158 let spaces = String.make
(max
0 (indentation - fstindent
)) ' '
in
159 read_until_end fstindent
((spaces ^
unescape s) :: ls
)
160 in match Enum.get e
with
161 None
| Some
(_
, "}}", _
) -> ret []
162 | Some
(indentation, s, _
) -> read_until_end indentation [s]
164 and read_quote indent e
=
165 let push_and_finish e elm
= Enum.push e elm
; raise
Enum.No_more_elements
in
166 let next_without_lt e
= function
167 | (_
, _
, true) as line
-> push_and_finish e line
168 | (n
, s, false) as line
->
169 if n
< indent
|| s.[0] <> '
>'
then
170 push_and_finish e line
172 let s = String.slice ~first
:1 s in
173 let s'
= String.strip
s in
174 (String.length
s - String.length
s'
, s'
, s'
= "")
176 in match collect (read_paragraph 0) (Enum.map
(next_without_lt e
) e
) with
178 | ps
-> Some
(Quote ps
)
182 let return () = String.concat
" " (List.rev ls
) in
183 match Enum.peek e
with
184 None
| Some
(_
, _
, true) -> return ()
185 | Some
(_
, l
, _
) -> match l
.[0] with
186 '
!'
| '
*'
| '#'
| '
>'
when snd_is_space l
-> return ()
187 | '
{'
when snd_is l '
{'
-> return ()
188 | _
-> Enum.junk e
; gettxt (l
:: ls
) in
189 let txt = gettxt [] in
190 Some
(Normal
(parse_text
txt))
195 { max
= String.length
s;
197 current
= new_fragment (); }
200 (* scan s starting from n, upto max (exclusive) *)
203 if n
>= max then List.rev
(push_current st
)
205 else match s.[n
] with
207 delimited
(fun ~first ~last
-> Code
(unescape_slice s ~first ~last
)) "`"
210 delimited
(fun ~first ~last
-> Bold
(unescape_slice s ~first ~last
)) "*"
213 delimited
(fun ~first ~last
-> Emph
(unescape_slice s ~first ~last
)) "__"
219 { max = last
; fragments
= []; current
= new_fragment (); }
222 | '
!'
when matches_at
s ~
max n
"![" ->
224 "![" (fun ref -> Image
{ img_src
= ref.src
; img_alt
= ref.desc
})
228 (fun ref -> match ref.src
, ref.desc
with
230 | "", desc
-> Link
{ href_target
= desc
; href_desc
= desc
}
231 | src
, "" when src
.[0] = '#'
-> Anchor
(String.slice ~first
:1 src
)
232 | src
, desc
-> Link
{ href_target
= ref.src
; href_desc
= ref.desc
})
234 | '
\\'
when (n
+ 1) < max -> addc st
.current
s.[n
+1]; scan
s st
(n
+ 2)
235 | c
-> addc st
.current c
; scan
s st
(n
+ 1)
237 (* [delimited f delim first] tries to match [delim] starting from [first],
238 * returns Some (offset of char after closing delim) or None *)
239 and delimited f delim
s st first
=
241 let delim_len = String.length delim
in
242 let scan_from_next_char () =
243 addc st
.current
s.[first
];
244 scan
s st
(first
+ 1)
246 if not
(matches_at
s ~
max first delim
) then scan_from_next_char ()
247 else match scan_past ~delim
s ~
max (first
+ String.length delim
) with
249 let chunk = f ~first
:(first
+ delim_len)
250 ~last
:(n
- String.length delim
)
252 { st
with fragments
= chunk :: push_current st
;
253 current
= new_fragment () }
255 | None
-> scan_from_next_char ()
257 and maybe_link delim f
s st n
= match scan_link
s ~
max:st
.max n
with
258 None
-> adds st
.current delim
; scan
s st n
261 { st
with fragments
= f
ref :: push_current st
;
262 current
= (new_fragment ()) }
265 (* return None if delim not found, else Some (offset of char *after* delim) *)
266 and scan_past ~delim
s ~
max n
=
267 let re = Str.regexp
(Str.quote delim
) in
268 let rec loop m ~
max =
269 if m
>= max then None
else
270 match (try Some
(Str.search_forward
re s m
) with Not_found
-> None
) with
271 | Some m
when m
< max && s.[m
-1] <> '
\\'
-> Some
(m
+ String.length delim
)
272 | Some m
when m
< max -> loop (m
+ 1) ~
max
273 | _
-> None
(* no match or >= max *)
276 (* returns None or offset of char after the reference
277 * (i.e. after closing ')'). *)
278 and scan_link
s ~
max n
= match scan_past ~delim
:"]" s ~
max n
with
280 | Some end_of_desc
->
281 if end_of_desc
>= max then None
282 else match s.[end_of_desc
] with
284 begin match scan_past ~delim
:")" s ~
max (end_of_desc
+ 1) with
289 desc
= unescape_slice s ~first
:n ~last
:(end_of_desc
- 1);
290 src
= unescape_slice s
291 ~first
:(end_of_desc
+ 1)
292 ~last
:(end_of_uri
- 1)
294 in Some
(ref, end_of_uri
)
298 and matches_at
s ~
max n delim
=
299 let len = String.length delim
in
300 if n
+ len > max then false
304 else if s.[n
] = delim
.[m
] then loop (n
+ 1) (m
+ 1) (k
- 1)
309 collect (read_paragraph 0)
310 (Enum.map
(fun l
-> let l'
= String.strip
l in (indentation l, l'
, l'
= "")) e
)
312 let parse_lines ls
= parse_enum (List.enum ls
)
313 let parse_text s = parse_lines ((Str.split
(Str.regexp
"\n") s))