extra/parser-combinators/parser-combinators.factor

   1 ! Copyright (C) 2004 Chris Double.
   2 ! See http://factorcode.org/license.txt for BSD license.
   3 USING: lists lists.lazy promises kernel sequences strings math
   4 arrays splitting quotations combinators namespaces
   5 unicode.case unicode.categories sequences.deep accessors ;
   6 IN: parser-combinators
   7
   8 ! Parser combinator protocol
   9 GENERIC: parse ( input parser -- list )
  10
  11 M: promise parse ( input parser -- list )
  12     force parse ;
  13
  14 TUPLE: parse-result parsed unparsed ;
  15
  16 ERROR: cannot-parse input ;
  17
  18 : parse-1 ( input parser -- result )
  19     dupd parse dup nil? [
  20         rot cannot-parse
  21     ] [
  22         nip car parsed>>
  23     ] if ;
  24
  25 C: <parse-result> parse-result
  26
  27 : <parse-results> ( parsed unparsed -- list )
  28     <parse-result> 1list ;
  29
  30 : parse-result-parsed-slice ( parse-result -- slice )
  31     dup parsed>> empty? [
  32         unparsed>> 0 0 rot <slice>
  33     ] [
  34         dup unparsed>>
  35         dup from>> [ rot parsed>> length - ] keep
  36         rot seq>> <slice>
  37     ] if ;
  38
  39 : string= ( str1 str2 ignore-case -- ? )
  40     [ [ >upper ] bi@ ] when sequence= ;
  41
  42 : string-head? ( str head ignore-case -- ? )
  43     2over shorter? [
  44         3drop f
  45     ] [
  46         [ [ length head-slice ] keep ] dip string=
  47     ] if ;
  48
  49 : ?string-head ( str head ignore-case -- newstr ? )
  50     [ 2dup ] dip string-head?
  51     [ length tail-slice t ] [ drop f ] if ;
  52
  53 TUPLE: token-parser string ignore-case? ;
  54
  55 C: <token-parser> token-parser
  56
  57 : token ( string -- parser ) f <token-parser> ;
  58
  59 : case-insensitive-token ( string -- parser ) t <token-parser> ;
  60
  61 M: token-parser parse ( input parser -- list )
  62     [ string>> ] [ ignore-case?>> ] bi
  63     [ tuck ] dip ?string-head
  64     [ <parse-results> ] [ 2drop nil ] if ;
  65
  66 : 1token ( n -- parser ) 1string token ;
  67
  68 TUPLE: satisfy-parser quot ;
  69
  70 C: satisfy satisfy-parser ( quot -- parser )
  71
  72 M: satisfy-parser parse ( input parser -- list )
  73     #! A parser that succeeds if the predicate,
  74     #! when passed the first character in the input, returns
  75     #! true.
  76     over empty? [
  77         2drop nil
  78     ] [
  79         quot>> [ unclip-slice dup ] dip call
  80         [ swap <parse-results> ] [ 2drop nil ] if
  81     ] if ;
  82
  83 LAZY: any-char-parser ( -- parser )
  84     [ drop t ] satisfy ;
  85
  86 TUPLE: epsilon-parser ;
  87
  88 C: epsilon epsilon-parser ( -- parser )
  89
  90 M: epsilon-parser parse ( input parser -- list )
  91     #! A parser that parses the empty string. It
  92     #! does not consume any input and always returns
  93     #! an empty list as the parse tree with the
  94     #! unmodified input.
  95     drop "" swap <parse-results> ;
  96
  97 TUPLE: succeed-parser result ;
  98
  99 C: succeed succeed-parser ( result -- parser )
 100
 101 M: succeed-parser parse ( input parser -- list )
 102     #! A parser that always returns 'result' as a
 103     #! successful parse with no input consumed.
 104     result>> swap <parse-results> ;
 105
 106 TUPLE: fail-parser ;
 107
 108 C: fail fail-parser ( -- parser )
 109
 110 M: fail-parser parse ( input parser -- list )
 111     #! A parser that always fails and returns
 112     #! an empty list of successes.
 113     2drop nil ;
 114
 115 TUPLE: ensure-parser test ;
 116
 117 : ensure ( parser -- ensure )
 118     ensure-parser boa ;
 119
 120 M: ensure-parser parse ( input parser -- list )
 121     2dup test>> parse nil?
 122     [ 2drop nil ] [ drop t swap <parse-results> ] if ;
 123
 124 TUPLE: ensure-not-parser test ;
 125
 126 : ensure-not ( parser -- ensure )
 127     ensure-not-parser boa ;
 128
 129 M: ensure-not-parser parse ( input parser -- list )
 130     2dup test>> parse nil?
 131     [ drop t swap <parse-results> ] [ 2drop nil ] if ;
 132
 133 TUPLE: and-parser parsers ;
 134
 135 : <&> ( parser1 parser2 -- parser )
 136     over and-parser? [
 137         [ parsers>> ] dip suffix
 138     ] [
 139         2array
 140     ] if and-parser boa ;
 141
 142 : <and-parser> ( parsers -- parser )
 143     dup length 1 = [ first ] [ and-parser boa ] if ;
 144
 145 : and-parser-parse ( list p1  -- list )
 146     swap [
 147         dup unparsed>> rot parse
 148         [
 149             [ parsed>> ] dip
 150             [ parsed>> 2array ] keep
 151             unparsed>> <parse-result>
 152         ] lazy-map-with
 153     ] lazy-map-with lconcat ;
 154
 155 M: and-parser parse ( input parser -- list )
 156     #! Parse 'input' by sequentially combining the
 157     #! two parsers. First parser1 is applied to the
 158     #! input then parser2 is applied to the rest of
 159     #! the input strings from the first parser.
 160     parsers>> unclip swapd parse
 161     [ [ and-parser-parse ] reduce ] 2curry promise ;
 162
 163 TUPLE: or-parser parsers ;
 164
 165 : <or-parser> ( parsers -- parser )
 166     dup length 1 = [ first ] [ or-parser boa ] if ;
 167
 168 : <|> ( parser1 parser2 -- parser )
 169     2array <or-parser> ;
 170
 171 M: or-parser parse ( input parser1 -- list )
 172     #! Return the combined list resulting from the parses
 173     #! of parser1 and parser2 being applied to the same
 174     #! input. This implements the choice parsing operator.
 175     parsers>> 0 swap seq>list
 176     [ parse ] lazy-map-with lconcat ;
 177
 178 : trim-left-slice ( string -- string )
 179     #! Return a new string without any leading whitespace
 180     #! from the original string.
 181     dup empty? [
 182         dup first blank? [ rest-slice trim-left-slice ] when
 183     ] unless ;
 184
 185 TUPLE: sp-parser p1 ;
 186
 187 #! Return a parser that first skips all whitespace before
 188 #! calling the original parser.
 189 C: sp sp-parser ( p1 -- parser )
 190
 191 M: sp-parser parse ( input parser -- list )
 192     #! Skip all leading whitespace from the input then call
 193     #! the parser on the remaining input.
 194     [ trim-left-slice ] dip p1>> parse ;
 195
 196 TUPLE: just-parser p1 ;
 197
 198 C: just just-parser ( p1 -- parser )
 199
 200 M: just-parser parse ( input parser -- result )
 201     #! Calls the given parser on the input removes
 202     #! from the results anything where the remaining
 203     #! input to be parsed is not empty. So ensures a
 204     #! fully parsed input string.
 205     p1>> parse [ unparsed>> empty? ] lfilter ;
 206
 207 TUPLE: apply-parser p1 quot ;
 208
 209 C: <@ apply-parser ( parser quot -- parser )
 210
 211 M: apply-parser parse ( input parser -- result )
 212     #! Calls the parser on the input. For each successful
 213     #! parse the quot is call with the parse result on the stack.
 214     #! The result of that quotation then becomes the new parse result.
 215     #! This allows modification of parse tree results (like
 216     #! converting strings to integers, etc).
 217     [ p1>> ] [ quot>> ] bi
 218     -rot parse [
 219         [ parsed>> swap call ] keep
 220         unparsed>> <parse-result>
 221     ] lazy-map-with ;
 222
 223 TUPLE: some-parser p1 ;
 224
 225 C: some some-parser ( p1 -- parser )
 226
 227 M: some-parser parse ( input parser -- result )
 228     #! Calls the parser on the input, guarantees
 229     #! the parse is complete (the remaining input is empty),
 230     #! picks the first solution and only returns the parse
 231     #! tree since the remaining input is empty.
 232     p1>> just parse-1 ;
 233
 234 : <& ( parser1 parser2 -- parser )
 235     #! Same as <&> except discard the results of the second parser.
 236     <&> [ first ] <@ ;
 237
 238 : &> ( parser1 parser2 -- parser )
 239     #! Same as <&> except discard the results of the first parser.
 240     <&> [ second ] <@ ;
 241
 242 : <:&> ( parser1 parser2 -- result )
 243     #! Same as <&> except flatten the result.
 244     <&> [ first2 suffix ] <@ ;
 245
 246 : <&:> ( parser1 parser2 -- result )
 247     #! Same as <&> except flatten the result.
 248     <&> [ first2 swap prefix ] <@ ;
 249
 250 : <:&:> ( parser1 parser2 -- result )
 251     #! Same as <&> except flatten the result.
 252     <&> [ first2 append ] <@ ;
 253
 254 LAZY: <*> ( parser -- parser )
 255     dup <*> <&:> { } succeed <|> ;
 256
 257 : <+> ( parser -- parser )
 258     #! Return a parser that accepts one or more occurences of the original
 259     #! parser.
 260     dup <*> <&:> ;
 261
 262 LAZY: <?> ( parser -- parser )
 263     #! Return a parser that optionally uses the parser
 264     #! if that parser would be successful.
 265     [ 1array ] <@ f succeed <|> ;
 266
 267 TUPLE: only-first-parser p1 ;
 268
 269 LAZY: only-first ( parser -- parser )
 270     only-first-parser boa ;
 271
 272 M: only-first-parser parse ( input parser -- list )
 273     #! Transform a parser into a parser that only yields
 274     #! the first possibility.
 275     p1>> parse 1 swap ltake ;
 276
 277 LAZY: <!*> ( parser -- parser )
 278     #! Like <*> but only return one possible result
 279     #! containing all matching parses. Does not return
 280     #! partial matches. Useful for efficiency since that's
 281     #! usually the effect you want and cuts down on backtracking
 282     #! required.
 283     <*> only-first ;
 284
 285 LAZY: <!+> ( parser -- parser )
 286     #! Like <+> but only return one possible result
 287     #! containing all matching parses. Does not return
 288     #! partial matches. Useful for efficiency since that's
 289     #! usually the effect you want and cuts down on backtracking
 290     #! required.
 291     <+> only-first ;
 292
 293 LAZY: <!?> ( parser -- parser )
 294     #! Like <?> but only return one possible result
 295     #! containing all matching parses. Does not return
 296     #! partial matches. Useful for efficiency since that's
 297     #! usually the effect you want and cuts down on backtracking
 298     #! required.
 299     <?> only-first ;
 300
 301 LAZY: <(?)> ( parser -- parser )
 302     #! Like <?> but take shortest match first.
 303     f succeed swap [ 1array ] <@ <|> ;
 304
 305 LAZY: <(*)> ( parser -- parser )
 306     #! Like <*> but take shortest match first.
 307     #! Implementation by Matthew Willis.
 308     { } succeed swap dup <(*)> <&:> <|> ;
 309
 310 LAZY: <(+)> ( parser -- parser )
 311     #! Like <+> but take shortest match first.
 312     #! Implementation by Matthew Willis.
 313     dup <(*)> <&:> ;
 314
 315 : pack ( close body open -- parser )
 316     #! Parse a construct enclosed by two symbols,
 317     #! given a parser for the opening symbol, the
 318     #! closing symbol, and the body.
 319     <& &> ;
 320
 321 : nonempty-list-of ( items separator -- parser )
 322     [ over &> <*> <&:> ] keep <?> tuck pack ;
 323
 324 : list-of ( items separator -- parser )
 325     #! Given a parser for the separator and for the
 326     #! items themselves, return a parser that parses
 327     #! lists of those items. The parse tree is an
 328     #! array of the parsed items.
 329     nonempty-list-of { } succeed <|> ;
 330
 331 LAZY: surrounded-by ( parser start end -- parser' )
 332     [ token ] bi@ swapd pack ;
 333
 334 : exactly-n ( parser n -- parser' )
 335     swap <repetition> <and-parser> [ flatten ] <@ ;
 336
 337 : at-most-n ( parser n -- parser' )
 338     dup zero? [
 339         2drop epsilon
 340     ] [
 341         2dup exactly-n
 342         -rot 1- at-most-n <|>
 343     ] if ;
 344
 345 : at-least-n ( parser n -- parser' )
 346     dupd exactly-n swap <*> <&> ;
 347
 348 : from-m-to-n ( parser m n -- parser' )
 349     [ [ exactly-n ] 2keep ] dip swap - at-most-n <:&:> ;