Initial snarf.
[shack.git] / libmojave / util / lm_arg.ml
blobe97a57ef537a1313b90a678706c157510f8d09f2
1 (*
2 * Parsing command line arguments, MCC-style. Arguments to options
3 * may be separated from the option by a space, or may be placed
4 * immediately after the option (without space) IF the option is
5 * not ambiguous. Also, options may be abbreviated as long as the
6 * short form is not ambiguous.
8 * ----------------------------------------------------------------
10 * Copyright (C) 2000-2006 Mojave Group, Caltech
12 * This library is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU Lesser General Public
14 * License as published by the Free Software Foundation,
15 * version 2.1 of the License.
17 * This library is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * Lesser General Public License for more details.
22 * You should have received a copy of the GNU Lesser General Public
23 * License along with this library; if not, write to the Free Software
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 * Additional permission is given to link this library with the
27 * OpenSSL project's "OpenSSL" library, and with the OCaml runtime,
28 * and you may distribute the linked executables. See the file
29 * LICENSE.libmojave for more details.
31 * Authors: Jason Hickey <jyh@cs.caltech.edu>
32 * Justin David Smith
33 * Modified By: Aleksey Nogin <nogin@cs.caltech.edu>
35 open Lm_printf
38 (*** Basic Specifications ***)
41 (* spec
42 Argument specification. Each option uses this specification to indicate
43 what type of argument (if any) the option takes. The following option
44 specifications are defined.
45 Unit f: Call an arbitrary function f ()
46 Set b: Set the boolean (reference) value b to true
47 Clear b: Set the boolean (reference) value b to false
48 String f: Takes one argument: call function f <string>
49 Int f: Takes one argument: call function f <integer>
50 Float f: Takes one argument: call function f <float>
51 Rest f: Call function f <arg>, for all remaining arguments
53 section = (name, spec, desc) list
54 Used to define a group of related arguments. (name, spec) indicate the
55 option name and option specification. desc gives a textual description
56 of the option.
58 sections = (desc, section) list
59 Used to define all option groups. Each option group is prefixed by desc
60 which briefly describes the section.
62 type 'a poly_spec =
63 (* Imperative versions *)
64 Unit of (unit -> unit)
65 | Set of bool ref
66 | Clear of bool ref
67 | String of (string -> unit)
68 | Int of (int -> unit)
69 | Float of (float -> unit)
70 | Rest of (string -> unit)
72 (* Functional versions *)
73 | UnitFold of ('a -> 'a)
74 | SetFold of ('a -> bool -> 'a)
75 | ClearFold of ('a -> bool -> 'a)
76 | StringFold of ('a -> string -> 'a)
77 | IntFold of ('a -> int -> 'a)
78 | FloatFold of ('a -> float -> 'a)
79 | RestFold of ('a -> string -> 'a)
81 (* Usage message *)
82 | Usage
84 (* spec_mode
86 StrictOptions: options are processed literally, and may not be collapsed
87 into multi-letter options.
88 MultiLetterMode: single-letter options of the form -x may be collapsed
89 into multi-letter options. *)
90 type spec_mode =
91 StrictOptions
92 | MultiLetterOptions
94 type 'a poly_section = (string * 'a poly_spec * string) list
95 type 'a poly_sections = spec_mode * (string * 'a poly_section) list
97 type spec = unit poly_spec
98 type section = unit poly_section
99 type sections = unit poly_sections
101 (* parsing mode
103 StrictMode: options are processed literally, and may not be collapsed
104 into multi-letter options.
105 MultiLetterMode: single-letter options may be collapsed.
106 MultiLetterPending: processing a multi-letter option *)
107 type mode =
108 StrictMode
109 | MultiLetterMode
110 | MultiLetterPending of string * int
112 (* BogusArg
113 Thrown by option processing when something goes wrong... *)
114 exception BogusArg of string
116 (* UsageError
117 Thrown on --help *)
118 exception UsageError
121 (*** Option Table ***)
124 (* CharCompare, CharTable
125 Defines a table indexed by individual characters. *)
126 module CharCompare = struct
127 type t = char
128 let compare (c1 : char) (c2 : char) =
129 if c1 < c2 then
131 else if c1 > c2 then
133 else
135 end (* CharCompare *)
137 module CharTable = Lm_map.LmMake (CharCompare);;
140 (* options
141 The option table is a tree, where each edge is labelled by a character.
142 To lookup the specification for an option, we walk the tree using the
143 characters of the option until we reach a node that has a specification
144 associated with it. This tree is used to help us identify unambiguous
145 prefixes, and also to determine where an option name ends and its value
146 begins (when the name and value are not space-delimited).
148 option_node
149 The type of a node in the options tree. Each node contains a spec if
150 the node matches an option name, and may contain a subtree if there is
151 at least one longer option that has this prefix.
152 SpecNode spec: Leaf node; this branch corresponds to the spec.
153 NameNode tree: No option corresponds to this branch, but there
154 are options in the subtree.
155 SpecOrName (spec, tree):
156 This branch corresponds to an option with the
157 indicated spec; there are also suboptions in the
158 indicated subtree.
160 type 'a option_node =
161 SpecNode of 'a poly_spec
162 | NameNode of 'a option_node CharTable.t
163 | SpecOrName of 'a poly_spec * 'a option_node CharTable.t
165 type 'a options = 'a option_node CharTable.t
167 (* is_alnum
169 test if a letter is a letter or number *)
170 let is_alnum = function
171 'a'..'z'
172 | 'A'..'Z'
173 | '0'..'9' ->
174 true
175 | _ ->
176 false
179 (* char_table_lookup
180 Lookup an entry in the char table. If no entry exists in the table,
181 then None is returned (instead of raising an exception). *)
182 let char_table_lookup table ch =
184 Some (CharTable.find table ch)
185 with
186 Not_found ->
187 (* If the character is '_', try looking it up as '-'. This is a
188 hack to accomodate both '_' and '-' in option names (proper
189 GCC style uses hyphen, but our old options used underscores). *)
190 if ch = '_' then
192 Some (CharTable.find table '-')
193 with
194 Not_found ->
195 None
196 else
197 None
200 (* lookup_option
201 We also allow --no-* prefixes on Boolean options.
203 JYH: this is perhaps not the simplest way to deal
204 with inversion, but the implementation is simple. *)
205 let is_invert_prefix name =
206 String.length name > 5
207 && String.unsafe_get name 0 = '-'
208 && String.unsafe_get name 1 = '-'
209 && String.unsafe_get name 2 = 'n'
210 && String.unsafe_get name 3 = 'o'
211 && String.unsafe_get name 4 = '-'
213 let strip_invert_prefix name =
214 String.sub name 4 (String.length name - 4)
216 let is_invertable_option opt = function
217 Set _
218 | Clear _
219 | SetFold _
220 | ClearFold _ ->
221 String.length opt > 1 && opt.[0] = '-'
222 | _ ->
223 false
226 (* add_option
227 Add a new option name to the option tree. If the exact option already
228 exists, then an exception is thrown. If a prefix or suffix of this
229 option is already defined, then no error occurs. *)
230 let add_option options name spec =
231 if is_invert_prefix name then
232 raise (BogusArg ("Option contains an invertion prefix: " ^ name));
233 let length = String.length name in
235 (* deconstruct_name
236 Updates the subtree rooted at options, based on the substring
237 of name beginning with offset. *)
238 let rec deconstruct_name options offset =
239 let ch = name.[offset] in
240 let offset = offset + 1 in
241 let entry =
242 if offset < length then
244 (* This is NOT the last character of the option; we
245 need to build a subtree and recurse on ourself. *)
246 match char_table_lookup options ch with
247 None ->
248 NameNode (deconstruct_name CharTable.empty offset)
249 | Some (SpecNode spec') ->
250 SpecOrName (spec', deconstruct_name CharTable.empty offset)
251 | Some (NameNode options) ->
252 NameNode (deconstruct_name options offset)
253 | Some (SpecOrName (spec', options)) ->
254 SpecOrName (spec', deconstruct_name options offset)
255 else
257 (* This is the last character of the option; this is
258 where we might have a duplicate hit, and where we
259 need to drop our specification. *)
260 match char_table_lookup options ch with
261 None ->
262 SpecNode spec
263 | Some (NameNode options) ->
264 SpecOrName (spec, options)
265 | Some _ ->
266 raise (BogusArg ("Duplicate option defined: " ^ name))
268 (* Update this node in the tree *)
269 CharTable.add options ch entry
271 deconstruct_name options 0
274 (* lookup_option_core
275 Lookup the option with the indicated name in the options tree. If there
276 is an exact option match in the tree, we return the option spec and an
277 empty string. If we hit end up at a node without a spec, but we are an
278 UNAMBIGUOUS prefix of an option in the tree, then we return that option's
279 spec, and an empty string.
281 The final case is more interesting: when we end up at a leaf, then we
282 split the ``name'' we were given into a name/value pair at that point,
283 and return the excess characters as the option's value. This is how we
284 determine when the value associated with an option is not delimited by a
285 space. Note that any option that is a prefix of another option cannot
286 take a value in this way.
288 let lookup_option_core options name =
289 let length = String.length name in
291 (* find_branch
292 Checks to see if the subtree rooted at options is a linear branch.
293 If so, return the spec at the end of the branch; otherwise, raise an
294 exception (assuming the option was ambiguous if the branch splits,
295 or that the option is unbound if there is no branch). *)
296 let rec find_branch options =
297 CharTable.fold (fun spec _ options ->
298 match spec, options with
299 None, SpecNode spec ->
300 Some spec
301 | None, NameNode options ->
302 find_branch options
303 | _ ->
304 raise (BogusArg ("Ambiguous option specified: " ^ name))) None options
306 let find_branch options =
307 match find_branch options with
308 None ->
309 raise (BogusArg ("No such option: " ^ name))
310 | Some spec ->
311 spec
314 (* lookup_name
315 Lookup an option in the subtree rooted at options, based on the
316 substring of name beginning at offset. *)
317 let rec lookup_name options offset =
318 let ch = name.[offset] in
319 let offset = offset + 1 in
320 if offset < length then
322 (* We're not at the end of the name we're searching for
323 yet; it is possible that we are looking at a name/value
324 pair. *)
325 match char_table_lookup options ch with
326 None ->
327 (* No option with this prefix was defined *)
328 raise (BogusArg ("No such option: " ^ name))
329 | Some (SpecNode (Unit _ | Set _ | Clear _ | UnitFold _ | SetFold _ | ClearFold _ | Usage )) ->
330 (* Name was too long; can not assume a name/value pair *)
331 raise (BogusArg ("No such option: " ^ name ^ " (option " ^ (String.sub name 0 offset) ^ " does not take arguments)"))
332 | Some (SpecNode spec) ->
333 (* Name was too long; assume it was a name/value pair *)
334 spec, String.sub name offset (length - offset)
335 | Some (NameNode options)
336 | Some (SpecOrName (_, options)) ->
337 (* Still searching... *)
338 lookup_name options offset
339 else
341 (* Last character in the name we were given; this is either
342 an exact match, or (hopefully) an unambiguous prefix of
343 an option in the tree. *)
344 match char_table_lookup options ch with
345 None ->
346 (* Last char of name, not no option matches *)
347 raise (BogusArg ("No such option: " ^ name))
348 | Some (SpecNode spec)
349 | Some (SpecOrName (spec, _)) ->
350 (* Exact match to an option in the tree. *)
351 spec, ""
352 | Some (NameNode options) ->
353 (* Inexact match; try to find a branch. *)
354 find_branch options, ""
356 lookup_name options 0
358 let lookup_option options name =
359 if is_invert_prefix name then
360 let orig_name = strip_invert_prefix name in
362 match lookup_option_core options orig_name with
363 Set f, "" ->
364 Clear f, ""
365 | SetFold f, "" ->
366 ClearFold f, ""
367 | Clear f, "" ->
368 Set f, ""
369 | ClearFold f, "" ->
370 SetFold f, ""
371 | _ ->
372 raise (Failure "invert")
373 with
374 BogusArg _
375 | Not_found ->
376 raise (BogusArg ("No such option: " ^ orig_name ^ " (extracted from inverted: " ^ name ^ ")"))
377 | Failure "invert" ->
378 raise (BogusArg ("Not an invertable option: " ^ orig_name ^ " (extracted from inverted: " ^ name ^ ")"))
379 else
380 lookup_option_core options name
383 (* compute_option_tree
384 Convert a sections spec into an option tree. Can raise an exception
385 if the sections spec contains duplicate options. *)
386 let compute_option_tree spec =
387 let options = CharTable.empty in
388 let options = List.fold_left (fun options (_, spec_block) ->
389 List.fold_left (fun options (name, spec, _) ->
390 add_option options name spec) options spec_block) options spec
392 options
395 (*** Help System ***)
397 (* Wraps at terminal width *)
398 let rec print_doc_string opt_width s =
399 let width = Lm_termsize.stdout_width - opt_width in
400 let margin = String.make (opt_width + 1) ' ' in
401 let () = margin.[0] <- '\n' in
402 let len = String.length s in
403 if len <= width then
404 print_string s
405 else
406 if String.rcontains_from s width ' ' then begin
407 let i = String.rindex_from s width ' ' in
408 print_string (String.sub s 0 i);
409 print_string margin;
410 print_doc_string opt_width (String.sub s (i+1) (len - i - 1))
411 end else begin
412 print_string (String.sub s 0 width);
413 print_string margin;
414 print_doc_string opt_width (String.sub s width (len - width))
417 let usage_arg = function
418 Unit _
419 | Set _
420 | Clear _
421 | UnitFold _
422 | SetFold _
423 | ClearFold _
424 | Usage ->
426 | String _
427 | StringFold _ ->
428 " <string>"
429 | Int _
430 | IntFold _ ->
431 " <number>"
432 | Float _
433 | FloatFold _ ->
434 " <float>"
435 | Rest _
436 | RestFold _ ->
437 " ..."
439 (* usage
440 Display the usage message and help text for the options. *)
441 let usage opt_width spec =
442 List.iter (fun (opt, spec, doc) ->
443 (* Descriptive text for the option argument *)
444 let opt = opt ^ (usage_arg spec) in
446 (* Display information on a single option. *)
447 (if String.length opt > opt_width then
448 (* option name too long to fit on one line *)
449 printf "@ %s@ %*s" opt opt_width ""
450 else
451 printf "@ %-*s" opt_width opt);
452 (if is_invertable_option opt spec then
453 printf "*: "
454 else
455 printf " : ");
456 print_doc_string (opt_width + 7) doc) spec
458 let usage_length (opt, spec, _) =
459 String.length opt + String.length (usage_arg spec)
461 let usage (mode, spec) usage_msg =
462 (* Display help for all sections. *)
463 let opt_max_length =
464 List.fold_left (fun i (_, spec) ->
465 (List.fold_left (fun i opt -> max i (usage_length opt))) i spec)
466 0 spec
468 let opt_width = min opt_max_length ((max 80 Lm_termsize.stdout_width) / 3 - 7) in
469 printf "@[<v 0>%s." usage_msg;
470 List.iter (fun (section, spec) ->
471 printf "@ @ @[<v 3>%s:" section;
472 usage opt_width spec;
473 printf "@]") spec;
474 (match mode with
475 StrictOptions ->
477 | MultiLetterOptions ->
478 printf "@ Single-letter options may be concatenated as part of a single option.");
479 (if List.exists (fun (_, spec) -> List.exists (fun (opt, spec, _) -> is_invertable_option opt spec) spec) spec then
480 printf "@ @ (*) Prefix the option with \"--no\" to disable.");
481 printf "@]@."
484 (*** Option Processing ***)
486 (* pending_arguments
487 Query for pending arguments or options. Advances the parser for
488 the current mode, and returns a pair (mode, found), where found
489 is true iff there are options or arguments left to process. *)
490 let advance_options mode _argv argv_length current =
491 match mode with
492 StrictMode
493 | MultiLetterMode ->
494 mode, current < argv_length
495 | MultiLetterPending (opt, i) when i = String.length opt ->
496 MultiLetterMode, current < argv_length
497 | MultiLetterPending _ ->
498 mode, true
501 (* get_next_arg
502 Get the next argument in the argument stream. Returns
503 the argument string, as well as the new current marker. *)
504 let get_next_arg opt argv argv_length current =
505 if current < argv_length then
506 argv.(current), current + 1
507 else
508 if (opt <> "") then
509 raise (BogusArg ("Option " ^ opt ^ " requires an argument"))
510 else
511 raise (Invalid_argument "Lm_arg: internal error")
513 (* get_next_option
514 In StrictMode, this is the same as get_next_arg.
515 In MultiLetterMode, this walks letter-by-letter through
516 simple options. *)
517 let rec get_next_option mode argv argv_length current =
518 match mode with
519 StrictMode ->
520 let opt, current = get_next_arg "" argv argv_length current in
521 opt, current, mode
522 | MultiLetterMode ->
523 (* See if the next argument is an option *)
524 let opt, current = get_next_arg "" argv argv_length current in
525 if String.length opt >= 2 && opt.[0] = '-' && is_alnum opt.[1] then
526 get_next_option (MultiLetterPending (opt, 1)) argv argv_length current
527 else
528 opt, current, mode
529 | MultiLetterPending (opt, i) ->
530 let s = String.make 2 opt.[i] in
531 let mode = MultiLetterPending (opt, succ i) in
532 s.[0] <- '-';
533 s, current, mode
536 (* parse
537 Parses the program arguments, using a sections specification. Any
538 non-option argument is passed to the default function, in order; if
539 -help or --help is intercepted on the argument stream, then the
540 usage message is displayed. *)
541 let fold_argv argv (mode_info, spec_info) arg default usage_msg =
542 (* Always add the --help flag *)
543 let spec_info = ("Help flags", ["--help", Usage, "Display a help message"]) :: spec_info in
545 (* Set the current mode *)
546 let mode =
547 match mode_info with
548 StrictOptions ->
549 StrictMode
550 | MultiLetterOptions ->
551 MultiLetterMode
555 (* Convert spec into an options tree, for easier parsing *)
556 let options = compute_option_tree spec_info in
557 let argv_length = Array.length argv in
560 * Parse a single option.
561 * arg: the fold value being computed
562 * current: the current index into argv
564 let rec parse_option mode arg current =
565 let mode, pending = advance_options mode argv argv_length current in
566 if pending then
567 (* Get the name of the option *)
568 let opt, current, mode = get_next_option mode argv argv_length current in
569 let current, arg =
570 if String.length opt > 0 && opt.[0] = '-' then
571 (* Get information on the option *)
572 let spec, s = lookup_option options opt in
574 (* If no value was embedded in the option, but the option
575 requires a value, then grab the next argument for its
576 value. *)
577 let s, current, arg =
578 match spec, s with
579 String _, ""
580 | Int _, ""
581 | Float _, ""
582 | StringFold _, ""
583 | IntFold _, ""
584 | FloatFold _, "" ->
585 let s, current = get_next_arg opt argv argv_length current in
586 s, current, arg
588 | Unit _, ""
589 | Set _, ""
590 | Clear _, ""
591 | Usage, ""
592 | UnitFold _, ""
593 | SetFold _, ""
594 | ClearFold _, ""
596 | String _, _
597 | Int _, _
598 | Float _, _
599 | StringFold _, _
600 | IntFold _, _
601 | FloatFold _, _ ->
602 s, current, arg
604 | Rest f, "" ->
605 let rec rest_function current =
606 if current < argv_length then begin
607 f argv.(current);
608 rest_function (current + 1)
610 else
611 "", current, arg
613 rest_function current
614 | RestFold f, "" ->
615 let rec rest_function arg current =
616 if current < argv_length then
617 rest_function (f arg argv.(current)) (current + 1)
618 else
619 "", current, arg
621 rest_function arg current
622 | _ ->
623 raise (Invalid_argument "Lm_arg: internal error")
626 (* Actually process the option. *)
627 let arg =
628 match spec with
629 Unit f ->
630 f ();
632 | UnitFold f ->
633 f arg
634 | Set x ->
635 x := true;
637 | SetFold f ->
638 f arg true;
639 | Clear x ->
640 x := false;
642 | ClearFold f ->
643 f arg false
644 | String f ->
645 f s;
647 | StringFold f ->
648 f arg s
649 | Int f ->
650 f (int_of_string s);
652 | IntFold f ->
653 f arg (int_of_string s)
654 | Float f ->
655 f (float_of_string s);
657 | FloatFold f ->
658 f arg (float_of_string s)
659 | Rest _
660 | RestFold _ ->
662 | Usage ->
663 usage (mode_info, spec_info) usage_msg;
664 raise UsageError
666 current, arg
667 else
668 (* Not an option; pass to the default function *)
669 let arg, rest = default arg opt in
670 if rest then
671 let rec rest_function arg current =
672 if current < argv_length then
673 let arg, _ = default arg argv.(current) in
674 rest_function arg (current + 1)
675 else
676 current, arg
678 rest_function arg current
679 else
680 current, arg
682 (* We're done with this option, advance to next *)
683 parse_option mode arg current
684 else
685 current, arg
687 let _, arg = parse_option mode arg 1 in
690 let fold spec arg default usage_msg =
691 fold_argv Sys.argv spec arg default usage_msg
693 let parse_argv argv spec default usage_msg =
694 fold_argv argv spec () (fun () opt -> default opt, false) usage_msg
696 let parse spec default usage_msg =
697 fold spec () (fun () opt -> default opt, false) usage_msg