No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / groff / src / preproc / refer / label.y
blobb9a1bea6f3d66b1c3b5fea5c85b60f13b148c759
1 /* $NetBSD: label.y,v 1.1.1.4 2006/02/06 18:14:37 wiz Exp $ */
3 /* -*- C++ -*-
4 Copyright (C) 1989, 1990, 1991, 1992, 2000, 2004
5 Free Software Foundation, Inc.
6 Written by James Clark (jjc@jclark.com)
8 This file is part of groff.
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING. If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
26 #include "refer.h"
27 #include "refid.h"
28 #include "ref.h"
29 #include "token.h"
31 int yylex();
32 void yyerror(const char *);
33 int yyparse();
35 static const char *format_serial(char c, int n);
37 struct label_info {
38 int start;
39 int length;
40 int count;
41 int total;
42 label_info(const string &);
45 label_info *lookup_label(const string &label);
47 struct expression {
48 enum {
49 // Does the tentative label depend on the reference?
50 CONTAINS_VARIABLE = 01,
51 CONTAINS_STAR = 02,
52 CONTAINS_FORMAT = 04,
53 CONTAINS_AT = 010
55 virtual ~expression() { }
56 virtual void evaluate(int, const reference &, string &,
57 substring_position &) = 0;
58 virtual unsigned analyze() { return 0; }
61 class at_expr : public expression {
62 public:
63 at_expr() { }
64 void evaluate(int, const reference &, string &, substring_position &);
65 unsigned analyze() { return CONTAINS_VARIABLE|CONTAINS_AT; }
68 class format_expr : public expression {
69 char type;
70 int width;
71 int first_number;
72 public:
73 format_expr(char c, int w = 0, int f = 1)
74 : type(c), width(w), first_number(f) { }
75 void evaluate(int, const reference &, string &, substring_position &);
76 unsigned analyze() { return CONTAINS_FORMAT; }
79 class field_expr : public expression {
80 int number;
81 char name;
82 public:
83 field_expr(char nm, int num) : number(num), name(nm) { }
84 void evaluate(int, const reference &, string &, substring_position &);
85 unsigned analyze() { return CONTAINS_VARIABLE; }
88 class literal_expr : public expression {
89 string s;
90 public:
91 literal_expr(const char *ptr, int len) : s(ptr, len) { }
92 void evaluate(int, const reference &, string &, substring_position &);
95 class unary_expr : public expression {
96 protected:
97 expression *expr;
98 public:
99 unary_expr(expression *e) : expr(e) { }
100 ~unary_expr() { delete expr; }
101 void evaluate(int, const reference &, string &, substring_position &) = 0;
102 unsigned analyze() { return expr ? expr->analyze() : 0; }
105 // This caches the analysis of an expression.
107 class analyzed_expr : public unary_expr {
108 unsigned flags;
109 public:
110 analyzed_expr(expression *);
111 void evaluate(int, const reference &, string &, substring_position &);
112 unsigned analyze() { return flags; }
115 class star_expr : public unary_expr {
116 public:
117 star_expr(expression *e) : unary_expr(e) { }
118 void evaluate(int, const reference &, string &, substring_position &);
119 unsigned analyze() {
120 return ((expr ? (expr->analyze() & ~CONTAINS_VARIABLE) : 0)
121 | CONTAINS_STAR);
125 typedef void map_func(const char *, const char *, string &);
127 class map_expr : public unary_expr {
128 map_func *func;
129 public:
130 map_expr(expression *e, map_func *f) : unary_expr(e), func(f) { }
131 void evaluate(int, const reference &, string &, substring_position &);
134 typedef const char *extractor_func(const char *, const char *, const char **);
136 class extractor_expr : public unary_expr {
137 int part;
138 extractor_func *func;
139 public:
140 enum { BEFORE = +1, MATCH = 0, AFTER = -1 };
141 extractor_expr(expression *e, extractor_func *f, int pt)
142 : unary_expr(e), part(pt), func(f) { }
143 void evaluate(int, const reference &, string &, substring_position &);
146 class truncate_expr : public unary_expr {
147 int n;
148 public:
149 truncate_expr(expression *e, int i) : unary_expr(e), n(i) { }
150 void evaluate(int, const reference &, string &, substring_position &);
153 class separator_expr : public unary_expr {
154 public:
155 separator_expr(expression *e) : unary_expr(e) { }
156 void evaluate(int, const reference &, string &, substring_position &);
159 class binary_expr : public expression {
160 protected:
161 expression *expr1;
162 expression *expr2;
163 public:
164 binary_expr(expression *e1, expression *e2) : expr1(e1), expr2(e2) { }
165 ~binary_expr() { delete expr1; delete expr2; }
166 void evaluate(int, const reference &, string &, substring_position &) = 0;
167 unsigned analyze() {
168 return (expr1 ? expr1->analyze() : 0) | (expr2 ? expr2->analyze() : 0);
172 class alternative_expr : public binary_expr {
173 public:
174 alternative_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
175 void evaluate(int, const reference &, string &, substring_position &);
178 class list_expr : public binary_expr {
179 public:
180 list_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
181 void evaluate(int, const reference &, string &, substring_position &);
184 class substitute_expr : public binary_expr {
185 public:
186 substitute_expr(expression *e1, expression *e2) : binary_expr(e1, e2) { }
187 void evaluate(int, const reference &, string &, substring_position &);
190 class ternary_expr : public expression {
191 protected:
192 expression *expr1;
193 expression *expr2;
194 expression *expr3;
195 public:
196 ternary_expr(expression *e1, expression *e2, expression *e3)
197 : expr1(e1), expr2(e2), expr3(e3) { }
198 ~ternary_expr() { delete expr1; delete expr2; delete expr3; }
199 void evaluate(int, const reference &, string &, substring_position &) = 0;
200 unsigned analyze() {
201 return ((expr1 ? expr1->analyze() : 0)
202 | (expr2 ? expr2->analyze() : 0)
203 | (expr3 ? expr3->analyze() : 0));
207 class conditional_expr : public ternary_expr {
208 public:
209 conditional_expr(expression *e1, expression *e2, expression *e3)
210 : ternary_expr(e1, e2, e3) { }
211 void evaluate(int, const reference &, string &, substring_position &);
214 static expression *parsed_label = 0;
215 static expression *parsed_date_label = 0;
216 static expression *parsed_short_label = 0;
218 static expression *parse_result;
220 string literals;
224 %union {
225 int num;
226 expression *expr;
227 struct { int ndigits; int val; } dig;
228 struct { int start; int len; } str;
231 /* uppercase or lowercase letter */
232 %token <num> TOKEN_LETTER
233 /* literal characters */
234 %token <str> TOKEN_LITERAL
235 /* digit */
236 %token <num> TOKEN_DIGIT
238 %type <expr> conditional
239 %type <expr> alternative
240 %type <expr> list
241 %type <expr> string
242 %type <expr> substitute
243 %type <expr> optional_conditional
244 %type <num> number
245 %type <dig> digits
246 %type <num> optional_number
247 %type <num> flag
251 expr:
252 optional_conditional
253 { parse_result = ($1 ? new analyzed_expr($1) : 0); }
256 conditional:
257 alternative
258 { $$ = $1; }
259 | alternative '?' optional_conditional ':' conditional
260 { $$ = new conditional_expr($1, $3, $5); }
263 optional_conditional:
264 /* empty */
265 { $$ = 0; }
266 | conditional
267 { $$ = $1; }
270 alternative:
271 list
272 { $$ = $1; }
273 | alternative '|' list
274 { $$ = new alternative_expr($1, $3); }
275 | alternative '&' list
276 { $$ = new conditional_expr($1, $3, 0); }
279 list:
280 substitute
281 { $$ = $1; }
282 | list substitute
283 { $$ = new list_expr($1, $2); }
286 substitute:
287 string
288 { $$ = $1; }
289 | substitute '~' string
290 { $$ = new substitute_expr($1, $3); }
293 string:
295 { $$ = new at_expr; }
296 | TOKEN_LITERAL
298 $$ = new literal_expr(literals.contents() + $1.start,
299 $1.len);
301 | TOKEN_LETTER
302 { $$ = new field_expr($1, 0); }
303 | TOKEN_LETTER number
304 { $$ = new field_expr($1, $2 - 1); }
305 | '%' TOKEN_LETTER
307 switch ($2) {
308 case 'I':
309 case 'i':
310 case 'A':
311 case 'a':
312 $$ = new format_expr($2);
313 break;
314 default:
315 command_error("unrecognized format `%1'", char($2));
316 $$ = new format_expr('a');
317 break;
321 | '%' digits
323 $$ = new format_expr('0', $2.ndigits, $2.val);
325 | string '.' flag TOKEN_LETTER optional_number
327 switch ($4) {
328 case 'l':
329 $$ = new map_expr($1, lowercase);
330 break;
331 case 'u':
332 $$ = new map_expr($1, uppercase);
333 break;
334 case 'c':
335 $$ = new map_expr($1, capitalize);
336 break;
337 case 'r':
338 $$ = new map_expr($1, reverse_name);
339 break;
340 case 'a':
341 $$ = new map_expr($1, abbreviate_name);
342 break;
343 case 'y':
344 $$ = new extractor_expr($1, find_year, $3);
345 break;
346 case 'n':
347 $$ = new extractor_expr($1, find_last_name, $3);
348 break;
349 default:
350 $$ = $1;
351 command_error("unknown function `%1'", char($4));
352 break;
356 | string '+' number
357 { $$ = new truncate_expr($1, $3); }
358 | string '-' number
359 { $$ = new truncate_expr($1, -$3); }
360 | string '*'
361 { $$ = new star_expr($1); }
362 | '(' optional_conditional ')'
363 { $$ = $2; }
364 | '<' optional_conditional '>'
365 { $$ = new separator_expr($2); }
368 optional_number:
369 /* empty */
370 { $$ = -1; }
371 | number
372 { $$ = $1; }
375 number:
376 TOKEN_DIGIT
377 { $$ = $1; }
378 | number TOKEN_DIGIT
379 { $$ = $1*10 + $2; }
382 digits:
383 TOKEN_DIGIT
384 { $$.ndigits = 1; $$.val = $1; }
385 | digits TOKEN_DIGIT
386 { $$.ndigits = $1.ndigits + 1; $$.val = $1.val*10 + $2; }
390 flag:
391 /* empty */
392 { $$ = 0; }
393 | '+'
394 { $$ = 1; }
395 | '-'
396 { $$ = -1; }
401 /* bison defines const to be empty unless __STDC__ is defined, which it
402 isn't under cfront */
404 #ifdef const
405 #undef const
406 #endif
408 const char *spec_ptr;
409 const char *spec_end;
410 const char *spec_cur;
412 static char uppercase_array[] = {
413 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
414 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P',
415 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X',
416 'Y', 'Z',
419 static char lowercase_array[] = {
420 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
421 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
422 'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
423 'y', 'z',
426 int yylex()
428 while (spec_ptr < spec_end && csspace(*spec_ptr))
429 spec_ptr++;
430 spec_cur = spec_ptr;
431 if (spec_ptr >= spec_end)
432 return 0;
433 unsigned char c = *spec_ptr++;
434 if (csalpha(c)) {
435 yylval.num = c;
436 return TOKEN_LETTER;
438 if (csdigit(c)) {
439 yylval.num = c - '0';
440 return TOKEN_DIGIT;
442 if (c == '\'') {
443 yylval.str.start = literals.length();
444 for (; spec_ptr < spec_end; spec_ptr++) {
445 if (*spec_ptr == '\'') {
446 if (++spec_ptr < spec_end && *spec_ptr == '\'')
447 literals += '\'';
448 else {
449 yylval.str.len = literals.length() - yylval.str.start;
450 return TOKEN_LITERAL;
453 else
454 literals += *spec_ptr;
456 yylval.str.len = literals.length() - yylval.str.start;
457 return TOKEN_LITERAL;
459 return c;
462 int set_label_spec(const char *label_spec)
464 spec_cur = spec_ptr = label_spec;
465 spec_end = strchr(label_spec, '\0');
466 literals.clear();
467 if (yyparse())
468 return 0;
469 delete parsed_label;
470 parsed_label = parse_result;
471 return 1;
474 int set_date_label_spec(const char *label_spec)
476 spec_cur = spec_ptr = label_spec;
477 spec_end = strchr(label_spec, '\0');
478 literals.clear();
479 if (yyparse())
480 return 0;
481 delete parsed_date_label;
482 parsed_date_label = parse_result;
483 return 1;
486 int set_short_label_spec(const char *label_spec)
488 spec_cur = spec_ptr = label_spec;
489 spec_end = strchr(label_spec, '\0');
490 literals.clear();
491 if (yyparse())
492 return 0;
493 delete parsed_short_label;
494 parsed_short_label = parse_result;
495 return 1;
498 void yyerror(const char *message)
500 if (spec_cur < spec_end)
501 command_error("label specification %1 before `%2'", message, spec_cur);
502 else
503 command_error("label specification %1 at end of string",
504 message, spec_cur);
507 void at_expr::evaluate(int tentative, const reference &ref,
508 string &result, substring_position &)
510 if (tentative)
511 ref.canonicalize_authors(result);
512 else {
513 const char *end, *start = ref.get_authors(&end);
514 if (start)
515 result.append(start, end - start);
519 void format_expr::evaluate(int tentative, const reference &ref,
520 string &result, substring_position &)
522 if (tentative)
523 return;
524 const label_info *lp = ref.get_label_ptr();
525 int num = lp == 0 ? ref.get_number() : lp->count;
526 if (type != '0')
527 result += format_serial(type, num + 1);
528 else {
529 const char *ptr = i_to_a(num + first_number);
530 int pad = width - strlen(ptr);
531 while (--pad >= 0)
532 result += '0';
533 result += ptr;
537 static const char *format_serial(char c, int n)
539 assert(n > 0);
540 static char buf[128]; // more than enough.
541 switch (c) {
542 case 'i':
543 case 'I':
545 char *p = buf;
546 // troff uses z and w to represent 10000 and 5000 in Roman
547 // numerals; I can find no historical basis for this usage
548 const char *s = c == 'i' ? "zwmdclxvi" : "ZWMDCLXVI";
549 if (n >= 40000)
550 return i_to_a(n);
551 while (n >= 10000) {
552 *p++ = s[0];
553 n -= 10000;
555 for (int i = 1000; i > 0; i /= 10, s += 2) {
556 int m = n/i;
557 n -= m*i;
558 switch (m) {
559 case 3:
560 *p++ = s[2];
561 /* falls through */
562 case 2:
563 *p++ = s[2];
564 /* falls through */
565 case 1:
566 *p++ = s[2];
567 break;
568 case 4:
569 *p++ = s[2];
570 *p++ = s[1];
571 break;
572 case 8:
573 *p++ = s[1];
574 *p++ = s[2];
575 *p++ = s[2];
576 *p++ = s[2];
577 break;
578 case 7:
579 *p++ = s[1];
580 *p++ = s[2];
581 *p++ = s[2];
582 break;
583 case 6:
584 *p++ = s[1];
585 *p++ = s[2];
586 break;
587 case 5:
588 *p++ = s[1];
589 break;
590 case 9:
591 *p++ = s[2];
592 *p++ = s[0];
595 *p = 0;
596 break;
598 case 'a':
599 case 'A':
601 char *p = buf;
602 // this is derived from troff/reg.c
603 while (n > 0) {
604 int d = n % 26;
605 if (d == 0)
606 d = 26;
607 n -= d;
608 n /= 26;
609 *p++ = c == 'a' ? lowercase_array[d - 1] :
610 uppercase_array[d - 1];
612 *p-- = 0;
613 // Reverse it.
614 char *q = buf;
615 while (q < p) {
616 char temp = *q;
617 *q = *p;
618 *p = temp;
619 --p;
620 ++q;
622 break;
624 default:
625 assert(0);
627 return buf;
630 void field_expr::evaluate(int, const reference &ref,
631 string &result, substring_position &)
633 const char *end;
634 const char *start = ref.get_field(name, &end);
635 if (start) {
636 start = nth_field(number, start, &end);
637 if (start)
638 result.append(start, end - start);
642 void literal_expr::evaluate(int, const reference &,
643 string &result, substring_position &)
645 result += s;
648 analyzed_expr::analyzed_expr(expression *e)
649 : unary_expr(e), flags(e ? e->analyze() : 0)
653 void analyzed_expr::evaluate(int tentative, const reference &ref,
654 string &result, substring_position &pos)
656 if (expr)
657 expr->evaluate(tentative, ref, result, pos);
660 void star_expr::evaluate(int tentative, const reference &ref,
661 string &result, substring_position &pos)
663 const label_info *lp = ref.get_label_ptr();
664 if (!tentative
665 && (lp == 0 || lp->total > 1)
666 && expr)
667 expr->evaluate(tentative, ref, result, pos);
670 void separator_expr::evaluate(int tentative, const reference &ref,
671 string &result, substring_position &pos)
673 int start_length = result.length();
674 int is_first = pos.start < 0;
675 if (expr)
676 expr->evaluate(tentative, ref, result, pos);
677 if (is_first) {
678 pos.start = start_length;
679 pos.length = result.length() - start_length;
683 void map_expr::evaluate(int tentative, const reference &ref,
684 string &result, substring_position &)
686 if (expr) {
687 string temp;
688 substring_position temp_pos;
689 expr->evaluate(tentative, ref, temp, temp_pos);
690 (*func)(temp.contents(), temp.contents() + temp.length(), result);
694 void extractor_expr::evaluate(int tentative, const reference &ref,
695 string &result, substring_position &)
697 if (expr) {
698 string temp;
699 substring_position temp_pos;
700 expr->evaluate(tentative, ref, temp, temp_pos);
701 const char *end, *start = (*func)(temp.contents(),
702 temp.contents() + temp.length(),
703 &end);
704 switch (part) {
705 case BEFORE:
706 if (start)
707 result.append(temp.contents(), start - temp.contents());
708 else
709 result += temp;
710 break;
711 case MATCH:
712 if (start)
713 result.append(start, end - start);
714 break;
715 case AFTER:
716 if (start)
717 result.append(end, temp.contents() + temp.length() - end);
718 break;
719 default:
720 assert(0);
725 static void first_part(int len, const char *ptr, const char *end,
726 string &result)
728 for (;;) {
729 const char *token_start = ptr;
730 if (!get_token(&ptr, end))
731 break;
732 const token_info *ti = lookup_token(token_start, ptr);
733 int counts = ti->sortify_non_empty(token_start, ptr);
734 if (counts && --len < 0)
735 break;
736 if (counts || ti->is_accent())
737 result.append(token_start, ptr - token_start);
741 static void last_part(int len, const char *ptr, const char *end,
742 string &result)
744 const char *start = ptr;
745 int count = 0;
746 for (;;) {
747 const char *token_start = ptr;
748 if (!get_token(&ptr, end))
749 break;
750 const token_info *ti = lookup_token(token_start, ptr);
751 if (ti->sortify_non_empty(token_start, ptr))
752 count++;
754 ptr = start;
755 int skip = count - len;
756 if (skip > 0) {
757 for (;;) {
758 const char *token_start = ptr;
759 if (!get_token(&ptr, end))
760 assert(0);
761 const token_info *ti = lookup_token(token_start, ptr);
762 if (ti->sortify_non_empty(token_start, ptr) && --skip < 0) {
763 ptr = token_start;
764 break;
768 first_part(len, ptr, end, result);
771 void truncate_expr::evaluate(int tentative, const reference &ref,
772 string &result, substring_position &)
774 if (expr) {
775 string temp;
776 substring_position temp_pos;
777 expr->evaluate(tentative, ref, temp, temp_pos);
778 const char *start = temp.contents();
779 const char *end = start + temp.length();
780 if (n > 0)
781 first_part(n, start, end, result);
782 else if (n < 0)
783 last_part(-n, start, end, result);
787 void alternative_expr::evaluate(int tentative, const reference &ref,
788 string &result, substring_position &pos)
790 int start_length = result.length();
791 if (expr1)
792 expr1->evaluate(tentative, ref, result, pos);
793 if (result.length() == start_length && expr2)
794 expr2->evaluate(tentative, ref, result, pos);
797 void list_expr::evaluate(int tentative, const reference &ref,
798 string &result, substring_position &pos)
800 if (expr1)
801 expr1->evaluate(tentative, ref, result, pos);
802 if (expr2)
803 expr2->evaluate(tentative, ref, result, pos);
806 void substitute_expr::evaluate(int tentative, const reference &ref,
807 string &result, substring_position &pos)
809 int start_length = result.length();
810 if (expr1)
811 expr1->evaluate(tentative, ref, result, pos);
812 if (result.length() > start_length && result[result.length() - 1] == '-') {
813 // ought to see if pos covers the -
814 result.set_length(result.length() - 1);
815 if (expr2)
816 expr2->evaluate(tentative, ref, result, pos);
820 void conditional_expr::evaluate(int tentative, const reference &ref,
821 string &result, substring_position &pos)
823 string temp;
824 substring_position temp_pos;
825 if (expr1)
826 expr1->evaluate(tentative, ref, temp, temp_pos);
827 if (temp.length() > 0) {
828 if (expr2)
829 expr2->evaluate(tentative, ref, result, pos);
831 else {
832 if (expr3)
833 expr3->evaluate(tentative, ref, result, pos);
837 void reference::pre_compute_label()
839 if (parsed_label != 0
840 && (parsed_label->analyze() & expression::CONTAINS_VARIABLE)) {
841 label.clear();
842 substring_position temp_pos;
843 parsed_label->evaluate(1, *this, label, temp_pos);
844 label_ptr = lookup_label(label);
848 void reference::compute_label()
850 label.clear();
851 if (parsed_label)
852 parsed_label->evaluate(0, *this, label, separator_pos);
853 if (short_label_flag && parsed_short_label)
854 parsed_short_label->evaluate(0, *this, short_label, short_separator_pos);
855 if (date_as_label) {
856 string new_date;
857 if (parsed_date_label) {
858 substring_position temp_pos;
859 parsed_date_label->evaluate(0, *this, new_date, temp_pos);
861 set_date(new_date);
863 if (label_ptr)
864 label_ptr->count += 1;
867 void reference::immediate_compute_label()
869 if (label_ptr)
870 label_ptr->total = 2; // force use of disambiguator
871 compute_label();
874 int reference::merge_labels(reference **v, int n, label_type type,
875 string &result)
877 if (abbreviate_label_ranges)
878 return merge_labels_by_number(v, n, type, result);
879 else
880 return merge_labels_by_parts(v, n, type, result);
883 int reference::merge_labels_by_number(reference **v, int n, label_type type,
884 string &result)
886 if (n <= 1)
887 return 0;
888 int num = get_number();
889 // Only merge three or more labels.
890 if (v[0]->get_number() != num + 1
891 || v[1]->get_number() != num + 2)
892 return 0;
893 int i;
894 for (i = 2; i < n; i++)
895 if (v[i]->get_number() != num + i + 1)
896 break;
897 result = get_label(type);
898 result += label_range_indicator;
899 result += v[i - 1]->get_label(type);
900 return i;
903 const substring_position &reference::get_separator_pos(label_type type) const
905 if (type == SHORT_LABEL && short_label_flag)
906 return short_separator_pos;
907 else
908 return separator_pos;
911 const string &reference::get_label(label_type type) const
913 if (type == SHORT_LABEL && short_label_flag)
914 return short_label;
915 else
916 return label;
919 int reference::merge_labels_by_parts(reference **v, int n, label_type type,
920 string &result)
922 if (n <= 0)
923 return 0;
924 const string &lb = get_label(type);
925 const substring_position &sp = get_separator_pos(type);
926 if (sp.start < 0
927 || sp.start != v[0]->get_separator_pos(type).start
928 || memcmp(lb.contents(), v[0]->get_label(type).contents(),
929 sp.start) != 0)
930 return 0;
931 result = lb;
932 int i = 0;
933 do {
934 result += separate_label_second_parts;
935 const substring_position &s = v[i]->get_separator_pos(type);
936 int sep_end_pos = s.start + s.length;
937 result.append(v[i]->get_label(type).contents() + sep_end_pos,
938 v[i]->get_label(type).length() - sep_end_pos);
939 } while (++i < n
940 && sp.start == v[i]->get_separator_pos(type).start
941 && memcmp(lb.contents(), v[i]->get_label(type).contents(),
942 sp.start) == 0);
943 return i;
946 string label_pool;
948 label_info::label_info(const string &s)
949 : start(label_pool.length()), length(s.length()), count(0), total(1)
951 label_pool += s;
954 static label_info **label_table = 0;
955 static int label_table_size = 0;
956 static int label_table_used = 0;
958 label_info *lookup_label(const string &label)
960 if (label_table == 0) {
961 label_table = new label_info *[17];
962 label_table_size = 17;
963 for (int i = 0; i < 17; i++)
964 label_table[i] = 0;
966 unsigned h = hash_string(label.contents(), label.length()) % label_table_size;
967 label_info **ptr;
968 for (ptr = label_table + h;
969 *ptr != 0;
970 (ptr == label_table)
971 ? (ptr = label_table + label_table_size - 1)
972 : ptr--)
973 if ((*ptr)->length == label.length()
974 && memcmp(label_pool.contents() + (*ptr)->start, label.contents(),
975 label.length()) == 0) {
976 (*ptr)->total += 1;
977 return *ptr;
979 label_info *result = *ptr = new label_info(label);
980 if (++label_table_used * 2 > label_table_size) {
981 // Rehash the table.
982 label_info **old_table = label_table;
983 int old_size = label_table_size;
984 label_table_size = next_size(label_table_size);
985 label_table = new label_info *[label_table_size];
986 int i;
987 for (i = 0; i < label_table_size; i++)
988 label_table[i] = 0;
989 for (i = 0; i < old_size; i++)
990 if (old_table[i]) {
991 h = hash_string(label_pool.contents() + old_table[i]->start,
992 old_table[i]->length);
993 label_info **p;
994 for (p = label_table + (h % label_table_size);
995 *p != 0;
996 (p == label_table)
997 ? (p = label_table + label_table_size - 1)
998 : --p)
1000 *p = old_table[i];
1002 a_delete old_table;
1004 return result;
1007 void clear_labels()
1009 for (int i = 0; i < label_table_size; i++) {
1010 delete label_table[i];
1011 label_table[i] = 0;
1013 label_table_used = 0;
1014 label_pool.clear();
1017 static void consider_authors(reference **start, reference **end, int i);
1019 void compute_labels(reference **v, int n)
1021 if (parsed_label
1022 && (parsed_label->analyze() & expression::CONTAINS_AT)
1023 && sort_fields.length() >= 2
1024 && sort_fields[0] == 'A'
1025 && sort_fields[1] == '+')
1026 consider_authors(v, v + n, 0);
1027 for (int i = 0; i < n; i++)
1028 v[i]->compute_label();
1032 /* A reference with a list of authors <A0,A1,...,AN> _needs_ author i
1033 where 0 <= i <= N if there exists a reference with a list of authors
1034 <B0,B1,...,BM> such that <A0,A1,...,AN> != <B0,B1,...,BM> and M >= i
1035 and Aj = Bj for 0 <= j < i. In this case if we can't say ``A0,
1036 A1,...,A(i-1) et al'' because this would match both <A0,A1,...,AN> and
1037 <B0,B1,...,BM>. If a reference needs author i we only have to call
1038 need_author(j) for some j >= i such that the reference also needs
1039 author j. */
1041 /* This function handles 2 tasks:
1042 determine which authors are needed (cannot be elided with et al.);
1043 determine which authors can have only last names in the labels.
1045 References >= start and < end have the same first i author names.
1046 Also they're sorted by A+. */
1048 static void consider_authors(reference **start, reference **end, int i)
1050 if (start >= end)
1051 return;
1052 reference **p = start;
1053 if (i >= (*p)->get_nauthors()) {
1054 for (++p; p < end && i >= (*p)->get_nauthors(); p++)
1056 if (p < end && i > 0) {
1057 // If we have an author list <A B C> and an author list <A B C D>,
1058 // then both lists need C.
1059 for (reference **q = start; q < end; q++)
1060 (*q)->need_author(i - 1);
1062 start = p;
1064 while (p < end) {
1065 reference **last_name_start = p;
1066 reference **name_start = p;
1067 for (++p;
1068 p < end && i < (*p)->get_nauthors()
1069 && same_author_last_name(**last_name_start, **p, i);
1070 p++) {
1071 if (!same_author_name(**name_start, **p, i)) {
1072 consider_authors(name_start, p, i + 1);
1073 name_start = p;
1076 consider_authors(name_start, p, i + 1);
1077 if (last_name_start == name_start) {
1078 for (reference **q = last_name_start; q < p; q++)
1079 (*q)->set_last_name_unambiguous(i);
1081 // If we have an author list <A B C D> and <A B C E>, then the lists
1082 // need author D and E respectively.
1083 if (name_start > start || p < end) {
1084 for (reference **q = last_name_start; q < p; q++)
1085 (*q)->need_author(i);
1090 int same_author_last_name(const reference &r1, const reference &r2, int n)
1092 const char *ae1;
1093 const char *as1 = r1.get_sort_field(0, n, 0, &ae1);
1094 const char *ae2;
1095 const char *as2 = r2.get_sort_field(0, n, 0, &ae2);
1096 if (!as1 && !as2) return 1; // they are the same
1097 if (!as1 || !as2) return 0;
1098 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1101 int same_author_name(const reference &r1, const reference &r2, int n)
1103 const char *ae1;
1104 const char *as1 = r1.get_sort_field(0, n, -1, &ae1);
1105 const char *ae2;
1106 const char *as2 = r2.get_sort_field(0, n, -1, &ae2);
1107 if (!as1 && !as2) return 1; // they are the same
1108 if (!as1 || !as2) return 0;
1109 return ae1 - as1 == ae2 - as2 && memcmp(as1, as2, ae1 - as1) == 0;
1113 void int_set::set(int i)
1115 assert(i >= 0);
1116 int bytei = i >> 3;
1117 if (bytei >= v.length()) {
1118 int old_length = v.length();
1119 v.set_length(bytei + 1);
1120 for (int j = old_length; j <= bytei; j++)
1121 v[j] = 0;
1123 v[bytei] |= 1 << (i & 7);
1126 int int_set::get(int i) const
1128 assert(i >= 0);
1129 int bytei = i >> 3;
1130 return bytei >= v.length() ? 0 : (v[bytei] & (1 << (i & 7))) != 0;
1133 void reference::set_last_name_unambiguous(int i)
1135 last_name_unambiguous.set(i);
1138 void reference::need_author(int n)
1140 if (n > last_needed_author)
1141 last_needed_author = n;
1144 const char *reference::get_authors(const char **end) const
1146 if (!computed_authors) {
1147 ((reference *)this)->computed_authors = 1;
1148 string &result = ((reference *)this)->authors;
1149 int na = get_nauthors();
1150 result.clear();
1151 for (int i = 0; i < na; i++) {
1152 if (last_name_unambiguous.get(i)) {
1153 const char *e, *start = get_author_last_name(i, &e);
1154 assert(start != 0);
1155 result.append(start, e - start);
1157 else {
1158 const char *e, *start = get_author(i, &e);
1159 assert(start != 0);
1160 result.append(start, e - start);
1162 if (i == last_needed_author
1163 && et_al.length() > 0
1164 && et_al_min_elide > 0
1165 && last_needed_author + et_al_min_elide < na
1166 && na >= et_al_min_total) {
1167 result += et_al;
1168 break;
1170 if (i < na - 1) {
1171 if (na == 2)
1172 result += join_authors_exactly_two;
1173 else if (i < na - 2)
1174 result += join_authors_default;
1175 else
1176 result += join_authors_last_two;
1180 const char *start = authors.contents();
1181 *end = start + authors.length();
1182 return start;
1185 int reference::get_nauthors() const
1187 if (nauthors < 0) {
1188 const char *dummy;
1189 int na;
1190 for (na = 0; get_author(na, &dummy) != 0; na++)
1192 ((reference *)this)->nauthors = na;
1194 return nauthors;