No empty .Rs/.Re
[netbsd-mini2440.git] / gnu / dist / groff / src / preproc / refer / ref.cpp
blobc3413ee39c169e8d22aad27831bdda59998d897e
1 /* $NetBSD$ */
3 // -*- C++ -*-
4 /* Copyright (C) 1989, 1990, 1991, 1992, 2001, 2003
5 Free Software Foundation, Inc.
6 Written by James Clark (jjc@jclark.com)
8 This file is part of groff.
10 groff is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 2, or (at your option) any later
13 version.
15 groff is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
20 You should have received a copy of the GNU General Public License along
21 with groff; see the file COPYING. If not, write to the Free Software
22 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
24 #include "refer.h"
25 #include "refid.h"
26 #include "ref.h"
27 #include "token.h"
29 static const char *find_day(const char *, const char *, const char **);
30 static int find_month(const char *start, const char *end);
31 static void abbreviate_names(string &);
33 #define DEFAULT_ARTICLES "the\000a\000an"
35 string articles(DEFAULT_ARTICLES, sizeof(DEFAULT_ARTICLES));
37 // Multiple occurrences of fields are separated by FIELD_SEPARATOR.
38 const char FIELD_SEPARATOR = '\0';
40 const char MULTI_FIELD_NAMES[] = "AE";
41 const char *AUTHOR_FIELDS = "AQ";
43 enum { OTHER, JOURNAL_ARTICLE, BOOK, ARTICLE_IN_BOOK, TECH_REPORT, BELL_TM };
45 const char *reference_types[] = {
46 "other",
47 "journal-article",
48 "book",
49 "article-in-book",
50 "tech-report",
51 "bell-tm",
54 static string temp_fields[256];
56 reference::reference(const char *start, int len, reference_id *ridp)
57 : h(0), merged(0), no(-1), field(0), nfields(0), label_ptr(0),
58 computed_authors(0), last_needed_author(-1), nauthors(-1)
60 int i;
61 for (i = 0; i < 256; i++)
62 field_index[i] = NULL_FIELD_INDEX;
63 if (ridp)
64 rid = *ridp;
65 if (start == 0)
66 return;
67 if (len <= 0)
68 return;
69 const char *end = start + len;
70 const char *ptr = start;
71 assert(*ptr == '%');
72 while (ptr < end) {
73 if (ptr + 1 < end && ptr[1] != '\0'
74 && ((ptr[1] != '%' && ptr[1] == annotation_field)
75 || (ptr + 2 < end && ptr[1] == '%' && ptr[2] != '\0'
76 && discard_fields.search(ptr[2]) < 0))) {
77 if (ptr[1] == '%')
78 ptr++;
79 string &f = temp_fields[(unsigned char)ptr[1]];
80 ptr += 2;
81 while (ptr < end && csspace(*ptr))
82 ptr++;
83 for (;;) {
84 for (;;) {
85 if (ptr >= end) {
86 f += '\n';
87 break;
89 f += *ptr;
90 if (*ptr++ == '\n')
91 break;
93 if (ptr >= end || *ptr == '%')
94 break;
97 else if (ptr + 1 < end && ptr[1] != '\0' && ptr[1] != '%'
98 && discard_fields.search(ptr[1]) < 0) {
99 string &f = temp_fields[(unsigned char)ptr[1]];
100 if (f.length() > 0) {
101 if (strchr(MULTI_FIELD_NAMES, ptr[1]) != 0)
102 f += FIELD_SEPARATOR;
103 else
104 f.clear();
106 ptr += 2;
107 if (ptr < end) {
108 if (*ptr == ' ')
109 ptr++;
110 for (;;) {
111 const char *p = ptr;
112 while (ptr < end && *ptr != '\n')
113 ptr++;
114 // strip trailing white space
115 const char *q = ptr;
116 while (q > p && q[-1] != '\n' && csspace(q[-1]))
117 q--;
118 while (p < q)
119 f += *p++;
120 if (ptr >= end)
121 break;
122 ptr++;
123 if (ptr >= end)
124 break;
125 if (*ptr == '%')
126 break;
127 f += ' ';
131 else {
132 // skip this field
133 for (;;) {
134 while (ptr < end && *ptr++ != '\n')
136 if (ptr >= end || *ptr == '%')
137 break;
141 for (i = 0; i < 256; i++)
142 if (temp_fields[i].length() > 0)
143 nfields++;
144 field = new string[nfields];
145 int j = 0;
146 for (i = 0; i < 256; i++)
147 if (temp_fields[i].length() > 0) {
148 field[j].move(temp_fields[i]);
149 if (abbreviate_fields.search(i) >= 0)
150 abbreviate_names(field[j]);
151 field_index[i] = j;
152 j++;
156 reference::~reference()
158 if (nfields > 0)
159 ad_delete(nfields) field;
162 // ref is the inline, this is the database ref
164 void reference::merge(reference &ref)
166 int i;
167 for (i = 0; i < 256; i++)
168 if (field_index[i] != NULL_FIELD_INDEX)
169 temp_fields[i].move(field[field_index[i]]);
170 for (i = 0; i < 256; i++)
171 if (ref.field_index[i] != NULL_FIELD_INDEX)
172 temp_fields[i].move(ref.field[ref.field_index[i]]);
173 for (i = 0; i < 256; i++)
174 field_index[i] = NULL_FIELD_INDEX;
175 int old_nfields = nfields;
176 nfields = 0;
177 for (i = 0; i < 256; i++)
178 if (temp_fields[i].length() > 0)
179 nfields++;
180 if (nfields != old_nfields) {
181 if (old_nfields > 0)
182 ad_delete(old_nfields) field;
183 field = new string[nfields];
185 int j = 0;
186 for (i = 0; i < 256; i++)
187 if (temp_fields[i].length() > 0) {
188 field[j].move(temp_fields[i]);
189 field_index[i] = j;
190 j++;
192 merged = 1;
195 void reference::insert_field(unsigned char c, string &s)
197 assert(s.length() > 0);
198 if (field_index[c] != NULL_FIELD_INDEX) {
199 field[field_index[c]].move(s);
200 return;
202 assert(field_index[c] == NULL_FIELD_INDEX);
203 string *old_field = field;
204 field = new string[nfields + 1];
205 int pos = 0;
206 int i;
207 for (i = 0; i < int(c); i++)
208 if (field_index[i] != NULL_FIELD_INDEX)
209 pos++;
210 for (i = 0; i < pos; i++)
211 field[i].move(old_field[i]);
212 field[pos].move(s);
213 for (i = pos; i < nfields; i++)
214 field[i + 1].move(old_field[i]);
215 if (nfields > 0)
216 ad_delete(nfields) old_field;
217 nfields++;
218 field_index[c] = pos;
219 for (i = c + 1; i < 256; i++)
220 if (field_index[i] != NULL_FIELD_INDEX)
221 field_index[i] += 1;
224 void reference::delete_field(unsigned char c)
226 if (field_index[c] == NULL_FIELD_INDEX)
227 return;
228 string *old_field = field;
229 field = new string[nfields - 1];
230 int i;
231 for (i = 0; i < int(field_index[c]); i++)
232 field[i].move(old_field[i]);
233 for (i = field_index[c]; i < nfields - 1; i++)
234 field[i].move(old_field[i + 1]);
235 if (nfields > 0)
236 ad_delete(nfields) old_field;
237 nfields--;
238 field_index[c] = NULL_FIELD_INDEX;
239 for (i = c + 1; i < 256; i++)
240 if (field_index[i] != NULL_FIELD_INDEX)
241 field_index[i] -= 1;
244 void reference::compute_hash_code()
246 if (!rid.is_null())
247 h = rid.hash();
248 else {
249 h = 0;
250 for (int i = 0; i < nfields; i++)
251 if (field[i].length() > 0) {
252 h <<= 4;
253 h ^= hash_string(field[i].contents(), field[i].length());
258 void reference::set_number(int n)
260 no = n;
263 const char SORT_SEP = '\001';
264 const char SORT_SUB_SEP = '\002';
265 const char SORT_SUB_SUB_SEP = '\003';
267 // sep specifies additional word separators
269 void sortify_words(const char *s, const char *end, const char *sep,
270 string &result)
272 int non_empty = 0;
273 int need_separator = 0;
274 for (;;) {
275 const char *token_start = s;
276 if (!get_token(&s, end))
277 break;
278 if ((s - token_start == 1
279 && (*token_start == ' '
280 || *token_start == '\n'
281 || (sep && *token_start != '\0'
282 && strchr(sep, *token_start) != 0)))
283 || (s - token_start == 2
284 && token_start[0] == '\\' && token_start[1] == ' ')) {
285 if (non_empty)
286 need_separator = 1;
288 else {
289 const token_info *ti = lookup_token(token_start, s);
290 if (ti->sortify_non_empty(token_start, s)) {
291 if (need_separator) {
292 result += ' ';
293 need_separator = 0;
295 ti->sortify(token_start, s, result);
296 non_empty = 1;
302 void sortify_word(const char *s, const char *end, string &result)
304 for (;;) {
305 const char *token_start = s;
306 if (!get_token(&s, end))
307 break;
308 const token_info *ti = lookup_token(token_start, s);
309 ti->sortify(token_start, s, result);
313 void sortify_other(const char *s, int len, string &key)
315 sortify_words(s, s + len, 0, key);
318 void sortify_title(const char *s, int len, string &key)
320 const char *end = s + len;
321 for (; s < end && (*s == ' ' || *s == '\n'); s++)
323 const char *ptr = s;
324 for (;;) {
325 const char *token_start = ptr;
326 if (!get_token(&ptr, end))
327 break;
328 if (ptr - token_start == 1
329 && (*token_start == ' ' || *token_start == '\n'))
330 break;
332 if (ptr < end) {
333 unsigned int first_word_len = ptr - s - 1;
334 const char *ae = articles.contents() + articles.length();
335 for (const char *a = articles.contents();
336 a < ae;
337 a = strchr(a, '\0') + 1)
338 if (first_word_len == strlen(a)) {
339 unsigned int j;
340 for (j = 0; j < first_word_len; j++)
341 if (a[j] != cmlower(s[j]))
342 break;
343 if (j >= first_word_len) {
344 s = ptr;
345 for (; s < end && (*s == ' ' || *s == '\n'); s++)
347 break;
351 sortify_words(s, end, 0, key);
354 void sortify_name(const char *s, int len, string &key)
356 const char *last_name_end;
357 const char *last_name = find_last_name(s, s + len, &last_name_end);
358 sortify_word(last_name, last_name_end, key);
359 key += SORT_SUB_SUB_SEP;
360 if (last_name > s)
361 sortify_words(s, last_name, ".", key);
362 key += SORT_SUB_SUB_SEP;
363 if (last_name_end < s + len)
364 sortify_words(last_name_end, s + len, ".,", key);
367 void sortify_date(const char *s, int len, string &key)
369 const char *year_end;
370 const char *year_start = find_year(s, s + len, &year_end);
371 if (!year_start) {
372 // Things without years are often `forthcoming', so it makes sense
373 // that they sort after things with explicit years.
374 key += 'A';
375 sortify_words(s, s + len, 0, key);
376 return;
378 int n = year_end - year_start;
379 while (n < 4) {
380 key += '0';
381 n++;
383 while (year_start < year_end)
384 key += *year_start++;
385 int m = find_month(s, s + len);
386 if (m < 0)
387 return;
388 key += 'A' + m;
389 const char *day_end;
390 const char *day_start = find_day(s, s + len, &day_end);
391 if (!day_start)
392 return;
393 if (day_end - day_start == 1)
394 key += '0';
395 while (day_start < day_end)
396 key += *day_start++;
399 // SORT_{SUB,SUB_SUB}_SEP can creep in from use of @ in label specification.
401 void sortify_label(const char *s, int len, string &key)
403 const char *end = s + len;
404 for (;;) {
405 const char *ptr;
406 for (ptr = s;
407 ptr < end && *ptr != SORT_SUB_SEP && *ptr != SORT_SUB_SUB_SEP;
408 ptr++)
410 if (ptr > s)
411 sortify_words(s, ptr, 0, key);
412 s = ptr;
413 if (s >= end)
414 break;
415 key += *s++;
419 void reference::compute_sort_key()
421 if (sort_fields.length() == 0)
422 return;
423 sort_fields += '\0';
424 const char *sf = sort_fields.contents();
425 while (*sf != '\0') {
426 sort_key += SORT_SEP;
427 char f = *sf++;
428 int n = 1;
429 if (*sf == '+') {
430 n = INT_MAX;
431 sf++;
433 else if (csdigit(*sf)) {
434 char *ptr;
435 long l = strtol(sf, &ptr, 10);
436 if (l == 0 && ptr == sf)
438 else {
439 sf = ptr;
440 if (l < 0) {
441 n = 1;
443 else {
444 n = int(l);
448 if (f == '.')
449 sortify_label(label.contents(), label.length(), sort_key);
450 else if (f == AUTHOR_FIELDS[0])
451 sortify_authors(n, sort_key);
452 else
453 sortify_field(f, n, sort_key);
455 sort_fields.set_length(sort_fields.length() - 1);
458 void reference::sortify_authors(int n, string &result) const
460 for (const char *p = AUTHOR_FIELDS; *p != '\0'; p++)
461 if (contains_field(*p)) {
462 sortify_field(*p, n, result);
463 return;
465 sortify_field(AUTHOR_FIELDS[0], n, result);
468 void reference::canonicalize_authors(string &result) const
470 int len = result.length();
471 sortify_authors(INT_MAX, result);
472 if (result.length() > len)
473 result += SORT_SUB_SEP;
476 void reference::sortify_field(unsigned char f, int n, string &result) const
478 typedef void (*sortify_t)(const char *, int, string &);
479 sortify_t sortifier = sortify_other;
480 switch (f) {
481 case 'A':
482 case 'E':
483 sortifier = sortify_name;
484 break;
485 case 'D':
486 sortifier = sortify_date;
487 break;
488 case 'B':
489 case 'J':
490 case 'T':
491 sortifier = sortify_title;
492 break;
494 int fi = field_index[(unsigned char)f];
495 if (fi != NULL_FIELD_INDEX) {
496 string &str = field[fi];
497 const char *start = str.contents();
498 const char *end = start + str.length();
499 for (int i = 0; i < n && start < end; i++) {
500 const char *p = start;
501 while (start < end && *start != FIELD_SEPARATOR)
502 start++;
503 if (i > 0)
504 result += SORT_SUB_SEP;
505 (*sortifier)(p, start - p, result);
506 if (start < end)
507 start++;
512 int compare_reference(const reference &r1, const reference &r2)
514 assert(r1.no >= 0);
515 assert(r2.no >= 0);
516 const char *s1 = r1.sort_key.contents();
517 int n1 = r1.sort_key.length();
518 const char *s2 = r2.sort_key.contents();
519 int n2 = r2.sort_key.length();
520 for (; n1 > 0 && n2 > 0; --n1, --n2, ++s1, ++s2)
521 if (*s1 != *s2)
522 return (int)(unsigned char)*s1 - (int)(unsigned char)*s2;
523 if (n2 > 0)
524 return -1;
525 if (n1 > 0)
526 return 1;
527 return r1.no - r2.no;
530 int same_reference(const reference &r1, const reference &r2)
532 if (!r1.rid.is_null() && r1.rid == r2.rid)
533 return 1;
534 if (r1.h != r2.h)
535 return 0;
536 if (r1.nfields != r2.nfields)
537 return 0;
538 int i = 0;
539 for (i = 0; i < 256; i++)
540 if (r1.field_index != r2.field_index)
541 return 0;
542 for (i = 0; i < r1.nfields; i++)
543 if (r1.field[i] != r2.field[i])
544 return 0;
545 return 1;
548 const char *find_last_name(const char *start, const char *end,
549 const char **endp)
551 const char *ptr = start;
552 const char *last_word = start;
553 for (;;) {
554 const char *token_start = ptr;
555 if (!get_token(&ptr, end))
556 break;
557 if (ptr - token_start == 1) {
558 if (*token_start == ',') {
559 *endp = token_start;
560 return last_word;
562 else if (*token_start == ' ' || *token_start == '\n') {
563 if (ptr < end && *ptr != ' ' && *ptr != '\n')
564 last_word = ptr;
568 *endp = end;
569 return last_word;
572 void abbreviate_name(const char *ptr, const char *end, string &result)
574 const char *last_name_end;
575 const char *last_name_start = find_last_name(ptr, end, &last_name_end);
576 int need_period = 0;
577 for (;;) {
578 const char *token_start = ptr;
579 if (!get_token(&ptr, last_name_start))
580 break;
581 const token_info *ti = lookup_token(token_start, ptr);
582 if (need_period) {
583 if ((ptr - token_start == 1 && *token_start == ' ')
584 || (ptr - token_start == 2 && token_start[0] == '\\'
585 && token_start[1] == ' '))
586 continue;
587 if (ti->is_upper())
588 result += period_before_initial;
589 else
590 result += period_before_other;
591 need_period = 0;
593 result.append(token_start, ptr - token_start);
594 if (ti->is_upper()) {
595 const char *lower_ptr = ptr;
596 int first_token = 1;
597 for (;;) {
598 token_start = ptr;
599 if (!get_token(&ptr, last_name_start))
600 break;
601 if ((ptr - token_start == 1 && *token_start == ' ')
602 || (ptr - token_start == 2 && token_start[0] == '\\'
603 && token_start[1] == ' '))
604 break;
605 ti = lookup_token(token_start, ptr);
606 if (ti->is_hyphen()) {
607 const char *ptr1 = ptr;
608 if (get_token(&ptr1, last_name_start)) {
609 ti = lookup_token(ptr, ptr1);
610 if (ti->is_upper()) {
611 result += period_before_hyphen;
612 result.append(token_start, ptr1 - token_start);
613 ptr = ptr1;
617 else if (ti->is_upper()) {
618 // MacDougal -> MacD.
619 result.append(lower_ptr, ptr - lower_ptr);
620 lower_ptr = ptr;
621 first_token = 1;
623 else if (first_token && ti->is_accent()) {
624 result.append(token_start, ptr - token_start);
625 lower_ptr = ptr;
627 first_token = 0;
629 need_period = 1;
632 if (need_period)
633 result += period_before_last_name;
634 result.append(last_name_start, end - last_name_start);
637 static void abbreviate_names(string &result)
639 string str;
640 str.move(result);
641 const char *ptr = str.contents();
642 const char *end = ptr + str.length();
643 while (ptr < end) {
644 const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
645 if (name_end == 0)
646 name_end = end;
647 abbreviate_name(ptr, name_end, result);
648 if (name_end >= end)
649 break;
650 ptr = name_end + 1;
651 result += FIELD_SEPARATOR;
655 void reverse_name(const char *ptr, const char *name_end, string &result)
657 const char *last_name_end;
658 const char *last_name_start = find_last_name(ptr, name_end, &last_name_end);
659 result.append(last_name_start, last_name_end - last_name_start);
660 while (last_name_start > ptr
661 && (last_name_start[-1] == ' ' || last_name_start[-1] == '\n'))
662 last_name_start--;
663 if (last_name_start > ptr) {
664 result += ", ";
665 result.append(ptr, last_name_start - ptr);
667 if (last_name_end < name_end)
668 result.append(last_name_end, name_end - last_name_end);
671 void reverse_names(string &result, int n)
673 if (n <= 0)
674 return;
675 string str;
676 str.move(result);
677 const char *ptr = str.contents();
678 const char *end = ptr + str.length();
679 while (ptr < end) {
680 if (--n < 0) {
681 result.append(ptr, end - ptr);
682 break;
684 const char *name_end = (char *)memchr(ptr, FIELD_SEPARATOR, end - ptr);
685 if (name_end == 0)
686 name_end = end;
687 reverse_name(ptr, name_end, result);
688 if (name_end >= end)
689 break;
690 ptr = name_end + 1;
691 result += FIELD_SEPARATOR;
695 // Return number of field separators.
697 int join_fields(string &f)
699 const char *ptr = f.contents();
700 int len = f.length();
701 int nfield_seps = 0;
702 int j;
703 for (j = 0; j < len; j++)
704 if (ptr[j] == FIELD_SEPARATOR)
705 nfield_seps++;
706 if (nfield_seps == 0)
707 return 0;
708 string temp;
709 int field_seps_left = nfield_seps;
710 for (j = 0; j < len; j++) {
711 if (ptr[j] == FIELD_SEPARATOR) {
712 if (nfield_seps == 1)
713 temp += join_authors_exactly_two;
714 else if (--field_seps_left == 0)
715 temp += join_authors_last_two;
716 else
717 temp += join_authors_default;
719 else
720 temp += ptr[j];
722 f = temp;
723 return nfield_seps;
726 void uppercase(const char *start, const char *end, string &result)
728 for (;;) {
729 const char *token_start = start;
730 if (!get_token(&start, end))
731 break;
732 const token_info *ti = lookup_token(token_start, start);
733 ti->upper_case(token_start, start, result);
737 void lowercase(const char *start, const char *end, string &result)
739 for (;;) {
740 const char *token_start = start;
741 if (!get_token(&start, end))
742 break;
743 const token_info *ti = lookup_token(token_start, start);
744 ti->lower_case(token_start, start, result);
748 void capitalize(const char *ptr, const char *end, string &result)
750 int in_small_point_size = 0;
751 for (;;) {
752 const char *start = ptr;
753 if (!get_token(&ptr, end))
754 break;
755 const token_info *ti = lookup_token(start, ptr);
756 const char *char_end = ptr;
757 int is_lower = ti->is_lower();
758 if ((is_lower || ti->is_upper()) && get_token(&ptr, end)) {
759 const token_info *ti2 = lookup_token(char_end, ptr);
760 if (!ti2->is_accent())
761 ptr = char_end;
763 if (is_lower) {
764 if (!in_small_point_size) {
765 result += "\\s-2";
766 in_small_point_size = 1;
768 ti->upper_case(start, char_end, result);
769 result.append(char_end, ptr - char_end);
771 else {
772 if (in_small_point_size) {
773 result += "\\s+2";
774 in_small_point_size = 0;
776 result.append(start, ptr - start);
779 if (in_small_point_size)
780 result += "\\s+2";
783 void capitalize_field(string &str)
785 string temp;
786 capitalize(str.contents(), str.contents() + str.length(), temp);
787 str.move(temp);
790 int is_terminated(const char *ptr, const char *end)
792 const char *last_token = end;
793 for (;;) {
794 const char *p = ptr;
795 if (!get_token(&ptr, end))
796 break;
797 last_token = p;
799 return end - last_token == 1
800 && (*last_token == '.' || *last_token == '!' || *last_token == '?');
803 void reference::output(FILE *fp)
805 fputs(".]-\n", fp);
806 for (int i = 0; i < 256; i++)
807 if (field_index[i] != NULL_FIELD_INDEX && i != annotation_field) {
808 string &f = field[field_index[i]];
809 if (!csdigit(i)) {
810 int j = reverse_fields.search(i);
811 if (j >= 0) {
812 int n;
813 int len = reverse_fields.length();
814 if (++j < len && csdigit(reverse_fields[j])) {
815 n = reverse_fields[j] - '0';
816 for (++j; j < len && csdigit(reverse_fields[j]); j++)
817 // should check for overflow
818 n = n*10 + reverse_fields[j] - '0';
820 else
821 n = INT_MAX;
822 reverse_names(f, n);
825 int is_multiple = join_fields(f) > 0;
826 if (capitalize_fields.search(i) >= 0)
827 capitalize_field(f);
828 if (memchr(f.contents(), '\n', f.length()) == 0) {
829 fprintf(fp, ".ds [%c ", i);
830 if (f[0] == ' ' || f[0] == '\\' || f[0] == '"')
831 putc('"', fp);
832 put_string(f, fp);
833 putc('\n', fp);
835 else {
836 fprintf(fp, ".de [%c\n", i);
837 put_string(f, fp);
838 fputs("..\n", fp);
840 if (i == 'P') {
841 int multiple_pages = 0;
842 const char *s = f.contents();
843 const char *end = f.contents() + f.length();
844 for (;;) {
845 const char *token_start = s;
846 if (!get_token(&s, end))
847 break;
848 const token_info *ti = lookup_token(token_start, s);
849 if (ti->is_hyphen() || ti->is_range_sep()) {
850 multiple_pages = 1;
851 break;
854 fprintf(fp, ".nr [P %d\n", multiple_pages);
856 else if (i == 'E')
857 fprintf(fp, ".nr [E %d\n", is_multiple);
859 for (const char *p = "TAO"; *p; p++) {
860 int fi = field_index[(unsigned char)*p];
861 if (fi != NULL_FIELD_INDEX) {
862 string &f = field[fi];
863 fprintf(fp, ".nr [%c %d\n", *p,
864 is_terminated(f.contents(), f.contents() + f.length()));
867 int t = classify();
868 fprintf(fp, ".][ %d %s\n", t, reference_types[t]);
869 if (annotation_macro.length() > 0 && annotation_field >= 0
870 && field_index[annotation_field] != NULL_FIELD_INDEX) {
871 putc('.', fp);
872 put_string(annotation_macro, fp);
873 putc('\n', fp);
874 put_string(field[field_index[annotation_field]], fp);
878 void reference::print_sort_key_comment(FILE *fp)
880 fputs(".\\\"", fp);
881 put_string(sort_key, fp);
882 putc('\n', fp);
885 const char *find_year(const char *start, const char *end, const char **endp)
887 for (;;) {
888 while (start < end && !csdigit(*start))
889 start++;
890 const char *ptr = start;
891 if (start == end)
892 break;
893 while (ptr < end && csdigit(*ptr))
894 ptr++;
895 if (ptr - start == 4 || ptr - start == 3
896 || (ptr - start == 2
897 && (start[0] >= '4' || (start[0] == '3' && start[1] >= '2')))) {
898 *endp = ptr;
899 return start;
901 start = ptr;
903 return 0;
906 static const char *find_day(const char *start, const char *end,
907 const char **endp)
909 for (;;) {
910 while (start < end && !csdigit(*start))
911 start++;
912 const char *ptr = start;
913 if (start == end)
914 break;
915 while (ptr < end && csdigit(*ptr))
916 ptr++;
917 if ((ptr - start == 1 && start[0] != '0')
918 || (ptr - start == 2 &&
919 (start[0] == '1'
920 || start[0] == '2'
921 || (start[0] == '3' && start[1] <= '1')
922 || (start[0] == '0' && start[1] != '0')))) {
923 *endp = ptr;
924 return start;
926 start = ptr;
928 return 0;
931 static int find_month(const char *start, const char *end)
933 static const char *months[] = {
934 "january",
935 "february",
936 "march",
937 "april",
938 "may",
939 "june",
940 "july",
941 "august",
942 "september",
943 "october",
944 "november",
945 "december",
947 for (;;) {
948 while (start < end && !csalpha(*start))
949 start++;
950 const char *ptr = start;
951 if (start == end)
952 break;
953 while (ptr < end && csalpha(*ptr))
954 ptr++;
955 if (ptr - start >= 3) {
956 for (unsigned int i = 0; i < sizeof(months)/sizeof(months[0]); i++) {
957 const char *q = months[i];
958 const char *p = start;
959 for (; p < ptr; p++, q++)
960 if (cmlower(*p) != *q)
961 break;
962 if (p >= ptr)
963 return i;
966 start = ptr;
968 return -1;
971 int reference::contains_field(char c) const
973 return field_index[(unsigned char)c] != NULL_FIELD_INDEX;
976 int reference::classify()
978 if (contains_field('J'))
979 return JOURNAL_ARTICLE;
980 if (contains_field('B'))
981 return ARTICLE_IN_BOOK;
982 if (contains_field('G'))
983 return TECH_REPORT;
984 if (contains_field('R'))
985 return TECH_REPORT;
986 if (contains_field('I'))
987 return BOOK;
988 if (contains_field('M'))
989 return BELL_TM;
990 return OTHER;
993 const char *reference::get_year(const char **endp) const
995 if (field_index['D'] != NULL_FIELD_INDEX) {
996 string &date = field[field_index['D']];
997 const char *start = date.contents();
998 const char *end = start + date.length();
999 return find_year(start, end, endp);
1001 else
1002 return 0;
1005 const char *reference::get_field(unsigned char c, const char **endp) const
1007 if (field_index[c] != NULL_FIELD_INDEX) {
1008 string &f = field[field_index[c]];
1009 const char *start = f.contents();
1010 *endp = start + f.length();
1011 return start;
1013 else
1014 return 0;
1017 const char *reference::get_date(const char **endp) const
1019 return get_field('D', endp);
1022 const char *nth_field(int i, const char *start, const char **endp)
1024 while (--i >= 0) {
1025 start = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
1026 if (!start)
1027 return 0;
1028 start++;
1030 const char *e = (char *)memchr(start, FIELD_SEPARATOR, *endp - start);
1031 if (e)
1032 *endp = e;
1033 return start;
1036 const char *reference::get_author(int i, const char **endp) const
1038 for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
1039 const char *start = get_field(*f, endp);
1040 if (start) {
1041 if (strchr(MULTI_FIELD_NAMES, *f) != 0)
1042 return nth_field(i, start, endp);
1043 else if (i == 0)
1044 return start;
1045 else
1046 return 0;
1049 return 0;
1052 const char *reference::get_author_last_name(int i, const char **endp) const
1054 for (const char *f = AUTHOR_FIELDS; *f != '\0'; f++) {
1055 const char *start = get_field(*f, endp);
1056 if (start) {
1057 if (strchr(MULTI_FIELD_NAMES, *f) != 0) {
1058 start = nth_field(i, start, endp);
1059 if (!start)
1060 return 0;
1062 if (*f == 'A')
1063 return find_last_name(start, *endp, endp);
1064 else
1065 return start;
1068 return 0;
1071 void reference::set_date(string &d)
1073 if (d.length() == 0)
1074 delete_field('D');
1075 else
1076 insert_field('D', d);
1079 int same_year(const reference &r1, const reference &r2)
1081 const char *ye1;
1082 const char *ys1 = r1.get_year(&ye1);
1083 const char *ye2;
1084 const char *ys2 = r2.get_year(&ye2);
1085 if (ys1 == 0) {
1086 if (ys2 == 0)
1087 return same_date(r1, r2);
1088 else
1089 return 0;
1091 else if (ys2 == 0)
1092 return 0;
1093 else if (ye1 - ys1 != ye2 - ys2)
1094 return 0;
1095 else
1096 return memcmp(ys1, ys2, ye1 - ys1) == 0;
1099 int same_date(const reference &r1, const reference &r2)
1101 const char *e1;
1102 const char *s1 = r1.get_date(&e1);
1103 const char *e2;
1104 const char *s2 = r2.get_date(&e2);
1105 if (s1 == 0)
1106 return s2 == 0;
1107 else if (s2 == 0)
1108 return 0;
1109 else if (e1 - s1 != e2 - s2)
1110 return 0;
1111 else
1112 return memcmp(s1, s2, e1 - s1) == 0;
1115 const char *reference::get_sort_field(int i, int si, int ssi,
1116 const char **endp) const
1118 const char *start = sort_key.contents();
1119 const char *end = start + sort_key.length();
1120 if (i < 0) {
1121 *endp = end;
1122 return start;
1124 while (--i >= 0) {
1125 start = (char *)memchr(start, SORT_SEP, end - start);
1126 if (!start)
1127 return 0;
1128 start++;
1130 const char *e = (char *)memchr(start, SORT_SEP, end - start);
1131 if (e)
1132 end = e;
1133 if (si < 0) {
1134 *endp = end;
1135 return start;
1137 while (--si >= 0) {
1138 start = (char *)memchr(start, SORT_SUB_SEP, end - start);
1139 if (!start)
1140 return 0;
1141 start++;
1143 e = (char *)memchr(start, SORT_SUB_SEP, end - start);
1144 if (e)
1145 end = e;
1146 if (ssi < 0) {
1147 *endp = end;
1148 return start;
1150 while (--ssi >= 0) {
1151 start = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
1152 if (!start)
1153 return 0;
1154 start++;
1156 e = (char *)memchr(start, SORT_SUB_SUB_SEP, end - start);
1157 if (e)
1158 end = e;
1159 *endp = end;
1160 return start;