4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2015 Gary Mills
24 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
25 * Use is subject to license terms.
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
40 #define Tolower(c) (isupper(c)?tolower(c):c)
46 static int ily(char *, char *, char *, int);
47 static int s(char *, char *, char *, int);
48 static int es(char *, char *, char *, int);
49 static int subst(char *, char *, char *, int);
51 static int bility(char *, char *, char *, int);
52 static int i_to_y(char *, char *, char *, int);
53 static int CCe(char *, char *, char *, int);
54 static int y_to_e(char *, char *, char *, int);
55 static int strip(char *, char *, char *, int);
56 static int ize(char *, char *, char *, int);
57 static int tion(char *, char *, char *, int);
58 static int an(char *, char *, char *, int);
60 static int tryword(char *, char *, int);
61 static int trypref(char *, char *, int);
62 static int trysuff(char *, int);
63 static int vowel(int);
64 static int dict(char *, char *);
65 static int monosyl(char *, char *);
66 static int VCe(char *, char *, char *, int);
67 static char *skipv(char *);
81 static struct suftab sufa
[] = {
82 {"ssen", ily
, 4, "-y+iness", "+ness" },
83 {"ssel", ily
, 4, "-y+i+less", "+less" },
84 {"se", s
, 1, "", "+s", es
, 2, "-y+ies", "+es" },
85 {"s'", s
, 2, "", "+'s"},
86 {"s", s
, 1, "", "+s"},
87 {"ecn", subst
, 1, "-t+ce", ""},
88 {"ycn", subst
, 1, "-t+cy", ""},
89 {"ytilb", nop
, 0, "", ""},
90 {"ytilib", bility
, 5, "-le+ility", ""},
91 {"elbaif", i_to_y
, 4, "-y+iable", ""},
92 {"elba", CCe
, 4, "-e+able", "+able"},
93 {"yti", CCe
, 3, "-e+ity", "+ity"},
94 {"ylb", y_to_e
, 1, "-e+y", ""},
95 {"yl", ily
, 2, "-y+ily", "+ly"},
96 {"laci", strip
, 2, "", "+al"},
97 {"latnem", strip
, 2, "", "+al"},
98 {"lanoi", strip
, 2, "", "+al"},
99 {"tnem", strip
, 4, "", "+ment"},
100 {"gni", CCe
, 3, "-e+ing", "+ing"},
101 {"reta", nop
, 0, "", ""},
102 {"retc", nop
, 0, "", ""},
103 {"re", strip
, 1, "", "+r", i_to_y
, 2, "-y+ier", "+er"},
104 {"de", strip
, 1, "", "+d", i_to_y
, 2, "-y+ied", "+ed"},
105 {"citsi", strip
, 2, "", "+ic"},
106 {"citi", ize
, 1, "-ic+e", ""},
107 {"cihparg", i_to_y
, 1, "-y+ic", ""},
108 {"tse", strip
, 2, "", "+st", i_to_y
, 3, "-y+iest", "+est"},
109 {"cirtem", i_to_y
, 1, "-y+ic", ""},
110 {"yrtem", subst
, 0, "-er+ry", ""},
111 {"cigol", i_to_y
, 1, "-y+ic", ""},
112 {"tsigol", i_to_y
, 2, "-y+ist", ""},
113 {"tsi", CCe
, 3, "-e+ist", "+ist"},
114 {"msi", CCe
, 3, "-e+ism", "+ist"},
115 {"noitacifi", i_to_y
, 6, "-y+ication", ""},
116 {"noitazi", ize
, 4, "-e+ation", ""},
117 {"rota", tion
, 2, "-e+or", ""},
118 {"rotc", tion
, 2, "", "+or"},
119 {"noit", tion
, 3, "-e+ion", "+ion"},
120 {"naino", an
, 3, "", "+ian"},
121 {"na", an
, 1, "", "+n"},
122 {"evi", subst
, 0, "-ion+ive", ""},
123 {"ezi", CCe
, 3, "-e+ize", "+ize"},
124 {"pihs", strip
, 4, "", "+ship"},
125 {"dooh", ily
, 4, "-y+ihood", "+hood"},
126 {"luf", ily
, 3, "-y+iful", "+ful"},
127 {"ekil", strip
, 4, "", "+like"},
131 static struct suftab sufb
[] = {
132 {"ssen", ily
, 4, "-y+iness", "+ness" },
133 {"ssel", ily
, 4, "-y+i+less", "+less" },
134 {"se", s
, 1, "", "+s", es
, 2, "-y+ies", "+es" },
135 {"s'", s
, 2, "", "+'s"},
136 {"s", s
, 1, "", "+s"},
137 {"ecn", subst
, 1, "-t+ce", ""},
138 {"ycn", subst
, 1, "-t+cy", ""},
139 {"ytilb", nop
, 0, "", ""},
140 {"ytilib", bility
, 5, "-le+ility", ""},
141 {"elbaif", i_to_y
, 4, "-y+iable", ""},
142 {"elba", CCe
, 4, "-e+able", "+able"},
143 {"yti", CCe
, 3, "-e+ity", "+ity"},
144 {"ylb", y_to_e
, 1, "-e+y", ""},
145 {"yl", ily
, 2, "-y+ily", "+ly"},
146 {"laci", strip
, 2, "", "+al"},
147 {"latnem", strip
, 2, "", "+al"},
148 {"lanoi", strip
, 2, "", "+al"},
149 {"tnem", strip
, 4, "", "+ment"},
150 {"gni", CCe
, 3, "-e+ing", "+ing"},
151 {"reta", nop
, 0, "", ""},
152 {"retc", nop
, 0, "", ""},
153 {"re", strip
, 1, "", "+r", i_to_y
, 2, "-y+ier", "+er"},
154 {"de", strip
, 1, "", "+d", i_to_y
, 2, "-y+ied", "+ed"},
155 {"citsi", strip
, 2, "", "+ic"},
156 {"citi", ize
, 1, "-ic+e", ""},
157 {"cihparg", i_to_y
, 1, "-y+ic", ""},
158 {"tse", strip
, 2, "", "+st", i_to_y
, 3, "-y+iest", "+est"},
159 {"cirtem", i_to_y
, 1, "-y+ic", ""},
160 {"yrtem", subst
, 0, "-er+ry", ""},
161 {"cigol", i_to_y
, 1, "-y+ic", ""},
162 {"tsigol", i_to_y
, 2, "-y+ist", ""},
163 {"tsi", CCe
, 3, "-e+ist", "+ist"},
164 {"msi", CCe
, 3, "-e+ism", "+ist"},
165 {"noitacifi", i_to_y
, 6, "-y+ication", ""},
166 {"noitasi", ize
, 4, "-e+ation", ""},
167 {"rota", tion
, 2, "-e+or", ""},
168 {"rotc", tion
, 2, "", "+or"},
169 {"noit", tion
, 3, "-e+ion", "+ion"},
170 {"naino", an
, 3, "", "+ian"},
171 {"na", an
, 1, "", "+n"},
172 {"evi", subst
, 0, "-ion+ive", ""},
173 {"esi", CCe
, 3, "-e+ise", "+ise"},
174 {"pihs", strip
, 4, "", "+ship"},
175 {"dooh", ily
, 4, "-y+ihood", "+hood"},
176 {"luf", ily
, 3, "-y+iful", "+ful"},
177 {"ekil", strip
, 4, "", "+like"},
181 static char *preftab
[] = {
220 "under", /* must precede un */
228 static struct suftab
*suftab
;
230 static char word
[LINE_MAX
];
231 static char original
[LINE_MAX
];
232 static char *deriv
[LINE_MAX
];
233 static char affix
[LINE_MAX
];
234 static FILE *file
, *found
;
236 * deriv is stack of pointers to notes like +micro +ed
237 * affix is concatenated string of notes
238 * the buffer size 141 stems from the sizes of original and affix.
242 * in an attempt to defray future maintenance misunderstandings, here is
243 * an attempt to describe the input/output expectations of the spell
246 * spellprog is intended to be called from the shell file spell.
247 * because of this, there is little error checking (this is historical, not
248 * necessarily advisable).
250 * spellprog options hashed-list pass
252 * the hashed-list is a list of the form made by spellin.
253 * there are 2 types of hashed lists:
254 * 1. a stop list: this specifies words that by the rules embodied
255 * in spellprog would be recognized as correct, BUT are really
257 * 2. a dictionary of correctly spelled words.
258 * the pass number determines how the words found in the specified
259 * hashed-list are treated. If the pass number is 1, the hashed-list is
260 * treated as the stop-list, otherwise, it is treated as the regular
261 * dictionary list. in this case, the value of "pass" is a filename. Found
262 * words are written to this file.
264 * In the normal case, the filename = /dev/null. However, if the v option
265 * is specified, the derivations are written to this file.
266 * The spellprog looks up words in the hashed-list; if a word is found, it
267 * is printed to the stdout. If the hashed-list was the stop-list, the
268 * words found are presumed to be misspellings. in this case,
269 * a control character is printed ( a "-" is appended to the word.
270 * a hyphen will never occur naturally in the input list because deroff
271 * is used in the shell file before calling spellprog.)
272 * If the regualar spelling list was used (hlista or hlistb), the words
273 * are correct, and may be ditched. (unless the -v option was used -
274 * see the manual page).
276 * spellprog should be called twice : first with the stop-list, to flag all
277 * a priori incorrectly spelled words; second with the dictionary.
279 * spellprog hstop 1 |\
280 * spellprog hlista /dev/null
282 * for a complete scenario, see the shell file: spell.
287 main(int argc
, char **argv
)
295 /* Set locale environment variables local definitions */
296 (void) setlocale(LC_ALL
, "");
297 #if !defined(TEXT_DOMAIN) /* Should be defined by cc -D */
298 #define TEXT_DOMAIN "SYS_TEST" /* Use this only if it wasn't */
300 (void) textdomain(TEXT_DOMAIN
);
304 while ((c
= getopt(argc
, argv
, "bvx")) != EOF
) {
319 argv
= &argv
[optind
];
321 if ((argc
< 2) || !prime(*argv
)) {
322 (void) fprintf(stderr
,
323 gettext("%s: cannot initialize hash table\n"), prog
);
329 /* Select the correct suffix table */
330 suftab
= (bflag
== 0) ? sufa
: sufb
;
333 * if pass is not 1, it is assumed to be a filename.
334 * found words are written to this file.
338 found
= fopen(*argv
, "w");
343 for (ep
= word
; (*ep
= j
= getchar()) != '\n'; ep
++)
347 * here is the hyphen processing. these words were found in the stop
348 * list. however, if they exist as is, (no derivations tried) in the
349 * dictionary, let them through as correct.
354 if (!tryword(word
, ep
, 0))
355 (void) fprintf(file
, "%s\n", word
);
358 for (cp
= word
, dp
= original
; cp
< ep
; )
362 for (cp
= word
; cp
< ep
; cp
++)
365 if (((ep
- word
) == 1) &&
366 ((word
[0] == 'A') || (word
[0] == 'I')))
368 if (trypref(ep
, ".", 0))
371 for (cp
= original
+1, dp
= word
+1; dp
< ep
; dp
++, cp
++)
374 if (((ep
- word
) == 1) && (word
[0] == 'a'))
376 if (trypref(ep
, ".", 0)||trysuff(ep
, 0))
378 if (isupper(word
[0])) {
379 for (cp
= original
, dp
= word
; *dp
= *cp
++; dp
++)
380 if (fold
) *dp
= Tolower(*dp
);
381 word
[0] = Tolower(word
[0]);
384 (void) fprintf(file
, "%s\n", original
);
389 (void) fprintf(file
, "%s-\n", original
);
390 else if (affix
[0] != 0 && affix
[0] != '.') {
392 (void) fprintf(file
, "%s\t%s\n", affix
,
399 * strip exactly one suffix and do
400 * indicated routine(s), which may recursively
405 trysuff(char *ep
, int lev
)
411 deriv
[lev
] = deriv
[lev
-1] = 0;
412 for (t
= &suftab
[0]; (t
!= 0 && (sp
= t
->suf
) != 0); t
++) {
417 for (sp
= cp
; --sp
>= word
&& !vowel(*sp
); )
421 if ((*t
->p1
)(ep
-t
->n1
, t
->d1
, t
->a1
, lev
+1))
424 deriv
[lev
] = deriv
[lev
+1] = 0;
425 return ((*t
->p2
)(ep
-t
->n2
, t
->d2
, t
->a2
, lev
));
441 strip(char *ep
, char *d
, char *a
, int lev
)
443 return (trypref(ep
, a
, lev
)||trysuff(ep
, lev
));
447 s(char *ep
, char *d
, char *a
, int lev
)
451 if (*ep
== 's' && ep
[-1] == 's')
453 return (strip(ep
, d
, a
, lev
));
458 an(char *ep
, char *d
, char *a
, int lev
)
460 if (!isupper(*word
)) /* must be proper name */
462 return (trypref(ep
, a
, lev
));
467 ize(char *ep
, char *d
, char *a
, int lev
)
470 return (strip(ep
, "", d
, lev
));
475 y_to_e(char *ep
, char *d
, char *a
, int lev
)
478 return (strip(ep
, "", d
, lev
));
482 ily(char *ep
, char *d
, char *a
, int lev
)
485 return (i_to_y(ep
, d
, a
, lev
));
487 return (strip(ep
, d
, a
, lev
));
491 bility(char *ep
, char *d
, char *a
, int lev
)
494 return (y_to_e(ep
, d
, a
, lev
));
498 i_to_y(char *ep
, char *d
, char *a
, int lev
)
504 return (strip(ep
, "", a
, lev
));
508 es(char *ep
, char *d
, char *a
, int lev
)
516 return (i_to_y(ep
, d
, a
, lev
));
521 return (strip(ep
, d
, a
, lev
));
527 subst(char *ep
, char *d
, char *a
, int lev
)
531 if (skipv(skipv(ep
-1)) < word
)
533 for (t
= d
; *t
!= '+'; t
++)
535 for (u
= ep
; *--t
!= '-'; )
537 return (strip(ep
, "", d
, lev
));
542 tion(char *ep
, char *d
, char *a
, int lev
)
547 return (trypref(ep
, a
, lev
));
549 return (y_to_e(ep
, d
, a
, lev
));
554 /* possible consonant-consonant-e ending */
556 CCe(char *ep
, char *d
, char *a
, int lev
)
561 return (y_to_e(ep
, d
, a
, lev
));
572 return (y_to_e(ep
, d
, a
, lev
));
582 if (y_to_e(ep
, d
, a
, lev
))
584 if (!(ep
[-2] == 'n' && ep
[-1] == 'g'))
593 if (y_to_e(ep
, d
, a
, lev
))
595 if (!(ep
[-2] == 'n' && ep
[-1] == 'g'))
602 if (y_to_e(ep
, d
, a
, lev
))
604 if (!(ep
[-2] == 'n' && ep
[-1] == 'g'))
608 if (y_to_e(ep
, d
, a
, lev
))
610 if (!(ep
[-2] == 'n' && ep
[-1] == 'g'))
614 return (VCe(ep
, d
, a
, lev
));
617 /* possible consonant-vowel-consonant-e ending */
619 VCe(char *ep
, char *d
, char *a
, int lev
)
625 if (!vowel(c
) && vowel(ep
[-2])) {
628 if (trypref(ep
, d
, lev
)||trysuff(ep
, lev
))
633 return (strip(ep
, d
, a
, lev
));
637 lookuppref(char **wp
, char *ep
)
642 for (sp
= preftab
; *sp
; sp
++) {
644 for (cp
= *sp
; *cp
; cp
++, bp
++)
645 if (Tolower(*bp
) != *cp
)
647 for (cp
= bp
; cp
< ep
; cp
++)
658 * while word is not in dictionary try stripping
659 * prefixes. Fail if no more prefixes.
662 trypref(char *ep
, char *a
, int lev
)
668 char space
[LINE_MAX
* 2];
670 if (tryword(word
, ep
, lev
))
675 while (cp
= lookuppref(&bp
, ep
)) {
679 if (tryword(bp
, ep
, lev
+1)) {
684 deriv
[lev
+1] = deriv
[lev
+2] = 0;
689 tryword(char *bp
, char *ep
, int lev
)
700 if (i
== 0 && vowel(*ep
) && ep
[-1] == ep
[-2] && monosyl(bp
, ep
-1)) {
702 deriv
[++lev
] = duple
;
708 if (vflag
== 0 || i
== 0)
711 * when derivations are wanted, collect them
717 (void) strcat(affix
, deriv
[j
]);
724 monosyl(char *bp
, char *ep
)
728 if (vowel(*--ep
) || !vowel(*--ep
) || ep
[1] == 'x' || ep
[1] == 'w')
739 if (s
>= word
&&vowel(*s
))
741 while (s
>= word
&& !vowel(*s
))
749 switch (Tolower(c
)) {
762 dict(char *bp
, char *ep
)
766 (void) fprintf(stdout
, "=%.*s\n", ep
-bp
, bp
);
769 result
= hashlook(bp
);