5 /* the 33 Cyrillic letters represented in ASCII characters following the
6 * conventions of the standard Library of Congress transliteration: */
10 stringdef v '{U+0432}'
11 stringdef g '{U+0433}'
12 stringdef d '{U+0434}'
13 stringdef e '{U+0435}'
14 stringdef e" '{U+0451}'
15 stringdef zh '{U+0436}'
16 stringdef z '{U+0437}'
17 stringdef i '{U+0438}'
18 stringdef i` '{U+0439}'
19 stringdef k '{U+043A}'
20 stringdef l '{U+043B}'
21 stringdef m '{U+043C}'
22 stringdef n '{U+043D}'
23 stringdef o '{U+043E}'
24 stringdef p '{U+043F}'
25 stringdef r '{U+0440}'
26 stringdef s '{U+0441}'
27 stringdef t '{U+0442}'
28 stringdef u '{U+0443}'
29 stringdef f '{U+0444}'
30 stringdef kh '{U+0445}'
31 stringdef ts '{U+0446}'
32 stringdef ch '{U+0447}'
33 stringdef sh '{U+0448}'
34 stringdef shch '{U+0449}'
35 stringdef " '{U+044A}'
36 stringdef y '{U+044B}'
37 stringdef ' '{U+044C}'
38 stringdef e` '{U+044D}'
39 stringdef iu '{U+044E}'
40 stringdef ia '{U+044F}'
42 routines ( mark_regions R2
59 define v '{a}{e}{i}{o}{u}{y}{e`}{iu}{ia}'
61 define mark_regions as (
66 gopast v setmark pV gopast non-v
67 gopast v gopast non-v setmark p2
73 define R2 as $p2 <= cursor
75 define perfective_gerund as (
80 ('{a}' or '{ia}' delete)
93 '{e}{e}' '{i}{e}' '{y}{e}' '{o}{e}' '{i}{m}{i}' '{y}{m}{i}'
94 '{e}{i`}' '{i}{i`}' '{y}{i`}' '{o}{i`}' '{e}{m}' '{i}{m}'
95 '{y}{m}' '{o}{m}' '{e}{g}{o}' '{o}{g}{o}' '{e}{m}{u}'
96 '{o}{m}{u}' '{i}{kh}' '{y}{kh}' '{u}{iu}' '{iu}{iu}' '{a}{ia}'
99 '{o}{iu}' // - which is somewhat archaic
100 '{e}{iu}' // - soft form of {o}{iu}
105 define adjectival as (
108 /* of the participle forms, em, vsh, ivsh, yvsh are readily removable.
109 nn, {iu}shch, shch, u{iu}shch can be removed, with a small proportion of
110 errors. Removing im, uem, enn creates too many errors.
115 '{e}{m}' // present passive participle
116 '{n}{n}' // adjective from past passive participle
117 '{v}{sh}' // past active participle
118 '{iu}{shch}' '{shch}' // present active participle
119 ('{a}' or '{ia}' delete)
121 //but not '{i}{m}' '{u}{e}{m}' // present passive participle
122 //or '{e}{n}{n}' // adjective from past passive participle
124 '{i}{v}{sh}' '{y}{v}{sh}'// past active participle
125 '{u}{iu}{shch}' // present active participle
132 define reflexive as (
142 '{l}{a}' '{n}{a}' '{e}{t}{e}' '{i`}{t}{e}' '{l}{i}' '{i`}'
143 '{l}' '{e}{m}' '{n}' '{l}{o}' '{n}{o}' '{e}{t}' '{iu}{t}'
144 '{n}{y}' '{t}{'}' '{e}{sh}{'}'
147 ('{a}' or '{ia}' delete)
149 '{i}{l}{a}' '{y}{l}{a}' '{e}{n}{a}' '{e}{i`}{t}{e}'
150 '{u}{i`}{t}{e}' '{i}{t}{e}' '{i}{l}{i}' '{y}{l}{i}' '{e}{i`}'
151 '{u}{i`}' '{i}{l}' '{y}{l}' '{i}{m}' '{y}{m}' '{e}{n}'
152 '{i}{l}{o}' '{y}{l}{o}' '{e}{n}{o}' '{ia}{t}' '{u}{e}{t}'
153 '{u}{iu}{t}' '{i}{t}' '{y}{t}' '{e}{n}{y}' '{i}{t}{'}'
154 '{y}{t}{'}' '{i}{sh}{'}' '{u}{iu}' '{iu}'
156 /* note the short passive participle tests:
157 '{n}{a}' '{n}' '{n}{o}' '{n}{y}'
158 '{e}{n}{a}' '{e}{n}' '{e}{n}{o}' '{e}{n}{y}'
165 '{a}' '{e}{v}' '{o}{v}' '{i}{e}' '{'}{e}' '{e}'
166 '{i}{ia}{m}{i}' '{ia}{m}{i}' '{a}{m}{i}' '{e}{i}' '{i}{i}'
167 '{i}' '{i}{e}{i`}' '{e}{i`}' '{o}{i`}' '{i}{i`}' '{i`}'
168 '{i}{ia}{m}' '{ia}{m}' '{i}{e}{m}' '{e}{m}' '{a}{m}' '{o}{m}'
169 '{o}' '{u}' '{a}{kh}' '{i}{ia}{kh}' '{ia}{kh}' '{y}' '{'}'
170 '{i}{iu}' '{'}{iu}' '{iu}' '{i}{ia}' '{'}{ia}' '{ia}'
172 /* the small class of neuter forms '{e}{n}{i}' '{e}{n}{e}{m}'
173 '{e}{n}{a}' '{e}{n}' '{e}{n}{a}{m}' '{e}{n}{a}{m}{i}' '{e}{n}{a}{x}'
174 omitted - they only occur on 12 words.
179 define derivational as (
180 [substring] R2 among (
191 '{e}{i`}{sh}{e}' // superlative forms
196 ('{n}' delete) // e.g. -nno endings
198 (delete) // with some slight false conflations
205 // Normalise {e"} to {e}. The documentation has long suggested the user
206 // should do this before calling the stemmer - we now do it for them.
207 do repeat ( goto (['{e"}']) <- '{e}' )
210 backwards setlimit tomark pV for (
214 adjectival or verb or noun
217 try([ '{i}' ] delete)
218 // because noun ending -i{iu} is being treated as verb ending -{iu}