Merge branch 'master' of git://repo.or.cz/sbcl
[sbcl/attila.git] / src / code / external-formats / enc-cyr.lisp
bloba0e033e95433070bb9d4ac67c0a83394c7d38884
1 (in-package "SB!IMPL")
3 (define-unibyte-mapper koi8-r->code-mapper code->koi8-r-mapper
4 (#x80 #x2500) ; BOX DRAWINGS LIGHT HORIZONTAL
5 (#x81 #x2502) ; BOX DRAWINGS LIGHT VERTICAL
6 (#x82 #x250C) ; BOX DRAWINGS LIGHT DOWN AND RIGHT
7 (#x83 #x2510) ; BOX DRAWINGS LIGHT DOWN AND LEFT
8 (#x84 #x2514) ; BOX DRAWINGS LIGHT UP AND RIGHT
9 (#x85 #x2518) ; BOX DRAWINGS LIGHT UP AND LEFT
10 (#x86 #x251C) ; BOX DRAWINGS LIGHT VERTICAL AND RIGHT
11 (#x87 #x2524) ; BOX DRAWINGS LIGHT VERTICAL AND LEFT
12 (#x88 #x252C) ; BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
13 (#x89 #x2534) ; BOX DRAWINGS LIGHT UP AND HORIZONTAL
14 (#x8A #x253C) ; BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
15 (#x8B #x2580) ; UPPER HALF BLOCK
16 (#x8C #x2584) ; LOWER HALF BLOCK
17 (#x8D #x2588) ; FULL BLOCK
18 (#x8E #x258C) ; LEFT HALF BLOCK
19 (#x8F #x2590) ; RIGHT HALF BLOCK
20 (#x90 #x2591) ; LIGHT SHADE
21 (#x91 #x2592) ; MEDIUM SHADE
22 (#x92 #x2593) ; DARK SHADE
23 (#x93 #x2320) ; UPPER HALF OF INTEGRAL
24 (#x94 #x25A0) ; BLACK SQUARE
25 (#x95 #x2219) ; BULLET OPERATOR
26 (#x96 #x221A) ; SQUARE ROOT
27 (#x97 #x2248) ; ALMOST EQUAL TO
28 (#x98 #x2264) ; LESS-THAN OR EQUAL TO
29 (#x99 #x2265) ; GREATER-THAN OR EQUAL TO
30 (#x9A #x00A0) ; NO-BREAK SPACE
31 (#x9B #x2321) ; LOWER HALF OF INTEGRAL
32 (#x9C #x00B0) ; DEGREE SIGN
33 (#x9D #x00B2) ; SUPERSCRIPT TWO
34 (#x9E #x00B7) ; MIDDLE DOT
35 (#x9F #x00F7) ; DIVISION SIGN
36 (#xA0 #x2550) ; BOX DRAWINGS DOUBLE HORIZONTAL
37 (#xA1 #x2551) ; BOX DRAWINGS DOUBLE VERTICAL
38 (#xA2 #x2552) ; BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
39 (#xA3 #x0451) ; CYRILLIC SMALL LETTER IO
40 (#xA4 #x2553) ; BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
41 (#xA5 #x2554) ; BOX DRAWINGS DOUBLE DOWN AND RIGHT
42 (#xA6 #x2555) ; BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
43 (#xA7 #x2556) ; BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
44 (#xA8 #x2557) ; BOX DRAWINGS DOUBLE DOWN AND LEFT
45 (#xA9 #x2558) ; BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
46 (#xAA #x2559) ; BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
47 (#xAB #x255A) ; BOX DRAWINGS DOUBLE UP AND RIGHT
48 (#xAC #x255B) ; BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
49 (#xAD #x255C) ; BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
50 (#xAE #x255D) ; BOX DRAWINGS DOUBLE UP AND LEFT
51 (#xAF #x255E) ; BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
52 (#xB0 #x255F) ; BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
53 (#xB1 #x2560) ; BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
54 (#xB2 #x2561) ; BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
55 (#xB3 #x0401) ; CYRILLIC CAPITAL LETTER IO
56 (#xB4 #x2562) ; BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
57 (#xB5 #x2563) ; BOX DRAWINGS DOUBLE VERTICAL AND LEFT
58 (#xB6 #x2564) ; BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
59 (#xB7 #x2565) ; BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
60 (#xB8 #x2566) ; BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
61 (#xB9 #x2567) ; BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
62 (#xBA #x2568) ; BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
63 (#xBB #x2569) ; BOX DRAWINGS DOUBLE UP AND HORIZONTAL
64 (#xBC #x256A) ; BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
65 (#xBD #x256B) ; BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
66 (#xBE #x256C) ; BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
67 (#xBF #x00A9) ; COPYRIGHT SIGN
68 (#xC0 #x044E) ; CYRILLIC SMALL LETTER YU
69 (#xC1 #x0430) ; CYRILLIC SMALL LETTER A
70 (#xC2 #x0431) ; CYRILLIC SMALL LETTER BE
71 (#xC3 #x0446) ; CYRILLIC SMALL LETTER TSE
72 (#xC4 #x0434) ; CYRILLIC SMALL LETTER DE
73 (#xC5 #x0435) ; CYRILLIC SMALL LETTER IE
74 (#xC6 #x0444) ; CYRILLIC SMALL LETTER EF
75 (#xC7 #x0433) ; CYRILLIC SMALL LETTER GHE
76 (#xC8 #x0445) ; CYRILLIC SMALL LETTER HA
77 (#xC9 #x0438) ; CYRILLIC SMALL LETTER I
78 (#xCA #x0439) ; CYRILLIC SMALL LETTER SHORT I
79 (#xCB #x043A) ; CYRILLIC SMALL LETTER KA
80 (#xCC #x043B) ; CYRILLIC SMALL LETTER EL
81 (#xCD #x043C) ; CYRILLIC SMALL LETTER EM
82 (#xCE #x043D) ; CYRILLIC SMALL LETTER EN
83 (#xCF #x043E) ; CYRILLIC SMALL LETTER O
84 (#xD0 #x043F) ; CYRILLIC SMALL LETTER PE
85 (#xD1 #x044F) ; CYRILLIC SMALL LETTER YA
86 (#xD2 #x0440) ; CYRILLIC SMALL LETTER ER
87 (#xD3 #x0441) ; CYRILLIC SMALL LETTER ES
88 (#xD4 #x0442) ; CYRILLIC SMALL LETTER TE
89 (#xD5 #x0443) ; CYRILLIC SMALL LETTER U
90 (#xD6 #x0436) ; CYRILLIC SMALL LETTER ZHE
91 (#xD7 #x0432) ; CYRILLIC SMALL LETTER VE
92 (#xD8 #x044C) ; CYRILLIC SMALL LETTER SOFT SIGN
93 (#xD9 #x044B) ; CYRILLIC SMALL LETTER YERU
94 (#xDA #x0437) ; CYRILLIC SMALL LETTER ZE
95 (#xDB #x0448) ; CYRILLIC SMALL LETTER SHA
96 (#xDC #x044D) ; CYRILLIC SMALL LETTER E
97 (#xDD #x0449) ; CYRILLIC SMALL LETTER SHCHA
98 (#xDE #x0447) ; CYRILLIC SMALL LETTER CHE
99 (#xDF #x044A) ; CYRILLIC SMALL LETTER HARD SIGN
100 (#xE0 #x042E) ; CYRILLIC CAPITAL LETTER YU
101 (#xE1 #x0410) ; CYRILLIC CAPITAL LETTER A
102 (#xE2 #x0411) ; CYRILLIC CAPITAL LETTER BE
103 (#xE3 #x0426) ; CYRILLIC CAPITAL LETTER TSE
104 (#xE4 #x0414) ; CYRILLIC CAPITAL LETTER DE
105 (#xE5 #x0415) ; CYRILLIC CAPITAL LETTER IE
106 (#xE6 #x0424) ; CYRILLIC CAPITAL LETTER EF
107 (#xE7 #x0413) ; CYRILLIC CAPITAL LETTER GHE
108 (#xE8 #x0425) ; CYRILLIC CAPITAL LETTER HA
109 (#xE9 #x0418) ; CYRILLIC CAPITAL LETTER I
110 (#xEA #x0419) ; CYRILLIC CAPITAL LETTER SHORT I
111 (#xEB #x041A) ; CYRILLIC CAPITAL LETTER KA
112 (#xEC #x041B) ; CYRILLIC CAPITAL LETTER EL
113 (#xED #x041C) ; CYRILLIC CAPITAL LETTER EM
114 (#xEE #x041D) ; CYRILLIC CAPITAL LETTER EN
115 (#xEF #x041E) ; CYRILLIC CAPITAL LETTER O
116 (#xF0 #x041F) ; CYRILLIC CAPITAL LETTER PE
117 (#xF1 #x042F) ; CYRILLIC CAPITAL LETTER YA
118 (#xF2 #x0420) ; CYRILLIC CAPITAL LETTER ER
119 (#xF3 #x0421) ; CYRILLIC CAPITAL LETTER ES
120 (#xF4 #x0422) ; CYRILLIC CAPITAL LETTER TE
121 (#xF5 #x0423) ; CYRILLIC CAPITAL LETTER U
122 (#xF6 #x0416) ; CYRILLIC CAPITAL LETTER ZHE
123 (#xF7 #x0412) ; CYRILLIC CAPITAL LETTER VE
124 (#xF8 #x042C) ; CYRILLIC CAPITAL LETTER SOFT SIGN
125 (#xF9 #x042B) ; CYRILLIC CAPITAL LETTER YERU
126 (#xFA #x0417) ; CYRILLIC CAPITAL LETTER ZE
127 (#xFB #x0428) ; CYRILLIC CAPITAL LETTER SHA
128 (#xFC #x042D) ; CYRILLIC CAPITAL LETTER E
129 (#xFD #x0429) ; CYRILLIC CAPITAL LETTER SHCHA
130 (#xFE #x0427) ; CYRILLIC CAPITAL LETTER CHE
131 (#xFF #x042A) ; CYRILLIC CAPITAL LETTER HARD SIGN
134 (declaim (inline get-koi8-r-bytes))
135 (defun get-koi8-r-bytes (string pos)
136 (declare (optimize speed (safety 0))
137 (type simple-string string)
138 (type array-range pos))
139 (get-latin-bytes #'code->koi8-r-mapper :koi8-r string pos))
141 (defun string->koi8-r (string sstart send null-padding)
142 (declare (optimize speed (safety 0))
143 (type simple-string string)
144 (type array-range sstart send))
145 (values (string->latin% string sstart send #'get-koi8-r-bytes null-padding)))
147 (defmacro define-koi8-r->string* (accessor type)
148 (declare (ignore type))
149 (let ((name (make-od-name 'koi8-r->string* accessor)))
150 `(progn
151 (defun ,name (string sstart send array astart aend)
152 (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'koi8-r->code-mapper)))))
154 (instantiate-octets-definition define-koi8-r->string*)
156 (defmacro define-koi8-r->string (accessor type)
157 (declare (ignore type))
158 `(defun ,(make-od-name 'koi8-r->string accessor) (array astart aend)
159 (,(make-od-name 'latin->string accessor) array astart aend #'koi8-r->code-mapper)))
161 (instantiate-octets-definition define-koi8-r->string)
163 (add-external-format-funs '(:koi8-r :|koi8-r|)
164 '(koi8-r->string-aref string->koi8-r))
166 (define-external-format (:koi8-r :|koi8-r|)
168 (let ((koi8-r-byte (code->koi8-r-mapper bits)))
169 (if koi8-r-byte
170 (setf (sap-ref-8 sap tail) koi8-r-byte)
171 (external-format-encoding-error stream bits)))
172 (let ((code (koi8-r->code-mapper byte)))
173 (if code
174 (code-char code)
175 (external-format-decoding-error stream byte)))) ;; TODO -- error check
177 (define-unibyte-mapper koi8-u->code-mapper code->koi8-u-mapper
178 (#x80 #x2500) ; BOX DRAWINGS LIGHT HORIZONTAL
179 (#x81 #x2502) ; BOX DRAWINGS LIGHT VERTICAL
180 (#x82 #x250C) ; BOX DRAWINGS LIGHT DOWN AND RIGHT
181 (#x83 #x2510) ; BOX DRAWINGS LIGHT DOWN AND LEFT
182 (#x84 #x2514) ; BOX DRAWINGS LIGHT UP AND RIGHT
183 (#x85 #x2518) ; BOX DRAWINGS LIGHT UP AND LEFT
184 (#x86 #x251C) ; BOX DRAWINGS LIGHT VERTICAL AND RIGHT
185 (#x87 #x2524) ; BOX DRAWINGS LIGHT VERTICAL AND LEFT
186 (#x88 #x252C) ; BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
187 (#x89 #x2534) ; BOX DRAWINGS LIGHT UP AND HORIZONTAL
188 (#x8A #x253C) ; BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
189 (#x8B #x2580) ; UPPER HALF BLOCK
190 (#x8C #x2584) ; LOWER HALF BLOCK
191 (#x8D #x2588) ; FULL BLOCK
192 (#x8E #x258C) ; LEFT HALF BLOCK
193 (#x8F #x2590) ; RIGHT HALF BLOCK
194 (#x90 #x2591) ; LIGHT SHADE
195 (#x91 #x2592) ; MEDIUM SHADE
196 (#x92 #x2593) ; DARK SHADE
197 (#x93 #x2320) ; TOP HALF INTEGRAL
198 (#x94 #x25A0) ; BLACK SQUARE
199 (#x95 #x2022) ; BULLET
200 (#x96 #x221A) ; SQUARE ROOT
201 (#x97 #x2248) ; ALMOST EQUAL TO
202 (#x98 #x2264) ; LESS-THAN OR EQUAL TO
203 (#x99 #x2265) ; GREATER-THAN OR EQUAL TO
204 (#x9A #x00A0) ; NO-BREAK SPACE
205 (#x9B #x2321) ; BOTTOM HALF INTEGRAL
206 (#x9C #x00B0) ; DEGREE SIGN
207 (#x9D #x00B2) ; SUPERSCRIPT TWO
208 (#x9E #x00B7) ; MIDDLE DOT
209 (#x9F #x00F7) ; DIVISION SIGN
210 (#xA0 #x2550) ; BOX DRAWINGS DOUBLE HORIZONTAL
211 (#xA1 #x2551) ; BOX DRAWINGS DOUBLE VERTICAL
212 (#xA2 #x2552) ; BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
213 (#xA3 #x0451) ; CYRILLIC SMALL LETTER IO
214 (#xA4 #x0454) ; CYRILLIC SMALL LETTER UKRAINIAN IE
215 (#xA5 #x2554) ; BOX DRAWINGS DOUBLE DOWN AND RIGHT
216 (#xA6 #x0456) ; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
217 (#xA7 #x0457) ; CYRILLIC SMALL LETTER YI
218 (#xA8 #x2557) ; BOX DRAWINGS DOUBLE DOWN AND LEFT
219 (#xA9 #x2558) ; BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
220 (#xAA #x2559) ; BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
221 (#xAB #x255A) ; BOX DRAWINGS DOUBLE UP AND RIGHT
222 (#xAC #x255B) ; BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
223 (#xAD #x0491) ; CYRILLIC SMALL LETTER GHE WITH UPTURN
224 (#xAE #x255D) ; BOX DRAWINGS DOUBLE UP AND LEFT
225 (#xAF #x255E) ; BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
226 (#xB0 #x255F) ; BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
227 (#xB1 #x2560) ; BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
228 (#xB2 #x2561) ; BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
229 (#xB3 #x0401) ; CYRILLIC CAPITAL LETTER IO
230 (#xB4 #x0404) ; CYRILLIC CAPITAL LETTER UKRAINIAN IE
231 (#xB5 #x2563) ; BOX DRAWINGS DOUBLE VERTICAL AND LEFT
232 (#xB6 #x0406) ; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
233 (#xB7 #x0407) ; CYRILLIC CAPITAL LETTER YI
234 (#xB8 #x2566) ; BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
235 (#xB9 #x2567) ; BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
236 (#xBA #x2568) ; BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
237 (#xBB #x2569) ; BOX DRAWINGS DOUBLE UP AND HORIZONTAL
238 (#xBC #x256A) ; BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
239 (#xBD #x0490) ; CYRILLIC CAPITAL LETTER GHE WITH UPTURN
240 (#xBE #x256C) ; BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
241 (#xBF #x00A9) ; COPYRIGHT SIGN
242 (#xC0 #x044E) ; CYRILLIC SMALL LETTER YU
243 (#xC1 #x0430) ; CYRILLIC SMALL LETTER A
244 (#xC2 #x0431) ; CYRILLIC SMALL LETTER BE
245 (#xC3 #x0446) ; CYRILLIC SMALL LETTER TSE
246 (#xC4 #x0434) ; CYRILLIC SMALL LETTER DE
247 (#xC5 #x0435) ; CYRILLIC SMALL LETTER IE
248 (#xC6 #x0444) ; CYRILLIC SMALL LETTER EF
249 (#xC7 #x0433) ; CYRILLIC SMALL LETTER GHE
250 (#xC8 #x0445) ; CYRILLIC SMALL LETTER HA
251 (#xC9 #x0438) ; CYRILLIC SMALL LETTER I
252 (#xCA #x0439) ; CYRILLIC SMALL LETTER SHORT I
253 (#xCB #x043A) ; CYRILLIC SMALL LETTER KA
254 (#xCC #x043B) ; CYRILLIC SMALL LETTER EL
255 (#xCD #x043C) ; CYRILLIC SMALL LETTER EM
256 (#xCE #x043D) ; CYRILLIC SMALL LETTER EN
257 (#xCF #x043E) ; CYRILLIC SMALL LETTER O
258 (#xD0 #x043F) ; CYRILLIC SMALL LETTER PE
259 (#xD1 #x044F) ; CYRILLIC SMALL LETTER YA
260 (#xD2 #x0440) ; CYRILLIC SMALL LETTER ER
261 (#xD3 #x0441) ; CYRILLIC SMALL LETTER ES
262 (#xD4 #x0442) ; CYRILLIC SMALL LETTER TE
263 (#xD5 #x0443) ; CYRILLIC SMALL LETTER U
264 (#xD6 #x0436) ; CYRILLIC SMALL LETTER ZHE
265 (#xD7 #x0432) ; CYRILLIC SMALL LETTER VE
266 (#xD8 #x044C) ; CYRILLIC SMALL LETTER SOFT SIGN
267 (#xD9 #x044B) ; CYRILLIC SMALL LETTER YERU
268 (#xDA #x0437) ; CYRILLIC SMALL LETTER ZE
269 (#xDB #x0448) ; CYRILLIC SMALL LETTER SHA
270 (#xDC #x044D) ; CYRILLIC SMALL LETTER E
271 (#xDD #x0449) ; CYRILLIC SMALL LETTER SHCHA
272 (#xDE #x0447) ; CYRILLIC SMALL LETTER CHE
273 (#xDF #x044A) ; CYRILLIC SMALL LETTER HARD SIGN
274 (#xE0 #x042E) ; CYRILLIC CAPITAL LETTER YU
275 (#xE1 #x0410) ; CYRILLIC CAPITAL LETTER A
276 (#xE2 #x0411) ; CYRILLIC CAPITAL LETTER BE
277 (#xE3 #x0426) ; CYRILLIC CAPITAL LETTER TSE
278 (#xE4 #x0414) ; CYRILLIC CAPITAL LETTER DE
279 (#xE5 #x0415) ; CYRILLIC CAPITAL LETTER IE
280 (#xE6 #x0424) ; CYRILLIC CAPITAL LETTER EF
281 (#xE7 #x0413) ; CYRILLIC CAPITAL LETTER GHE
282 (#xE8 #x0425) ; CYRILLIC CAPITAL LETTER HA
283 (#xE9 #x0418) ; CYRILLIC CAPITAL LETTER I
284 (#xEA #x0419) ; CYRILLIC CAPITAL LETTER SHORT I
285 (#xEB #x041A) ; CYRILLIC CAPITAL LETTER KA
286 (#xEC #x041B) ; CYRILLIC CAPITAL LETTER EL
287 (#xED #x041C) ; CYRILLIC CAPITAL LETTER EM
288 (#xEE #x041D) ; CYRILLIC CAPITAL LETTER EN
289 (#xEF #x041E) ; CYRILLIC CAPITAL LETTER O
290 (#xF0 #x041F) ; CYRILLIC CAPITAL LETTER PE
291 (#xF1 #x042F) ; CYRILLIC CAPITAL LETTER YA
292 (#xF2 #x0420) ; CYRILLIC CAPITAL LETTER ER
293 (#xF3 #x0421) ; CYRILLIC CAPITAL LETTER ES
294 (#xF4 #x0422) ; CYRILLIC CAPITAL LETTER TE
295 (#xF5 #x0423) ; CYRILLIC CAPITAL LETTER U
296 (#xF6 #x0416) ; CYRILLIC CAPITAL LETTER ZHE
297 (#xF7 #x0412) ; CYRILLIC CAPITAL LETTER VE
298 (#xF8 #x042C) ; CYRILLIC CAPITAL LETTER SOFT SIGN
299 (#xF9 #x042B) ; CYRILLIC CAPITAL LETTER YERU
300 (#xFA #x0417) ; CYRILLIC CAPITAL LETTER ZE
301 (#xFB #x0428) ; CYRILLIC CAPITAL LETTER SHA
302 (#xFC #x042D) ; CYRILLIC CAPITAL LETTER E
303 (#xFD #x0429) ; CYRILLIC CAPITAL LETTER SHCHA
304 (#xFE #x0427) ; CYRILLIC CAPITAL LETTER CHE
305 (#xFF #x042A) ; CYRILLIC CAPITAL LETTER HARD SIGN
308 (declaim (inline get-koi8-u-bytes))
309 (defun get-koi8-u-bytes (string pos)
310 (declare (optimize speed (safety 0))
311 (type simple-string string)
312 (type array-range pos))
313 (get-latin-bytes #'code->koi8-u-mapper :koi8-u string pos))
315 (defun string->koi8-u (string sstart send null-padding)
316 (declare (optimize speed (safety 0))
317 (type simple-string string)
318 (type array-range sstart send))
319 (values (string->latin% string sstart send #'get-koi8-u-bytes null-padding)))
321 (defmacro define-koi8-u->string* (accessor type)
322 (declare (ignore type))
323 (let ((name (make-od-name 'koi8-u->string* accessor)))
324 `(progn
325 (defun ,name (string sstart send array astart aend)
326 (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'koi8-u->code-mapper)))))
328 (instantiate-octets-definition define-koi8-u->string*)
330 (defmacro define-koi8-u->string (accessor type)
331 (declare (ignore type))
332 `(defun ,(make-od-name 'koi8-u->string accessor) (array astart aend)
333 (,(make-od-name 'latin->string accessor) array astart aend #'koi8-u->code-mapper)))
335 (instantiate-octets-definition define-koi8-u->string)
337 (add-external-format-funs '(:koi8-u :|koi8-u|)
338 '(koi8-u->string-aref string->koi8-u))
340 (define-external-format (:koi8-u :|koi8-u|)
342 (let ((koi8-u-byte (code->koi8-u-mapper bits)))
343 (if koi8-u-byte
344 (setf (sap-ref-8 sap tail) koi8-u-byte)
345 (external-format-encoding-error stream bits)))
346 (let ((code (koi8-u->code-mapper byte)))
347 (if code
348 (code-char code)
349 (external-format-decoding-error stream byte)))) ;; TODO -- error check
351 (define-unibyte-mapper x-mac-cyrillic->code-mapper code->x-mac-cyrillic-mapper
352 (#x80 #x0410) ; CYRILLIC CAPITAL LETTER A
353 (#x81 #x0411) ; CYRILLIC CAPITAL LETTER BE
354 (#x82 #x0412) ; CYRILLIC CAPITAL LETTER VE
355 (#x83 #x0413) ; CYRILLIC CAPITAL LETTER GHE
356 (#x84 #x0414) ; CYRILLIC CAPITAL LETTER DE
357 (#x85 #x0415) ; CYRILLIC CAPITAL LETTER IE
358 (#x86 #x0416) ; CYRILLIC CAPITAL LETTER ZHE
359 (#x87 #x0417) ; CYRILLIC CAPITAL LETTER ZE
360 (#x88 #x0418) ; CYRILLIC CAPITAL LETTER I
361 (#x89 #x0419) ; CYRILLIC CAPITAL LETTER SHORT I
362 (#x8A #x041A) ; CYRILLIC CAPITAL LETTER KA
363 (#x8B #x041B) ; CYRILLIC CAPITAL LETTER EL
364 (#x8C #x041C) ; CYRILLIC CAPITAL LETTER EM
365 (#x8D #x041D) ; CYRILLIC CAPITAL LETTER EN
366 (#x8E #x041E) ; CYRILLIC CAPITAL LETTER O
367 (#x8F #x041F) ; CYRILLIC CAPITAL LETTER PE
368 (#x90 #x0420) ; CYRILLIC CAPITAL LETTER ER
369 (#x91 #x0421) ; CYRILLIC CAPITAL LETTER ES
370 (#x92 #x0422) ; CYRILLIC CAPITAL LETTER TE
371 (#x93 #x0423) ; CYRILLIC CAPITAL LETTER U
372 (#x94 #x0424) ; CYRILLIC CAPITAL LETTER EF
373 (#x95 #x0425) ; CYRILLIC CAPITAL LETTER HA
374 (#x96 #x0426) ; CYRILLIC CAPITAL LETTER TSE
375 (#x97 #x0427) ; CYRILLIC CAPITAL LETTER CHE
376 (#x98 #x0428) ; CYRILLIC CAPITAL LETTER SHA
377 (#x99 #x0429) ; CYRILLIC CAPITAL LETTER SHCHA
378 (#x9A #x042A) ; CYRILLIC CAPITAL LETTER HARD SIGN
379 (#x9B #x042B) ; CYRILLIC CAPITAL LETTER YERU
380 (#x9C #x042C) ; CYRILLIC CAPITAL LETTER SOFT SIGN
381 (#x9D #x042D) ; CYRILLIC CAPITAL LETTER E
382 (#x9E #x042E) ; CYRILLIC CAPITAL LETTER YU
383 (#x9F #x042F) ; CYRILLIC CAPITAL LETTER YA
384 (#xA0 #x2020) ; DAGGER
385 (#xA1 #x00B0) ; DEGREE SIGN
386 (#xA4 #x00A7) ; SECTION SIGN
387 (#xA5 #x2022) ; BULLET
388 (#xA6 #x00B6) ; PILCROW SIGN
389 (#xA7 #x0406) ; CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
390 (#xA8 #x00AE) ; REGISTERED SIGN
391 (#xAA #x2122) ; TRADE MARK SIGN
392 (#xAB #x0402) ; CYRILLIC CAPITAL LETTER DJE
393 (#xAC #x0452) ; CYRILLIC SMALL LETTER DJE
394 (#xAD #x2260) ; NOT EQUAL TO
395 (#xAE #x0403) ; CYRILLIC CAPITAL LETTER GJE
396 (#xAF #x0453) ; CYRILLIC SMALL LETTER GJE
397 (#xB0 #x221E) ; INFINITY
398 (#xB2 #x2264) ; LESS-THAN OR EQUAL TO
399 (#xB3 #x2265) ; GREATER-THAN OR EQUAL TO
400 (#xB4 #x0456) ; CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
401 (#xB6 #x2202) ; PARTIAL DIFFERENTIAL
402 (#xB7 #x0408) ; CYRILLIC CAPITAL LETTER JE
403 (#xB8 #x0404) ; CYRILLIC CAPITAL LETTER UKRAINIAN IE
404 (#xB9 #x0454) ; CYRILLIC SMALL LETTER UKRAINIAN IE
405 (#xBA #x0407) ; CYRILLIC CAPITAL LETTER YI
406 (#xBB #x0457) ; CYRILLIC SMALL LETTER YI
407 (#xBC #x0409) ; CYRILLIC CAPITAL LETTER LJE
408 (#xBD #x0459) ; CYRILLIC SMALL LETTER LJE
409 (#xBE #x040A) ; CYRILLIC CAPITAL LETTER NJE
410 (#xBF #x045A) ; CYRILLIC SMALL LETTER NJE
411 (#xC0 #x0458) ; CYRILLIC SMALL LETTER JE
412 (#xC1 #x0405) ; CYRILLIC CAPITAL LETTER DZE
413 (#xC2 #x00AC) ; NOT SIGN
414 (#xC3 #x221A) ; SQUARE ROOT
415 (#xC4 #x0192) ; LATIN SMALL LETTER F WITH HOOK
416 (#xC5 #x2248) ; ALMOST EQUAL TO
417 (#xC6 #x2206) ; INCREMENT
418 (#xC7 #x00AB) ; LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
419 (#xC8 #x00BB) ; RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
420 (#xC9 #x2026) ; HORIZONTAL ELLIPSIS
421 (#xCA #x00A0) ; NO-BREAK SPACE
422 (#xCB #x040B) ; CYRILLIC CAPITAL LETTER TSHE
423 (#xCC #x045B) ; CYRILLIC SMALL LETTER TSHE
424 (#xCD #x040C) ; CYRILLIC CAPITAL LETTER KJE
425 (#xCE #x045C) ; CYRILLIC SMALL LETTER KJE
426 (#xCF #x0455) ; CYRILLIC SMALL LETTER DZE
427 (#xD0 #x2013) ; EN DASH
428 (#xD1 #x2014) ; EM DASH
429 (#xD2 #x201C) ; LEFT DOUBLE QUOTATION MARK
430 (#xD3 #x201D) ; RIGHT DOUBLE QUOTATION MARK
431 (#xD4 #x2018) ; LEFT SINGLE QUOTATION MARK
432 (#xD5 #x2019) ; RIGHT SINGLE QUOTATION MARK
433 (#xD6 #x00F7) ; DIVISION SIGN
434 (#xD7 #x201E) ; DOUBLE LOW-9 QUOTATION MARK
435 (#xD8 #x040E) ; CYRILLIC CAPITAL LETTER SHORT U
436 (#xD9 #x045E) ; CYRILLIC SMALL LETTER SHORT U
437 (#xDA #x040F) ; CYRILLIC CAPITAL LETTER DZHE
438 (#xDB #x045F) ; CYRILLIC SMALL LETTER DZHE
439 (#xDC #x2116) ; NUMERO SIGN
440 (#xDD #x0401) ; CYRILLIC CAPITAL LETTER IO
441 (#xDE #x0451) ; CYRILLIC SMALL LETTER IO
442 (#xDF #x044F) ; CYRILLIC SMALL LETTER YA
443 (#xE0 #x0430) ; CYRILLIC SMALL LETTER A
444 (#xE1 #x0431) ; CYRILLIC SMALL LETTER BE
445 (#xE2 #x0432) ; CYRILLIC SMALL LETTER VE
446 (#xE3 #x0433) ; CYRILLIC SMALL LETTER GHE
447 (#xE4 #x0434) ; CYRILLIC SMALL LETTER DE
448 (#xE5 #x0435) ; CYRILLIC SMALL LETTER IE
449 (#xE6 #x0436) ; CYRILLIC SMALL LETTER ZHE
450 (#xE7 #x0437) ; CYRILLIC SMALL LETTER ZE
451 (#xE8 #x0438) ; CYRILLIC SMALL LETTER I
452 (#xE9 #x0439) ; CYRILLIC SMALL LETTER SHORT I
453 (#xEA #x043A) ; CYRILLIC SMALL LETTER KA
454 (#xEB #x043B) ; CYRILLIC SMALL LETTER EL
455 (#xEC #x043C) ; CYRILLIC SMALL LETTER EM
456 (#xED #x043D) ; CYRILLIC SMALL LETTER EN
457 (#xEE #x043E) ; CYRILLIC SMALL LETTER O
458 (#xEF #x043F) ; CYRILLIC SMALL LETTER PE
459 (#xF0 #x0440) ; CYRILLIC SMALL LETTER ER
460 (#xF1 #x0441) ; CYRILLIC SMALL LETTER ES
461 (#xF2 #x0442) ; CYRILLIC SMALL LETTER TE
462 (#xF3 #x0443) ; CYRILLIC SMALL LETTER U
463 (#xF4 #x0444) ; CYRILLIC SMALL LETTER EF
464 (#xF5 #x0445) ; CYRILLIC SMALL LETTER HA
465 (#xF6 #x0446) ; CYRILLIC SMALL LETTER TSE
466 (#xF7 #x0447) ; CYRILLIC SMALL LETTER CHE
467 (#xF8 #x0448) ; CYRILLIC SMALL LETTER SHA
468 (#xF9 #x0449) ; CYRILLIC SMALL LETTER SHCHA
469 (#xFA #x044A) ; CYRILLIC SMALL LETTER HARD SIGN
470 (#xFB #x044B) ; CYRILLIC SMALL LETTER YERU
471 (#xFC #x044C) ; CYRILLIC SMALL LETTER SOFT SIGN
472 (#xFD #x044D) ; CYRILLIC SMALL LETTER E
473 (#xFE #x044E) ; CYRILLIC SMALL LETTER YU
474 (#xFF #x00A4) ; CURRENCY SIGN
477 (declaim (inline get-x-mac-cyrillic-bytes))
478 (defun get-x-mac-cyrillic-bytes (string pos)
479 (declare (optimize speed (safety 0))
480 (type simple-string string)
481 (type array-range pos))
482 (get-latin-bytes #'code->x-mac-cyrillic-mapper :x-mac-cyrillic string pos))
484 (defun string->x-mac-cyrillic (string sstart send null-padding)
485 (declare (optimize speed (safety 0))
486 (type simple-string string)
487 (type array-range sstart send))
488 (values (string->latin% string sstart send #'get-x-mac-cyrillic-bytes null-padding)))
490 (defmacro define-x-mac-cyrillic->string* (accessor type)
491 (declare (ignore type))
492 (let ((name (make-od-name 'x-mac-cyrillic->string* accessor)))
493 `(progn
494 (defun ,name (string sstart send array astart aend)
495 (,(make-od-name 'latin->string* accessor) string sstart send array astart aend #'x-mac-cyrillic->code-mapper)))))
497 (instantiate-octets-definition define-x-mac-cyrillic->string*)
499 (defmacro define-x-mac-cyrillic->string (accessor type)
500 (declare (ignore type))
501 `(defun ,(make-od-name 'x-mac-cyrillic->string accessor) (array astart aend)
502 (,(make-od-name 'latin->string accessor) array astart aend #'x-mac-cyrillic->code-mapper)))
504 (instantiate-octets-definition define-x-mac-cyrillic->string)
506 (add-external-format-funs '(:x-mac-cyrillic :|x-mac-cyrillic|)
507 '(x-mac-cyrillic->string-aref string->x-mac-cyrillic))
509 (define-external-format (:x-mac-cyrillic :|x-mac-cyrillic|)
511 (let ((x-mac-cyrillic-byte (code->x-mac-cyrillic-mapper bits)))
512 (if x-mac-cyrillic-byte
513 (setf (sap-ref-8 sap tail) x-mac-cyrillic-byte)
514 (external-format-encoding-error stream bits)))
515 (let ((code (x-mac-cyrillic->code-mapper byte)))
516 (if code
517 (code-char code)
518 (external-format-decoding-error stream byte)))) ;; TODO -- error check