12 /* The abstracted result of an CU12 insn */
14 uint64_t addr1
; // target
16 uint64_t addr2
; // source
21 /* Define various input buffers. */
23 /* 1-byte UTF-8 character */
24 uint8_t pattern1
[] = {
25 0x00, 0x01, 0x02, 0x03
28 /* 2-byte UTF-8 character */
29 uint8_t pattern2
[] = {
36 /* 3-byte UTF-8 character */
37 uint8_t pattern3
[] = {
44 /* 4-byte UTF-8 character */
45 uint8_t pattern4
[] = {
46 0xf4, 0x80, 0x80, 0x80,
47 0xf4, 0x80, 0x80, 0x81,
48 0xf4, 0x80, 0x80, 0x82,
49 0xf4, 0x80, 0x80, 0x83,
56 0xc3, 0x80, // 2 bytes
58 0xe1, 0x90, 0x93, // 3 bytes
60 0xf4, 0x80, 0x90, 0x8a, // 4 bytes
62 0xc4, 0x8c, // 2 bytes
63 0xe1, 0x91, 0x94, // 3 bytes
64 0xc5, 0x8a, // 2 bytes
65 0xf4, 0x80, 0x90, 0x8a, // 4 bytes
66 0xc5, 0x8a, // 2 bytes
67 0xe1, 0x91, 0x94, // 3 bytes
68 0xf4, 0x80, 0x90, 0x8a, // 4 bytes
69 0xe1, 0x91, 0x94, // 3 bytes
72 /* This is the buffer for the converted bytes. */
73 uint16_t buff
[1000]; /* Large so we con'don't have to worry about it */
77 do_cu12(uint16_t *dst
, uint64_t dst_len
, uint8_t *src
, uint64_t src_len
)
82 /* build up the register pairs */
83 register uint8_t *source
asm("4") = src
;
84 register uint64_t source_len
asm("5") = src_len
;
85 register uint16_t *dest
asm("2") = dst
;
86 register uint64_t dest_len
asm("3") = dst_len
;
92 : "+d"(dest
), "+d"(source
), "=d"(cc
),
93 "+d"(source_len
), "+d"(dest_len
)
97 /* Capture register contents at end of cu12 */
98 regs
.addr1
= (uint64_t)dest
;
100 regs
.addr2
= (uint64_t)source
;
101 regs
.len2
= source_len
;
108 run_test(uint16_t *dst
, uint64_t dst_len
, uint8_t *src
, uint64_t src_len
)
117 for(i
= 0; i
< src_len
; ++i
)
118 printf(" %02x", src
[i
]);
122 result
= do_cu12(dst
, dst_len
, src
, src_len
);
124 // Write out the converted byte, if any
126 if (dst_len
- result
.len1
== 0)
129 uint64_t num_bytes
= dst_len
- result
.len1
;
131 /* The number of bytes that were written must be divisible by 2 */
132 if (num_bytes
% 2 != 0)
133 fprintf(stderr
, "*** number of bytes is not a multiple of 2\n");
135 for (i
= 0; i
< num_bytes
/ 2; i
++) {
136 printf(" %04x", dst
[i
]);
141 printf(" cc = %d\n", result
.cc
);
143 printf(" dst address difference: %"PRId64
, result
.addr1
- (uint64_t)dst
);
144 printf(" dst len: %"PRId64
"\n", result
.len1
);
147 printf(" src address difference: %"PRId64
, result
.addr2
- (uint64_t)src
);
148 printf(" src len: %"PRId64
"\n", result
.len2
);
151 // Test conversion of a one-byte character
152 void convert_1_byte(void)
156 printf("===== Conversion of a one-byte character =====\n");
158 printf("\n----- Valid characters -----\n");
160 0x00, 0x7f, // corner cases
161 0x01, 0x10, 0x7e, 0x5d // misc
163 run_test(buff
, sizeof buff
, valid
, sizeof valid
);
165 // As conversion stops upon encountering an invalid character, we
166 // need to test each invalid character separately, to make sure it
167 // is recognized as invalid.
169 printf("\n----- Invalid characters -----\n");
170 uint8_t always_invalid
[] = {
171 0x80, 0xbf, // corner cases
172 0xf8, 0xff, // corner cases
173 0x81, 0xbe, 0x95, 0xab // misc
175 for (i
= 0; i
< sizeof always_invalid
; ++i
) {
176 uint8_t invalid_char
[1];
177 invalid_char
[0] = always_invalid
[i
];
178 run_test(buff
, sizeof buff
, invalid_char
, sizeof invalid_char
);
181 // In case of m3 == 0 we get cc=0 indicating exhaustion of source
182 printf("\n----- Invalid characters if m3 == 1 -----\n");
183 uint8_t invalid_if_m3
[] = { // contains all such invalid characters
187 for (i
= 0; i
< sizeof invalid_if_m3
; ++i
) {
188 uint8_t invalid_char
[1];
189 invalid_char
[0] = invalid_if_m3
[i
];
190 run_test(buff
, sizeof buff
, invalid_char
, sizeof invalid_char
);
193 printf("\n----- 1st char valid, 2nd char invalid -----\n");
194 uint8_t valid_invalid
[] = {
198 run_test(buff
, sizeof buff
, valid_invalid
, sizeof valid_invalid
);
201 // Test conversion of a two-byte character
202 void convert_2_bytes(void)
206 printf("\n===== Conversion of a two-byte character =====\n");
208 printf("\n----- Valid characters -----\n");
210 0xc2, 0x80, // corner case
211 0xc2, 0xbf, // corner case
212 0xdf, 0x80, // corner case
213 0xdf, 0xbf, // corner case
214 0xc3, 0xbe, 0xda, 0xbc // misc
216 run_test(buff
, sizeof buff
, valid
, sizeof valid
);
218 printf("\n----- Valid characters if m3 == 0 -----\n");
219 // First char is 0xc0 or 0xc1
220 uint8_t valid_if_not_m3
[] = {
226 run_test(buff
, sizeof buff
, valid_if_not_m3
, sizeof valid_if_not_m3
);
228 // Test for invalid two-byte characters where the 1st byte is valid
229 // The 2nd byte is invalid if not in range 0x80..0xbf, inclusive
231 // As conversion stops upon encountering an invalid character, we
232 // need to test each invalid character separately, to make sure it
233 // is recognized as invalid.
235 printf("\n----- Invalid characters if m3 == 1 -----\n");
236 uint8_t always_invalid
[] = {
242 for (i
= 0; i
< sizeof always_invalid
; i
+= 2) {
243 uint8_t invalid_char
[2];
244 invalid_char
[0] = always_invalid
[i
];
245 invalid_char
[1] = always_invalid
[i
+1];
246 run_test(buff
, sizeof buff
, invalid_char
, sizeof invalid_char
);
249 /* Nb: for a two-byte character we need not test the case where
250 invalidity of the character (cc=2) takes precedence over exhaustion
251 of the 1st operand (cc=1). Invalidity of the character has already
252 been tested when testing the 1st byte. */
254 printf("\n----- 1st char valid, 2nd char invalid -----\n");
255 uint8_t valid_invalid
[] = {
257 0xc4, 0x00 // invalid
259 run_test(buff
, sizeof buff
, valid_invalid
, sizeof valid_invalid
);
262 // Test conversion of a three-byte character
264 convert_3_bytes(void)
268 printf("\n===== Conversion of a three-byte character =====\n");
270 /* Exhaustively test the 1st byte E0 - EF, and the interval boundaries for
271 the 2nd and 3rd bytes */
272 printf("\n----- Valid characters -----\n");
278 0xe0, 0xaa, 0xbb, // random e0 .. ..
280 run_test(buff
, sizeof buff
, e0
, sizeof e0
);
287 0xed, 0x8a, 0xbb, // random ed .. ..
289 run_test(buff
, sizeof buff
, ed
, sizeof ed
);
291 for (i
= 0; i
<= 0xf; ++i
) {
292 uint8_t exxx_1
[3] = { 0x0, 0x80, 0x80 };
293 uint8_t exxx_2
[3] = { 0x0, 0xbf, 0x80 };
294 uint8_t exxx_3
[3] = { 0x0, 0x80, 0xbf };
295 uint8_t exxx_4
[3] = { 0x0, 0xbf, 0xbf };
297 if (i
== 0x00) continue; // special case e0
298 if (i
== 0x0d) continue; // special case ed
300 exxx_1
[0] = 0xe0 | i
;
301 exxx_2
[0] = 0xe0 | i
;
302 exxx_3
[0] = 0xe0 | i
;
303 exxx_4
[0] = 0xe0 | i
;
304 run_test(buff
, sizeof buff
, exxx_1
, sizeof exxx_1
);
305 run_test(buff
, sizeof buff
, exxx_2
, sizeof exxx_2
);
306 run_test(buff
, sizeof buff
, exxx_3
, sizeof exxx_3
);
307 run_test(buff
, sizeof buff
, exxx_4
, sizeof exxx_4
);
310 printf("\n----- Invalid characters (2nd byte is invalid) -----\n");
311 // Test for invalid three-byte characters where the 1st byte is valid
312 // The 2nd byte is invalid.
314 // As conversion stops upon encountering an invalid character, we
315 // need to test each invalid character separately, to make sure it
316 // is recognized as invalid.
318 e0
[0] = 0xe0; // valid
319 e0
[1] = 0x9f; // invalid because outside [0xa0 .. 0xbf]
320 e0
[2] = 0x80; // valid
321 run_test(buff
, sizeof buff
, e0
, sizeof e0
);
322 e0
[1] = 0xc0; // invalid because outside [0xa0 .. 0xbf]
323 run_test(buff
, sizeof buff
, e0
, sizeof e0
);
325 ed
[0] = 0xed; // valid
326 ed
[1] = 0x7f; // invalid because outside [0x80 .. 0x9f]
327 ed
[2] = 0x80; // valid
328 run_test(buff
, sizeof buff
, ed
, sizeof ed
);
329 ed
[1] = 0xa0; // invalid because outside [0x80 .. 0x9f]
330 run_test(buff
, sizeof buff
, ed
, sizeof ed
);
332 for (i
= 0; i
<= 0xf; ++i
) {
333 uint8_t exxx_1
[3] = { 0x0, 0x7f, 0x80 };
334 uint8_t exxx_2
[3] = { 0x0, 0xc0, 0x80 };
336 if (i
== 0x00) continue; // special case e0
337 if (i
== 0x0d) continue; // special case ed
339 exxx_1
[0] = 0xe0 | i
;
340 exxx_2
[0] = 0xe0 | i
;
341 run_test(buff
, sizeof buff
, exxx_1
, sizeof exxx_1
);
342 run_test(buff
, sizeof buff
, exxx_2
, sizeof exxx_2
);
345 printf("\n----- Invalid characters (3rd byte is invalid) -----\n");
346 // For all 1st bytes 0xe0 .. 0xef the 3rd bytes must be in [0x80 .. 0xbf]
347 // No need to special case 0xe0 and 0xed
348 for (i
= 0; i
<= 0xf; ++i
) {
349 uint8_t exxx_1
[3] = { 0x0, 0xab, 0x7f };
350 uint8_t exxx_2
[3] = { 0x0, 0xab, 0xc0 };
352 exxx_1
[0] = 0xe0 | i
;
353 exxx_2
[0] = 0xe0 | i
;
354 run_test(buff
, sizeof buff
, exxx_1
, sizeof exxx_1
);
355 run_test(buff
, sizeof buff
, exxx_2
, sizeof exxx_2
);
358 printf("\n----- Invalid 2nd char AND output exhausted -----\n");
359 /* The character is invalid in its 2nd byte AND the output buffer is
360 exhausted (2 bytes are needed) */
364 run_test(buff
, 1, pat1
, 3);
366 printf("\n----- Invalid 3rd char AND output exhausted -----\n");
367 /* The character is invalid in its 3rd byte AND the output buffer is
368 exhausted (2 bytes are needed) */
372 run_test(buff
, 1, pat2
, 3);
374 printf("\n----- 1st char valid, 2nd char invalid -----\n");
375 uint8_t valid_invalid
[] = {
376 0xe1, 0x90, 0x90, // valid
377 0xe1, 0x00, 0x90 // invalid
379 run_test(buff
, sizeof buff
, valid_invalid
, sizeof valid_invalid
);
382 // Test conversion of a four-byte character
384 convert_4_bytes(void)
388 printf("\n===== Conversion of a four-byte character =====\n");
390 printf("\n----- Valid characters -----\n");
391 for (i
= 0; i
<= 4; ++i
) {
396 for (j
= 0; j
<= 1; ++j
) {
399 valid
[1] = j
== 0 ? 0x90 : 0xbf; // 0xf0
401 valid
[1] = j
== 0 ? 0x80 : 0x8f; // 0xf4
403 valid
[1] = j
== 0 ? 0x80 : 0xbf; // 0xf1 .. 0xf3
405 // Byte 3 and byte 4 have same interval 0x80 .. 0xbf
408 run_test(buff
, sizeof buff
, valid
, sizeof valid
);
411 run_test(buff
, sizeof buff
, valid
, sizeof valid
);
414 run_test(buff
, sizeof buff
, valid
, sizeof valid
);
417 run_test(buff
, sizeof buff
, valid
, sizeof valid
);
421 printf("\n----- Valid characters if m3 == 0 -----\n");
422 // First char is 0xf5 .. 0xf7
423 uint8_t valid_if_not_m3
[] = {
424 0xf5, 0x00, 0x00, 0x00,
425 0xf6, 0x11, 0x22, 0x33,
426 0xf7, 0x44, 0x55, 0x66,
428 run_test(buff
, sizeof buff
, valid_if_not_m3
, sizeof valid_if_not_m3
);
430 // As conversion stops upon encountering an invalid character, we
431 // need to test each invalid character separately, to make sure it
432 // is recognized as invalid.
434 printf("\n----- Invalid characters (2nd byte is invalid) -----\n");
435 // Test for invalid four-byte characters where the 2nd byte is invalid.
436 // All other bytes are valid
437 uint8_t f0
[4], f4
[4];
439 f0
[0] = 0xf0; // valid
440 f0
[1] = 0x8f; // invalid because outside [0x90 .. 0xbf]
441 f0
[2] = 0x80; // valid
442 f0
[3] = 0x80; // valid
443 run_test(buff
, sizeof buff
, f0
, sizeof f0
);
444 f0
[1] = 0xc0; // invalid because outside [0x90 .. 0xbf]
445 run_test(buff
, sizeof buff
, f0
, sizeof f0
);
447 f4
[0] = 0xf4; // valid
448 f4
[1] = 0x7f; // invalid because outside [0x80 .. 0x8f]
449 f4
[2] = 0x80; // valid
450 f4
[3] = 0x80; // valid
451 run_test(buff
, sizeof buff
, f4
, sizeof f4
);
452 f4
[1] = 0x90; // invalid because outside [0x80 .. 0x9f]
453 run_test(buff
, sizeof buff
, f4
, sizeof f4
);
455 for (i
= 0; i
<= 0x4; ++i
) {
456 uint8_t fxxx_1
[4] = { 0x0, 0x7f, 0x80, 0x80 };
457 uint8_t fxxx_2
[4] = { 0x0, 0xc0, 0x80, 0x80 };
459 if (i
== 0) continue; // special case f0
460 if (i
== 4) continue; // special case f4
462 fxxx_1
[0] = 0xf0 | i
;
463 fxxx_2
[0] = 0xf0 | i
;
464 run_test(buff
, sizeof buff
, fxxx_1
, sizeof fxxx_1
);
465 run_test(buff
, sizeof buff
, fxxx_2
, sizeof fxxx_2
);
468 printf("\n----- Invalid characters (3rd byte is invalid) -----\n");
469 // Test for invalid four-byte characters where the 3rd byte is invalid.
470 // All other bytes are valid
471 for (i
= 0; i
<= 0x4; ++i
) {
472 uint8_t fxxx
[4] = { 0x0, 0x0, 0x0, 0x80 };
475 fxxx
[1] = (i
== 0) ? 0x94 : 0x84;
477 run_test(buff
, sizeof buff
, fxxx
, sizeof fxxx
);
479 run_test(buff
, sizeof buff
, fxxx
, sizeof fxxx
);
482 printf("\n----- Invalid characters (4th byte is invalid) -----\n");
483 // Test for invalid four-byte characters where the 3rd byte is invalid.
484 // All other bytes are valid
485 for (i
= 0; i
<= 0x4; ++i
) {
486 uint8_t fxxx
[4] = { 0x0, 0x0, 0x80, 0x0 };
489 fxxx
[1] = (i
== 0) ? 0x94 : 0x84;
491 run_test(buff
, sizeof buff
, fxxx
, sizeof fxxx
);
493 run_test(buff
, sizeof buff
, fxxx
, sizeof fxxx
);
496 printf("\n----- Invalid 2nd char AND output exhausted -----\n");
497 /* The character is invalid in its 2nd byte AND the output buffer is
498 exhausted (4 bytes are needed) */
500 0xf0, 0x00, 0x80, 0x80
502 run_test(buff
, 1, pat1
, 4);
504 printf("\n----- Invalid 3rd char AND output exhausted -----\n");
505 /* The character is invalid in its 3rd byte AND the output buffer is
506 exhausted (4 bytes are needed) */
508 0xf0, 0xaa, 0x00, 0x80
510 run_test(buff
, 3, pat2
, 4);
512 printf("\n----- Invalid 4th char AND output exhausted -----\n");
513 /* The character is invalid in its 4th byte AND the output buffer is
514 exhausted (4 bytes are needed) */
516 0xf0, 0xaa, 0xaa, 0x00
518 run_test(buff
, 3, pat3
, 4);
520 printf("\n----- 1st char valid, 2nd char invalid -----\n");
521 uint8_t valid_invalid
[] = {
522 0xf0, 0xaa, 0xaa, 0xaa, // valid
523 0xf0, 0x00, 0x00, 0x00 // invalid
525 run_test(buff
, sizeof buff
, valid_invalid
, sizeof valid_invalid
);
536 /* Length == 0, no memory should be read or written */
537 printf("\n------------- test1 ----------------\n");
538 run_test(NULL
, 0, NULL
, 0);
540 /* Test exhaustion of source length (source bytes are valid) */
541 printf("\n------------- test2.1 ----------------\n");
543 /* No character will be written to BUFF, i.e. loop in jitted code
545 run_test(buff
, sizeof buff
, NULL
, 0);
546 run_test(buff
, sizeof buff
, pattern1
, 0);
547 run_test(buff
, sizeof buff
, pattern2
, 0);
548 run_test(buff
, sizeof buff
, pattern2
, 1);
549 run_test(buff
, sizeof buff
, pattern3
, 0);
550 run_test(buff
, sizeof buff
, pattern3
, 1);
551 run_test(buff
, sizeof buff
, pattern3
, 2);
552 run_test(buff
, sizeof buff
, pattern4
, 0);
553 run_test(buff
, sizeof buff
, pattern4
, 1);
554 run_test(buff
, sizeof buff
, pattern4
, 2);
555 run_test(buff
, sizeof buff
, pattern4
, 3);
557 printf("\n------------- test2.2 ----------------\n");
558 /* At least one character will be written to BUFF, i.e. loop in jitted
560 run_test(buff
, sizeof buff
, pattern1
, 2);
561 run_test(buff
, sizeof buff
, pattern2
, 5);
562 run_test(buff
, sizeof buff
, pattern3
, 6);
563 run_test(buff
, sizeof buff
, pattern4
, 9);
565 /* Test exhaustion of destination length (source bytes are valid) */
566 printf("\n------------- test3.1 ----------------\n");
568 /* No character will be written to BUFF, i.e. loop in jitted code
571 /* Want to write 2 or 4 bytes at a time */
572 run_test(NULL
, 0, pattern1
, sizeof pattern1
); // 2-byte result
573 run_test(NULL
, 0, pattern2
, sizeof pattern2
); // 2-byte result
574 run_test(NULL
, 1, pattern2
, sizeof pattern2
); // 2-byte result
575 run_test(NULL
, 0, pattern3
, sizeof pattern3
); // 2-byte result
576 run_test(NULL
, 1, pattern3
, sizeof pattern3
); // 2-byte result
577 run_test(NULL
, 0, pattern4
, sizeof pattern4
); // 4-byte result
578 run_test(NULL
, 1, pattern4
, sizeof pattern4
); // 4-byte result
579 run_test(NULL
, 2, pattern4
, sizeof pattern4
); // 4-byte result
580 run_test(NULL
, 3, pattern4
, sizeof pattern4
); // 4-byte result
582 printf("\n------------- test3.2 ----------------\n");
583 /* At least one character will be written to BUFF, i.e. loop in jitted
585 run_test(buff
, 4, pattern1
, sizeof pattern1
);
586 run_test(buff
, 5, pattern1
, sizeof pattern1
);
587 run_test(buff
, 6, pattern1
, sizeof pattern1
);
588 run_test(buff
, 7, pattern1
, sizeof pattern1
);
590 /* Convert buffer with mixed characters */
591 printf("\n------------- test4 ----------------\n");
592 run_test(buff
, sizeof buff
, mixed
, sizeof mixed
);