1 /* Test the Unicode character name functions.
2 Copyright (C) 2000-2003 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
30 /* The names according to the UnicodeData.txt file, modified to contain the
31 Hangul syllable names, as described in the Unicode 3.0 book. */
32 const char * unicode_names
[0x110000];
34 /* Maximum length of a field in the UnicodeData.txt file. */
37 /* Reads the next field from STREAM. The buffer BUFFER has size FIELDLEN.
38 Reads up to (but excluding) DELIM.
39 Returns 1 when a field was successfully read, otherwise 0. */
41 getfield (FILE *stream
, char *buffer
, int delim
)
46 for (; (c
= getc (stream
)), (c
!= EOF
&& c
!= delim
); )
48 /* Put c into the buffer. */
49 if (++count
>= FIELDLEN
- 1)
51 fprintf (stderr
, "field too long\n");
64 /* Stores in unicode_names[] the relevant contents of the UnicodeData.txt
67 fill_names (const char *unicodedata_filename
)
71 char field0
[FIELDLEN
];
72 char field1
[FIELDLEN
];
75 for (i
= 0; i
< 0x110000; i
++)
76 unicode_names
[i
] = NULL
;
78 stream
= fopen (unicodedata_filename
, "r");
81 fprintf (stderr
, "error during fopen of '%s'\n", unicodedata_filename
);
91 n
= getfield (stream
, field0
, ';');
92 n
+= getfield (stream
, field1
, ';');
97 fprintf (stderr
, "short line in '%s':%d\n",
98 unicodedata_filename
, lineno
);
101 for (; (c
= getc (stream
)), (c
!= EOF
&& c
!= '\n'); )
103 i
= strtoul (field0
, NULL
, 16);
106 fprintf (stderr
, "index too large\n");
109 unicode_names
[i
] = xstrdup (field1
);
111 if (ferror (stream
) || fclose (stream
))
113 fprintf (stderr
, "error reading from '%s'\n", unicodedata_filename
);
118 /* Perform an exhaustive test of the unicode_character_name function. */
124 char buf
[UNINAME_MAX
];
126 for (i
= 0; i
< 0x11000; i
++)
128 char *result
= unicode_character_name (i
, buf
);
130 if (unicode_names
[i
] != NULL
)
134 fprintf (stderr
, "\\u%04X name lookup failed!\n", i
);
137 else if (strcmp (result
, unicode_names
[i
]) != 0)
139 fprintf (stderr
, "\\u%04X name lookup returned wrong name: %s\n",
148 fprintf (stderr
, "\\u%04X name lookup returned wrong name: %s\n",
155 for (i
= 0x110000; i
< 0x1000000; i
++)
157 char *result
= unicode_character_name (i
, buf
);
161 fprintf (stderr
, "\\u%04X name lookup returned wrong name: %s\n",
170 /* Perform a test of the unicode_name_character function. */
172 test_inverse_lookup ()
177 /* First, verify all valid character names are recognized. */
178 for (i
= 0; i
< 0x110000; i
++)
179 if (unicode_names
[i
] != NULL
)
181 unsigned int result
= unicode_name_character (unicode_names
[i
]);
184 if (result
== UNINAME_INVALID
)
185 fprintf (stderr
, "inverse name lookup of \"%s\" failed\n",
189 "inverse name lookup of \"%s\" returned 0x%04X\n",
190 unicode_names
[i
], result
);
195 /* Second, generate random but likely names and verify they are not
196 recognized unless really valid. */
197 for (i
= 0; i
< 10000; i
++)
202 unsigned int l1
, l2
, j1
, j2
;
203 char buf
[2*UNINAME_MAX
];
206 do i1
= ((rand () % 0x11) << 16)
207 + ((rand () & 0xff) << 8)
209 while (unicode_names
[i1
] == NULL
);
211 do i2
= ((rand () % 0x11) << 16)
212 + ((rand () & 0xff) << 8)
214 while (unicode_names
[i2
] == NULL
);
216 s1
= unicode_names
[i1
];
218 s2
= unicode_names
[i2
];
221 /* Concatenate a starting piece of s1 with an ending piece of s2. */
222 for (j1
= 1; j1
<= l1
; j1
++)
223 if (j1
== l1
|| s1
[j1
] == ' ')
224 for (j2
= 0; j2
< l2
; j2
++)
225 if (j2
== 0 || s2
[j2
-1] == ' ')
227 memcpy (buf
, s1
, j1
);
229 memcpy (buf
+ j1
+ 1, s2
+ j2
, l2
- j2
+ 1);
231 result
= unicode_name_character (buf
);
232 if (result
!= UNINAME_INVALID
233 && !(unicode_names
[result
] != NULL
234 && strcmp (unicode_names
[result
], buf
) == 0))
237 "inverse name lookup of \"%s\" returned 0x%04X\n",
238 unicode_names
[i
], result
);
244 /* Third, some extreme case that used to loop. */
245 if (unicode_name_character ("A A") != UNINAME_INVALID
)
252 main (int argc
, char *argv
[])
256 fill_names (argv
[1]);
258 error
|= test_name_lookup ();
259 error
|= test_inverse_lookup ();