2 # $Id: api.txt,v 1.1.1.1 2004/05/31 04:25:25 lukem Exp $
10 NOTE: This library has been customized for use with OpenLDAP. The character
11 data tables are hardcoded into the library and the load/unload/reload
12 functions are no-ops. Also, the MUTT API claimed to be compatible with
13 John Cowan's library but its ucnumber behavior was broken. This has been
14 fixed in the OpenLDAP release.
16 By default, the implementation specific properties in MUTTUCData.txt are
17 not incorporated into the OpenLDAP build. You can supply them to ucgendat
18 and recreate uctable.h if you need them.
23 -----------------------------------------------------------------------------
25 Macros that combine to select data tables for ucdata_load(), ucdata_unload(),
28 #define UCDATA_CASE 0x01
29 #define UCDATA_CTYPE 0x02
30 #define UCDATA_DECOMP 0x04
31 #define UCDATA_CMBCL 0x08
32 #define UCDATA_NUM 0x10
33 #define UCDATA_COMP 0x20
34 #define UCATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\
35 UCDATA_CMBCL|UCDATA_NUM|UCDATA_COMP)
36 -----------------------------------------------------------------------------
38 void ucdata_load(char *paths, int masks)
40 This function initializes the UCData library by locating the data files in
41 one of the colon-separated directories in the `paths' parameter. The data
42 files to be loaded are specified in the `masks' parameter as a bitwise
43 combination of the macros listed above.
45 This should be called before using any of the other functions.
47 NOTE: the ucdata_setup(char *paths) function is now a macro that expands
48 into this function at compile time.
50 -----------------------------------------------------------------------------
52 void ucdata_unload(int masks)
54 This function unloads the data tables specified in the `masks' parameter.
56 This function should be called when the application is done using the UCData
59 NOTE: the ucdata_cleanup() function is now a macro that expands into this
60 function at compile time.
62 -----------------------------------------------------------------------------
64 void ucdata_reload(char *paths, int masks)
66 This function reloads the data files from one of the colon-separated
67 directories in the `paths' parameter. The data files to be reloaded are
68 specified in the `masks' parameter as a bitwise combination of the macros
71 If the data files have already been loaded, they are unloaded before the
72 data files are loaded again.
74 -----------------------------------------------------------------------------
76 int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
78 This function determines if a character has a decomposition and returns the
79 decomposition information if it exists.
81 If a zero is returned, there is no decomposition. If a non-zero is
82 returned, then the `num' and `decomp' variables are filled in with the
87 unsigned long i, num, *decomp;
89 if (ucdecomp(0x1d5, &num, &decomp) != 0) {
90 for (i = 0; i < num; i++)
91 printf("0x%08lX,", decomp[i]);
95 int uccanondecomp(const unsigned long *in, int inlen, unsigned long **out,
98 This function decomposes an input string and does canonical reordering of
99 the characters at the same time.
101 If a -1 is returned, memory allocation was not successful. If a zero is
102 returned, no decomposition occured. Any other value means the output string
103 contains the fully decomposed string in canonical order.
105 If the "outlen" parameter comes back with a value > 0, then the string
106 returned in the "out" parameter needs to be deallocated by the caller.
108 -----------------------------------------------------------------------------
110 int ucdecomp_hangul(unsigned long code, unsigned long *num,
111 unsigned long decomp[])
113 This function determines if a Hangul syllable has a decomposition and
114 returns the decomposition information.
116 An array of at least size 3 should be passed to the function for the
117 decomposition of the syllable.
119 If a zero is returned, the character is not a Hangul syllable. If a
120 non-zero is returned, the `num' field will be 2 or 3 and the syllable will
121 be decomposed into the `decomp' array arithmetically.
125 unsigned long i, num, decomp[3];
127 if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) {
128 for (i = 0; i < num; i++)
129 printf("0x%08lX,", decomp[i]);
133 -----------------------------------------------------------------------------
135 int uccomp(unsigned long ch1, unsigned long ch2, unsigned long *comp)
137 This function takes a pair of characters and determines if they combine to
138 form another character.
140 If a zero is returned, no composition is formed by the character pair. Any
141 other value indicates the "comp" parameter has a value.
143 int uccomp_hangul(unsigned long *str, int len)
145 This function composes the Hangul Jamo in the string. The composition is
148 The return value provides the new length of the string. This will be
149 smaller than "len" if compositions occured.
151 int uccanoncomp(unsigned long *str, int len)
153 This function does a canonical composition of characters in the string.
155 The return value is the new length of the string.
157 -----------------------------------------------------------------------------
164 int ucnumber_lookup(unsigned long code, struct ucnumber *num)
166 This function determines if the code is a number and fills in the `num'
167 field with the numerator and denominator. If the code happens to be a
168 single digit, the denominator field will be 1.
171 The original code would set numerator = denominator for regular digits.
172 However, the Readme also claimed to be compatible with John Cowan's uctype
173 library, but this behavior is both nonsensical and incompatible with the
174 Cowan library. As such, it has been fixed here as described above.
178 If the function returns 0, the code is not a number. Any other return
179 value means the code is a number.
181 int ucdigit_lookup(unsigned long code, int *digit)
183 This function determines if the code is a digit and fills in the `digit'
184 field with the digit value.
186 If the function returns 0, the code is not a number. Any other return
187 value means the code is a number.
189 struct ucnumber ucgetnumber(unsigned long code)
191 This is a compatibility function with John Cowan's "uctype" package. It
192 uses ucnumber_lookup().
194 int ucgetdigit(unsigned long code)
196 This is a compatibility function with John Cowan's "uctype" package. It
197 uses ucdigit_lookup().
199 -----------------------------------------------------------------------------
201 unsigned long uctoupper(unsigned long code)
203 This function returns the code unchanged if it is already upper case or has
204 no upper case equivalent. Otherwise the upper case equivalent is returned.
206 -----------------------------------------------------------------------------
208 unsigned long uctolower(unsigned long code)
210 This function returns the code unchanged if it is already lower case or has
211 no lower case equivalent. Otherwise the lower case equivalent is returned.
213 -----------------------------------------------------------------------------
215 unsigned long uctotitle(unsigned long code)
217 This function returns the code unchanged if it is already title case or has
218 no title case equivalent. Otherwise the title case equivalent is returned.
220 -----------------------------------------------------------------------------
222 int ucisalpha(unsigned long code)
223 int ucisalnum(unsigned long code)
224 int ucisdigit(unsigned long code)
225 int uciscntrl(unsigned long code)
226 int ucisspace(unsigned long code)
227 int ucisblank(unsigned long code)
228 int ucispunct(unsigned long code)
229 int ucisgraph(unsigned long code)
230 int ucisprint(unsigned long code)
231 int ucisxdigit(unsigned long code)
233 int ucisupper(unsigned long code)
234 int ucislower(unsigned long code)
235 int ucistitle(unsigned long code)
237 These functions (actually macros) determine if a character has these
238 properties. These behave in a fashion very similar to the venerable ctype
241 -----------------------------------------------------------------------------
243 int ucisisocntrl(unsigned long code)
245 Is the character a C0 control character (< 32) ?
247 int ucisfmtcntrl(unsigned long code)
249 Is the character a format control character?
251 int ucissymbol(unsigned long code)
253 Is the character a symbol?
255 int ucisnumber(unsigned long code)
257 Is the character a number or digit?
259 int ucisnonspacing(unsigned long code)
261 Is the character non-spacing?
263 int ucisopenpunct(unsigned long code)
265 Is the character an open/left punctuation (i.e. '[')
267 int ucisclosepunct(unsigned long code)
269 Is the character an close/right punctuation (i.e. ']')
271 int ucisinitialpunct(unsigned long code)
273 Is the character an initial punctuation (i.e. U+2018 LEFT SINGLE QUOTATION
276 int ucisfinalpunct(unsigned long code)
278 Is the character a final punctuation (i.e. U+2019 RIGHT SINGLE QUOTATION
281 int uciscomposite(unsigned long code)
283 Can the character be decomposed into a set of other characters?
285 int ucisquote(unsigned long code)
287 Is the character one of the many quotation marks?
289 int ucissymmetric(unsigned long code)
291 Is the character one that has an opposite form (i.e. <>)
293 int ucismirroring(unsigned long code)
295 Is the character mirroring (superset of symmetric)?
297 int ucisnonbreaking(unsigned long code)
299 Is the character non-breaking (i.e. non-breaking space)?
301 int ucisrtl(unsigned long code)
303 Does the character have strong right-to-left directionality (i.e. Arabic
306 int ucisltr(unsigned long code)
308 Does the character have strong left-to-right directionality (i.e. Latin
311 int ucisstrong(unsigned long code)
313 Does the character have strong directionality?
315 int ucisweak(unsigned long code)
317 Does the character have weak directionality (i.e. numbers)?
319 int ucisneutral(unsigned long code)
321 Does the character have neutral directionality (i.e. whitespace)?
323 int ucisseparator(unsigned long code)
325 Is the character a block or segment separator?
327 int ucislsep(unsigned long code)
329 Is the character a line separator?
331 int ucispsep(unsigned long code)
333 Is the character a paragraph separator?
335 int ucismark(unsigned long code)
337 Is the character a mark of some kind?
339 int ucisnsmark(unsigned long code)
341 Is the character a non-spacing mark?
343 int ucisspmark(unsigned long code)
345 Is the character a spacing mark?
347 int ucismodif(unsigned long code)
349 Is the character a modifier letter?
351 int ucismodifsymbol(unsigned long code)
353 Is the character a modifier symbol?
355 int ucisletnum(unsigned long code)
357 Is the character a number represented by a letter?
359 int ucisconnect(unsigned long code)
361 Is the character connecting punctuation?
363 int ucisdash(unsigned long code)
365 Is the character dash punctuation?
367 int ucismath(unsigned long code)
369 Is the character a math character?
371 int uciscurrency(unsigned long code)
373 Is the character a currency character?
375 int ucisenclosing(unsigned long code)
377 Is the character enclosing (i.e. enclosing box)?
379 int ucisprivate(unsigned long code)
381 Is the character from the Private Use Area?
383 int ucissurrogate(unsigned long code)
385 Is the character one of the surrogate codes?
387 int ucisdefined(unsigned long code)
389 Is the character defined (appeared in one of the data files)?
391 int ucisundefined(unsigned long code)
393 Is the character not defined (non-Unicode)?
395 int ucishan(unsigned long code)
397 Is the character a Han ideograph?
399 int ucishangul(unsigned long code)
401 Is the character a pre-composed Hangul syllable?