1 /***********************************************************************
3 * This software is part of the ast package *
4 * Copyright (c) 1985-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
13 * Information and Software Systems Research *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * Phong Vo <kpv@research.att.com> *
21 ***********************************************************************/
24 * regex collation symbol support
35 #include "ucs_names.h"
37 typedef struct Ucs_map_s
43 struct Ucs_map_s
* next
;
46 #define setattr(a,i) ((a)[(i)>>5]|=(1<<((i)&((1<<5)-1))))
47 #define tstattr(a,i) ((a)[(i)>>5]&(1<<((i)&((1<<5)-1))))
48 #define clrattr(a,i) ((a)[(i)>>5]&=~(1<<((i)&((1<<5)-1))))
56 #if CC_NATIVE != CC_ASCII
62 * initialize the writeable tables from the readonly data
63 * the tables are big enough to be concerned about text vs. data vs. bss
72 register Ucs_map_t
* a
;
73 register Ucs_map_t
* w
;
77 local
.dtdisc
.link
= offsetof(Ucs_map_t
, link
);
78 local
.dtdisc
.key
= offsetof(Ucs_map_t
, name
);
79 local
.dtdisc
.size
= -1;
80 if (!(w
= (Ucs_map_t
*)malloc(sizeof(Ucs_map_t
) * (elementsof(ucs_attrs
) + elementsof(ucs_names
)))))
85 if (!(local
.attrs
= dtopen(&local
.dtdisc
, Dttree
)))
91 if (!(local
.names
= dtopen(&local
.dtdisc
, Dttree
)))
98 for (i
= 0; i
< elementsof(ucs_attrs
); i
++, w
++)
100 memcpy(w
, &ucs_attrs
[i
], offsetof(Ucs_dat_t
, table
));
101 w
->name
= ucs_strings
[ucs_attrs
[i
].table
] + ucs_attrs
[i
].index
;
103 dtinsert(local
.attrs
, w
);
105 for (i
= 0; i
< elementsof(ucs_names
); i
++, w
++)
107 memcpy(w
, &ucs_names
[i
], offsetof(Ucs_dat_t
, table
));
108 w
->name
= ucs_strings
[ucs_names
[i
].table
] + ucs_names
[i
].index
;
110 if (a
= (Ucs_map_t
*)dtsearch(local
.names
, w
))
117 dtinsert(local
.names
, w
);
119 #if CC_NATIVE != CC_ASCII
120 local
.a2n
= ccmap(CC_ASCII
, CC_NATIVE
);
126 * return the collating symbol delimited by [c c], where c is either '=' or '.'
127 * s points to the first char after the initial [
128 * if e!=0 it is set to point to the next char in s on return
130 * the collating symbol is converted to multibyte in <buf,size>
131 * the return value is:
132 * -1 syntax error or buf not large enough
133 * >=0 size with 0-terminated mb collation element
134 * or ligature value in buf
138 regcollate(register const char* s
, char** e
, char* buf
, int size
)
144 register Ucs_map_t
* a
;
157 else if ((term
= *s
++) != '.' && term
!= '=')
162 else if (*s
== term
&& *(s
+ 1) == ']')
168 if ((n
= (s
- t
)) == 1)
170 if (*s
== term
&& *(s
+ 1) == ']')
177 if (!local
.attrs
&& initialize())
179 attr
[0] = attr
[1] = attr
[2] = 0;
205 if (c
== ' ' || c
== '-' && u
> b
&& *s
!= ' ' && *s
!= '-')
214 if (a
= (Ucs_map_t
*)dtmatch(local
.attrs
, b
))
215 setattr(attr
, a
->code
);
224 else if (islower(*v
))
230 if (b
> buf
&& *(b
- 1) == ' ')
233 attr
[0] &= ~((Ucs_attr_t
)1);
236 if (tstattr(attr
, UCS_UC
) || tstattr(attr
, UCS_LC
))
241 if (z
= (Ucs_map_t
*)dtmatch(local
.names
, buf
))
244 for (a
= z
; a
; a
= a
->next
)
245 if ((attr
[0] & a
->attr
[0]) == attr
[0] && (attr
[1] & a
->attr
[1]) == attr
[1] && (attr
[2] & a
->attr
[2]) == attr
[2])
250 #if CC_NATIVE != CC_ASCII
251 buf
[0] = local
.a2n
[a
->code
];
262 if ((r
= wcstombs(buf
, w
, size
)) > 0)
274 if ((n
= s
- t
- 2) > (size
- 1))
282 for (t
= buf
; isalnum(*t
); t
++);
288 else if (*s
++ != term
|| *s
++ != ']')
293 else if (n
> (size
- 1))