1 /* Transliteration using the locale's data.
2 Copyright (C) 2000-2025 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
26 #include <libc-lock.h>
27 #include "gconv_int.h"
28 #include "../locale/localeinfo.h"
29 #include <pointer_guard.h>
33 __gconv_transliterate (struct __gconv_step
*step
,
34 struct __gconv_step_data
*step_data
,
35 const unsigned char *inbufstart
,
36 const unsigned char **inbufp
,
37 const unsigned char *inbufend
,
38 unsigned char **outbufstart
, size_t *irreversible
)
40 /* Find out about the locale's transliteration. */
42 const uint32_t *from_idx
;
43 const uint32_t *from_tbl
;
44 const uint32_t *to_idx
;
45 const uint32_t *to_tbl
;
46 const uint32_t *winbuf
;
47 const uint32_t *winbufend
;
51 /* The input buffer. There are actually 4-byte values. */
52 winbuf
= (const uint32_t *) *inbufp
;
53 winbufend
= (const uint32_t *) inbufend
;
55 __gconv_fct fct
= step
->__fct
;
56 if (step
->__shlib_handle
!= NULL
)
59 /* If there is no transliteration information in the locale don't do
60 anything and return the error. */
61 size
= _NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TAB_SIZE
);
65 /* Get the rest of the values. */
67 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_FROM_IDX
);
69 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_FROM_TBL
);
71 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TO_IDX
);
73 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_TO_TBL
);
75 /* Test whether there is enough input. */
76 if (winbuf
+ 1 > winbufend
)
77 return (winbuf
== winbufend
78 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
80 /* The array starting at FROM_IDX contains indices to the string table
81 in FROM_TBL. The indices are sorted wrt to the strings. I.e., we
82 are doing binary search. */
87 uint32_t med
= (low
+ high
) / 2;
91 /* Compare the string at this index with the string at the current
92 position in the input buffer. */
97 if (from_tbl
[idx
+ cnt
] != winbuf
[cnt
])
102 while (from_tbl
[idx
+ cnt
] != L
'\0' && winbuf
+ cnt
< winbufend
);
104 if (cnt
> 0 && from_tbl
[idx
+ cnt
] == L
'\0')
106 /* Found a matching input sequence. Now try to convert the
107 possible replacements. */
108 uint32_t idx2
= to_idx
[med
];
112 /* Determine length of replacement. */
113 unsigned int len
= 0;
115 const unsigned char *toinptr
;
116 unsigned char *outptr
;
118 while (to_tbl
[idx2
+ len
] != L
'\0')
121 /* Try this input text. */
122 toinptr
= (const unsigned char *) &to_tbl
[idx2
];
123 outptr
= *outbufstart
;
124 res
= DL_CALL_FCT (fct
,
125 (step
, step_data
, &toinptr
,
126 (const unsigned char *) &to_tbl
[idx2
+ len
],
127 &outptr
, NULL
, 0, 0));
128 if (res
!= __GCONV_ILLEGAL_INPUT
)
130 /* If the conversion succeeds we have to increment the
132 if (res
== __GCONV_EMPTY_INPUT
)
134 *inbufp
+= cnt
* sizeof (uint32_t);
138 /* Do not increment the output pointer if we could not
139 store the entire output. */
140 if (res
!= __GCONV_FULL_OUTPUT
)
141 *outbufstart
= outptr
;
146 /* Next replacement. */
149 while (to_tbl
[idx2
] != L
'\0');
151 /* Nothing found, continue searching. */
153 else if (cnt
> 0 && winbuf
+ cnt
== winbufend
)
154 /* This means that the input buffer contents matches a prefix of
155 an entry. Since we cannot match it unless we get more input,
156 we will tell the caller about it. */
157 return __GCONV_INCOMPLETE_INPUT
;
159 if (winbuf
+ cnt
>= winbufend
|| from_tbl
[idx
+ cnt
] < winbuf
[cnt
])
166 /* Maybe the character is supposed to be ignored. */
167 if (_NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE_LEN
) != 0)
169 int n
= _NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE_LEN
);
170 const uint32_t *ranges
=
171 (const uint32_t *) _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_IGNORE
);
172 const uint32_t wc
= *(const uint32_t *) (*inbufp
);
175 /* Test whether there is enough input. */
176 if (winbuf
+ 1 > winbufend
)
177 return (winbuf
== winbufend
178 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
180 for (i
= 0; i
< n
; ranges
+= 3, ++i
)
181 if (ranges
[0] <= wc
&& wc
<= ranges
[1]
182 && (wc
- ranges
[0]) % ranges
[2] == 0)
184 /* Matches the range. Ignore it. */
189 else if (wc
< ranges
[0])
190 /* There cannot be any other matching range since they are
195 /* One last chance: use the default replacement. */
196 if (_NL_CURRENT_WORD (LC_CTYPE
, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
) != 0)
198 const uint32_t *default_missing
= (const uint32_t *)
199 _NL_CURRENT (LC_CTYPE
, _NL_CTYPE_TRANSLIT_DEFAULT_MISSING
);
200 const unsigned char *toinptr
= (const unsigned char *) default_missing
;
201 uint32_t len
= _NL_CURRENT_WORD (LC_CTYPE
,
202 _NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN
);
203 unsigned char *outptr
;
206 /* Test whether there is enough input. */
207 if (winbuf
+ 1 > winbufend
)
208 return (winbuf
== winbufend
209 ? __GCONV_EMPTY_INPUT
: __GCONV_INCOMPLETE_INPUT
);
211 outptr
= *outbufstart
;
212 res
= DL_CALL_FCT (fct
,
213 (step
, step_data
, &toinptr
,
214 (const unsigned char *) (default_missing
+ len
),
215 &outptr
, NULL
, 0, 0));
217 if (res
!= __GCONV_ILLEGAL_INPUT
)
219 /* If the conversion succeeds we have to increment the
221 if (res
== __GCONV_EMPTY_INPUT
)
223 /* This worked but is not reversible. */
228 *outbufstart
= outptr
;
234 /* Haven't found a match. */
235 return __gconv_mark_illegal_input (step_data
);
237 libc_hidden_def (__gconv_transliterate
)