1 /*-------------------------------------------------------------------------
4 * locale compatibility layer for tsearch
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
12 *-------------------------------------------------------------------------
16 #include "storage/fd.h"
17 #include "tsearch/ts_locale.h"
18 #include "tsearch/ts_public.h"
20 static void tsearch_readline_callback(void *arg
);
23 #ifdef USE_WIDE_UPPER_LOWER
26 t_isdigit(const char *ptr
)
28 int clen
= pg_mblen(ptr
);
31 if (clen
== 1 || lc_ctype_is_c())
32 return isdigit(TOUCHAR(ptr
));
34 char2wchar(character
, 2, ptr
, clen
);
36 return iswdigit((wint_t) character
[0]);
40 t_isspace(const char *ptr
)
42 int clen
= pg_mblen(ptr
);
45 if (clen
== 1 || lc_ctype_is_c())
46 return isspace(TOUCHAR(ptr
));
48 char2wchar(character
, 2, ptr
, clen
);
50 return iswspace((wint_t) character
[0]);
54 t_isalpha(const char *ptr
)
56 int clen
= pg_mblen(ptr
);
59 if (clen
== 1 || lc_ctype_is_c())
60 return isalpha(TOUCHAR(ptr
));
62 char2wchar(character
, 2, ptr
, clen
);
64 return iswalpha((wint_t) character
[0]);
68 t_isprint(const char *ptr
)
70 int clen
= pg_mblen(ptr
);
73 if (clen
== 1 || lc_ctype_is_c())
74 return isprint(TOUCHAR(ptr
));
76 char2wchar(character
, 2, ptr
, clen
);
78 return iswprint((wint_t) character
[0]);
80 #endif /* USE_WIDE_UPPER_LOWER */
84 * Set up to read a file using tsearch_readline(). This facility is
85 * better than just reading the file directly because it provides error
86 * context pointing to the specific line where a problem is detected.
90 * tsearch_readline_state trst;
92 * if (!tsearch_readline_begin(&trst, filename))
94 * (errcode(ERRCODE_CONFIG_FILE_ERROR),
95 * errmsg("could not open stop-word file \"%s\": %m",
97 * while ((line = tsearch_readline(&trst)) != NULL)
99 * tsearch_readline_end(&trst);
101 * Note that the caller supplies the ereport() for file open failure;
102 * this is so that a custom message can be provided. The filename string
103 * passed to tsearch_readline_begin() must remain valid through
104 * tsearch_readline_end().
107 tsearch_readline_begin(tsearch_readline_state
*stp
,
108 const char *filename
)
110 if ((stp
->fp
= AllocateFile(filename
, "r")) == NULL
)
112 stp
->filename
= filename
;
115 /* Setup error traceback support for ereport() */
116 stp
->cb
.callback
= tsearch_readline_callback
;
117 stp
->cb
.arg
= (void *) stp
;
118 stp
->cb
.previous
= error_context_stack
;
119 error_context_stack
= &stp
->cb
;
124 * Read the next line from a tsearch data file (expected to be in UTF-8), and
125 * convert it to database encoding if needed. The returned string is palloc'd.
126 * NULL return means EOF.
129 tsearch_readline(tsearch_readline_state
*stp
)
135 result
= t_readline(stp
->fp
);
136 stp
->curline
= result
;
141 * Close down after reading a file with tsearch_readline()
144 tsearch_readline_end(tsearch_readline_state
*stp
)
147 /* Pop the error context stack */
148 error_context_stack
= stp
->cb
.previous
;
152 * Error context callback for errors occurring while reading a tsearch
153 * configuration file.
156 tsearch_readline_callback(void *arg
)
158 tsearch_readline_state
*stp
= (tsearch_readline_state
*) arg
;
161 * We can't include the text of the config line for errors that occur
162 * during t_readline() itself. This is only partly a consequence of
163 * our arms-length use of that routine: the major cause of such
164 * errors is encoding violations, and we daren't try to print error
165 * messages containing badly-encoded data.
168 errcontext("line %d of configuration file \"%s\": \"%s\"",
173 errcontext("line %d of configuration file \"%s\"",
180 * Read the next line from a tsearch data file (expected to be in UTF-8), and
181 * convert it to database encoding if needed. The returned string is palloc'd.
182 * NULL return means EOF.
184 * Note: direct use of this function is now deprecated. Go through
185 * tsearch_readline() to provide better error reporting.
192 char buf
[4096]; /* lines must not be longer than this */
194 if (fgets(buf
, sizeof(buf
), fp
) == NULL
)
199 /* Make sure the input is valid UTF-8 */
200 (void) pg_verify_mbstr(PG_UTF8
, buf
, len
, false);
203 recoded
= (char *) pg_do_encoding_conversion((unsigned char *) buf
,
206 GetDatabaseEncoding());
210 * conversion didn't pstrdup, so we must. We can use the length of the
211 * original string, because no conversion was done.
213 recoded
= pnstrdup(recoded
, len
);
220 * lowerstr --- fold null-terminated string to lower case
222 * Returned string is palloc'd
225 lowerstr(const char *str
)
227 return lowerstr_with_len(str
, strlen(str
));
231 * lowerstr_with_len --- fold string to lower case
233 * Input string need not be null-terminated.
235 * Returned string is palloc'd
238 lowerstr_with_len(const char *str
, int len
)
245 #ifdef USE_WIDE_UPPER_LOWER
248 * Use wide char code only when max encoding length > 1 and ctype != C.
249 * Some operating systems fail with multi-byte encodings and a C locale.
250 * Also, for a C locale there is no need to process as multibyte. From
251 * backend/utils/adt/oracle_compat.c Teodor
253 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
260 * alloc number of wchar_t for worst case, len contains number of
261 * bytes >= number of characters and alloc 1 wchar_t for 0, because
262 * wchar2char wants zero-terminated string
264 wptr
= wstr
= (wchar_t *) palloc(sizeof(wchar_t) * (len
+ 1));
266 wlen
= char2wchar(wstr
, len
+ 1, str
, len
);
271 *wptr
= towlower((wint_t) *wptr
);
276 * Alloc result string for worst case + '\0'
278 len
= pg_database_encoding_max_length() * wlen
+ 1;
279 out
= (char *) palloc(len
);
281 wlen
= wchar2char(out
, wstr
, len
);
287 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE
),
288 errmsg("conversion from wchar_t to server encoding failed: %m")));
292 #endif /* USE_WIDE_UPPER_LOWER */
294 const char *ptr
= str
;
297 outptr
= out
= (char *) palloc(sizeof(char) * (len
+ 1));
298 while ((ptr
- str
) < len
&& *ptr
)
300 *outptr
++ = tolower(TOUCHAR(ptr
));