Add support for user-defined I/O conversion casts.
[PostgreSQL.git] / src / backend / tsearch / ts_locale.c
blob80e31d951198888e355930b880331dd633587254
1 /*-------------------------------------------------------------------------
3 * ts_locale.c
4 * locale compatibility layer for tsearch
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
9 * IDENTIFICATION
10 * $PostgreSQL$
12 *-------------------------------------------------------------------------
14 #include "postgres.h"
16 #include "storage/fd.h"
17 #include "tsearch/ts_locale.h"
18 #include "tsearch/ts_public.h"
20 static void tsearch_readline_callback(void *arg);
23 #ifdef USE_WIDE_UPPER_LOWER
25 int
26 t_isdigit(const char *ptr)
28 int clen = pg_mblen(ptr);
29 wchar_t character[2];
31 if (clen == 1 || lc_ctype_is_c())
32 return isdigit(TOUCHAR(ptr));
34 char2wchar(character, 2, ptr, clen);
36 return iswdigit((wint_t) character[0]);
39 int
40 t_isspace(const char *ptr)
42 int clen = pg_mblen(ptr);
43 wchar_t character[2];
45 if (clen == 1 || lc_ctype_is_c())
46 return isspace(TOUCHAR(ptr));
48 char2wchar(character, 2, ptr, clen);
50 return iswspace((wint_t) character[0]);
53 int
54 t_isalpha(const char *ptr)
56 int clen = pg_mblen(ptr);
57 wchar_t character[2];
59 if (clen == 1 || lc_ctype_is_c())
60 return isalpha(TOUCHAR(ptr));
62 char2wchar(character, 2, ptr, clen);
64 return iswalpha((wint_t) character[0]);
67 int
68 t_isprint(const char *ptr)
70 int clen = pg_mblen(ptr);
71 wchar_t character[2];
73 if (clen == 1 || lc_ctype_is_c())
74 return isprint(TOUCHAR(ptr));
76 char2wchar(character, 2, ptr, clen);
78 return iswprint((wint_t) character[0]);
80 #endif /* USE_WIDE_UPPER_LOWER */
84 * Set up to read a file using tsearch_readline(). This facility is
85 * better than just reading the file directly because it provides error
86 * context pointing to the specific line where a problem is detected.
88 * Expected usage is:
90 * tsearch_readline_state trst;
92 * if (!tsearch_readline_begin(&trst, filename))
93 * ereport(ERROR,
94 * (errcode(ERRCODE_CONFIG_FILE_ERROR),
95 * errmsg("could not open stop-word file \"%s\": %m",
96 * filename)));
97 * while ((line = tsearch_readline(&trst)) != NULL)
98 * process line;
99 * tsearch_readline_end(&trst);
101 * Note that the caller supplies the ereport() for file open failure;
102 * this is so that a custom message can be provided. The filename string
103 * passed to tsearch_readline_begin() must remain valid through
104 * tsearch_readline_end().
106 bool
107 tsearch_readline_begin(tsearch_readline_state *stp,
108 const char *filename)
110 if ((stp->fp = AllocateFile(filename, "r")) == NULL)
111 return false;
112 stp->filename = filename;
113 stp->lineno = 0;
114 stp->curline = NULL;
115 /* Setup error traceback support for ereport() */
116 stp->cb.callback = tsearch_readline_callback;
117 stp->cb.arg = (void *) stp;
118 stp->cb.previous = error_context_stack;
119 error_context_stack = &stp->cb;
120 return true;
124 * Read the next line from a tsearch data file (expected to be in UTF-8), and
125 * convert it to database encoding if needed. The returned string is palloc'd.
126 * NULL return means EOF.
128 char *
129 tsearch_readline(tsearch_readline_state *stp)
131 char *result;
133 stp->lineno++;
134 stp->curline = NULL;
135 result = t_readline(stp->fp);
136 stp->curline = result;
137 return result;
141 * Close down after reading a file with tsearch_readline()
143 void
144 tsearch_readline_end(tsearch_readline_state *stp)
146 FreeFile(stp->fp);
147 /* Pop the error context stack */
148 error_context_stack = stp->cb.previous;
152 * Error context callback for errors occurring while reading a tsearch
153 * configuration file.
155 static void
156 tsearch_readline_callback(void *arg)
158 tsearch_readline_state *stp = (tsearch_readline_state *) arg;
161 * We can't include the text of the config line for errors that occur
162 * during t_readline() itself. This is only partly a consequence of
163 * our arms-length use of that routine: the major cause of such
164 * errors is encoding violations, and we daren't try to print error
165 * messages containing badly-encoded data.
167 if (stp->curline)
168 errcontext("line %d of configuration file \"%s\": \"%s\"",
169 stp->lineno,
170 stp->filename,
171 stp->curline);
172 else
173 errcontext("line %d of configuration file \"%s\"",
174 stp->lineno,
175 stp->filename);
180 * Read the next line from a tsearch data file (expected to be in UTF-8), and
181 * convert it to database encoding if needed. The returned string is palloc'd.
182 * NULL return means EOF.
184 * Note: direct use of this function is now deprecated. Go through
185 * tsearch_readline() to provide better error reporting.
187 char *
188 t_readline(FILE *fp)
190 int len;
191 char *recoded;
192 char buf[4096]; /* lines must not be longer than this */
194 if (fgets(buf, sizeof(buf), fp) == NULL)
195 return NULL;
197 len = strlen(buf);
199 /* Make sure the input is valid UTF-8 */
200 (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
202 /* And convert */
203 recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
204 len,
205 PG_UTF8,
206 GetDatabaseEncoding());
208 if (recoded == NULL) /* should not happen */
209 elog(ERROR, "encoding conversion failed");
211 if (recoded == buf)
214 * conversion didn't pstrdup, so we must. We can use the length of the
215 * original string, because no conversion was done.
217 recoded = pnstrdup(recoded, len);
220 return recoded;
224 * lowerstr --- fold null-terminated string to lower case
226 * Returned string is palloc'd
228 char *
229 lowerstr(const char *str)
231 return lowerstr_with_len(str, strlen(str));
235 * lowerstr_with_len --- fold string to lower case
237 * Input string need not be null-terminated.
239 * Returned string is palloc'd
241 char *
242 lowerstr_with_len(const char *str, int len)
244 char *out;
246 if (len == 0)
247 return pstrdup("");
249 #ifdef USE_WIDE_UPPER_LOWER
252 * Use wide char code only when max encoding length > 1 and ctype != C.
253 * Some operating systems fail with multi-byte encodings and a C locale.
254 * Also, for a C locale there is no need to process as multibyte. From
255 * backend/utils/adt/oracle_compat.c Teodor
257 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
259 wchar_t *wstr,
260 *wptr;
261 int wlen;
264 * alloc number of wchar_t for worst case, len contains number of
265 * bytes >= number of characters and alloc 1 wchar_t for 0, because
266 * wchar2char wants zero-terminated string
268 wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
270 wlen = char2wchar(wstr, len + 1, str, len);
271 Assert(wlen <= len);
273 while (*wptr)
275 *wptr = towlower((wint_t) *wptr);
276 wptr++;
280 * Alloc result string for worst case + '\0'
282 len = pg_database_encoding_max_length() * wlen + 1;
283 out = (char *) palloc(len);
285 wlen = wchar2char(out, wstr, len);
287 pfree(wstr);
289 if (wlen < 0)
290 ereport(ERROR,
291 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
292 errmsg("conversion from wchar_t to server encoding failed: %m")));
293 Assert(wlen < len);
295 else
296 #endif /* USE_WIDE_UPPER_LOWER */
298 const char *ptr = str;
299 char *outptr;
301 outptr = out = (char *) palloc(sizeof(char) * (len + 1));
302 while ((ptr - str) < len && *ptr)
304 *outptr++ = tolower(TOUCHAR(ptr));
305 ptr++;
307 *outptr = '\0';
310 return out;