Fix oversight in previous error-reporting patch; mustn't pfree path string
[PostgreSQL.git] / src / backend / tsearch / ts_locale.c
blobc534ac1ff77e52bd932e66eb7780f17888bf8617
1 /*-------------------------------------------------------------------------
3 * ts_locale.c
4 * locale compatibility layer for tsearch
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
9 * IDENTIFICATION
10 * $PostgreSQL$
12 *-------------------------------------------------------------------------
14 #include "postgres.h"
16 #include "storage/fd.h"
17 #include "tsearch/ts_locale.h"
18 #include "tsearch/ts_public.h"
20 static void tsearch_readline_callback(void *arg);
23 #ifdef USE_WIDE_UPPER_LOWER
25 int
26 t_isdigit(const char *ptr)
28 int clen = pg_mblen(ptr);
29 wchar_t character[2];
31 if (clen == 1 || lc_ctype_is_c())
32 return isdigit(TOUCHAR(ptr));
34 char2wchar(character, 2, ptr, clen);
36 return iswdigit((wint_t) character[0]);
39 int
40 t_isspace(const char *ptr)
42 int clen = pg_mblen(ptr);
43 wchar_t character[2];
45 if (clen == 1 || lc_ctype_is_c())
46 return isspace(TOUCHAR(ptr));
48 char2wchar(character, 2, ptr, clen);
50 return iswspace((wint_t) character[0]);
53 int
54 t_isalpha(const char *ptr)
56 int clen = pg_mblen(ptr);
57 wchar_t character[2];
59 if (clen == 1 || lc_ctype_is_c())
60 return isalpha(TOUCHAR(ptr));
62 char2wchar(character, 2, ptr, clen);
64 return iswalpha((wint_t) character[0]);
67 int
68 t_isprint(const char *ptr)
70 int clen = pg_mblen(ptr);
71 wchar_t character[2];
73 if (clen == 1 || lc_ctype_is_c())
74 return isprint(TOUCHAR(ptr));
76 char2wchar(character, 2, ptr, clen);
78 return iswprint((wint_t) character[0]);
80 #endif /* USE_WIDE_UPPER_LOWER */
84 * Set up to read a file using tsearch_readline(). This facility is
85 * better than just reading the file directly because it provides error
86 * context pointing to the specific line where a problem is detected.
88 * Expected usage is:
90 * tsearch_readline_state trst;
92 * if (!tsearch_readline_begin(&trst, filename))
93 * ereport(ERROR,
94 * (errcode(ERRCODE_CONFIG_FILE_ERROR),
95 * errmsg("could not open stop-word file \"%s\": %m",
96 * filename)));
97 * while ((line = tsearch_readline(&trst)) != NULL)
98 * process line;
99 * tsearch_readline_end(&trst);
101 * Note that the caller supplies the ereport() for file open failure;
102 * this is so that a custom message can be provided. The filename string
103 * passed to tsearch_readline_begin() must remain valid through
104 * tsearch_readline_end().
106 bool
107 tsearch_readline_begin(tsearch_readline_state *stp,
108 const char *filename)
110 if ((stp->fp = AllocateFile(filename, "r")) == NULL)
111 return false;
112 stp->filename = filename;
113 stp->lineno = 0;
114 stp->curline = NULL;
115 /* Setup error traceback support for ereport() */
116 stp->cb.callback = tsearch_readline_callback;
117 stp->cb.arg = (void *) stp;
118 stp->cb.previous = error_context_stack;
119 error_context_stack = &stp->cb;
120 return true;
124 * Read the next line from a tsearch data file (expected to be in UTF-8), and
125 * convert it to database encoding if needed. The returned string is palloc'd.
126 * NULL return means EOF.
128 char *
129 tsearch_readline(tsearch_readline_state *stp)
131 char *result;
133 stp->lineno++;
134 stp->curline = NULL;
135 result = t_readline(stp->fp);
136 stp->curline = result;
137 return result;
141 * Close down after reading a file with tsearch_readline()
143 void
144 tsearch_readline_end(tsearch_readline_state *stp)
146 FreeFile(stp->fp);
147 /* Pop the error context stack */
148 error_context_stack = stp->cb.previous;
152 * Error context callback for errors occurring while reading a tsearch
153 * configuration file.
155 static void
156 tsearch_readline_callback(void *arg)
158 tsearch_readline_state *stp = (tsearch_readline_state *) arg;
161 * We can't include the text of the config line for errors that occur
162 * during t_readline() itself. This is only partly a consequence of
163 * our arms-length use of that routine: the major cause of such
164 * errors is encoding violations, and we daren't try to print error
165 * messages containing badly-encoded data.
167 if (stp->curline)
168 errcontext("line %d of configuration file \"%s\": \"%s\"",
169 stp->lineno,
170 stp->filename,
171 stp->curline);
172 else
173 errcontext("line %d of configuration file \"%s\"",
174 stp->lineno,
175 stp->filename);
180 * Read the next line from a tsearch data file (expected to be in UTF-8), and
181 * convert it to database encoding if needed. The returned string is palloc'd.
182 * NULL return means EOF.
184 * Note: direct use of this function is now deprecated. Go through
185 * tsearch_readline() to provide better error reporting.
187 char *
188 t_readline(FILE *fp)
190 int len;
191 char *recoded;
192 char buf[4096]; /* lines must not be longer than this */
194 if (fgets(buf, sizeof(buf), fp) == NULL)
195 return NULL;
197 len = strlen(buf);
199 /* Make sure the input is valid UTF-8 */
200 (void) pg_verify_mbstr(PG_UTF8, buf, len, false);
202 /* And convert */
203 recoded = (char *) pg_do_encoding_conversion((unsigned char *) buf,
204 len,
205 PG_UTF8,
206 GetDatabaseEncoding());
207 if (recoded == buf)
210 * conversion didn't pstrdup, so we must. We can use the length of the
211 * original string, because no conversion was done.
213 recoded = pnstrdup(recoded, len);
216 return recoded;
220 * lowerstr --- fold null-terminated string to lower case
222 * Returned string is palloc'd
224 char *
225 lowerstr(const char *str)
227 return lowerstr_with_len(str, strlen(str));
231 * lowerstr_with_len --- fold string to lower case
233 * Input string need not be null-terminated.
235 * Returned string is palloc'd
237 char *
238 lowerstr_with_len(const char *str, int len)
240 char *out;
242 if (len == 0)
243 return pstrdup("");
245 #ifdef USE_WIDE_UPPER_LOWER
248 * Use wide char code only when max encoding length > 1 and ctype != C.
249 * Some operating systems fail with multi-byte encodings and a C locale.
250 * Also, for a C locale there is no need to process as multibyte. From
251 * backend/utils/adt/oracle_compat.c Teodor
253 if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
255 wchar_t *wstr,
256 *wptr;
257 int wlen;
260 * alloc number of wchar_t for worst case, len contains number of
261 * bytes >= number of characters and alloc 1 wchar_t for 0, because
262 * wchar2char wants zero-terminated string
264 wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
266 wlen = char2wchar(wstr, len + 1, str, len);
267 Assert(wlen <= len);
269 while (*wptr)
271 *wptr = towlower((wint_t) *wptr);
272 wptr++;
276 * Alloc result string for worst case + '\0'
278 len = pg_database_encoding_max_length() * wlen + 1;
279 out = (char *) palloc(len);
281 wlen = wchar2char(out, wstr, len);
283 pfree(wstr);
285 if (wlen < 0)
286 ereport(ERROR,
287 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
288 errmsg("conversion from wchar_t to server encoding failed: %m")));
289 Assert(wlen < len);
291 else
292 #endif /* USE_WIDE_UPPER_LOWER */
294 const char *ptr = str;
295 char *outptr;
297 outptr = out = (char *) palloc(sizeof(char) * (len + 1));
298 while ((ptr - str) < len && *ptr)
300 *outptr++ = tolower(TOUCHAR(ptr));
301 ptr++;
303 *outptr = '\0';
306 return out;