Fix oversight in previous error-reporting patch; mustn't pfree path string
[PostgreSQL.git] / src / backend / utils / adt / like.c
blob3f2d48bf711e5a215ab2a08846a670bb5e1c2b3e
1 /*-------------------------------------------------------------------------
3 * like.c
4 * like expression handling code.
6 * NOTES
7 * A big hack of the regexp.c code!! Contributed by
8 * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
10 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
13 * IDENTIFICATION
14 * $PostgreSQL$
16 *-------------------------------------------------------------------------
18 #include "postgres.h"
20 #include <ctype.h>
22 #include "mb/pg_wchar.h"
23 #include "utils/builtins.h"
26 #define LIKE_TRUE 1
27 #define LIKE_FALSE 0
28 #define LIKE_ABORT (-1)
31 static int SB_MatchText(char *t, int tlen, char *p, int plen);
32 static text *SB_do_like_escape(text *, text *);
34 static int MB_MatchText(char *t, int tlen, char *p, int plen);
35 static text *MB_do_like_escape(text *, text *);
37 static int UTF8_MatchText(char *t, int tlen, char *p, int plen);
39 static int SB_IMatchText(char *t, int tlen, char *p, int plen);
41 static int GenericMatchText(char *s, int slen, char *p, int plen);
42 static int Generic_Text_IC_like(text *str, text *pat);
44 /*--------------------
45 * Support routine for MatchText. Compares given multibyte streams
46 * as wide characters. If they match, returns 1 otherwise returns 0.
47 *--------------------
49 static inline int
50 wchareq(char *p1, char *p2)
52 int p1_len;
54 /* Optimization: quickly compare the first byte. */
55 if (*p1 != *p2)
56 return 0;
58 p1_len = pg_mblen(p1);
59 if (pg_mblen(p2) != p1_len)
60 return 0;
62 /* They are the same length */
63 while (p1_len--)
65 if (*p1++ != *p2++)
66 return 0;
68 return 1;
72 * Formerly we had a routine iwchareq() here that tried to do case-insensitive
73 * comparison of multibyte characters. It did not work at all, however,
74 * because it relied on tolower() which has a single-byte API ... and
75 * towlower() wouldn't be much better since we have no suitably cheap way
76 * of getting a single character transformed to the system's wchar_t format.
77 * So now, we just downcase the strings using lower() and apply regular LIKE
78 * comparison. This should be revisited when we install better locale support.
81 #define NextByte(p, plen) ((p)++, (plen)--)
83 /* Set up to compile like_match.c for multibyte characters */
84 #define CHAREQ(p1, p2) wchareq((p1), (p2))
85 #define NextChar(p, plen) \
86 do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
87 #define CopyAdvChar(dst, src, srclen) \
88 do { int __l = pg_mblen(src); \
89 (srclen) -= __l; \
90 while (__l-- > 0) \
91 *(dst)++ = *(src)++; \
92 } while (0)
94 #define MatchText MB_MatchText
95 #define do_like_escape MB_do_like_escape
97 #include "like_match.c"
99 /* Set up to compile like_match.c for single-byte characters */
100 #define CHAREQ(p1, p2) (*(p1) == *(p2))
101 #define NextChar(p, plen) NextByte((p), (plen))
102 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
104 #define MatchText SB_MatchText
105 #define do_like_escape SB_do_like_escape
107 #include "like_match.c"
109 /* setup to compile like_match.c for single byte case insensitive matches */
110 #define MATCH_LOWER
111 #define NextChar(p, plen) NextByte((p), (plen))
112 #define MatchText SB_IMatchText
114 #include "like_match.c"
116 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
118 #define NextChar(p, plen) \
119 do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
120 #define MatchText UTF8_MatchText
122 #include "like_match.c"
124 static inline int
125 GenericMatchText(char *s, int slen, char *p, int plen)
127 if (pg_database_encoding_max_length() == 1)
128 return SB_MatchText(s, slen, p, plen);
129 else if (GetDatabaseEncoding() == PG_UTF8)
130 return UTF8_MatchText(s, slen, p, plen);
131 else
132 return MB_MatchText(s, slen, p, plen);
135 static inline int
136 Generic_Text_IC_like(text *str, text *pat)
138 char *s,
140 int slen,
141 plen;
144 * For efficiency reasons, in the single byte case we don't call lower()
145 * on the pattern and text, but instead call to_lower on each character.
146 * In the multi-byte case we don't have much choice :-(
149 if (pg_database_encoding_max_length() > 1)
151 /* lower's result is never packed, so OK to use old macros here */
152 pat = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(pat)));
153 p = VARDATA(pat);
154 plen = (VARSIZE(pat) - VARHDRSZ);
155 str = DatumGetTextP(DirectFunctionCall1(lower, PointerGetDatum(str)));
156 s = VARDATA(str);
157 slen = (VARSIZE(str) - VARHDRSZ);
158 if (GetDatabaseEncoding() == PG_UTF8)
159 return UTF8_MatchText(s, slen, p, plen);
160 else
161 return MB_MatchText(s, slen, p, plen);
163 else
165 p = VARDATA_ANY(pat);
166 plen = VARSIZE_ANY_EXHDR(pat);
167 s = VARDATA_ANY(str);
168 slen = VARSIZE_ANY_EXHDR(str);
169 return SB_IMatchText(s, slen, p, plen);
174 * interface routines called by the function manager
177 Datum
178 namelike(PG_FUNCTION_ARGS)
180 Name str = PG_GETARG_NAME(0);
181 text *pat = PG_GETARG_TEXT_PP(1);
182 bool result;
183 char *s,
185 int slen,
186 plen;
188 s = NameStr(*str);
189 slen = strlen(s);
190 p = VARDATA_ANY(pat);
191 plen = VARSIZE_ANY_EXHDR(pat);
193 result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
195 PG_RETURN_BOOL(result);
198 Datum
199 namenlike(PG_FUNCTION_ARGS)
201 Name str = PG_GETARG_NAME(0);
202 text *pat = PG_GETARG_TEXT_PP(1);
203 bool result;
204 char *s,
206 int slen,
207 plen;
209 s = NameStr(*str);
210 slen = strlen(s);
211 p = VARDATA_ANY(pat);
212 plen = VARSIZE_ANY_EXHDR(pat);
214 result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
216 PG_RETURN_BOOL(result);
219 Datum
220 textlike(PG_FUNCTION_ARGS)
222 text *str = PG_GETARG_TEXT_PP(0);
223 text *pat = PG_GETARG_TEXT_PP(1);
224 bool result;
225 char *s,
227 int slen,
228 plen;
230 s = VARDATA_ANY(str);
231 slen = VARSIZE_ANY_EXHDR(str);
232 p = VARDATA_ANY(pat);
233 plen = VARSIZE_ANY_EXHDR(pat);
235 result = (GenericMatchText(s, slen, p, plen) == LIKE_TRUE);
237 PG_RETURN_BOOL(result);
240 Datum
241 textnlike(PG_FUNCTION_ARGS)
243 text *str = PG_GETARG_TEXT_PP(0);
244 text *pat = PG_GETARG_TEXT_PP(1);
245 bool result;
246 char *s,
248 int slen,
249 plen;
251 s = VARDATA_ANY(str);
252 slen = VARSIZE_ANY_EXHDR(str);
253 p = VARDATA_ANY(pat);
254 plen = VARSIZE_ANY_EXHDR(pat);
256 result = (GenericMatchText(s, slen, p, plen) != LIKE_TRUE);
258 PG_RETURN_BOOL(result);
261 Datum
262 bytealike(PG_FUNCTION_ARGS)
264 bytea *str = PG_GETARG_BYTEA_PP(0);
265 bytea *pat = PG_GETARG_BYTEA_PP(1);
266 bool result;
267 char *s,
269 int slen,
270 plen;
272 s = VARDATA_ANY(str);
273 slen = VARSIZE_ANY_EXHDR(str);
274 p = VARDATA_ANY(pat);
275 plen = VARSIZE_ANY_EXHDR(pat);
277 result = (SB_MatchText(s, slen, p, plen) == LIKE_TRUE);
279 PG_RETURN_BOOL(result);
282 Datum
283 byteanlike(PG_FUNCTION_ARGS)
285 bytea *str = PG_GETARG_BYTEA_PP(0);
286 bytea *pat = PG_GETARG_BYTEA_PP(1);
287 bool result;
288 char *s,
290 int slen,
291 plen;
293 s = VARDATA_ANY(str);
294 slen = VARSIZE_ANY_EXHDR(str);
295 p = VARDATA_ANY(pat);
296 plen = VARSIZE_ANY_EXHDR(pat);
298 result = (SB_MatchText(s, slen, p, plen) != LIKE_TRUE);
300 PG_RETURN_BOOL(result);
304 * Case-insensitive versions
307 Datum
308 nameiclike(PG_FUNCTION_ARGS)
310 Name str = PG_GETARG_NAME(0);
311 text *pat = PG_GETARG_TEXT_PP(1);
312 bool result;
313 text *strtext;
315 strtext = DatumGetTextP(DirectFunctionCall1(name_text,
316 NameGetDatum(str)));
317 result = (Generic_Text_IC_like(strtext, pat) == LIKE_TRUE);
319 PG_RETURN_BOOL(result);
322 Datum
323 nameicnlike(PG_FUNCTION_ARGS)
325 Name str = PG_GETARG_NAME(0);
326 text *pat = PG_GETARG_TEXT_PP(1);
327 bool result;
328 text *strtext;
330 strtext = DatumGetTextP(DirectFunctionCall1(name_text,
331 NameGetDatum(str)));
332 result = (Generic_Text_IC_like(strtext, pat) != LIKE_TRUE);
334 PG_RETURN_BOOL(result);
337 Datum
338 texticlike(PG_FUNCTION_ARGS)
340 text *str = PG_GETARG_TEXT_PP(0);
341 text *pat = PG_GETARG_TEXT_PP(1);
342 bool result;
344 result = (Generic_Text_IC_like(str, pat) == LIKE_TRUE);
346 PG_RETURN_BOOL(result);
349 Datum
350 texticnlike(PG_FUNCTION_ARGS)
352 text *str = PG_GETARG_TEXT_PP(0);
353 text *pat = PG_GETARG_TEXT_PP(1);
354 bool result;
356 result = (Generic_Text_IC_like(str, pat) != LIKE_TRUE);
358 PG_RETURN_BOOL(result);
362 * like_escape() --- given a pattern and an ESCAPE string,
363 * convert the pattern to use Postgres' standard backslash escape convention.
365 Datum
366 like_escape(PG_FUNCTION_ARGS)
368 text *pat = PG_GETARG_TEXT_PP(0);
369 text *esc = PG_GETARG_TEXT_PP(1);
370 text *result;
372 if (pg_database_encoding_max_length() == 1)
373 result = SB_do_like_escape(pat, esc);
374 else
375 result = MB_do_like_escape(pat, esc);
377 PG_RETURN_TEXT_P(result);
381 * like_escape_bytea() --- given a pattern and an ESCAPE string,
382 * convert the pattern to use Postgres' standard backslash escape convention.
384 Datum
385 like_escape_bytea(PG_FUNCTION_ARGS)
387 bytea *pat = PG_GETARG_BYTEA_PP(0);
388 bytea *esc = PG_GETARG_BYTEA_PP(1);
389 bytea *result = SB_do_like_escape((text *) pat, (text *) esc);
391 PG_RETURN_BYTEA_P((bytea *) result);