1 /*-------------------------------------------------------------------------
4 * like expression handling code.
7 * A big hack of the regexp.c code!! Contributed by
8 * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95).
10 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
16 *-------------------------------------------------------------------------
22 #include "mb/pg_wchar.h"
23 #include "utils/builtins.h"
28 #define LIKE_ABORT (-1)
31 static int SB_MatchText(char *t
, int tlen
, char *p
, int plen
);
32 static text
*SB_do_like_escape(text
*, text
*);
34 static int MB_MatchText(char *t
, int tlen
, char *p
, int plen
);
35 static text
*MB_do_like_escape(text
*, text
*);
37 static int UTF8_MatchText(char *t
, int tlen
, char *p
, int plen
);
39 static int SB_IMatchText(char *t
, int tlen
, char *p
, int plen
);
41 static int GenericMatchText(char *s
, int slen
, char *p
, int plen
);
42 static int Generic_Text_IC_like(text
*str
, text
*pat
);
44 /*--------------------
45 * Support routine for MatchText. Compares given multibyte streams
46 * as wide characters. If they match, returns 1 otherwise returns 0.
50 wchareq(char *p1
, char *p2
)
54 /* Optimization: quickly compare the first byte. */
58 p1_len
= pg_mblen(p1
);
59 if (pg_mblen(p2
) != p1_len
)
62 /* They are the same length */
72 * Formerly we had a routine iwchareq() here that tried to do case-insensitive
73 * comparison of multibyte characters. It did not work at all, however,
74 * because it relied on tolower() which has a single-byte API ... and
75 * towlower() wouldn't be much better since we have no suitably cheap way
76 * of getting a single character transformed to the system's wchar_t format.
77 * So now, we just downcase the strings using lower() and apply regular LIKE
78 * comparison. This should be revisited when we install better locale support.
81 #define NextByte(p, plen) ((p)++, (plen)--)
83 /* Set up to compile like_match.c for multibyte characters */
84 #define CHAREQ(p1, p2) wchareq((p1), (p2))
85 #define NextChar(p, plen) \
86 do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
87 #define CopyAdvChar(dst, src, srclen) \
88 do { int __l = pg_mblen(src); \
91 *(dst)++ = *(src)++; \
94 #define MatchText MB_MatchText
95 #define do_like_escape MB_do_like_escape
97 #include "like_match.c"
99 /* Set up to compile like_match.c for single-byte characters */
100 #define CHAREQ(p1, p2) (*(p1) == *(p2))
101 #define NextChar(p, plen) NextByte((p), (plen))
102 #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
104 #define MatchText SB_MatchText
105 #define do_like_escape SB_do_like_escape
107 #include "like_match.c"
109 /* setup to compile like_match.c for single byte case insensitive matches */
111 #define NextChar(p, plen) NextByte((p), (plen))
112 #define MatchText SB_IMatchText
114 #include "like_match.c"
116 /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */
118 #define NextChar(p, plen) \
119 do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 )
120 #define MatchText UTF8_MatchText
122 #include "like_match.c"
125 GenericMatchText(char *s
, int slen
, char *p
, int plen
)
127 if (pg_database_encoding_max_length() == 1)
128 return SB_MatchText(s
, slen
, p
, plen
);
129 else if (GetDatabaseEncoding() == PG_UTF8
)
130 return UTF8_MatchText(s
, slen
, p
, plen
);
132 return MB_MatchText(s
, slen
, p
, plen
);
136 Generic_Text_IC_like(text
*str
, text
*pat
)
144 * For efficiency reasons, in the single byte case we don't call lower()
145 * on the pattern and text, but instead call to_lower on each character.
146 * In the multi-byte case we don't have much choice :-(
149 if (pg_database_encoding_max_length() > 1)
151 /* lower's result is never packed, so OK to use old macros here */
152 pat
= DatumGetTextP(DirectFunctionCall1(lower
, PointerGetDatum(pat
)));
154 plen
= (VARSIZE(pat
) - VARHDRSZ
);
155 str
= DatumGetTextP(DirectFunctionCall1(lower
, PointerGetDatum(str
)));
157 slen
= (VARSIZE(str
) - VARHDRSZ
);
158 if (GetDatabaseEncoding() == PG_UTF8
)
159 return UTF8_MatchText(s
, slen
, p
, plen
);
161 return MB_MatchText(s
, slen
, p
, plen
);
165 p
= VARDATA_ANY(pat
);
166 plen
= VARSIZE_ANY_EXHDR(pat
);
167 s
= VARDATA_ANY(str
);
168 slen
= VARSIZE_ANY_EXHDR(str
);
169 return SB_IMatchText(s
, slen
, p
, plen
);
174 * interface routines called by the function manager
178 namelike(PG_FUNCTION_ARGS
)
180 Name str
= PG_GETARG_NAME(0);
181 text
*pat
= PG_GETARG_TEXT_PP(1);
190 p
= VARDATA_ANY(pat
);
191 plen
= VARSIZE_ANY_EXHDR(pat
);
193 result
= (GenericMatchText(s
, slen
, p
, plen
) == LIKE_TRUE
);
195 PG_RETURN_BOOL(result
);
199 namenlike(PG_FUNCTION_ARGS
)
201 Name str
= PG_GETARG_NAME(0);
202 text
*pat
= PG_GETARG_TEXT_PP(1);
211 p
= VARDATA_ANY(pat
);
212 plen
= VARSIZE_ANY_EXHDR(pat
);
214 result
= (GenericMatchText(s
, slen
, p
, plen
) != LIKE_TRUE
);
216 PG_RETURN_BOOL(result
);
220 textlike(PG_FUNCTION_ARGS
)
222 text
*str
= PG_GETARG_TEXT_PP(0);
223 text
*pat
= PG_GETARG_TEXT_PP(1);
230 s
= VARDATA_ANY(str
);
231 slen
= VARSIZE_ANY_EXHDR(str
);
232 p
= VARDATA_ANY(pat
);
233 plen
= VARSIZE_ANY_EXHDR(pat
);
235 result
= (GenericMatchText(s
, slen
, p
, plen
) == LIKE_TRUE
);
237 PG_RETURN_BOOL(result
);
241 textnlike(PG_FUNCTION_ARGS
)
243 text
*str
= PG_GETARG_TEXT_PP(0);
244 text
*pat
= PG_GETARG_TEXT_PP(1);
251 s
= VARDATA_ANY(str
);
252 slen
= VARSIZE_ANY_EXHDR(str
);
253 p
= VARDATA_ANY(pat
);
254 plen
= VARSIZE_ANY_EXHDR(pat
);
256 result
= (GenericMatchText(s
, slen
, p
, plen
) != LIKE_TRUE
);
258 PG_RETURN_BOOL(result
);
262 bytealike(PG_FUNCTION_ARGS
)
264 bytea
*str
= PG_GETARG_BYTEA_PP(0);
265 bytea
*pat
= PG_GETARG_BYTEA_PP(1);
272 s
= VARDATA_ANY(str
);
273 slen
= VARSIZE_ANY_EXHDR(str
);
274 p
= VARDATA_ANY(pat
);
275 plen
= VARSIZE_ANY_EXHDR(pat
);
277 result
= (SB_MatchText(s
, slen
, p
, plen
) == LIKE_TRUE
);
279 PG_RETURN_BOOL(result
);
283 byteanlike(PG_FUNCTION_ARGS
)
285 bytea
*str
= PG_GETARG_BYTEA_PP(0);
286 bytea
*pat
= PG_GETARG_BYTEA_PP(1);
293 s
= VARDATA_ANY(str
);
294 slen
= VARSIZE_ANY_EXHDR(str
);
295 p
= VARDATA_ANY(pat
);
296 plen
= VARSIZE_ANY_EXHDR(pat
);
298 result
= (SB_MatchText(s
, slen
, p
, plen
) != LIKE_TRUE
);
300 PG_RETURN_BOOL(result
);
304 * Case-insensitive versions
308 nameiclike(PG_FUNCTION_ARGS
)
310 Name str
= PG_GETARG_NAME(0);
311 text
*pat
= PG_GETARG_TEXT_PP(1);
315 strtext
= DatumGetTextP(DirectFunctionCall1(name_text
,
317 result
= (Generic_Text_IC_like(strtext
, pat
) == LIKE_TRUE
);
319 PG_RETURN_BOOL(result
);
323 nameicnlike(PG_FUNCTION_ARGS
)
325 Name str
= PG_GETARG_NAME(0);
326 text
*pat
= PG_GETARG_TEXT_PP(1);
330 strtext
= DatumGetTextP(DirectFunctionCall1(name_text
,
332 result
= (Generic_Text_IC_like(strtext
, pat
) != LIKE_TRUE
);
334 PG_RETURN_BOOL(result
);
338 texticlike(PG_FUNCTION_ARGS
)
340 text
*str
= PG_GETARG_TEXT_PP(0);
341 text
*pat
= PG_GETARG_TEXT_PP(1);
344 result
= (Generic_Text_IC_like(str
, pat
) == LIKE_TRUE
);
346 PG_RETURN_BOOL(result
);
350 texticnlike(PG_FUNCTION_ARGS
)
352 text
*str
= PG_GETARG_TEXT_PP(0);
353 text
*pat
= PG_GETARG_TEXT_PP(1);
356 result
= (Generic_Text_IC_like(str
, pat
) != LIKE_TRUE
);
358 PG_RETURN_BOOL(result
);
362 * like_escape() --- given a pattern and an ESCAPE string,
363 * convert the pattern to use Postgres' standard backslash escape convention.
366 like_escape(PG_FUNCTION_ARGS
)
368 text
*pat
= PG_GETARG_TEXT_PP(0);
369 text
*esc
= PG_GETARG_TEXT_PP(1);
372 if (pg_database_encoding_max_length() == 1)
373 result
= SB_do_like_escape(pat
, esc
);
375 result
= MB_do_like_escape(pat
, esc
);
377 PG_RETURN_TEXT_P(result
);
381 * like_escape_bytea() --- given a pattern and an ESCAPE string,
382 * convert the pattern to use Postgres' standard backslash escape convention.
385 like_escape_bytea(PG_FUNCTION_ARGS
)
387 bytea
*pat
= PG_GETARG_BYTEA_PP(0);
388 bytea
*esc
= PG_GETARG_BYTEA_PP(1);
389 bytea
*result
= SB_do_like_escape((text
*) pat
, (text
*) esc
);
391 PG_RETURN_BYTEA_P((bytea
*) result
);