Fix oversight in previous error-reporting patch; mustn't pfree path string
[PostgreSQL.git] / src / backend / utils / adt / varlena.c
blobc5dd4476bb152d421c35016c06bb27bf87822fb5
1 /*-------------------------------------------------------------------------
3 * varlena.c
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include <ctype.h>
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "libpq/md5.h"
22 #include "libpq/pqformat.h"
23 #include "miscadmin.h"
24 #include "parser/scansup.h"
25 #include "regex/regex.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/pg_locale.h"
31 typedef struct varlena unknown;
33 typedef struct
35 bool use_wchar; /* T if multibyte encoding */
36 char *str1; /* use these if not use_wchar */
37 char *str2; /* note: these point to original texts */
38 pg_wchar *wstr1; /* use these if use_wchar */
39 pg_wchar *wstr2; /* note: these are palloc'd */
40 int len1; /* string lengths in logical characters */
41 int len2;
42 /* Skip table for Boyer-Moore-Horspool search algorithm: */
43 int skiptablemask; /* mask for ANDing with skiptable subscripts */
44 int skiptable[256]; /* skip distance for given mismatched char */
45 } TextPositionState;
47 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
48 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
49 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
50 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
51 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
53 static int text_cmp(text *arg1, text *arg2);
54 static int32 text_length(Datum str);
55 static int text_position(text *t1, text *t2);
56 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
57 static int text_position_next(int start_pos, TextPositionState *state);
58 static void text_position_cleanup(TextPositionState *state);
59 static text *text_substring(Datum str,
60 int32 start,
61 int32 length,
62 bool length_not_specified);
63 static void appendStringInfoText(StringInfo str, const text *t);
66 /*****************************************************************************
67 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
68 *****************************************************************************/
71 * cstring_to_text
73 * Create a text value from a null-terminated C string.
75 * The new text value is freshly palloc'd with a full-size VARHDR.
77 text *
78 cstring_to_text(const char *s)
80 return cstring_to_text_with_len(s, strlen(s));
84 * cstring_to_text_with_len
86 * Same as cstring_to_text except the caller specifies the string length;
87 * the string need not be null_terminated.
89 text *
90 cstring_to_text_with_len(const char *s, int len)
92 text *result = (text *) palloc(len + VARHDRSZ);
94 SET_VARSIZE(result, len + VARHDRSZ);
95 memcpy(VARDATA(result), s, len);
97 return result;
101 * text_to_cstring
103 * Create a palloc'd, null-terminated C string from a text value.
105 * We support being passed a compressed or toasted text value.
106 * This is a bit bogus since such values shouldn't really be referred to as
107 * "text *", but it seems useful for robustness. If we didn't handle that
108 * case here, we'd need another routine that did, anyway.
110 char *
111 text_to_cstring(const text *t)
113 /* must cast away the const, unfortunately */
114 text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
115 int len = VARSIZE_ANY_EXHDR(tunpacked);
116 char *result;
118 result = (char *) palloc(len + 1);
119 memcpy(result, VARDATA_ANY(tunpacked), len);
120 result[len] = '\0';
122 if (tunpacked != t)
123 pfree(tunpacked);
125 return result;
129 * text_to_cstring_buffer
131 * Copy a text value into a caller-supplied buffer of size dst_len.
133 * The text string is truncated if necessary to fit. The result is
134 * guaranteed null-terminated (unless dst_len == 0).
136 * We support being passed a compressed or toasted text value.
137 * This is a bit bogus since such values shouldn't really be referred to as
138 * "text *", but it seems useful for robustness. If we didn't handle that
139 * case here, we'd need another routine that did, anyway.
141 void
142 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
144 /* must cast away the const, unfortunately */
145 text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
146 size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
148 if (dst_len > 0)
150 dst_len--;
151 if (dst_len >= src_len)
152 dst_len = src_len;
153 else /* ensure truncation is encoding-safe */
154 dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
155 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
156 dst[dst_len] = '\0';
159 if (srcunpacked != src)
160 pfree(srcunpacked);
164 /*****************************************************************************
165 * USER I/O ROUTINES *
166 *****************************************************************************/
169 #define VAL(CH) ((CH) - '0')
170 #define DIG(VAL) ((VAL) + '0')
173 * byteain - converts from printable representation of byte array
175 * Non-printable characters must be passed as '\nnn' (octal) and are
176 * converted to internal form. '\' must be passed as '\\'.
177 * ereport(ERROR, ...) if bad form.
179 * BUGS:
180 * The input is scanned twice.
181 * The error checking of input is minimal.
183 Datum
184 byteain(PG_FUNCTION_ARGS)
186 char *inputText = PG_GETARG_CSTRING(0);
187 char *tp;
188 char *rp;
189 int byte;
190 bytea *result;
192 for (byte = 0, tp = inputText; *tp != '\0'; byte++)
194 if (tp[0] != '\\')
195 tp++;
196 else if ((tp[0] == '\\') &&
197 (tp[1] >= '0' && tp[1] <= '3') &&
198 (tp[2] >= '0' && tp[2] <= '7') &&
199 (tp[3] >= '0' && tp[3] <= '7'))
200 tp += 4;
201 else if ((tp[0] == '\\') &&
202 (tp[1] == '\\'))
203 tp += 2;
204 else
207 * one backslash, not followed by 0 or ### valid octal
209 ereport(ERROR,
210 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
211 errmsg("invalid input syntax for type bytea")));
215 byte += VARHDRSZ;
216 result = (bytea *) palloc(byte);
217 SET_VARSIZE(result, byte);
219 tp = inputText;
220 rp = VARDATA(result);
221 while (*tp != '\0')
223 if (tp[0] != '\\')
224 *rp++ = *tp++;
225 else if ((tp[0] == '\\') &&
226 (tp[1] >= '0' && tp[1] <= '3') &&
227 (tp[2] >= '0' && tp[2] <= '7') &&
228 (tp[3] >= '0' && tp[3] <= '7'))
230 byte = VAL(tp[1]);
231 byte <<= 3;
232 byte += VAL(tp[2]);
233 byte <<= 3;
234 *rp++ = byte + VAL(tp[3]);
235 tp += 4;
237 else if ((tp[0] == '\\') &&
238 (tp[1] == '\\'))
240 *rp++ = '\\';
241 tp += 2;
243 else
246 * We should never get here. The first pass should not allow it.
248 ereport(ERROR,
249 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
250 errmsg("invalid input syntax for type bytea")));
254 PG_RETURN_BYTEA_P(result);
258 * byteaout - converts to printable representation of byte array
260 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
261 * '\\'.
263 * NULL vlena should be an error--returning string with NULL for now.
265 Datum
266 byteaout(PG_FUNCTION_ARGS)
268 bytea *vlena = PG_GETARG_BYTEA_PP(0);
269 char *result;
270 char *vp;
271 char *rp;
272 int val; /* holds unprintable chars */
273 int i;
274 int len;
276 len = 1; /* empty string has 1 char */
277 vp = VARDATA_ANY(vlena);
278 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
280 if (*vp == '\\')
281 len += 2;
282 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
283 len += 4;
284 else
285 len++;
287 rp = result = (char *) palloc(len);
288 vp = VARDATA_ANY(vlena);
289 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
291 if (*vp == '\\')
293 *rp++ = '\\';
294 *rp++ = '\\';
296 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
298 val = *vp;
299 rp[0] = '\\';
300 rp[3] = DIG(val & 07);
301 val >>= 3;
302 rp[2] = DIG(val & 07);
303 val >>= 3;
304 rp[1] = DIG(val & 03);
305 rp += 4;
307 else
308 *rp++ = *vp;
310 *rp = '\0';
311 PG_RETURN_CSTRING(result);
315 * bytearecv - converts external binary format to bytea
317 Datum
318 bytearecv(PG_FUNCTION_ARGS)
320 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
321 bytea *result;
322 int nbytes;
324 nbytes = buf->len - buf->cursor;
325 result = (bytea *) palloc(nbytes + VARHDRSZ);
326 SET_VARSIZE(result, nbytes + VARHDRSZ);
327 pq_copymsgbytes(buf, VARDATA(result), nbytes);
328 PG_RETURN_BYTEA_P(result);
332 * byteasend - converts bytea to binary format
334 * This is a special case: just copy the input...
336 Datum
337 byteasend(PG_FUNCTION_ARGS)
339 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
341 PG_RETURN_BYTEA_P(vlena);
346 * textin - converts "..." to internal representation
348 Datum
349 textin(PG_FUNCTION_ARGS)
351 char *inputText = PG_GETARG_CSTRING(0);
353 PG_RETURN_TEXT_P(cstring_to_text(inputText));
357 * textout - converts internal representation to "..."
359 Datum
360 textout(PG_FUNCTION_ARGS)
362 Datum txt = PG_GETARG_DATUM(0);
364 PG_RETURN_CSTRING(TextDatumGetCString(txt));
368 * textrecv - converts external binary format to text
370 Datum
371 textrecv(PG_FUNCTION_ARGS)
373 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
374 text *result;
375 char *str;
376 int nbytes;
378 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
380 result = cstring_to_text_with_len(str, nbytes);
381 pfree(str);
382 PG_RETURN_TEXT_P(result);
386 * textsend - converts text to binary format
388 Datum
389 textsend(PG_FUNCTION_ARGS)
391 text *t = PG_GETARG_TEXT_PP(0);
392 StringInfoData buf;
394 pq_begintypsend(&buf);
395 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
396 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
401 * unknownin - converts "..." to internal representation
403 Datum
404 unknownin(PG_FUNCTION_ARGS)
406 char *str = PG_GETARG_CSTRING(0);
408 /* representation is same as cstring */
409 PG_RETURN_CSTRING(pstrdup(str));
413 * unknownout - converts internal representation to "..."
415 Datum
416 unknownout(PG_FUNCTION_ARGS)
418 /* representation is same as cstring */
419 char *str = PG_GETARG_CSTRING(0);
421 PG_RETURN_CSTRING(pstrdup(str));
425 * unknownrecv - converts external binary format to unknown
427 Datum
428 unknownrecv(PG_FUNCTION_ARGS)
430 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
431 char *str;
432 int nbytes;
434 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
435 /* representation is same as cstring */
436 PG_RETURN_CSTRING(str);
440 * unknownsend - converts unknown to binary format
442 Datum
443 unknownsend(PG_FUNCTION_ARGS)
445 /* representation is same as cstring */
446 char *str = PG_GETARG_CSTRING(0);
447 StringInfoData buf;
449 pq_begintypsend(&buf);
450 pq_sendtext(&buf, str, strlen(str));
451 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
455 /* ========== PUBLIC ROUTINES ========== */
458 * textlen -
459 * returns the logical length of a text*
460 * (which is less than the VARSIZE of the text*)
462 Datum
463 textlen(PG_FUNCTION_ARGS)
465 Datum str = PG_GETARG_DATUM(0);
467 /* try to avoid decompressing argument */
468 PG_RETURN_INT32(text_length(str));
472 * text_length -
473 * Does the real work for textlen()
475 * This is broken out so it can be called directly by other string processing
476 * functions. Note that the argument is passed as a Datum, to indicate that
477 * it may still be in compressed form. We can avoid decompressing it at all
478 * in some cases.
480 static int32
481 text_length(Datum str)
483 /* fastpath when max encoding length is one */
484 if (pg_database_encoding_max_length() == 1)
485 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
486 else
488 text *t = DatumGetTextPP(str);
490 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
491 VARSIZE_ANY_EXHDR(t)));
496 * textoctetlen -
497 * returns the physical length of a text*
498 * (which is less than the VARSIZE of the text*)
500 Datum
501 textoctetlen(PG_FUNCTION_ARGS)
503 Datum str = PG_GETARG_DATUM(0);
505 /* We need not detoast the input at all */
506 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
510 * textcat -
511 * takes two text* and returns a text* that is the concatenation of
512 * the two.
514 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
515 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
516 * Allocate space for output in all cases.
517 * XXX - thomas 1997-07-10
519 Datum
520 textcat(PG_FUNCTION_ARGS)
522 text *t1 = PG_GETARG_TEXT_PP(0);
523 text *t2 = PG_GETARG_TEXT_PP(1);
524 int len1,
525 len2,
526 len;
527 text *result;
528 char *ptr;
530 len1 = VARSIZE_ANY_EXHDR(t1);
531 if (len1 < 0)
532 len1 = 0;
534 len2 = VARSIZE_ANY_EXHDR(t2);
535 if (len2 < 0)
536 len2 = 0;
538 len = len1 + len2 + VARHDRSZ;
539 result = (text *) palloc(len);
541 /* Set size of result string... */
542 SET_VARSIZE(result, len);
544 /* Fill data field of result string... */
545 ptr = VARDATA(result);
546 if (len1 > 0)
547 memcpy(ptr, VARDATA_ANY(t1), len1);
548 if (len2 > 0)
549 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
551 PG_RETURN_TEXT_P(result);
555 * charlen_to_bytelen()
556 * Compute the number of bytes occupied by n characters starting at *p
558 * It is caller's responsibility that there actually are n characters;
559 * the string need not be null-terminated.
561 static int
562 charlen_to_bytelen(const char *p, int n)
564 if (pg_database_encoding_max_length() == 1)
566 /* Optimization for single-byte encodings */
567 return n;
569 else
571 const char *s;
573 for (s = p; n > 0; n--)
574 s += pg_mblen(s);
576 return s - p;
581 * text_substr()
582 * Return a substring starting at the specified position.
583 * - thomas 1997-12-31
585 * Input:
586 * - string
587 * - starting position (is one-based)
588 * - string length
590 * If the starting position is zero or less, then return from the start of the string
591 * adjusting the length to be consistent with the "negative start" per SQL92.
592 * If the length is less than zero, return the remaining string.
594 * Added multibyte support.
595 * - Tatsuo Ishii 1998-4-21
596 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
597 * Formerly returned the entire string; now returns a portion.
598 * - Thomas Lockhart 1998-12-10
599 * Now uses faster TOAST-slicing interface
600 * - John Gray 2002-02-22
601 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
602 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
603 * error; if E < 1, return '', not entire string). Fixed MB related bug when
604 * S > LC and < LC + 4 sometimes garbage characters are returned.
605 * - Joe Conway 2002-08-10
607 Datum
608 text_substr(PG_FUNCTION_ARGS)
610 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
611 PG_GETARG_INT32(1),
612 PG_GETARG_INT32(2),
613 false));
617 * text_substr_no_len -
618 * Wrapper to avoid opr_sanity failure due to
619 * one function accepting a different number of args.
621 Datum
622 text_substr_no_len(PG_FUNCTION_ARGS)
624 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
625 PG_GETARG_INT32(1),
626 -1, true));
630 * text_substring -
631 * Does the real work for text_substr() and text_substr_no_len()
633 * This is broken out so it can be called directly by other string processing
634 * functions. Note that the argument is passed as a Datum, to indicate that
635 * it may still be in compressed/toasted form. We can avoid detoasting all
636 * of it in some cases.
638 * The result is always a freshly palloc'd datum.
640 static text *
641 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
643 int32 eml = pg_database_encoding_max_length();
644 int32 S = start; /* start position */
645 int32 S1; /* adjusted start position */
646 int32 L1; /* adjusted substring length */
648 /* life is easy if the encoding max length is 1 */
649 if (eml == 1)
651 S1 = Max(S, 1);
653 if (length_not_specified) /* special case - get length to end of
654 * string */
655 L1 = -1;
656 else
658 /* end position */
659 int E = S + length;
662 * A negative value for L is the only way for the end position to
663 * be before the start. SQL99 says to throw an error.
665 if (E < S)
666 ereport(ERROR,
667 (errcode(ERRCODE_SUBSTRING_ERROR),
668 errmsg("negative substring length not allowed")));
671 * A zero or negative value for the end position can happen if the
672 * start was negative or one. SQL99 says to return a zero-length
673 * string.
675 if (E < 1)
676 return cstring_to_text("");
678 L1 = E - S1;
682 * If the start position is past the end of the string, SQL99 says to
683 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
684 * that for us. Convert to zero-based starting position
686 return DatumGetTextPSlice(str, S1 - 1, L1);
688 else if (eml > 1)
691 * When encoding max length is > 1, we can't get LC without
692 * detoasting, so we'll grab a conservatively large slice now and go
693 * back later to do the right thing
695 int32 slice_start;
696 int32 slice_size;
697 int32 slice_strlen;
698 text *slice;
699 int32 E1;
700 int32 i;
701 char *p;
702 char *s;
703 text *ret;
706 * if S is past the end of the string, the tuple toaster will return a
707 * zero-length string to us
709 S1 = Max(S, 1);
712 * We need to start at position zero because there is no way to know
713 * in advance which byte offset corresponds to the supplied start
714 * position.
716 slice_start = 0;
718 if (length_not_specified) /* special case - get length to end of
719 * string */
720 slice_size = L1 = -1;
721 else
723 int E = S + length;
726 * A negative value for L is the only way for the end position to
727 * be before the start. SQL99 says to throw an error.
729 if (E < S)
730 ereport(ERROR,
731 (errcode(ERRCODE_SUBSTRING_ERROR),
732 errmsg("negative substring length not allowed")));
735 * A zero or negative value for the end position can happen if the
736 * start was negative or one. SQL99 says to return a zero-length
737 * string.
739 if (E < 1)
740 return cstring_to_text("");
743 * if E is past the end of the string, the tuple toaster will
744 * truncate the length for us
746 L1 = E - S1;
749 * Total slice size in bytes can't be any longer than the start
750 * position plus substring length times the encoding max length.
752 slice_size = (S1 + L1) * eml;
756 * If we're working with an untoasted source, no need to do an extra
757 * copying step.
759 if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
760 VARATT_IS_EXTERNAL(DatumGetPointer(str)))
761 slice = DatumGetTextPSlice(str, slice_start, slice_size);
762 else
763 slice = (text *) DatumGetPointer(str);
765 /* see if we got back an empty string */
766 if (VARSIZE_ANY_EXHDR(slice) == 0)
768 if (slice != (text *) DatumGetPointer(str))
769 pfree(slice);
770 return cstring_to_text("");
773 /* Now we can get the actual length of the slice in MB characters */
774 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
775 VARSIZE_ANY_EXHDR(slice));
778 * Check that the start position wasn't > slice_strlen. If so, SQL99
779 * says to return a zero-length string.
781 if (S1 > slice_strlen)
783 if (slice != (text *) DatumGetPointer(str))
784 pfree(slice);
785 return cstring_to_text("");
789 * Adjust L1 and E1 now that we know the slice string length. Again
790 * remember that S1 is one based, and slice_start is zero based.
792 if (L1 > -1)
793 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
794 else
795 E1 = slice_start + 1 + slice_strlen;
798 * Find the start position in the slice; remember S1 is not zero based
800 p = VARDATA_ANY(slice);
801 for (i = 0; i < S1 - 1; i++)
802 p += pg_mblen(p);
804 /* hang onto a pointer to our start position */
805 s = p;
808 * Count the actual bytes used by the substring of the requested
809 * length.
811 for (i = S1; i < E1; i++)
812 p += pg_mblen(p);
814 ret = (text *) palloc(VARHDRSZ + (p - s));
815 SET_VARSIZE(ret, VARHDRSZ + (p - s));
816 memcpy(VARDATA(ret), s, (p - s));
818 if (slice != (text *) DatumGetPointer(str))
819 pfree(slice);
821 return ret;
823 else
824 elog(ERROR, "invalid backend encoding: encoding max length < 1");
826 /* not reached: suppress compiler warning */
827 return NULL;
831 * textpos -
832 * Return the position of the specified substring.
833 * Implements the SQL92 POSITION() function.
834 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
835 * - thomas 1997-07-27
837 Datum
838 textpos(PG_FUNCTION_ARGS)
840 text *str = PG_GETARG_TEXT_PP(0);
841 text *search_str = PG_GETARG_TEXT_PP(1);
843 PG_RETURN_INT32((int32) text_position(str, search_str));
847 * text_position -
848 * Does the real work for textpos()
850 * Inputs:
851 * t1 - string to be searched
852 * t2 - pattern to match within t1
853 * Result:
854 * Character index of the first matched char, starting from 1,
855 * or 0 if no match.
857 * This is broken out so it can be called directly by other string processing
858 * functions.
860 static int
861 text_position(text *t1, text *t2)
863 TextPositionState state;
864 int result;
866 text_position_setup(t1, t2, &state);
867 result = text_position_next(1, &state);
868 text_position_cleanup(&state);
869 return result;
874 * text_position_setup, text_position_next, text_position_cleanup -
875 * Component steps of text_position()
877 * These are broken out so that a string can be efficiently searched for
878 * multiple occurrences of the same pattern. text_position_next may be
879 * called multiple times with increasing values of start_pos, which is
880 * the 1-based character position to start the search from. The "state"
881 * variable is normally just a local variable in the caller.
884 static void
885 text_position_setup(text *t1, text *t2, TextPositionState *state)
887 int len1 = VARSIZE_ANY_EXHDR(t1);
888 int len2 = VARSIZE_ANY_EXHDR(t2);
890 if (pg_database_encoding_max_length() == 1)
892 /* simple case - single byte encoding */
893 state->use_wchar = false;
894 state->str1 = VARDATA_ANY(t1);
895 state->str2 = VARDATA_ANY(t2);
896 state->len1 = len1;
897 state->len2 = len2;
899 else
901 /* not as simple - multibyte encoding */
902 pg_wchar *p1,
903 *p2;
905 p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
906 len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
907 p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
908 len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
910 state->use_wchar = true;
911 state->wstr1 = p1;
912 state->wstr2 = p2;
913 state->len1 = len1;
914 state->len2 = len2;
918 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
919 * notes we use the terminology that the "haystack" is the string to be
920 * searched (t1) and the "needle" is the pattern being sought (t2).
922 * If the needle is empty or bigger than the haystack then there is no
923 * point in wasting cycles initializing the table. We also choose not
924 * to use B-M-H for needles of length 1, since the skip table can't
925 * possibly save anything in that case.
927 if (len1 >= len2 && len2 > 1)
929 int searchlength = len1 - len2;
930 int skiptablemask;
931 int last;
932 int i;
935 * First we must determine how much of the skip table to use. The
936 * declaration of TextPositionState allows up to 256 elements, but for
937 * short search problems we don't really want to have to initialize so
938 * many elements --- it would take too long in comparison to the
939 * actual search time. So we choose a useful skip table size based on
940 * the haystack length minus the needle length. The closer the needle
941 * length is to the haystack length the less useful skipping becomes.
943 * Note: since we use bit-masking to select table elements, the skip
944 * table size MUST be a power of 2, and so the mask must be 2^N-1.
946 if (searchlength < 16)
947 skiptablemask = 3;
948 else if (searchlength < 64)
949 skiptablemask = 7;
950 else if (searchlength < 128)
951 skiptablemask = 15;
952 else if (searchlength < 512)
953 skiptablemask = 31;
954 else if (searchlength < 2048)
955 skiptablemask = 63;
956 else if (searchlength < 4096)
957 skiptablemask = 127;
958 else
959 skiptablemask = 255;
960 state->skiptablemask = skiptablemask;
963 * Initialize the skip table. We set all elements to the needle
964 * length, since this is the correct skip distance for any character
965 * not found in the needle.
967 for (i = 0; i <= skiptablemask; i++)
968 state->skiptable[i] = len2;
971 * Now examine the needle. For each character except the last one,
972 * set the corresponding table element to the appropriate skip
973 * distance. Note that when two characters share the same skip table
974 * entry, the one later in the needle must determine the skip distance.
976 last = len2 - 1;
978 if (!state->use_wchar)
980 const char *str2 = state->str2;
982 for (i = 0; i < last; i++)
983 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
985 else
987 const pg_wchar *wstr2 = state->wstr2;
989 for (i = 0; i < last; i++)
990 state->skiptable[wstr2[i] & skiptablemask] = last - i;
995 static int
996 text_position_next(int start_pos, TextPositionState *state)
998 int haystack_len = state->len1;
999 int needle_len = state->len2;
1000 int skiptablemask = state->skiptablemask;
1002 Assert(start_pos > 0); /* else caller error */
1004 if (needle_len <= 0)
1005 return start_pos; /* result for empty pattern */
1007 start_pos--; /* adjust for zero based arrays */
1009 /* Done if the needle can't possibly fit */
1010 if (haystack_len < start_pos + needle_len)
1011 return 0;
1013 if (!state->use_wchar)
1015 /* simple case - single byte encoding */
1016 const char *haystack = state->str1;
1017 const char *needle = state->str2;
1018 const char *haystack_end = &haystack[haystack_len];
1019 const char *hptr;
1021 if (needle_len == 1)
1023 /* No point in using B-M-H for a one-character needle */
1024 char nchar = *needle;
1026 hptr = &haystack[start_pos];
1027 while (hptr < haystack_end)
1029 if (*hptr == nchar)
1030 return hptr - haystack + 1;
1031 hptr++;
1034 else
1036 const char *needle_last = &needle[needle_len - 1];
1038 /* Start at startpos plus the length of the needle */
1039 hptr = &haystack[start_pos + needle_len - 1];
1040 while (hptr < haystack_end)
1042 /* Match the needle scanning *backward* */
1043 const char *nptr;
1044 const char *p;
1046 nptr = needle_last;
1047 p = hptr;
1048 while (*nptr == *p)
1050 /* Matched it all? If so, return 1-based position */
1051 if (nptr == needle)
1052 return p - haystack + 1;
1053 nptr--, p--;
1056 * No match, so use the haystack char at hptr to decide how
1057 * far to advance. If the needle had any occurrence of that
1058 * character (or more precisely, one sharing the same
1059 * skiptable entry) before its last character, then we advance
1060 * far enough to align the last such needle character with
1061 * that haystack position. Otherwise we can advance by the
1062 * whole needle length.
1064 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1068 else
1070 /* The multibyte char version. This works exactly the same way. */
1071 const pg_wchar *haystack = state->wstr1;
1072 const pg_wchar *needle = state->wstr2;
1073 const pg_wchar *haystack_end = &haystack[haystack_len];
1074 const pg_wchar *hptr;
1076 if (needle_len == 1)
1078 /* No point in using B-M-H for a one-character needle */
1079 pg_wchar nchar = *needle;
1081 hptr = &haystack[start_pos];
1082 while (hptr < haystack_end)
1084 if (*hptr == nchar)
1085 return hptr - haystack + 1;
1086 hptr++;
1089 else
1091 const pg_wchar *needle_last = &needle[needle_len - 1];
1093 /* Start at startpos plus the length of the needle */
1094 hptr = &haystack[start_pos + needle_len - 1];
1095 while (hptr < haystack_end)
1097 /* Match the needle scanning *backward* */
1098 const pg_wchar *nptr;
1099 const pg_wchar *p;
1101 nptr = needle_last;
1102 p = hptr;
1103 while (*nptr == *p)
1105 /* Matched it all? If so, return 1-based position */
1106 if (nptr == needle)
1107 return p - haystack + 1;
1108 nptr--, p--;
1111 * No match, so use the haystack char at hptr to decide how
1112 * far to advance. If the needle had any occurrence of that
1113 * character (or more precisely, one sharing the same
1114 * skiptable entry) before its last character, then we advance
1115 * far enough to align the last such needle character with
1116 * that haystack position. Otherwise we can advance by the
1117 * whole needle length.
1119 hptr += state->skiptable[*hptr & skiptablemask];
1124 return 0; /* not found */
1127 static void
1128 text_position_cleanup(TextPositionState *state)
1130 if (state->use_wchar)
1132 pfree(state->wstr1);
1133 pfree(state->wstr2);
1137 /* varstr_cmp()
1138 * Comparison function for text strings with given lengths.
1139 * Includes locale support, but must copy strings to temporary memory
1140 * to allow null-termination for inputs to strcoll().
1141 * Returns -1, 0 or 1
1144 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
1146 int result;
1149 * Unfortunately, there is no strncoll(), so in the non-C locale case we
1150 * have to do some memory copying. This turns out to be significantly
1151 * slower, so we optimize the case where LC_COLLATE is C. We also try to
1152 * optimize relatively-short strings by avoiding palloc/pfree overhead.
1154 if (lc_collate_is_c())
1156 result = strncmp(arg1, arg2, Min(len1, len2));
1157 if ((result == 0) && (len1 != len2))
1158 result = (len1 < len2) ? -1 : 1;
1160 else
1162 #define STACKBUFLEN 1024
1164 char a1buf[STACKBUFLEN];
1165 char a2buf[STACKBUFLEN];
1166 char *a1p,
1167 *a2p;
1169 #ifdef WIN32
1170 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1171 if (GetDatabaseEncoding() == PG_UTF8)
1173 int a1len;
1174 int a2len;
1175 int r;
1177 if (len1 >= STACKBUFLEN / 2)
1179 a1len = len1 * 2 + 2;
1180 a1p = palloc(a1len);
1182 else
1184 a1len = STACKBUFLEN;
1185 a1p = a1buf;
1187 if (len2 >= STACKBUFLEN / 2)
1189 a2len = len2 * 2 + 2;
1190 a2p = palloc(a2len);
1192 else
1194 a2len = STACKBUFLEN;
1195 a2p = a2buf;
1198 /* stupid Microsloth API does not work for zero-length input */
1199 if (len1 == 0)
1200 r = 0;
1201 else
1203 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1204 (LPWSTR) a1p, a1len / 2);
1205 if (!r)
1206 ereport(ERROR,
1207 (errmsg("could not convert string to UTF-16: error %lu",
1208 GetLastError())));
1210 ((LPWSTR) a1p)[r] = 0;
1212 if (len2 == 0)
1213 r = 0;
1214 else
1216 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1217 (LPWSTR) a2p, a2len / 2);
1218 if (!r)
1219 ereport(ERROR,
1220 (errmsg("could not convert string to UTF-16: error %lu",
1221 GetLastError())));
1223 ((LPWSTR) a2p)[r] = 0;
1225 errno = 0;
1226 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1227 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1228 * headers */
1229 ereport(ERROR,
1230 (errmsg("could not compare Unicode strings: %m")));
1233 * In some locales wcscoll() can claim that nonidentical strings
1234 * are equal. Believing that would be bad news for a number of
1235 * reasons, so we follow Perl's lead and sort "equal" strings
1236 * according to strcmp (on the UTF-8 representation).
1238 if (result == 0)
1240 result = strncmp(arg1, arg2, Min(len1, len2));
1241 if ((result == 0) && (len1 != len2))
1242 result = (len1 < len2) ? -1 : 1;
1245 if (a1p != a1buf)
1246 pfree(a1p);
1247 if (a2p != a2buf)
1248 pfree(a2p);
1250 return result;
1252 #endif /* WIN32 */
1254 if (len1 >= STACKBUFLEN)
1255 a1p = (char *) palloc(len1 + 1);
1256 else
1257 a1p = a1buf;
1258 if (len2 >= STACKBUFLEN)
1259 a2p = (char *) palloc(len2 + 1);
1260 else
1261 a2p = a2buf;
1263 memcpy(a1p, arg1, len1);
1264 a1p[len1] = '\0';
1265 memcpy(a2p, arg2, len2);
1266 a2p[len2] = '\0';
1268 result = strcoll(a1p, a2p);
1271 * In some locales strcoll() can claim that nonidentical strings are
1272 * equal. Believing that would be bad news for a number of reasons,
1273 * so we follow Perl's lead and sort "equal" strings according to
1274 * strcmp().
1276 if (result == 0)
1277 result = strcmp(a1p, a2p);
1279 if (a1p != a1buf)
1280 pfree(a1p);
1281 if (a2p != a2buf)
1282 pfree(a2p);
1285 return result;
1289 /* text_cmp()
1290 * Internal comparison function for text strings.
1291 * Returns -1, 0 or 1
1293 static int
1294 text_cmp(text *arg1, text *arg2)
1296 char *a1p,
1297 *a2p;
1298 int len1,
1299 len2;
1301 a1p = VARDATA_ANY(arg1);
1302 a2p = VARDATA_ANY(arg2);
1304 len1 = VARSIZE_ANY_EXHDR(arg1);
1305 len2 = VARSIZE_ANY_EXHDR(arg2);
1307 return varstr_cmp(a1p, len1, a2p, len2);
1311 * Comparison functions for text strings.
1313 * Note: btree indexes need these routines not to leak memory; therefore,
1314 * be careful to free working copies of toasted datums. Most places don't
1315 * need to be so careful.
1318 Datum
1319 texteq(PG_FUNCTION_ARGS)
1321 text *arg1 = PG_GETARG_TEXT_PP(0);
1322 text *arg2 = PG_GETARG_TEXT_PP(1);
1323 bool result;
1326 * Since we only care about equality or not-equality, we can avoid all the
1327 * expense of strcoll() here, and just do bitwise comparison.
1329 if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1330 result = false;
1331 else
1332 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1333 VARSIZE_ANY_EXHDR(arg1)) == 0);
1335 PG_FREE_IF_COPY(arg1, 0);
1336 PG_FREE_IF_COPY(arg2, 1);
1338 PG_RETURN_BOOL(result);
1341 Datum
1342 textne(PG_FUNCTION_ARGS)
1344 text *arg1 = PG_GETARG_TEXT_PP(0);
1345 text *arg2 = PG_GETARG_TEXT_PP(1);
1346 bool result;
1349 * Since we only care about equality or not-equality, we can avoid all the
1350 * expense of strcoll() here, and just do bitwise comparison.
1352 if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1353 result = true;
1354 else
1355 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1356 VARSIZE_ANY_EXHDR(arg1)) != 0);
1358 PG_FREE_IF_COPY(arg1, 0);
1359 PG_FREE_IF_COPY(arg2, 1);
1361 PG_RETURN_BOOL(result);
1364 Datum
1365 text_lt(PG_FUNCTION_ARGS)
1367 text *arg1 = PG_GETARG_TEXT_PP(0);
1368 text *arg2 = PG_GETARG_TEXT_PP(1);
1369 bool result;
1371 result = (text_cmp(arg1, arg2) < 0);
1373 PG_FREE_IF_COPY(arg1, 0);
1374 PG_FREE_IF_COPY(arg2, 1);
1376 PG_RETURN_BOOL(result);
1379 Datum
1380 text_le(PG_FUNCTION_ARGS)
1382 text *arg1 = PG_GETARG_TEXT_PP(0);
1383 text *arg2 = PG_GETARG_TEXT_PP(1);
1384 bool result;
1386 result = (text_cmp(arg1, arg2) <= 0);
1388 PG_FREE_IF_COPY(arg1, 0);
1389 PG_FREE_IF_COPY(arg2, 1);
1391 PG_RETURN_BOOL(result);
1394 Datum
1395 text_gt(PG_FUNCTION_ARGS)
1397 text *arg1 = PG_GETARG_TEXT_PP(0);
1398 text *arg2 = PG_GETARG_TEXT_PP(1);
1399 bool result;
1401 result = (text_cmp(arg1, arg2) > 0);
1403 PG_FREE_IF_COPY(arg1, 0);
1404 PG_FREE_IF_COPY(arg2, 1);
1406 PG_RETURN_BOOL(result);
1409 Datum
1410 text_ge(PG_FUNCTION_ARGS)
1412 text *arg1 = PG_GETARG_TEXT_PP(0);
1413 text *arg2 = PG_GETARG_TEXT_PP(1);
1414 bool result;
1416 result = (text_cmp(arg1, arg2) >= 0);
1418 PG_FREE_IF_COPY(arg1, 0);
1419 PG_FREE_IF_COPY(arg2, 1);
1421 PG_RETURN_BOOL(result);
1424 Datum
1425 bttextcmp(PG_FUNCTION_ARGS)
1427 text *arg1 = PG_GETARG_TEXT_PP(0);
1428 text *arg2 = PG_GETARG_TEXT_PP(1);
1429 int32 result;
1431 result = text_cmp(arg1, arg2);
1433 PG_FREE_IF_COPY(arg1, 0);
1434 PG_FREE_IF_COPY(arg2, 1);
1436 PG_RETURN_INT32(result);
1440 Datum
1441 text_larger(PG_FUNCTION_ARGS)
1443 text *arg1 = PG_GETARG_TEXT_PP(0);
1444 text *arg2 = PG_GETARG_TEXT_PP(1);
1445 text *result;
1447 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1449 PG_RETURN_TEXT_P(result);
1452 Datum
1453 text_smaller(PG_FUNCTION_ARGS)
1455 text *arg1 = PG_GETARG_TEXT_PP(0);
1456 text *arg2 = PG_GETARG_TEXT_PP(1);
1457 text *result;
1459 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1461 PG_RETURN_TEXT_P(result);
1466 * The following operators support character-by-character comparison
1467 * of text datums, to allow building indexes suitable for LIKE clauses.
1468 * Note that the regular texteq/textne comparison operators are assumed
1469 * to be compatible with these!
1472 static int
1473 internal_text_pattern_compare(text *arg1, text *arg2)
1475 int result;
1476 int len1,
1477 len2;
1479 len1 = VARSIZE_ANY_EXHDR(arg1);
1480 len2 = VARSIZE_ANY_EXHDR(arg2);
1482 result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1483 if (result != 0)
1484 return result;
1485 else if (len1 < len2)
1486 return -1;
1487 else if (len1 > len2)
1488 return 1;
1489 else
1490 return 0;
1494 Datum
1495 text_pattern_lt(PG_FUNCTION_ARGS)
1497 text *arg1 = PG_GETARG_TEXT_PP(0);
1498 text *arg2 = PG_GETARG_TEXT_PP(1);
1499 int result;
1501 result = internal_text_pattern_compare(arg1, arg2);
1503 PG_FREE_IF_COPY(arg1, 0);
1504 PG_FREE_IF_COPY(arg2, 1);
1506 PG_RETURN_BOOL(result < 0);
1510 Datum
1511 text_pattern_le(PG_FUNCTION_ARGS)
1513 text *arg1 = PG_GETARG_TEXT_PP(0);
1514 text *arg2 = PG_GETARG_TEXT_PP(1);
1515 int result;
1517 result = internal_text_pattern_compare(arg1, arg2);
1519 PG_FREE_IF_COPY(arg1, 0);
1520 PG_FREE_IF_COPY(arg2, 1);
1522 PG_RETURN_BOOL(result <= 0);
1526 Datum
1527 text_pattern_ge(PG_FUNCTION_ARGS)
1529 text *arg1 = PG_GETARG_TEXT_PP(0);
1530 text *arg2 = PG_GETARG_TEXT_PP(1);
1531 int result;
1533 result = internal_text_pattern_compare(arg1, arg2);
1535 PG_FREE_IF_COPY(arg1, 0);
1536 PG_FREE_IF_COPY(arg2, 1);
1538 PG_RETURN_BOOL(result >= 0);
1542 Datum
1543 text_pattern_gt(PG_FUNCTION_ARGS)
1545 text *arg1 = PG_GETARG_TEXT_PP(0);
1546 text *arg2 = PG_GETARG_TEXT_PP(1);
1547 int result;
1549 result = internal_text_pattern_compare(arg1, arg2);
1551 PG_FREE_IF_COPY(arg1, 0);
1552 PG_FREE_IF_COPY(arg2, 1);
1554 PG_RETURN_BOOL(result > 0);
1558 Datum
1559 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1561 text *arg1 = PG_GETARG_TEXT_PP(0);
1562 text *arg2 = PG_GETARG_TEXT_PP(1);
1563 int result;
1565 result = internal_text_pattern_compare(arg1, arg2);
1567 PG_FREE_IF_COPY(arg1, 0);
1568 PG_FREE_IF_COPY(arg2, 1);
1570 PG_RETURN_INT32(result);
1574 /*-------------------------------------------------------------
1575 * byteaoctetlen
1577 * get the number of bytes contained in an instance of type 'bytea'
1578 *-------------------------------------------------------------
1580 Datum
1581 byteaoctetlen(PG_FUNCTION_ARGS)
1583 Datum str = PG_GETARG_DATUM(0);
1585 /* We need not detoast the input at all */
1586 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1590 * byteacat -
1591 * takes two bytea* and returns a bytea* that is the concatenation of
1592 * the two.
1594 * Cloned from textcat and modified as required.
1596 Datum
1597 byteacat(PG_FUNCTION_ARGS)
1599 bytea *t1 = PG_GETARG_BYTEA_PP(0);
1600 bytea *t2 = PG_GETARG_BYTEA_PP(1);
1601 int len1,
1602 len2,
1603 len;
1604 bytea *result;
1605 char *ptr;
1607 len1 = VARSIZE_ANY_EXHDR(t1);
1608 if (len1 < 0)
1609 len1 = 0;
1611 len2 = VARSIZE_ANY_EXHDR(t2);
1612 if (len2 < 0)
1613 len2 = 0;
1615 len = len1 + len2 + VARHDRSZ;
1616 result = (bytea *) palloc(len);
1618 /* Set size of result string... */
1619 SET_VARSIZE(result, len);
1621 /* Fill data field of result string... */
1622 ptr = VARDATA(result);
1623 if (len1 > 0)
1624 memcpy(ptr, VARDATA_ANY(t1), len1);
1625 if (len2 > 0)
1626 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
1628 PG_RETURN_BYTEA_P(result);
1631 #define PG_STR_GET_BYTEA(str_) \
1632 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1635 * bytea_substr()
1636 * Return a substring starting at the specified position.
1637 * Cloned from text_substr and modified as required.
1639 * Input:
1640 * - string
1641 * - starting position (is one-based)
1642 * - string length (optional)
1644 * If the starting position is zero or less, then return from the start of the string
1645 * adjusting the length to be consistent with the "negative start" per SQL92.
1646 * If the length is less than zero, an ERROR is thrown. If no third argument
1647 * (length) is provided, the length to the end of the string is assumed.
1649 Datum
1650 bytea_substr(PG_FUNCTION_ARGS)
1652 int S = PG_GETARG_INT32(1); /* start position */
1653 int S1; /* adjusted start position */
1654 int L1; /* adjusted substring length */
1656 S1 = Max(S, 1);
1658 if (fcinfo->nargs == 2)
1661 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1662 * the end of the string if we pass it a negative value for length.
1664 L1 = -1;
1666 else
1668 /* end position */
1669 int E = S + PG_GETARG_INT32(2);
1672 * A negative value for L is the only way for the end position to be
1673 * before the start. SQL99 says to throw an error.
1675 if (E < S)
1676 ereport(ERROR,
1677 (errcode(ERRCODE_SUBSTRING_ERROR),
1678 errmsg("negative substring length not allowed")));
1681 * A zero or negative value for the end position can happen if the
1682 * start was negative or one. SQL99 says to return a zero-length
1683 * string.
1685 if (E < 1)
1686 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1688 L1 = E - S1;
1692 * If the start position is past the end of the string, SQL99 says to
1693 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1694 * for us. Convert to zero-based starting position
1696 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1700 * bytea_substr_no_len -
1701 * Wrapper to avoid opr_sanity failure due to
1702 * one function accepting a different number of args.
1704 Datum
1705 bytea_substr_no_len(PG_FUNCTION_ARGS)
1707 return bytea_substr(fcinfo);
1711 * byteapos -
1712 * Return the position of the specified substring.
1713 * Implements the SQL92 POSITION() function.
1714 * Cloned from textpos and modified as required.
1716 Datum
1717 byteapos(PG_FUNCTION_ARGS)
1719 bytea *t1 = PG_GETARG_BYTEA_PP(0);
1720 bytea *t2 = PG_GETARG_BYTEA_PP(1);
1721 int pos;
1722 int px,
1724 int len1,
1725 len2;
1726 char *p1,
1727 *p2;
1729 len1 = VARSIZE_ANY_EXHDR(t1);
1730 len2 = VARSIZE_ANY_EXHDR(t2);
1732 if (len2 <= 0)
1733 PG_RETURN_INT32(1); /* result for empty pattern */
1735 p1 = VARDATA_ANY(t1);
1736 p2 = VARDATA_ANY(t2);
1738 pos = 0;
1739 px = (len1 - len2);
1740 for (p = 0; p <= px; p++)
1742 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1744 pos = p + 1;
1745 break;
1747 p1++;
1750 PG_RETURN_INT32(pos);
1753 /*-------------------------------------------------------------
1754 * byteaGetByte
1756 * this routine treats "bytea" as an array of bytes.
1757 * It returns the Nth byte (a number between 0 and 255).
1758 *-------------------------------------------------------------
1760 Datum
1761 byteaGetByte(PG_FUNCTION_ARGS)
1763 bytea *v = PG_GETARG_BYTEA_PP(0);
1764 int32 n = PG_GETARG_INT32(1);
1765 int len;
1766 int byte;
1768 len = VARSIZE_ANY_EXHDR(v);
1770 if (n < 0 || n >= len)
1771 ereport(ERROR,
1772 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1773 errmsg("index %d out of valid range, 0..%d",
1774 n, len - 1)));
1776 byte = ((unsigned char *) VARDATA_ANY(v))[n];
1778 PG_RETURN_INT32(byte);
1781 /*-------------------------------------------------------------
1782 * byteaGetBit
1784 * This routine treats a "bytea" type like an array of bits.
1785 * It returns the value of the Nth bit (0 or 1).
1787 *-------------------------------------------------------------
1789 Datum
1790 byteaGetBit(PG_FUNCTION_ARGS)
1792 bytea *v = PG_GETARG_BYTEA_PP(0);
1793 int32 n = PG_GETARG_INT32(1);
1794 int byteNo,
1795 bitNo;
1796 int len;
1797 int byte;
1799 len = VARSIZE_ANY_EXHDR(v);
1801 if (n < 0 || n >= len * 8)
1802 ereport(ERROR,
1803 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1804 errmsg("index %d out of valid range, 0..%d",
1805 n, len * 8 - 1)));
1807 byteNo = n / 8;
1808 bitNo = n % 8;
1810 byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
1812 if (byte & (1 << bitNo))
1813 PG_RETURN_INT32(1);
1814 else
1815 PG_RETURN_INT32(0);
1818 /*-------------------------------------------------------------
1819 * byteaSetByte
1821 * Given an instance of type 'bytea' creates a new one with
1822 * the Nth byte set to the given value.
1824 *-------------------------------------------------------------
1826 Datum
1827 byteaSetByte(PG_FUNCTION_ARGS)
1829 bytea *v = PG_GETARG_BYTEA_P(0);
1830 int32 n = PG_GETARG_INT32(1);
1831 int32 newByte = PG_GETARG_INT32(2);
1832 int len;
1833 bytea *res;
1835 len = VARSIZE(v) - VARHDRSZ;
1837 if (n < 0 || n >= len)
1838 ereport(ERROR,
1839 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1840 errmsg("index %d out of valid range, 0..%d",
1841 n, len - 1)));
1844 * Make a copy of the original varlena.
1846 res = (bytea *) palloc(VARSIZE(v));
1847 memcpy((char *) res, (char *) v, VARSIZE(v));
1850 * Now set the byte.
1852 ((unsigned char *) VARDATA(res))[n] = newByte;
1854 PG_RETURN_BYTEA_P(res);
1857 /*-------------------------------------------------------------
1858 * byteaSetBit
1860 * Given an instance of type 'bytea' creates a new one with
1861 * the Nth bit set to the given value.
1863 *-------------------------------------------------------------
1865 Datum
1866 byteaSetBit(PG_FUNCTION_ARGS)
1868 bytea *v = PG_GETARG_BYTEA_P(0);
1869 int32 n = PG_GETARG_INT32(1);
1870 int32 newBit = PG_GETARG_INT32(2);
1871 bytea *res;
1872 int len;
1873 int oldByte,
1874 newByte;
1875 int byteNo,
1876 bitNo;
1878 len = VARSIZE(v) - VARHDRSZ;
1880 if (n < 0 || n >= len * 8)
1881 ereport(ERROR,
1882 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1883 errmsg("index %d out of valid range, 0..%d",
1884 n, len * 8 - 1)));
1886 byteNo = n / 8;
1887 bitNo = n % 8;
1890 * sanity check!
1892 if (newBit != 0 && newBit != 1)
1893 ereport(ERROR,
1894 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1895 errmsg("new bit must be 0 or 1")));
1898 * Make a copy of the original varlena.
1900 res = (bytea *) palloc(VARSIZE(v));
1901 memcpy((char *) res, (char *) v, VARSIZE(v));
1904 * Update the byte.
1906 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1908 if (newBit == 0)
1909 newByte = oldByte & (~(1 << bitNo));
1910 else
1911 newByte = oldByte | (1 << bitNo);
1913 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1915 PG_RETURN_BYTEA_P(res);
1919 /* text_name()
1920 * Converts a text type to a Name type.
1922 Datum
1923 text_name(PG_FUNCTION_ARGS)
1925 text *s = PG_GETARG_TEXT_PP(0);
1926 Name result;
1927 int len;
1929 len = VARSIZE_ANY_EXHDR(s);
1931 /* Truncate oversize input */
1932 if (len >= NAMEDATALEN)
1933 len = NAMEDATALEN - 1;
1935 result = (Name) palloc(NAMEDATALEN);
1936 memcpy(NameStr(*result), VARDATA_ANY(s), len);
1938 /* now null pad to full length... */
1939 while (len < NAMEDATALEN)
1941 *(NameStr(*result) + len) = '\0';
1942 len++;
1945 PG_RETURN_NAME(result);
1948 /* name_text()
1949 * Converts a Name type to a text type.
1951 Datum
1952 name_text(PG_FUNCTION_ARGS)
1954 Name s = PG_GETARG_NAME(0);
1956 PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
1961 * textToQualifiedNameList - convert a text object to list of names
1963 * This implements the input parsing needed by nextval() and other
1964 * functions that take a text parameter representing a qualified name.
1965 * We split the name at dots, downcase if not double-quoted, and
1966 * truncate names if they're too long.
1968 List *
1969 textToQualifiedNameList(text *textval)
1971 char *rawname;
1972 List *result = NIL;
1973 List *namelist;
1974 ListCell *l;
1976 /* Convert to C string (handles possible detoasting). */
1977 /* Note we rely on being able to modify rawname below. */
1978 rawname = text_to_cstring(textval);
1980 if (!SplitIdentifierString(rawname, '.', &namelist))
1981 ereport(ERROR,
1982 (errcode(ERRCODE_INVALID_NAME),
1983 errmsg("invalid name syntax")));
1985 if (namelist == NIL)
1986 ereport(ERROR,
1987 (errcode(ERRCODE_INVALID_NAME),
1988 errmsg("invalid name syntax")));
1990 foreach(l, namelist)
1992 char *curname = (char *) lfirst(l);
1994 result = lappend(result, makeString(pstrdup(curname)));
1997 pfree(rawname);
1998 list_free(namelist);
2000 return result;
2004 * SplitIdentifierString --- parse a string containing identifiers
2006 * This is the guts of textToQualifiedNameList, and is exported for use in
2007 * other situations such as parsing GUC variables. In the GUC case, it's
2008 * important to avoid memory leaks, so the API is designed to minimize the
2009 * amount of stuff that needs to be allocated and freed.
2011 * Inputs:
2012 * rawstring: the input string; must be overwritable! On return, it's
2013 * been modified to contain the separated identifiers.
2014 * separator: the separator punctuation expected between identifiers
2015 * (typically '.' or ','). Whitespace may also appear around
2016 * identifiers.
2017 * Outputs:
2018 * namelist: filled with a palloc'd list of pointers to identifiers within
2019 * rawstring. Caller should list_free() this even on error return.
2021 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2023 * Note that an empty string is considered okay here, though not in
2024 * textToQualifiedNameList.
2026 bool
2027 SplitIdentifierString(char *rawstring, char separator,
2028 List **namelist)
2030 char *nextp = rawstring;
2031 bool done = false;
2033 *namelist = NIL;
2035 while (isspace((unsigned char) *nextp))
2036 nextp++; /* skip leading whitespace */
2038 if (*nextp == '\0')
2039 return true; /* allow empty string */
2041 /* At the top of the loop, we are at start of a new identifier. */
2044 char *curname;
2045 char *endp;
2047 if (*nextp == '\"')
2049 /* Quoted name --- collapse quote-quote pairs, no downcasing */
2050 curname = nextp + 1;
2051 for (;;)
2053 endp = strchr(nextp + 1, '\"');
2054 if (endp == NULL)
2055 return false; /* mismatched quotes */
2056 if (endp[1] != '\"')
2057 break; /* found end of quoted name */
2058 /* Collapse adjacent quotes into one quote, and look again */
2059 memmove(endp, endp + 1, strlen(endp));
2060 nextp = endp;
2062 /* endp now points at the terminating quote */
2063 nextp = endp + 1;
2065 else
2067 /* Unquoted name --- extends to separator or whitespace */
2068 char *downname;
2069 int len;
2071 curname = nextp;
2072 while (*nextp && *nextp != separator &&
2073 !isspace((unsigned char) *nextp))
2074 nextp++;
2075 endp = nextp;
2076 if (curname == nextp)
2077 return false; /* empty unquoted name not allowed */
2080 * Downcase the identifier, using same code as main lexer does.
2082 * XXX because we want to overwrite the input in-place, we cannot
2083 * support a downcasing transformation that increases the string
2084 * length. This is not a problem given the current implementation
2085 * of downcase_truncate_identifier, but we'll probably have to do
2086 * something about this someday.
2088 len = endp - curname;
2089 downname = downcase_truncate_identifier(curname, len, false);
2090 Assert(strlen(downname) <= len);
2091 strncpy(curname, downname, len);
2092 pfree(downname);
2095 while (isspace((unsigned char) *nextp))
2096 nextp++; /* skip trailing whitespace */
2098 if (*nextp == separator)
2100 nextp++;
2101 while (isspace((unsigned char) *nextp))
2102 nextp++; /* skip leading whitespace for next */
2103 /* we expect another name, so done remains false */
2105 else if (*nextp == '\0')
2106 done = true;
2107 else
2108 return false; /* invalid syntax */
2110 /* Now safe to overwrite separator with a null */
2111 *endp = '\0';
2113 /* Truncate name if it's overlength */
2114 truncate_identifier(curname, strlen(curname), false);
2117 * Finished isolating current name --- add it to list
2119 *namelist = lappend(*namelist, curname);
2121 /* Loop back if we didn't reach end of string */
2122 } while (!done);
2124 return true;
2128 /*****************************************************************************
2129 * Comparison Functions used for bytea
2131 * Note: btree indexes need these routines not to leak memory; therefore,
2132 * be careful to free working copies of toasted datums. Most places don't
2133 * need to be so careful.
2134 *****************************************************************************/
2136 Datum
2137 byteaeq(PG_FUNCTION_ARGS)
2139 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2140 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2141 int len1,
2142 len2;
2143 bool result;
2145 len1 = VARSIZE_ANY_EXHDR(arg1);
2146 len2 = VARSIZE_ANY_EXHDR(arg2);
2148 /* fast path for different-length inputs */
2149 if (len1 != len2)
2150 result = false;
2151 else
2152 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
2154 PG_FREE_IF_COPY(arg1, 0);
2155 PG_FREE_IF_COPY(arg2, 1);
2157 PG_RETURN_BOOL(result);
2160 Datum
2161 byteane(PG_FUNCTION_ARGS)
2163 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2164 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2165 int len1,
2166 len2;
2167 bool result;
2169 len1 = VARSIZE_ANY_EXHDR(arg1);
2170 len2 = VARSIZE_ANY_EXHDR(arg2);
2172 /* fast path for different-length inputs */
2173 if (len1 != len2)
2174 result = true;
2175 else
2176 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
2178 PG_FREE_IF_COPY(arg1, 0);
2179 PG_FREE_IF_COPY(arg2, 1);
2181 PG_RETURN_BOOL(result);
2184 Datum
2185 bytealt(PG_FUNCTION_ARGS)
2187 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2188 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2189 int len1,
2190 len2;
2191 int cmp;
2193 len1 = VARSIZE_ANY_EXHDR(arg1);
2194 len2 = VARSIZE_ANY_EXHDR(arg2);
2196 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2198 PG_FREE_IF_COPY(arg1, 0);
2199 PG_FREE_IF_COPY(arg2, 1);
2201 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2204 Datum
2205 byteale(PG_FUNCTION_ARGS)
2207 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2208 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2209 int len1,
2210 len2;
2211 int cmp;
2213 len1 = VARSIZE_ANY_EXHDR(arg1);
2214 len2 = VARSIZE_ANY_EXHDR(arg2);
2216 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2218 PG_FREE_IF_COPY(arg1, 0);
2219 PG_FREE_IF_COPY(arg2, 1);
2221 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2224 Datum
2225 byteagt(PG_FUNCTION_ARGS)
2227 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2228 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2229 int len1,
2230 len2;
2231 int cmp;
2233 len1 = VARSIZE_ANY_EXHDR(arg1);
2234 len2 = VARSIZE_ANY_EXHDR(arg2);
2236 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2238 PG_FREE_IF_COPY(arg1, 0);
2239 PG_FREE_IF_COPY(arg2, 1);
2241 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2244 Datum
2245 byteage(PG_FUNCTION_ARGS)
2247 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2248 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2249 int len1,
2250 len2;
2251 int cmp;
2253 len1 = VARSIZE_ANY_EXHDR(arg1);
2254 len2 = VARSIZE_ANY_EXHDR(arg2);
2256 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2258 PG_FREE_IF_COPY(arg1, 0);
2259 PG_FREE_IF_COPY(arg2, 1);
2261 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2264 Datum
2265 byteacmp(PG_FUNCTION_ARGS)
2267 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2268 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2269 int len1,
2270 len2;
2271 int cmp;
2273 len1 = VARSIZE_ANY_EXHDR(arg1);
2274 len2 = VARSIZE_ANY_EXHDR(arg2);
2276 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2277 if ((cmp == 0) && (len1 != len2))
2278 cmp = (len1 < len2) ? -1 : 1;
2280 PG_FREE_IF_COPY(arg1, 0);
2281 PG_FREE_IF_COPY(arg2, 1);
2283 PG_RETURN_INT32(cmp);
2287 * appendStringInfoText
2289 * Append a text to str.
2290 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2292 static void
2293 appendStringInfoText(StringInfo str, const text *t)
2295 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
2299 * replace_text
2300 * replace all occurrences of 'old_sub_str' in 'orig_str'
2301 * with 'new_sub_str' to form 'new_str'
2303 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2304 * otherwise returns 'new_str'
2306 Datum
2307 replace_text(PG_FUNCTION_ARGS)
2309 text *src_text = PG_GETARG_TEXT_PP(0);
2310 text *from_sub_text = PG_GETARG_TEXT_PP(1);
2311 text *to_sub_text = PG_GETARG_TEXT_PP(2);
2312 int src_text_len;
2313 int from_sub_text_len;
2314 TextPositionState state;
2315 text *ret_text;
2316 int start_posn;
2317 int curr_posn;
2318 int chunk_len;
2319 char *start_ptr;
2320 StringInfoData str;
2322 text_position_setup(src_text, from_sub_text, &state);
2325 * Note: we check the converted string length, not the original, because
2326 * they could be different if the input contained invalid encoding.
2328 src_text_len = state.len1;
2329 from_sub_text_len = state.len2;
2331 /* Return unmodified source string if empty source or pattern */
2332 if (src_text_len < 1 || from_sub_text_len < 1)
2334 text_position_cleanup(&state);
2335 PG_RETURN_TEXT_P(src_text);
2338 start_posn = 1;
2339 curr_posn = text_position_next(1, &state);
2341 /* When the from_sub_text is not found, there is nothing to do. */
2342 if (curr_posn == 0)
2344 text_position_cleanup(&state);
2345 PG_RETURN_TEXT_P(src_text);
2348 /* start_ptr points to the start_posn'th character of src_text */
2349 start_ptr = VARDATA_ANY(src_text);
2351 initStringInfo(&str);
2355 CHECK_FOR_INTERRUPTS();
2357 /* copy the data skipped over by last text_position_next() */
2358 chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
2359 appendBinaryStringInfo(&str, start_ptr, chunk_len);
2361 appendStringInfoText(&str, to_sub_text);
2363 start_posn = curr_posn;
2364 start_ptr += chunk_len;
2365 start_posn += from_sub_text_len;
2366 start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
2368 curr_posn = text_position_next(start_posn, &state);
2370 while (curr_posn > 0);
2372 /* copy trailing data */
2373 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2374 appendBinaryStringInfo(&str, start_ptr, chunk_len);
2376 text_position_cleanup(&state);
2378 ret_text = cstring_to_text_with_len(str.data, str.len);
2379 pfree(str.data);
2381 PG_RETURN_TEXT_P(ret_text);
2385 * check_replace_text_has_escape_char
2387 * check whether replace_text contains escape char.
2389 static bool
2390 check_replace_text_has_escape_char(const text *replace_text)
2392 const char *p = VARDATA_ANY(replace_text);
2393 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2395 if (pg_database_encoding_max_length() == 1)
2397 for (; p < p_end; p++)
2399 if (*p == '\\')
2400 return true;
2403 else
2405 for (; p < p_end; p += pg_mblen(p))
2407 if (*p == '\\')
2408 return true;
2412 return false;
2416 * appendStringInfoRegexpSubstr
2418 * Append replace_text to str, substituting regexp back references for
2419 * \n escapes. start_ptr is the start of the match in the source string,
2420 * at logical character position data_pos.
2422 static void
2423 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2424 regmatch_t *pmatch,
2425 char *start_ptr, int data_pos)
2427 const char *p = VARDATA_ANY(replace_text);
2428 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2429 int eml = pg_database_encoding_max_length();
2431 for (;;)
2433 const char *chunk_start = p;
2434 int so;
2435 int eo;
2437 /* Find next escape char. */
2438 if (eml == 1)
2440 for (; p < p_end && *p != '\\'; p++)
2441 /* nothing */ ;
2443 else
2445 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2446 /* nothing */ ;
2449 /* Copy the text we just scanned over, if any. */
2450 if (p > chunk_start)
2451 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2453 /* Done if at end of string, else advance over escape char. */
2454 if (p >= p_end)
2455 break;
2456 p++;
2458 if (p >= p_end)
2460 /* Escape at very end of input. Treat same as unexpected char */
2461 appendStringInfoChar(str, '\\');
2462 break;
2465 if (*p >= '1' && *p <= '9')
2467 /* Use the back reference of regexp. */
2468 int idx = *p - '0';
2470 so = pmatch[idx].rm_so;
2471 eo = pmatch[idx].rm_eo;
2472 p++;
2474 else if (*p == '&')
2476 /* Use the entire matched string. */
2477 so = pmatch[0].rm_so;
2478 eo = pmatch[0].rm_eo;
2479 p++;
2481 else if (*p == '\\')
2483 /* \\ means transfer one \ to output. */
2484 appendStringInfoChar(str, '\\');
2485 p++;
2486 continue;
2488 else
2491 * If escape char is not followed by any expected char, just treat
2492 * it as ordinary data to copy. (XXX would it be better to throw
2493 * an error?)
2495 appendStringInfoChar(str, '\\');
2496 continue;
2499 if (so != -1 && eo != -1)
2502 * Copy the text that is back reference of regexp. Note so and eo
2503 * are counted in characters not bytes.
2505 char *chunk_start;
2506 int chunk_len;
2508 Assert(so >= data_pos);
2509 chunk_start = start_ptr;
2510 chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
2511 chunk_len = charlen_to_bytelen(chunk_start, eo - so);
2512 appendBinaryStringInfo(str, chunk_start, chunk_len);
2517 #define REGEXP_REPLACE_BACKREF_CNT 10
2520 * replace_text_regexp
2522 * replace text that matches to regexp in src_text to replace_text.
2524 * Note: to avoid having to include regex.h in builtins.h, we declare
2525 * the regexp argument as void *, but really it's regex_t *.
2527 text *
2528 replace_text_regexp(text *src_text, void *regexp,
2529 text *replace_text, bool glob)
2531 text *ret_text;
2532 regex_t *re = (regex_t *) regexp;
2533 int src_text_len = VARSIZE_ANY_EXHDR(src_text);
2534 StringInfoData buf;
2535 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
2536 pg_wchar *data;
2537 size_t data_len;
2538 int search_start;
2539 int data_pos;
2540 char *start_ptr;
2541 bool have_escape;
2543 initStringInfo(&buf);
2545 /* Convert data string to wide characters. */
2546 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2547 data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
2549 /* Check whether replace_text has escape char. */
2550 have_escape = check_replace_text_has_escape_char(replace_text);
2552 /* start_ptr points to the data_pos'th character of src_text */
2553 start_ptr = (char *) VARDATA_ANY(src_text);
2554 data_pos = 0;
2556 search_start = 0;
2557 while (search_start <= data_len)
2559 int regexec_result;
2561 CHECK_FOR_INTERRUPTS();
2563 regexec_result = pg_regexec(re,
2564 data,
2565 data_len,
2566 search_start,
2567 NULL, /* no details */
2568 REGEXP_REPLACE_BACKREF_CNT,
2569 pmatch,
2572 if (regexec_result == REG_NOMATCH)
2573 break;
2575 if (regexec_result != REG_OKAY)
2577 char errMsg[100];
2579 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2580 ereport(ERROR,
2581 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2582 errmsg("regular expression failed: %s", errMsg)));
2586 * Copy the text to the left of the match position. Note we are given
2587 * character not byte indexes.
2589 if (pmatch[0].rm_so - data_pos > 0)
2591 int chunk_len;
2593 chunk_len = charlen_to_bytelen(start_ptr,
2594 pmatch[0].rm_so - data_pos);
2595 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2598 * Advance start_ptr over that text, to avoid multiple rescans of
2599 * it if the replace_text contains multiple back-references.
2601 start_ptr += chunk_len;
2602 data_pos = pmatch[0].rm_so;
2606 * Copy the replace_text. Process back references when the
2607 * replace_text has escape characters.
2609 if (have_escape)
2610 appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
2611 start_ptr, data_pos);
2612 else
2613 appendStringInfoText(&buf, replace_text);
2615 /* Advance start_ptr and data_pos over the matched text. */
2616 start_ptr += charlen_to_bytelen(start_ptr,
2617 pmatch[0].rm_eo - data_pos);
2618 data_pos = pmatch[0].rm_eo;
2621 * When global option is off, replace the first instance only.
2623 if (!glob)
2624 break;
2627 * Search from next character when the matching text is zero width.
2629 search_start = data_pos;
2630 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2631 search_start++;
2635 * Copy the text to the right of the last match.
2637 if (data_pos < data_len)
2639 int chunk_len;
2641 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2642 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2645 ret_text = cstring_to_text_with_len(buf.data, buf.len);
2646 pfree(buf.data);
2647 pfree(data);
2649 return ret_text;
2653 * split_text
2654 * parse input string
2655 * return ord item (1 based)
2656 * based on provided field separator
2658 Datum
2659 split_text(PG_FUNCTION_ARGS)
2661 text *inputstring = PG_GETARG_TEXT_PP(0);
2662 text *fldsep = PG_GETARG_TEXT_PP(1);
2663 int fldnum = PG_GETARG_INT32(2);
2664 int inputstring_len;
2665 int fldsep_len;
2666 TextPositionState state;
2667 int start_posn;
2668 int end_posn;
2669 text *result_text;
2671 /* field number is 1 based */
2672 if (fldnum < 1)
2673 ereport(ERROR,
2674 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2675 errmsg("field position must be greater than zero")));
2677 text_position_setup(inputstring, fldsep, &state);
2680 * Note: we check the converted string length, not the original, because
2681 * they could be different if the input contained invalid encoding.
2683 inputstring_len = state.len1;
2684 fldsep_len = state.len2;
2686 /* return empty string for empty input string */
2687 if (inputstring_len < 1)
2689 text_position_cleanup(&state);
2690 PG_RETURN_TEXT_P(cstring_to_text(""));
2693 /* empty field separator */
2694 if (fldsep_len < 1)
2696 text_position_cleanup(&state);
2697 /* if first field, return input string, else empty string */
2698 if (fldnum == 1)
2699 PG_RETURN_TEXT_P(inputstring);
2700 else
2701 PG_RETURN_TEXT_P(cstring_to_text(""));
2704 /* identify bounds of first field */
2705 start_posn = 1;
2706 end_posn = text_position_next(1, &state);
2708 /* special case if fldsep not found at all */
2709 if (end_posn == 0)
2711 text_position_cleanup(&state);
2712 /* if field 1 requested, return input string, else empty string */
2713 if (fldnum == 1)
2714 PG_RETURN_TEXT_P(inputstring);
2715 else
2716 PG_RETURN_TEXT_P(cstring_to_text(""));
2719 while (end_posn > 0 && --fldnum > 0)
2721 /* identify bounds of next field */
2722 start_posn = end_posn + fldsep_len;
2723 end_posn = text_position_next(start_posn, &state);
2726 text_position_cleanup(&state);
2728 if (fldnum > 0)
2730 /* N'th field separator not found */
2731 /* if last field requested, return it, else empty string */
2732 if (fldnum == 1)
2733 result_text = text_substring(PointerGetDatum(inputstring),
2734 start_posn,
2736 true);
2737 else
2738 result_text = cstring_to_text("");
2740 else
2742 /* non-last field requested */
2743 result_text = text_substring(PointerGetDatum(inputstring),
2744 start_posn,
2745 end_posn - start_posn,
2746 false);
2749 PG_RETURN_TEXT_P(result_text);
2753 * text_to_array
2754 * parse input string
2755 * return text array of elements
2756 * based on provided field separator
2758 Datum
2759 text_to_array(PG_FUNCTION_ARGS)
2761 text *inputstring = PG_GETARG_TEXT_PP(0);
2762 text *fldsep = PG_GETARG_TEXT_PP(1);
2763 int inputstring_len;
2764 int fldsep_len;
2765 TextPositionState state;
2766 int fldnum;
2767 int start_posn;
2768 int end_posn;
2769 int chunk_len;
2770 char *start_ptr;
2771 text *result_text;
2772 ArrayBuildState *astate = NULL;
2774 text_position_setup(inputstring, fldsep, &state);
2777 * Note: we check the converted string length, not the original, because
2778 * they could be different if the input contained invalid encoding.
2780 inputstring_len = state.len1;
2781 fldsep_len = state.len2;
2783 /* return NULL for empty input string */
2784 if (inputstring_len < 1)
2786 text_position_cleanup(&state);
2787 PG_RETURN_NULL();
2791 * empty field separator return one element, 1D, array using the input
2792 * string
2794 if (fldsep_len < 1)
2796 text_position_cleanup(&state);
2797 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2798 PointerGetDatum(inputstring), 1));
2801 start_posn = 1;
2802 /* start_ptr points to the start_posn'th character of inputstring */
2803 start_ptr = VARDATA_ANY(inputstring);
2805 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2807 CHECK_FOR_INTERRUPTS();
2809 end_posn = text_position_next(start_posn, &state);
2811 if (end_posn == 0)
2813 /* fetch last field */
2814 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
2816 else
2818 /* fetch non-last field */
2819 chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
2822 /* must build a temp text datum to pass to accumArrayResult */
2823 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
2825 /* stash away this field */
2826 astate = accumArrayResult(astate,
2827 PointerGetDatum(result_text),
2828 false,
2829 TEXTOID,
2830 CurrentMemoryContext);
2832 pfree(result_text);
2834 if (end_posn == 0)
2835 break;
2837 start_posn = end_posn;
2838 start_ptr += chunk_len;
2839 start_posn += fldsep_len;
2840 start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
2843 text_position_cleanup(&state);
2845 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2846 CurrentMemoryContext));
2850 * array_to_text
2851 * concatenate Cstring representation of input array elements
2852 * using provided field separator
2854 Datum
2855 array_to_text(PG_FUNCTION_ARGS)
2857 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2858 char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
2859 int nitems,
2860 *dims,
2861 ndims;
2862 Oid element_type;
2863 int typlen;
2864 bool typbyval;
2865 char typalign;
2866 StringInfoData buf;
2867 bool printed = false;
2868 char *p;
2869 bits8 *bitmap;
2870 int bitmask;
2871 int i;
2872 ArrayMetaState *my_extra;
2874 ndims = ARR_NDIM(v);
2875 dims = ARR_DIMS(v);
2876 nitems = ArrayGetNItems(ndims, dims);
2878 /* if there are no elements, return an empty string */
2879 if (nitems == 0)
2880 PG_RETURN_TEXT_P(cstring_to_text(""));
2882 element_type = ARR_ELEMTYPE(v);
2883 initStringInfo(&buf);
2886 * We arrange to look up info about element type, including its output
2887 * conversion proc, only once per series of calls, assuming the element
2888 * type doesn't change underneath us.
2890 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2891 if (my_extra == NULL)
2893 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2894 sizeof(ArrayMetaState));
2895 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2896 my_extra->element_type = ~element_type;
2899 if (my_extra->element_type != element_type)
2902 * Get info about element type, including its output conversion proc
2904 get_type_io_data(element_type, IOFunc_output,
2905 &my_extra->typlen, &my_extra->typbyval,
2906 &my_extra->typalign, &my_extra->typdelim,
2907 &my_extra->typioparam, &my_extra->typiofunc);
2908 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2909 fcinfo->flinfo->fn_mcxt);
2910 my_extra->element_type = element_type;
2912 typlen = my_extra->typlen;
2913 typbyval = my_extra->typbyval;
2914 typalign = my_extra->typalign;
2916 p = ARR_DATA_PTR(v);
2917 bitmap = ARR_NULLBITMAP(v);
2918 bitmask = 1;
2920 for (i = 0; i < nitems; i++)
2922 Datum itemvalue;
2923 char *value;
2925 /* Get source element, checking for NULL */
2926 if (bitmap && (*bitmap & bitmask) == 0)
2928 /* we ignore nulls */
2930 else
2932 itemvalue = fetch_att(p, typbyval, typlen);
2934 value = OutputFunctionCall(&my_extra->proc, itemvalue);
2936 if (printed)
2937 appendStringInfo(&buf, "%s%s", fldsep, value);
2938 else
2939 appendStringInfoString(&buf, value);
2940 printed = true;
2942 p = att_addlength_pointer(p, typlen, p);
2943 p = (char *) att_align_nominal(p, typalign);
2946 /* advance bitmap pointer if any */
2947 if (bitmap)
2949 bitmask <<= 1;
2950 if (bitmask == 0x100)
2952 bitmap++;
2953 bitmask = 1;
2958 PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len));
2961 #define HEXBASE 16
2963 * Convert a int32 to a string containing a base 16 (hex) representation of
2964 * the number.
2966 Datum
2967 to_hex32(PG_FUNCTION_ARGS)
2969 uint32 value = (uint32) PG_GETARG_INT32(0);
2970 char *ptr;
2971 const char *digits = "0123456789abcdef";
2972 char buf[32]; /* bigger than needed, but reasonable */
2974 ptr = buf + sizeof(buf) - 1;
2975 *ptr = '\0';
2979 *--ptr = digits[value % HEXBASE];
2980 value /= HEXBASE;
2981 } while (ptr > buf && value);
2983 PG_RETURN_TEXT_P(cstring_to_text(ptr));
2987 * Convert a int64 to a string containing a base 16 (hex) representation of
2988 * the number.
2990 Datum
2991 to_hex64(PG_FUNCTION_ARGS)
2993 uint64 value = (uint64) PG_GETARG_INT64(0);
2994 char *ptr;
2995 const char *digits = "0123456789abcdef";
2996 char buf[32]; /* bigger than needed, but reasonable */
2998 ptr = buf + sizeof(buf) - 1;
2999 *ptr = '\0';
3003 *--ptr = digits[value % HEXBASE];
3004 value /= HEXBASE;
3005 } while (ptr > buf && value);
3007 PG_RETURN_TEXT_P(cstring_to_text(ptr));
3011 * Create an md5 hash of a text string and return it as hex
3013 * md5 produces a 16 byte (128 bit) hash; double it for hex
3015 #define MD5_HASH_LEN 32
3017 Datum
3018 md5_text(PG_FUNCTION_ARGS)
3020 text *in_text = PG_GETARG_TEXT_PP(0);
3021 size_t len;
3022 char hexsum[MD5_HASH_LEN + 1];
3024 /* Calculate the length of the buffer using varlena metadata */
3025 len = VARSIZE_ANY_EXHDR(in_text);
3027 /* get the hash result */
3028 if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3029 ereport(ERROR,
3030 (errcode(ERRCODE_OUT_OF_MEMORY),
3031 errmsg("out of memory")));
3033 /* convert to text and return it */
3034 PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3038 * Create an md5 hash of a bytea field and return it as a hex string:
3039 * 16-byte md5 digest is represented in 32 hex characters.
3041 Datum
3042 md5_bytea(PG_FUNCTION_ARGS)
3044 bytea *in = PG_GETARG_BYTEA_PP(0);
3045 size_t len;
3046 char hexsum[MD5_HASH_LEN + 1];
3048 len = VARSIZE_ANY_EXHDR(in);
3049 if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3050 ereport(ERROR,
3051 (errcode(ERRCODE_OUT_OF_MEMORY),
3052 errmsg("out of memory")));
3054 PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3058 * Return the size of a datum, possibly compressed
3060 * Works on any data type
3062 Datum
3063 pg_column_size(PG_FUNCTION_ARGS)
3065 Datum value = PG_GETARG_DATUM(0);
3066 int32 result;
3067 int typlen;
3069 /* On first call, get the input type's typlen, and save at *fn_extra */
3070 if (fcinfo->flinfo->fn_extra == NULL)
3072 /* Lookup the datatype of the supplied argument */
3073 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3075 typlen = get_typlen(argtypeid);
3076 if (typlen == 0) /* should not happen */
3077 elog(ERROR, "cache lookup failed for type %u", argtypeid);
3079 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3080 sizeof(int));
3081 *((int *) fcinfo->flinfo->fn_extra) = typlen;
3083 else
3084 typlen = *((int *) fcinfo->flinfo->fn_extra);
3086 if (typlen == -1)
3088 /* varlena type, possibly toasted */
3089 result = toast_datum_size(value);
3091 else if (typlen == -2)
3093 /* cstring */
3094 result = strlen(DatumGetCString(value)) + 1;
3096 else
3098 /* ordinary fixed-width type */
3099 result = typlen;
3102 PG_RETURN_INT32(result);