Don't use 'return' where you should use 'PG_RETURN_xxx'.
[PostgreSQL.git] / src / backend / utils / adt / varlena.c
blob4434c97b694b79f4e49f1b23b65f7f0e36b0ab9b
1 /*-------------------------------------------------------------------------
3 * varlena.c
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
10 * IDENTIFICATION
11 * $PostgreSQL$
13 *-------------------------------------------------------------------------
15 #include "postgres.h"
17 #include <ctype.h>
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "libpq/md5.h"
22 #include "libpq/pqformat.h"
23 #include "miscadmin.h"
24 #include "parser/scansup.h"
25 #include "regex/regex.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/pg_locale.h"
31 typedef struct varlena unknown;
33 typedef struct
35 bool use_wchar; /* T if multibyte encoding */
36 char *str1; /* use these if not use_wchar */
37 char *str2; /* note: these point to original texts */
38 pg_wchar *wstr1; /* use these if use_wchar */
39 pg_wchar *wstr2; /* note: these are palloc'd */
40 int len1; /* string lengths in logical characters */
41 int len2;
42 /* Skip table for Boyer-Moore-Horspool search algorithm: */
43 int skiptablemask; /* mask for ANDing with skiptable subscripts */
44 int skiptable[256]; /* skip distance for given mismatched char */
45 } TextPositionState;
47 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
48 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
49 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
50 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
51 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
53 static int text_cmp(text *arg1, text *arg2);
54 static int32 text_length(Datum str);
55 static int text_position(text *t1, text *t2);
56 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
57 static int text_position_next(int start_pos, TextPositionState *state);
58 static void text_position_cleanup(TextPositionState *state);
59 static text *text_substring(Datum str,
60 int32 start,
61 int32 length,
62 bool length_not_specified);
63 static void appendStringInfoText(StringInfo str, const text *t);
66 /*****************************************************************************
67 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
68 *****************************************************************************/
71 * cstring_to_text
73 * Create a text value from a null-terminated C string.
75 * The new text value is freshly palloc'd with a full-size VARHDR.
77 text *
78 cstring_to_text(const char *s)
80 return cstring_to_text_with_len(s, strlen(s));
84 * cstring_to_text_with_len
86 * Same as cstring_to_text except the caller specifies the string length;
87 * the string need not be null_terminated.
89 text *
90 cstring_to_text_with_len(const char *s, int len)
92 text *result = (text *) palloc(len + VARHDRSZ);
94 SET_VARSIZE(result, len + VARHDRSZ);
95 memcpy(VARDATA(result), s, len);
97 return result;
101 * text_to_cstring
103 * Create a palloc'd, null-terminated C string from a text value.
105 * We support being passed a compressed or toasted text value.
106 * This is a bit bogus since such values shouldn't really be referred to as
107 * "text *", but it seems useful for robustness. If we didn't handle that
108 * case here, we'd need another routine that did, anyway.
110 char *
111 text_to_cstring(const text *t)
113 /* must cast away the const, unfortunately */
114 text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
115 int len = VARSIZE_ANY_EXHDR(tunpacked);
116 char *result;
118 result = (char *) palloc(len + 1);
119 memcpy(result, VARDATA_ANY(tunpacked), len);
120 result[len] = '\0';
122 if (tunpacked != t)
123 pfree(tunpacked);
125 return result;
129 * text_to_cstring_buffer
131 * Copy a text value into a caller-supplied buffer of size dst_len.
133 * The text string is truncated if necessary to fit. The result is
134 * guaranteed null-terminated (unless dst_len == 0).
136 * We support being passed a compressed or toasted text value.
137 * This is a bit bogus since such values shouldn't really be referred to as
138 * "text *", but it seems useful for robustness. If we didn't handle that
139 * case here, we'd need another routine that did, anyway.
141 void
142 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
144 /* must cast away the const, unfortunately */
145 text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
146 size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
148 if (dst_len > 0)
150 dst_len--;
151 if (dst_len >= src_len)
152 dst_len = src_len;
153 else /* ensure truncation is encoding-safe */
154 dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
155 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
156 dst[dst_len] = '\0';
159 if (srcunpacked != src)
160 pfree(srcunpacked);
164 /*****************************************************************************
165 * USER I/O ROUTINES *
166 *****************************************************************************/
169 #define VAL(CH) ((CH) - '0')
170 #define DIG(VAL) ((VAL) + '0')
173 * byteain - converts from printable representation of byte array
175 * Non-printable characters must be passed as '\nnn' (octal) and are
176 * converted to internal form. '\' must be passed as '\\'.
177 * ereport(ERROR, ...) if bad form.
179 * BUGS:
180 * The input is scanned twice.
181 * The error checking of input is minimal.
183 Datum
184 byteain(PG_FUNCTION_ARGS)
186 char *inputText = PG_GETARG_CSTRING(0);
187 char *tp;
188 char *rp;
189 int byte;
190 bytea *result;
192 for (byte = 0, tp = inputText; *tp != '\0'; byte ++)
194 if (tp[0] != '\\')
195 tp++;
196 else if ((tp[0] == '\\') &&
197 (tp[1] >= '0' && tp[1] <= '3') &&
198 (tp[2] >= '0' && tp[2] <= '7') &&
199 (tp[3] >= '0' && tp[3] <= '7'))
200 tp += 4;
201 else if ((tp[0] == '\\') &&
202 (tp[1] == '\\'))
203 tp += 2;
204 else
207 * one backslash, not followed by 0 or ### valid octal
209 ereport(ERROR,
210 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
211 errmsg("invalid input syntax for type bytea")));
215 byte +=VARHDRSZ;
217 result = (bytea *) palloc(byte);
218 SET_VARSIZE(result, byte);
220 tp = inputText;
221 rp = VARDATA(result);
222 while (*tp != '\0')
224 if (tp[0] != '\\')
225 *rp++ = *tp++;
226 else if ((tp[0] == '\\') &&
227 (tp[1] >= '0' && tp[1] <= '3') &&
228 (tp[2] >= '0' && tp[2] <= '7') &&
229 (tp[3] >= '0' && tp[3] <= '7'))
231 byte = VAL(tp[1]);
232 byte <<=3;
233 byte +=VAL(tp[2]);
234 byte <<=3;
235 *rp++ = byte +VAL(tp[3]);
237 tp += 4;
239 else if ((tp[0] == '\\') &&
240 (tp[1] == '\\'))
242 *rp++ = '\\';
243 tp += 2;
245 else
248 * We should never get here. The first pass should not allow it.
250 ereport(ERROR,
251 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
252 errmsg("invalid input syntax for type bytea")));
256 PG_RETURN_BYTEA_P(result);
260 * byteaout - converts to printable representation of byte array
262 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
263 * '\\'.
265 * NULL vlena should be an error--returning string with NULL for now.
267 Datum
268 byteaout(PG_FUNCTION_ARGS)
270 bytea *vlena = PG_GETARG_BYTEA_PP(0);
271 char *result;
272 char *vp;
273 char *rp;
274 int val; /* holds unprintable chars */
275 int i;
276 int len;
278 len = 1; /* empty string has 1 char */
279 vp = VARDATA_ANY(vlena);
280 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
282 if (*vp == '\\')
283 len += 2;
284 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
285 len += 4;
286 else
287 len++;
289 rp = result = (char *) palloc(len);
290 vp = VARDATA_ANY(vlena);
291 for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
293 if (*vp == '\\')
295 *rp++ = '\\';
296 *rp++ = '\\';
298 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
300 val = *vp;
301 rp[0] = '\\';
302 rp[3] = DIG(val & 07);
303 val >>= 3;
304 rp[2] = DIG(val & 07);
305 val >>= 3;
306 rp[1] = DIG(val & 03);
307 rp += 4;
309 else
310 *rp++ = *vp;
312 *rp = '\0';
313 PG_RETURN_CSTRING(result);
317 * bytearecv - converts external binary format to bytea
319 Datum
320 bytearecv(PG_FUNCTION_ARGS)
322 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
323 bytea *result;
324 int nbytes;
326 nbytes = buf->len - buf->cursor;
327 result = (bytea *) palloc(nbytes + VARHDRSZ);
328 SET_VARSIZE(result, nbytes + VARHDRSZ);
329 pq_copymsgbytes(buf, VARDATA(result), nbytes);
330 PG_RETURN_BYTEA_P(result);
334 * byteasend - converts bytea to binary format
336 * This is a special case: just copy the input...
338 Datum
339 byteasend(PG_FUNCTION_ARGS)
341 bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
343 PG_RETURN_BYTEA_P(vlena);
348 * textin - converts "..." to internal representation
350 Datum
351 textin(PG_FUNCTION_ARGS)
353 char *inputText = PG_GETARG_CSTRING(0);
355 PG_RETURN_TEXT_P(cstring_to_text(inputText));
359 * textout - converts internal representation to "..."
361 Datum
362 textout(PG_FUNCTION_ARGS)
364 Datum txt = PG_GETARG_DATUM(0);
366 PG_RETURN_CSTRING(TextDatumGetCString(txt));
370 * textrecv - converts external binary format to text
372 Datum
373 textrecv(PG_FUNCTION_ARGS)
375 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
376 text *result;
377 char *str;
378 int nbytes;
380 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
382 result = cstring_to_text_with_len(str, nbytes);
383 pfree(str);
384 PG_RETURN_TEXT_P(result);
388 * textsend - converts text to binary format
390 Datum
391 textsend(PG_FUNCTION_ARGS)
393 text *t = PG_GETARG_TEXT_PP(0);
394 StringInfoData buf;
396 pq_begintypsend(&buf);
397 pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
398 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
403 * unknownin - converts "..." to internal representation
405 Datum
406 unknownin(PG_FUNCTION_ARGS)
408 char *str = PG_GETARG_CSTRING(0);
410 /* representation is same as cstring */
411 PG_RETURN_CSTRING(pstrdup(str));
415 * unknownout - converts internal representation to "..."
417 Datum
418 unknownout(PG_FUNCTION_ARGS)
420 /* representation is same as cstring */
421 char *str = PG_GETARG_CSTRING(0);
423 PG_RETURN_CSTRING(pstrdup(str));
427 * unknownrecv - converts external binary format to unknown
429 Datum
430 unknownrecv(PG_FUNCTION_ARGS)
432 StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
433 char *str;
434 int nbytes;
436 str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
437 /* representation is same as cstring */
438 PG_RETURN_CSTRING(str);
442 * unknownsend - converts unknown to binary format
444 Datum
445 unknownsend(PG_FUNCTION_ARGS)
447 /* representation is same as cstring */
448 char *str = PG_GETARG_CSTRING(0);
449 StringInfoData buf;
451 pq_begintypsend(&buf);
452 pq_sendtext(&buf, str, strlen(str));
453 PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
457 /* ========== PUBLIC ROUTINES ========== */
460 * textlen -
461 * returns the logical length of a text*
462 * (which is less than the VARSIZE of the text*)
464 Datum
465 textlen(PG_FUNCTION_ARGS)
467 Datum str = PG_GETARG_DATUM(0);
469 /* try to avoid decompressing argument */
470 PG_RETURN_INT32(text_length(str));
474 * text_length -
475 * Does the real work for textlen()
477 * This is broken out so it can be called directly by other string processing
478 * functions. Note that the argument is passed as a Datum, to indicate that
479 * it may still be in compressed form. We can avoid decompressing it at all
480 * in some cases.
482 static int32
483 text_length(Datum str)
485 /* fastpath when max encoding length is one */
486 if (pg_database_encoding_max_length() == 1)
487 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
488 else
490 text *t = DatumGetTextPP(str);
492 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
493 VARSIZE_ANY_EXHDR(t)));
498 * textoctetlen -
499 * returns the physical length of a text*
500 * (which is less than the VARSIZE of the text*)
502 Datum
503 textoctetlen(PG_FUNCTION_ARGS)
505 Datum str = PG_GETARG_DATUM(0);
507 /* We need not detoast the input at all */
508 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
512 * textcat -
513 * takes two text* and returns a text* that is the concatenation of
514 * the two.
516 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
517 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
518 * Allocate space for output in all cases.
519 * XXX - thomas 1997-07-10
521 Datum
522 textcat(PG_FUNCTION_ARGS)
524 text *t1 = PG_GETARG_TEXT_PP(0);
525 text *t2 = PG_GETARG_TEXT_PP(1);
526 int len1,
527 len2,
528 len;
529 text *result;
530 char *ptr;
532 len1 = VARSIZE_ANY_EXHDR(t1);
533 if (len1 < 0)
534 len1 = 0;
536 len2 = VARSIZE_ANY_EXHDR(t2);
537 if (len2 < 0)
538 len2 = 0;
540 len = len1 + len2 + VARHDRSZ;
541 result = (text *) palloc(len);
543 /* Set size of result string... */
544 SET_VARSIZE(result, len);
546 /* Fill data field of result string... */
547 ptr = VARDATA(result);
548 if (len1 > 0)
549 memcpy(ptr, VARDATA_ANY(t1), len1);
550 if (len2 > 0)
551 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
553 PG_RETURN_TEXT_P(result);
557 * charlen_to_bytelen()
558 * Compute the number of bytes occupied by n characters starting at *p
560 * It is caller's responsibility that there actually are n characters;
561 * the string need not be null-terminated.
563 static int
564 charlen_to_bytelen(const char *p, int n)
566 if (pg_database_encoding_max_length() == 1)
568 /* Optimization for single-byte encodings */
569 return n;
571 else
573 const char *s;
575 for (s = p; n > 0; n--)
576 s += pg_mblen(s);
578 return s - p;
583 * text_substr()
584 * Return a substring starting at the specified position.
585 * - thomas 1997-12-31
587 * Input:
588 * - string
589 * - starting position (is one-based)
590 * - string length
592 * If the starting position is zero or less, then return from the start of the string
593 * adjusting the length to be consistent with the "negative start" per SQL92.
594 * If the length is less than zero, return the remaining string.
596 * Added multibyte support.
597 * - Tatsuo Ishii 1998-4-21
598 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
599 * Formerly returned the entire string; now returns a portion.
600 * - Thomas Lockhart 1998-12-10
601 * Now uses faster TOAST-slicing interface
602 * - John Gray 2002-02-22
603 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
604 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
605 * error; if E < 1, return '', not entire string). Fixed MB related bug when
606 * S > LC and < LC + 4 sometimes garbage characters are returned.
607 * - Joe Conway 2002-08-10
609 Datum
610 text_substr(PG_FUNCTION_ARGS)
612 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
613 PG_GETARG_INT32(1),
614 PG_GETARG_INT32(2),
615 false));
619 * text_substr_no_len -
620 * Wrapper to avoid opr_sanity failure due to
621 * one function accepting a different number of args.
623 Datum
624 text_substr_no_len(PG_FUNCTION_ARGS)
626 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
627 PG_GETARG_INT32(1),
628 -1, true));
632 * text_substring -
633 * Does the real work for text_substr() and text_substr_no_len()
635 * This is broken out so it can be called directly by other string processing
636 * functions. Note that the argument is passed as a Datum, to indicate that
637 * it may still be in compressed/toasted form. We can avoid detoasting all
638 * of it in some cases.
640 * The result is always a freshly palloc'd datum.
642 static text *
643 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
645 int32 eml = pg_database_encoding_max_length();
646 int32 S = start; /* start position */
647 int32 S1; /* adjusted start position */
648 int32 L1; /* adjusted substring length */
650 /* life is easy if the encoding max length is 1 */
651 if (eml == 1)
653 S1 = Max(S, 1);
655 if (length_not_specified) /* special case - get length to end of
656 * string */
657 L1 = -1;
658 else
660 /* end position */
661 int E = S + length;
664 * A negative value for L is the only way for the end position to
665 * be before the start. SQL99 says to throw an error.
667 if (E < S)
668 ereport(ERROR,
669 (errcode(ERRCODE_SUBSTRING_ERROR),
670 errmsg("negative substring length not allowed")));
673 * A zero or negative value for the end position can happen if the
674 * start was negative or one. SQL99 says to return a zero-length
675 * string.
677 if (E < 1)
678 return cstring_to_text("");
680 L1 = E - S1;
684 * If the start position is past the end of the string, SQL99 says to
685 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
686 * that for us. Convert to zero-based starting position
688 return DatumGetTextPSlice(str, S1 - 1, L1);
690 else if (eml > 1)
693 * When encoding max length is > 1, we can't get LC without
694 * detoasting, so we'll grab a conservatively large slice now and go
695 * back later to do the right thing
697 int32 slice_start;
698 int32 slice_size;
699 int32 slice_strlen;
700 text *slice;
701 int32 E1;
702 int32 i;
703 char *p;
704 char *s;
705 text *ret;
708 * if S is past the end of the string, the tuple toaster will return a
709 * zero-length string to us
711 S1 = Max(S, 1);
714 * We need to start at position zero because there is no way to know
715 * in advance which byte offset corresponds to the supplied start
716 * position.
718 slice_start = 0;
720 if (length_not_specified) /* special case - get length to end of
721 * string */
722 slice_size = L1 = -1;
723 else
725 int E = S + length;
728 * A negative value for L is the only way for the end position to
729 * be before the start. SQL99 says to throw an error.
731 if (E < S)
732 ereport(ERROR,
733 (errcode(ERRCODE_SUBSTRING_ERROR),
734 errmsg("negative substring length not allowed")));
737 * A zero or negative value for the end position can happen if the
738 * start was negative or one. SQL99 says to return a zero-length
739 * string.
741 if (E < 1)
742 return cstring_to_text("");
745 * if E is past the end of the string, the tuple toaster will
746 * truncate the length for us
748 L1 = E - S1;
751 * Total slice size in bytes can't be any longer than the start
752 * position plus substring length times the encoding max length.
754 slice_size = (S1 + L1) * eml;
758 * If we're working with an untoasted source, no need to do an extra
759 * copying step.
761 if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
762 VARATT_IS_EXTERNAL(DatumGetPointer(str)))
763 slice = DatumGetTextPSlice(str, slice_start, slice_size);
764 else
765 slice = (text *) DatumGetPointer(str);
767 /* see if we got back an empty string */
768 if (VARSIZE_ANY_EXHDR(slice) == 0)
770 if (slice != (text *) DatumGetPointer(str))
771 pfree(slice);
772 return cstring_to_text("");
775 /* Now we can get the actual length of the slice in MB characters */
776 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
777 VARSIZE_ANY_EXHDR(slice));
780 * Check that the start position wasn't > slice_strlen. If so, SQL99
781 * says to return a zero-length string.
783 if (S1 > slice_strlen)
785 if (slice != (text *) DatumGetPointer(str))
786 pfree(slice);
787 return cstring_to_text("");
791 * Adjust L1 and E1 now that we know the slice string length. Again
792 * remember that S1 is one based, and slice_start is zero based.
794 if (L1 > -1)
795 E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
796 else
797 E1 = slice_start + 1 + slice_strlen;
800 * Find the start position in the slice; remember S1 is not zero based
802 p = VARDATA_ANY(slice);
803 for (i = 0; i < S1 - 1; i++)
804 p += pg_mblen(p);
806 /* hang onto a pointer to our start position */
807 s = p;
810 * Count the actual bytes used by the substring of the requested
811 * length.
813 for (i = S1; i < E1; i++)
814 p += pg_mblen(p);
816 ret = (text *) palloc(VARHDRSZ + (p - s));
817 SET_VARSIZE(ret, VARHDRSZ + (p - s));
818 memcpy(VARDATA(ret), s, (p - s));
820 if (slice != (text *) DatumGetPointer(str))
821 pfree(slice);
823 return ret;
825 else
826 elog(ERROR, "invalid backend encoding: encoding max length < 1");
828 /* not reached: suppress compiler warning */
829 return NULL;
833 * textpos -
834 * Return the position of the specified substring.
835 * Implements the SQL92 POSITION() function.
836 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
837 * - thomas 1997-07-27
839 Datum
840 textpos(PG_FUNCTION_ARGS)
842 text *str = PG_GETARG_TEXT_PP(0);
843 text *search_str = PG_GETARG_TEXT_PP(1);
845 PG_RETURN_INT32((int32) text_position(str, search_str));
849 * text_position -
850 * Does the real work for textpos()
852 * Inputs:
853 * t1 - string to be searched
854 * t2 - pattern to match within t1
855 * Result:
856 * Character index of the first matched char, starting from 1,
857 * or 0 if no match.
859 * This is broken out so it can be called directly by other string processing
860 * functions.
862 static int
863 text_position(text *t1, text *t2)
865 TextPositionState state;
866 int result;
868 text_position_setup(t1, t2, &state);
869 result = text_position_next(1, &state);
870 text_position_cleanup(&state);
871 return result;
876 * text_position_setup, text_position_next, text_position_cleanup -
877 * Component steps of text_position()
879 * These are broken out so that a string can be efficiently searched for
880 * multiple occurrences of the same pattern. text_position_next may be
881 * called multiple times with increasing values of start_pos, which is
882 * the 1-based character position to start the search from. The "state"
883 * variable is normally just a local variable in the caller.
886 static void
887 text_position_setup(text *t1, text *t2, TextPositionState *state)
889 int len1 = VARSIZE_ANY_EXHDR(t1);
890 int len2 = VARSIZE_ANY_EXHDR(t2);
892 if (pg_database_encoding_max_length() == 1)
894 /* simple case - single byte encoding */
895 state->use_wchar = false;
896 state->str1 = VARDATA_ANY(t1);
897 state->str2 = VARDATA_ANY(t2);
898 state->len1 = len1;
899 state->len2 = len2;
901 else
903 /* not as simple - multibyte encoding */
904 pg_wchar *p1,
905 *p2;
907 p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
908 len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
909 p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
910 len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
912 state->use_wchar = true;
913 state->wstr1 = p1;
914 state->wstr2 = p2;
915 state->len1 = len1;
916 state->len2 = len2;
920 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
921 * notes we use the terminology that the "haystack" is the string to be
922 * searched (t1) and the "needle" is the pattern being sought (t2).
924 * If the needle is empty or bigger than the haystack then there is no
925 * point in wasting cycles initializing the table. We also choose not to
926 * use B-M-H for needles of length 1, since the skip table can't possibly
927 * save anything in that case.
929 if (len1 >= len2 && len2 > 1)
931 int searchlength = len1 - len2;
932 int skiptablemask;
933 int last;
934 int i;
937 * First we must determine how much of the skip table to use. The
938 * declaration of TextPositionState allows up to 256 elements, but for
939 * short search problems we don't really want to have to initialize so
940 * many elements --- it would take too long in comparison to the
941 * actual search time. So we choose a useful skip table size based on
942 * the haystack length minus the needle length. The closer the needle
943 * length is to the haystack length the less useful skipping becomes.
945 * Note: since we use bit-masking to select table elements, the skip
946 * table size MUST be a power of 2, and so the mask must be 2^N-1.
948 if (searchlength < 16)
949 skiptablemask = 3;
950 else if (searchlength < 64)
951 skiptablemask = 7;
952 else if (searchlength < 128)
953 skiptablemask = 15;
954 else if (searchlength < 512)
955 skiptablemask = 31;
956 else if (searchlength < 2048)
957 skiptablemask = 63;
958 else if (searchlength < 4096)
959 skiptablemask = 127;
960 else
961 skiptablemask = 255;
962 state->skiptablemask = skiptablemask;
965 * Initialize the skip table. We set all elements to the needle
966 * length, since this is the correct skip distance for any character
967 * not found in the needle.
969 for (i = 0; i <= skiptablemask; i++)
970 state->skiptable[i] = len2;
973 * Now examine the needle. For each character except the last one,
974 * set the corresponding table element to the appropriate skip
975 * distance. Note that when two characters share the same skip table
976 * entry, the one later in the needle must determine the skip
977 * distance.
979 last = len2 - 1;
981 if (!state->use_wchar)
983 const char *str2 = state->str2;
985 for (i = 0; i < last; i++)
986 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
988 else
990 const pg_wchar *wstr2 = state->wstr2;
992 for (i = 0; i < last; i++)
993 state->skiptable[wstr2[i] & skiptablemask] = last - i;
998 static int
999 text_position_next(int start_pos, TextPositionState *state)
1001 int haystack_len = state->len1;
1002 int needle_len = state->len2;
1003 int skiptablemask = state->skiptablemask;
1005 Assert(start_pos > 0); /* else caller error */
1007 if (needle_len <= 0)
1008 return start_pos; /* result for empty pattern */
1010 start_pos--; /* adjust for zero based arrays */
1012 /* Done if the needle can't possibly fit */
1013 if (haystack_len < start_pos + needle_len)
1014 return 0;
1016 if (!state->use_wchar)
1018 /* simple case - single byte encoding */
1019 const char *haystack = state->str1;
1020 const char *needle = state->str2;
1021 const char *haystack_end = &haystack[haystack_len];
1022 const char *hptr;
1024 if (needle_len == 1)
1026 /* No point in using B-M-H for a one-character needle */
1027 char nchar = *needle;
1029 hptr = &haystack[start_pos];
1030 while (hptr < haystack_end)
1032 if (*hptr == nchar)
1033 return hptr - haystack + 1;
1034 hptr++;
1037 else
1039 const char *needle_last = &needle[needle_len - 1];
1041 /* Start at startpos plus the length of the needle */
1042 hptr = &haystack[start_pos + needle_len - 1];
1043 while (hptr < haystack_end)
1045 /* Match the needle scanning *backward* */
1046 const char *nptr;
1047 const char *p;
1049 nptr = needle_last;
1050 p = hptr;
1051 while (*nptr == *p)
1053 /* Matched it all? If so, return 1-based position */
1054 if (nptr == needle)
1055 return p - haystack + 1;
1056 nptr--, p--;
1060 * No match, so use the haystack char at hptr to decide how
1061 * far to advance. If the needle had any occurrence of that
1062 * character (or more precisely, one sharing the same
1063 * skiptable entry) before its last character, then we advance
1064 * far enough to align the last such needle character with
1065 * that haystack position. Otherwise we can advance by the
1066 * whole needle length.
1068 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1072 else
1074 /* The multibyte char version. This works exactly the same way. */
1075 const pg_wchar *haystack = state->wstr1;
1076 const pg_wchar *needle = state->wstr2;
1077 const pg_wchar *haystack_end = &haystack[haystack_len];
1078 const pg_wchar *hptr;
1080 if (needle_len == 1)
1082 /* No point in using B-M-H for a one-character needle */
1083 pg_wchar nchar = *needle;
1085 hptr = &haystack[start_pos];
1086 while (hptr < haystack_end)
1088 if (*hptr == nchar)
1089 return hptr - haystack + 1;
1090 hptr++;
1093 else
1095 const pg_wchar *needle_last = &needle[needle_len - 1];
1097 /* Start at startpos plus the length of the needle */
1098 hptr = &haystack[start_pos + needle_len - 1];
1099 while (hptr < haystack_end)
1101 /* Match the needle scanning *backward* */
1102 const pg_wchar *nptr;
1103 const pg_wchar *p;
1105 nptr = needle_last;
1106 p = hptr;
1107 while (*nptr == *p)
1109 /* Matched it all? If so, return 1-based position */
1110 if (nptr == needle)
1111 return p - haystack + 1;
1112 nptr--, p--;
1116 * No match, so use the haystack char at hptr to decide how
1117 * far to advance. If the needle had any occurrence of that
1118 * character (or more precisely, one sharing the same
1119 * skiptable entry) before its last character, then we advance
1120 * far enough to align the last such needle character with
1121 * that haystack position. Otherwise we can advance by the
1122 * whole needle length.
1124 hptr += state->skiptable[*hptr & skiptablemask];
1129 return 0; /* not found */
1132 static void
1133 text_position_cleanup(TextPositionState *state)
1135 if (state->use_wchar)
1137 pfree(state->wstr1);
1138 pfree(state->wstr2);
1142 /* varstr_cmp()
1143 * Comparison function for text strings with given lengths.
1144 * Includes locale support, but must copy strings to temporary memory
1145 * to allow null-termination for inputs to strcoll().
1146 * Returns an integer less than, equal to, or greater than zero, indicating
1147 * whether arg1 is less than, equal to, or greater than arg2.
1150 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
1152 int result;
1155 * Unfortunately, there is no strncoll(), so in the non-C locale case we
1156 * have to do some memory copying. This turns out to be significantly
1157 * slower, so we optimize the case where LC_COLLATE is C. We also try to
1158 * optimize relatively-short strings by avoiding palloc/pfree overhead.
1160 if (lc_collate_is_c())
1162 result = strncmp(arg1, arg2, Min(len1, len2));
1163 if ((result == 0) && (len1 != len2))
1164 result = (len1 < len2) ? -1 : 1;
1166 else
1168 #define STACKBUFLEN 1024
1170 char a1buf[STACKBUFLEN];
1171 char a2buf[STACKBUFLEN];
1172 char *a1p,
1173 *a2p;
1175 #ifdef WIN32
1176 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1177 if (GetDatabaseEncoding() == PG_UTF8)
1179 int a1len;
1180 int a2len;
1181 int r;
1183 if (len1 >= STACKBUFLEN / 2)
1185 a1len = len1 * 2 + 2;
1186 a1p = palloc(a1len);
1188 else
1190 a1len = STACKBUFLEN;
1191 a1p = a1buf;
1193 if (len2 >= STACKBUFLEN / 2)
1195 a2len = len2 * 2 + 2;
1196 a2p = palloc(a2len);
1198 else
1200 a2len = STACKBUFLEN;
1201 a2p = a2buf;
1204 /* stupid Microsloth API does not work for zero-length input */
1205 if (len1 == 0)
1206 r = 0;
1207 else
1209 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1210 (LPWSTR) a1p, a1len / 2);
1211 if (!r)
1212 ereport(ERROR,
1213 (errmsg("could not convert string to UTF-16: error %lu",
1214 GetLastError())));
1216 ((LPWSTR) a1p)[r] = 0;
1218 if (len2 == 0)
1219 r = 0;
1220 else
1222 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1223 (LPWSTR) a2p, a2len / 2);
1224 if (!r)
1225 ereport(ERROR,
1226 (errmsg("could not convert string to UTF-16: error %lu",
1227 GetLastError())));
1229 ((LPWSTR) a2p)[r] = 0;
1231 errno = 0;
1232 result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1233 if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
1234 * headers */
1235 ereport(ERROR,
1236 (errmsg("could not compare Unicode strings: %m")));
1239 * In some locales wcscoll() can claim that nonidentical strings
1240 * are equal. Believing that would be bad news for a number of
1241 * reasons, so we follow Perl's lead and sort "equal" strings
1242 * according to strcmp (on the UTF-8 representation).
1244 if (result == 0)
1246 result = strncmp(arg1, arg2, Min(len1, len2));
1247 if ((result == 0) && (len1 != len2))
1248 result = (len1 < len2) ? -1 : 1;
1251 if (a1p != a1buf)
1252 pfree(a1p);
1253 if (a2p != a2buf)
1254 pfree(a2p);
1256 return result;
1258 #endif /* WIN32 */
1260 if (len1 >= STACKBUFLEN)
1261 a1p = (char *) palloc(len1 + 1);
1262 else
1263 a1p = a1buf;
1264 if (len2 >= STACKBUFLEN)
1265 a2p = (char *) palloc(len2 + 1);
1266 else
1267 a2p = a2buf;
1269 memcpy(a1p, arg1, len1);
1270 a1p[len1] = '\0';
1271 memcpy(a2p, arg2, len2);
1272 a2p[len2] = '\0';
1274 result = strcoll(a1p, a2p);
1277 * In some locales strcoll() can claim that nonidentical strings are
1278 * equal. Believing that would be bad news for a number of reasons,
1279 * so we follow Perl's lead and sort "equal" strings according to
1280 * strcmp().
1282 if (result == 0)
1283 result = strcmp(a1p, a2p);
1285 if (a1p != a1buf)
1286 pfree(a1p);
1287 if (a2p != a2buf)
1288 pfree(a2p);
1291 return result;
1295 /* text_cmp()
1296 * Internal comparison function for text strings.
1297 * Returns -1, 0 or 1
1299 static int
1300 text_cmp(text *arg1, text *arg2)
1302 char *a1p,
1303 *a2p;
1304 int len1,
1305 len2;
1307 a1p = VARDATA_ANY(arg1);
1308 a2p = VARDATA_ANY(arg2);
1310 len1 = VARSIZE_ANY_EXHDR(arg1);
1311 len2 = VARSIZE_ANY_EXHDR(arg2);
1313 return varstr_cmp(a1p, len1, a2p, len2);
1317 * Comparison functions for text strings.
1319 * Note: btree indexes need these routines not to leak memory; therefore,
1320 * be careful to free working copies of toasted datums. Most places don't
1321 * need to be so careful.
1324 Datum
1325 texteq(PG_FUNCTION_ARGS)
1327 text *arg1 = PG_GETARG_TEXT_PP(0);
1328 text *arg2 = PG_GETARG_TEXT_PP(1);
1329 bool result;
1332 * Since we only care about equality or not-equality, we can avoid all the
1333 * expense of strcoll() here, and just do bitwise comparison.
1335 if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1336 result = false;
1337 else
1338 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1339 VARSIZE_ANY_EXHDR(arg1)) == 0);
1341 PG_FREE_IF_COPY(arg1, 0);
1342 PG_FREE_IF_COPY(arg2, 1);
1344 PG_RETURN_BOOL(result);
1347 Datum
1348 textne(PG_FUNCTION_ARGS)
1350 text *arg1 = PG_GETARG_TEXT_PP(0);
1351 text *arg2 = PG_GETARG_TEXT_PP(1);
1352 bool result;
1355 * Since we only care about equality or not-equality, we can avoid all the
1356 * expense of strcoll() here, and just do bitwise comparison.
1358 if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1359 result = true;
1360 else
1361 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1362 VARSIZE_ANY_EXHDR(arg1)) != 0);
1364 PG_FREE_IF_COPY(arg1, 0);
1365 PG_FREE_IF_COPY(arg2, 1);
1367 PG_RETURN_BOOL(result);
1370 Datum
1371 text_lt(PG_FUNCTION_ARGS)
1373 text *arg1 = PG_GETARG_TEXT_PP(0);
1374 text *arg2 = PG_GETARG_TEXT_PP(1);
1375 bool result;
1377 result = (text_cmp(arg1, arg2) < 0);
1379 PG_FREE_IF_COPY(arg1, 0);
1380 PG_FREE_IF_COPY(arg2, 1);
1382 PG_RETURN_BOOL(result);
1385 Datum
1386 text_le(PG_FUNCTION_ARGS)
1388 text *arg1 = PG_GETARG_TEXT_PP(0);
1389 text *arg2 = PG_GETARG_TEXT_PP(1);
1390 bool result;
1392 result = (text_cmp(arg1, arg2) <= 0);
1394 PG_FREE_IF_COPY(arg1, 0);
1395 PG_FREE_IF_COPY(arg2, 1);
1397 PG_RETURN_BOOL(result);
1400 Datum
1401 text_gt(PG_FUNCTION_ARGS)
1403 text *arg1 = PG_GETARG_TEXT_PP(0);
1404 text *arg2 = PG_GETARG_TEXT_PP(1);
1405 bool result;
1407 result = (text_cmp(arg1, arg2) > 0);
1409 PG_FREE_IF_COPY(arg1, 0);
1410 PG_FREE_IF_COPY(arg2, 1);
1412 PG_RETURN_BOOL(result);
1415 Datum
1416 text_ge(PG_FUNCTION_ARGS)
1418 text *arg1 = PG_GETARG_TEXT_PP(0);
1419 text *arg2 = PG_GETARG_TEXT_PP(1);
1420 bool result;
1422 result = (text_cmp(arg1, arg2) >= 0);
1424 PG_FREE_IF_COPY(arg1, 0);
1425 PG_FREE_IF_COPY(arg2, 1);
1427 PG_RETURN_BOOL(result);
1430 Datum
1431 bttextcmp(PG_FUNCTION_ARGS)
1433 text *arg1 = PG_GETARG_TEXT_PP(0);
1434 text *arg2 = PG_GETARG_TEXT_PP(1);
1435 int32 result;
1437 result = text_cmp(arg1, arg2);
1439 PG_FREE_IF_COPY(arg1, 0);
1440 PG_FREE_IF_COPY(arg2, 1);
1442 PG_RETURN_INT32(result);
1446 Datum
1447 text_larger(PG_FUNCTION_ARGS)
1449 text *arg1 = PG_GETARG_TEXT_PP(0);
1450 text *arg2 = PG_GETARG_TEXT_PP(1);
1451 text *result;
1453 result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1455 PG_RETURN_TEXT_P(result);
1458 Datum
1459 text_smaller(PG_FUNCTION_ARGS)
1461 text *arg1 = PG_GETARG_TEXT_PP(0);
1462 text *arg2 = PG_GETARG_TEXT_PP(1);
1463 text *result;
1465 result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1467 PG_RETURN_TEXT_P(result);
1472 * The following operators support character-by-character comparison
1473 * of text datums, to allow building indexes suitable for LIKE clauses.
1474 * Note that the regular texteq/textne comparison operators are assumed
1475 * to be compatible with these!
1478 static int
1479 internal_text_pattern_compare(text *arg1, text *arg2)
1481 int result;
1482 int len1,
1483 len2;
1485 len1 = VARSIZE_ANY_EXHDR(arg1);
1486 len2 = VARSIZE_ANY_EXHDR(arg2);
1488 result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1489 if (result != 0)
1490 return result;
1491 else if (len1 < len2)
1492 return -1;
1493 else if (len1 > len2)
1494 return 1;
1495 else
1496 return 0;
1500 Datum
1501 text_pattern_lt(PG_FUNCTION_ARGS)
1503 text *arg1 = PG_GETARG_TEXT_PP(0);
1504 text *arg2 = PG_GETARG_TEXT_PP(1);
1505 int result;
1507 result = internal_text_pattern_compare(arg1, arg2);
1509 PG_FREE_IF_COPY(arg1, 0);
1510 PG_FREE_IF_COPY(arg2, 1);
1512 PG_RETURN_BOOL(result < 0);
1516 Datum
1517 text_pattern_le(PG_FUNCTION_ARGS)
1519 text *arg1 = PG_GETARG_TEXT_PP(0);
1520 text *arg2 = PG_GETARG_TEXT_PP(1);
1521 int result;
1523 result = internal_text_pattern_compare(arg1, arg2);
1525 PG_FREE_IF_COPY(arg1, 0);
1526 PG_FREE_IF_COPY(arg2, 1);
1528 PG_RETURN_BOOL(result <= 0);
1532 Datum
1533 text_pattern_ge(PG_FUNCTION_ARGS)
1535 text *arg1 = PG_GETARG_TEXT_PP(0);
1536 text *arg2 = PG_GETARG_TEXT_PP(1);
1537 int result;
1539 result = internal_text_pattern_compare(arg1, arg2);
1541 PG_FREE_IF_COPY(arg1, 0);
1542 PG_FREE_IF_COPY(arg2, 1);
1544 PG_RETURN_BOOL(result >= 0);
1548 Datum
1549 text_pattern_gt(PG_FUNCTION_ARGS)
1551 text *arg1 = PG_GETARG_TEXT_PP(0);
1552 text *arg2 = PG_GETARG_TEXT_PP(1);
1553 int result;
1555 result = internal_text_pattern_compare(arg1, arg2);
1557 PG_FREE_IF_COPY(arg1, 0);
1558 PG_FREE_IF_COPY(arg2, 1);
1560 PG_RETURN_BOOL(result > 0);
1564 Datum
1565 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1567 text *arg1 = PG_GETARG_TEXT_PP(0);
1568 text *arg2 = PG_GETARG_TEXT_PP(1);
1569 int result;
1571 result = internal_text_pattern_compare(arg1, arg2);
1573 PG_FREE_IF_COPY(arg1, 0);
1574 PG_FREE_IF_COPY(arg2, 1);
1576 PG_RETURN_INT32(result);
1580 /*-------------------------------------------------------------
1581 * byteaoctetlen
1583 * get the number of bytes contained in an instance of type 'bytea'
1584 *-------------------------------------------------------------
1586 Datum
1587 byteaoctetlen(PG_FUNCTION_ARGS)
1589 Datum str = PG_GETARG_DATUM(0);
1591 /* We need not detoast the input at all */
1592 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1596 * byteacat -
1597 * takes two bytea* and returns a bytea* that is the concatenation of
1598 * the two.
1600 * Cloned from textcat and modified as required.
1602 Datum
1603 byteacat(PG_FUNCTION_ARGS)
1605 bytea *t1 = PG_GETARG_BYTEA_PP(0);
1606 bytea *t2 = PG_GETARG_BYTEA_PP(1);
1607 int len1,
1608 len2,
1609 len;
1610 bytea *result;
1611 char *ptr;
1613 len1 = VARSIZE_ANY_EXHDR(t1);
1614 if (len1 < 0)
1615 len1 = 0;
1617 len2 = VARSIZE_ANY_EXHDR(t2);
1618 if (len2 < 0)
1619 len2 = 0;
1621 len = len1 + len2 + VARHDRSZ;
1622 result = (bytea *) palloc(len);
1624 /* Set size of result string... */
1625 SET_VARSIZE(result, len);
1627 /* Fill data field of result string... */
1628 ptr = VARDATA(result);
1629 if (len1 > 0)
1630 memcpy(ptr, VARDATA_ANY(t1), len1);
1631 if (len2 > 0)
1632 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
1634 PG_RETURN_BYTEA_P(result);
1637 #define PG_STR_GET_BYTEA(str_) \
1638 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1641 * bytea_substr()
1642 * Return a substring starting at the specified position.
1643 * Cloned from text_substr and modified as required.
1645 * Input:
1646 * - string
1647 * - starting position (is one-based)
1648 * - string length (optional)
1650 * If the starting position is zero or less, then return from the start of the string
1651 * adjusting the length to be consistent with the "negative start" per SQL92.
1652 * If the length is less than zero, an ERROR is thrown. If no third argument
1653 * (length) is provided, the length to the end of the string is assumed.
1655 Datum
1656 bytea_substr(PG_FUNCTION_ARGS)
1658 int S = PG_GETARG_INT32(1); /* start position */
1659 int S1; /* adjusted start position */
1660 int L1; /* adjusted substring length */
1662 S1 = Max(S, 1);
1664 if (fcinfo->nargs == 2)
1667 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1668 * the end of the string if we pass it a negative value for length.
1670 L1 = -1;
1672 else
1674 /* end position */
1675 int E = S + PG_GETARG_INT32(2);
1678 * A negative value for L is the only way for the end position to be
1679 * before the start. SQL99 says to throw an error.
1681 if (E < S)
1682 ereport(ERROR,
1683 (errcode(ERRCODE_SUBSTRING_ERROR),
1684 errmsg("negative substring length not allowed")));
1687 * A zero or negative value for the end position can happen if the
1688 * start was negative or one. SQL99 says to return a zero-length
1689 * string.
1691 if (E < 1)
1692 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1694 L1 = E - S1;
1698 * If the start position is past the end of the string, SQL99 says to
1699 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1700 * for us. Convert to zero-based starting position
1702 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1706 * bytea_substr_no_len -
1707 * Wrapper to avoid opr_sanity failure due to
1708 * one function accepting a different number of args.
1710 Datum
1711 bytea_substr_no_len(PG_FUNCTION_ARGS)
1713 return bytea_substr(fcinfo);
1717 * byteapos -
1718 * Return the position of the specified substring.
1719 * Implements the SQL92 POSITION() function.
1720 * Cloned from textpos and modified as required.
1722 Datum
1723 byteapos(PG_FUNCTION_ARGS)
1725 bytea *t1 = PG_GETARG_BYTEA_PP(0);
1726 bytea *t2 = PG_GETARG_BYTEA_PP(1);
1727 int pos;
1728 int px,
1730 int len1,
1731 len2;
1732 char *p1,
1733 *p2;
1735 len1 = VARSIZE_ANY_EXHDR(t1);
1736 len2 = VARSIZE_ANY_EXHDR(t2);
1738 if (len2 <= 0)
1739 PG_RETURN_INT32(1); /* result for empty pattern */
1741 p1 = VARDATA_ANY(t1);
1742 p2 = VARDATA_ANY(t2);
1744 pos = 0;
1745 px = (len1 - len2);
1746 for (p = 0; p <= px; p++)
1748 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1750 pos = p + 1;
1751 break;
1753 p1++;
1756 PG_RETURN_INT32(pos);
1759 /*-------------------------------------------------------------
1760 * byteaGetByte
1762 * this routine treats "bytea" as an array of bytes.
1763 * It returns the Nth byte (a number between 0 and 255).
1764 *-------------------------------------------------------------
1766 Datum
1767 byteaGetByte(PG_FUNCTION_ARGS)
1769 bytea *v = PG_GETARG_BYTEA_PP(0);
1770 int32 n = PG_GETARG_INT32(1);
1771 int len;
1772 int byte;
1774 len = VARSIZE_ANY_EXHDR(v);
1776 if (n < 0 || n >= len)
1777 ereport(ERROR,
1778 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1779 errmsg("index %d out of valid range, 0..%d",
1780 n, len - 1)));
1782 byte = ((unsigned char *) VARDATA_ANY(v))[n];
1784 PG_RETURN_INT32(byte);
1787 /*-------------------------------------------------------------
1788 * byteaGetBit
1790 * This routine treats a "bytea" type like an array of bits.
1791 * It returns the value of the Nth bit (0 or 1).
1793 *-------------------------------------------------------------
1795 Datum
1796 byteaGetBit(PG_FUNCTION_ARGS)
1798 bytea *v = PG_GETARG_BYTEA_PP(0);
1799 int32 n = PG_GETARG_INT32(1);
1800 int byteNo,
1801 bitNo;
1802 int len;
1803 int byte;
1805 len = VARSIZE_ANY_EXHDR(v);
1807 if (n < 0 || n >= len * 8)
1808 ereport(ERROR,
1809 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1810 errmsg("index %d out of valid range, 0..%d",
1811 n, len * 8 - 1)));
1813 byteNo = n / 8;
1814 bitNo = n % 8;
1816 byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
1818 if (byte &(1 << bitNo))
1819 PG_RETURN_INT32(1);
1820 else
1821 PG_RETURN_INT32(0);
1824 /*-------------------------------------------------------------
1825 * byteaSetByte
1827 * Given an instance of type 'bytea' creates a new one with
1828 * the Nth byte set to the given value.
1830 *-------------------------------------------------------------
1832 Datum
1833 byteaSetByte(PG_FUNCTION_ARGS)
1835 bytea *v = PG_GETARG_BYTEA_P(0);
1836 int32 n = PG_GETARG_INT32(1);
1837 int32 newByte = PG_GETARG_INT32(2);
1838 int len;
1839 bytea *res;
1841 len = VARSIZE(v) - VARHDRSZ;
1843 if (n < 0 || n >= len)
1844 ereport(ERROR,
1845 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1846 errmsg("index %d out of valid range, 0..%d",
1847 n, len - 1)));
1850 * Make a copy of the original varlena.
1852 res = (bytea *) palloc(VARSIZE(v));
1853 memcpy((char *) res, (char *) v, VARSIZE(v));
1856 * Now set the byte.
1858 ((unsigned char *) VARDATA(res))[n] = newByte;
1860 PG_RETURN_BYTEA_P(res);
1863 /*-------------------------------------------------------------
1864 * byteaSetBit
1866 * Given an instance of type 'bytea' creates a new one with
1867 * the Nth bit set to the given value.
1869 *-------------------------------------------------------------
1871 Datum
1872 byteaSetBit(PG_FUNCTION_ARGS)
1874 bytea *v = PG_GETARG_BYTEA_P(0);
1875 int32 n = PG_GETARG_INT32(1);
1876 int32 newBit = PG_GETARG_INT32(2);
1877 bytea *res;
1878 int len;
1879 int oldByte,
1880 newByte;
1881 int byteNo,
1882 bitNo;
1884 len = VARSIZE(v) - VARHDRSZ;
1886 if (n < 0 || n >= len * 8)
1887 ereport(ERROR,
1888 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1889 errmsg("index %d out of valid range, 0..%d",
1890 n, len * 8 - 1)));
1892 byteNo = n / 8;
1893 bitNo = n % 8;
1896 * sanity check!
1898 if (newBit != 0 && newBit != 1)
1899 ereport(ERROR,
1900 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1901 errmsg("new bit must be 0 or 1")));
1904 * Make a copy of the original varlena.
1906 res = (bytea *) palloc(VARSIZE(v));
1907 memcpy((char *) res, (char *) v, VARSIZE(v));
1910 * Update the byte.
1912 oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1914 if (newBit == 0)
1915 newByte = oldByte & (~(1 << bitNo));
1916 else
1917 newByte = oldByte | (1 << bitNo);
1919 ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1921 PG_RETURN_BYTEA_P(res);
1925 /* text_name()
1926 * Converts a text type to a Name type.
1928 Datum
1929 text_name(PG_FUNCTION_ARGS)
1931 text *s = PG_GETARG_TEXT_PP(0);
1932 Name result;
1933 int len;
1935 len = VARSIZE_ANY_EXHDR(s);
1937 /* Truncate oversize input */
1938 if (len >= NAMEDATALEN)
1939 len = NAMEDATALEN - 1;
1941 result = (Name) palloc(NAMEDATALEN);
1942 memcpy(NameStr(*result), VARDATA_ANY(s), len);
1944 /* now null pad to full length... */
1945 while (len < NAMEDATALEN)
1947 *(NameStr(*result) + len) = '\0';
1948 len++;
1951 PG_RETURN_NAME(result);
1954 /* name_text()
1955 * Converts a Name type to a text type.
1957 Datum
1958 name_text(PG_FUNCTION_ARGS)
1960 Name s = PG_GETARG_NAME(0);
1962 PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
1967 * textToQualifiedNameList - convert a text object to list of names
1969 * This implements the input parsing needed by nextval() and other
1970 * functions that take a text parameter representing a qualified name.
1971 * We split the name at dots, downcase if not double-quoted, and
1972 * truncate names if they're too long.
1974 List *
1975 textToQualifiedNameList(text *textval)
1977 char *rawname;
1978 List *result = NIL;
1979 List *namelist;
1980 ListCell *l;
1982 /* Convert to C string (handles possible detoasting). */
1983 /* Note we rely on being able to modify rawname below. */
1984 rawname = text_to_cstring(textval);
1986 if (!SplitIdentifierString(rawname, '.', &namelist))
1987 ereport(ERROR,
1988 (errcode(ERRCODE_INVALID_NAME),
1989 errmsg("invalid name syntax")));
1991 if (namelist == NIL)
1992 ereport(ERROR,
1993 (errcode(ERRCODE_INVALID_NAME),
1994 errmsg("invalid name syntax")));
1996 foreach(l, namelist)
1998 char *curname = (char *) lfirst(l);
2000 result = lappend(result, makeString(pstrdup(curname)));
2003 pfree(rawname);
2004 list_free(namelist);
2006 return result;
2010 * SplitIdentifierString --- parse a string containing identifiers
2012 * This is the guts of textToQualifiedNameList, and is exported for use in
2013 * other situations such as parsing GUC variables. In the GUC case, it's
2014 * important to avoid memory leaks, so the API is designed to minimize the
2015 * amount of stuff that needs to be allocated and freed.
2017 * Inputs:
2018 * rawstring: the input string; must be overwritable! On return, it's
2019 * been modified to contain the separated identifiers.
2020 * separator: the separator punctuation expected between identifiers
2021 * (typically '.' or ','). Whitespace may also appear around
2022 * identifiers.
2023 * Outputs:
2024 * namelist: filled with a palloc'd list of pointers to identifiers within
2025 * rawstring. Caller should list_free() this even on error return.
2027 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2029 * Note that an empty string is considered okay here, though not in
2030 * textToQualifiedNameList.
2032 bool
2033 SplitIdentifierString(char *rawstring, char separator,
2034 List **namelist)
2036 char *nextp = rawstring;
2037 bool done = false;
2039 *namelist = NIL;
2041 while (isspace((unsigned char) *nextp))
2042 nextp++; /* skip leading whitespace */
2044 if (*nextp == '\0')
2045 return true; /* allow empty string */
2047 /* At the top of the loop, we are at start of a new identifier. */
2050 char *curname;
2051 char *endp;
2053 if (*nextp == '\"')
2055 /* Quoted name --- collapse quote-quote pairs, no downcasing */
2056 curname = nextp + 1;
2057 for (;;)
2059 endp = strchr(nextp + 1, '\"');
2060 if (endp == NULL)
2061 return false; /* mismatched quotes */
2062 if (endp[1] != '\"')
2063 break; /* found end of quoted name */
2064 /* Collapse adjacent quotes into one quote, and look again */
2065 memmove(endp, endp + 1, strlen(endp));
2066 nextp = endp;
2068 /* endp now points at the terminating quote */
2069 nextp = endp + 1;
2071 else
2073 /* Unquoted name --- extends to separator or whitespace */
2074 char *downname;
2075 int len;
2077 curname = nextp;
2078 while (*nextp && *nextp != separator &&
2079 !isspace((unsigned char) *nextp))
2080 nextp++;
2081 endp = nextp;
2082 if (curname == nextp)
2083 return false; /* empty unquoted name not allowed */
2086 * Downcase the identifier, using same code as main lexer does.
2088 * XXX because we want to overwrite the input in-place, we cannot
2089 * support a downcasing transformation that increases the string
2090 * length. This is not a problem given the current implementation
2091 * of downcase_truncate_identifier, but we'll probably have to do
2092 * something about this someday.
2094 len = endp - curname;
2095 downname = downcase_truncate_identifier(curname, len, false);
2096 Assert(strlen(downname) <= len);
2097 strncpy(curname, downname, len);
2098 pfree(downname);
2101 while (isspace((unsigned char) *nextp))
2102 nextp++; /* skip trailing whitespace */
2104 if (*nextp == separator)
2106 nextp++;
2107 while (isspace((unsigned char) *nextp))
2108 nextp++; /* skip leading whitespace for next */
2109 /* we expect another name, so done remains false */
2111 else if (*nextp == '\0')
2112 done = true;
2113 else
2114 return false; /* invalid syntax */
2116 /* Now safe to overwrite separator with a null */
2117 *endp = '\0';
2119 /* Truncate name if it's overlength */
2120 truncate_identifier(curname, strlen(curname), false);
2123 * Finished isolating current name --- add it to list
2125 *namelist = lappend(*namelist, curname);
2127 /* Loop back if we didn't reach end of string */
2128 } while (!done);
2130 return true;
2134 /*****************************************************************************
2135 * Comparison Functions used for bytea
2137 * Note: btree indexes need these routines not to leak memory; therefore,
2138 * be careful to free working copies of toasted datums. Most places don't
2139 * need to be so careful.
2140 *****************************************************************************/
2142 Datum
2143 byteaeq(PG_FUNCTION_ARGS)
2145 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2146 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2147 int len1,
2148 len2;
2149 bool result;
2151 len1 = VARSIZE_ANY_EXHDR(arg1);
2152 len2 = VARSIZE_ANY_EXHDR(arg2);
2154 /* fast path for different-length inputs */
2155 if (len1 != len2)
2156 result = false;
2157 else
2158 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
2160 PG_FREE_IF_COPY(arg1, 0);
2161 PG_FREE_IF_COPY(arg2, 1);
2163 PG_RETURN_BOOL(result);
2166 Datum
2167 byteane(PG_FUNCTION_ARGS)
2169 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2170 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2171 int len1,
2172 len2;
2173 bool result;
2175 len1 = VARSIZE_ANY_EXHDR(arg1);
2176 len2 = VARSIZE_ANY_EXHDR(arg2);
2178 /* fast path for different-length inputs */
2179 if (len1 != len2)
2180 result = true;
2181 else
2182 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
2184 PG_FREE_IF_COPY(arg1, 0);
2185 PG_FREE_IF_COPY(arg2, 1);
2187 PG_RETURN_BOOL(result);
2190 Datum
2191 bytealt(PG_FUNCTION_ARGS)
2193 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2194 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2195 int len1,
2196 len2;
2197 int cmp;
2199 len1 = VARSIZE_ANY_EXHDR(arg1);
2200 len2 = VARSIZE_ANY_EXHDR(arg2);
2202 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2204 PG_FREE_IF_COPY(arg1, 0);
2205 PG_FREE_IF_COPY(arg2, 1);
2207 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2210 Datum
2211 byteale(PG_FUNCTION_ARGS)
2213 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2214 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2215 int len1,
2216 len2;
2217 int cmp;
2219 len1 = VARSIZE_ANY_EXHDR(arg1);
2220 len2 = VARSIZE_ANY_EXHDR(arg2);
2222 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2224 PG_FREE_IF_COPY(arg1, 0);
2225 PG_FREE_IF_COPY(arg2, 1);
2227 PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2230 Datum
2231 byteagt(PG_FUNCTION_ARGS)
2233 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2234 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2235 int len1,
2236 len2;
2237 int cmp;
2239 len1 = VARSIZE_ANY_EXHDR(arg1);
2240 len2 = VARSIZE_ANY_EXHDR(arg2);
2242 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2244 PG_FREE_IF_COPY(arg1, 0);
2245 PG_FREE_IF_COPY(arg2, 1);
2247 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2250 Datum
2251 byteage(PG_FUNCTION_ARGS)
2253 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2254 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2255 int len1,
2256 len2;
2257 int cmp;
2259 len1 = VARSIZE_ANY_EXHDR(arg1);
2260 len2 = VARSIZE_ANY_EXHDR(arg2);
2262 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2264 PG_FREE_IF_COPY(arg1, 0);
2265 PG_FREE_IF_COPY(arg2, 1);
2267 PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2270 Datum
2271 byteacmp(PG_FUNCTION_ARGS)
2273 bytea *arg1 = PG_GETARG_BYTEA_PP(0);
2274 bytea *arg2 = PG_GETARG_BYTEA_PP(1);
2275 int len1,
2276 len2;
2277 int cmp;
2279 len1 = VARSIZE_ANY_EXHDR(arg1);
2280 len2 = VARSIZE_ANY_EXHDR(arg2);
2282 cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2283 if ((cmp == 0) && (len1 != len2))
2284 cmp = (len1 < len2) ? -1 : 1;
2286 PG_FREE_IF_COPY(arg1, 0);
2287 PG_FREE_IF_COPY(arg2, 1);
2289 PG_RETURN_INT32(cmp);
2293 * appendStringInfoText
2295 * Append a text to str.
2296 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2298 static void
2299 appendStringInfoText(StringInfo str, const text *t)
2301 appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
2305 * replace_text
2306 * replace all occurrences of 'old_sub_str' in 'orig_str'
2307 * with 'new_sub_str' to form 'new_str'
2309 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2310 * otherwise returns 'new_str'
2312 Datum
2313 replace_text(PG_FUNCTION_ARGS)
2315 text *src_text = PG_GETARG_TEXT_PP(0);
2316 text *from_sub_text = PG_GETARG_TEXT_PP(1);
2317 text *to_sub_text = PG_GETARG_TEXT_PP(2);
2318 int src_text_len;
2319 int from_sub_text_len;
2320 TextPositionState state;
2321 text *ret_text;
2322 int start_posn;
2323 int curr_posn;
2324 int chunk_len;
2325 char *start_ptr;
2326 StringInfoData str;
2328 text_position_setup(src_text, from_sub_text, &state);
2331 * Note: we check the converted string length, not the original, because
2332 * they could be different if the input contained invalid encoding.
2334 src_text_len = state.len1;
2335 from_sub_text_len = state.len2;
2337 /* Return unmodified source string if empty source or pattern */
2338 if (src_text_len < 1 || from_sub_text_len < 1)
2340 text_position_cleanup(&state);
2341 PG_RETURN_TEXT_P(src_text);
2344 start_posn = 1;
2345 curr_posn = text_position_next(1, &state);
2347 /* When the from_sub_text is not found, there is nothing to do. */
2348 if (curr_posn == 0)
2350 text_position_cleanup(&state);
2351 PG_RETURN_TEXT_P(src_text);
2354 /* start_ptr points to the start_posn'th character of src_text */
2355 start_ptr = VARDATA_ANY(src_text);
2357 initStringInfo(&str);
2361 CHECK_FOR_INTERRUPTS();
2363 /* copy the data skipped over by last text_position_next() */
2364 chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
2365 appendBinaryStringInfo(&str, start_ptr, chunk_len);
2367 appendStringInfoText(&str, to_sub_text);
2369 start_posn = curr_posn;
2370 start_ptr += chunk_len;
2371 start_posn += from_sub_text_len;
2372 start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
2374 curr_posn = text_position_next(start_posn, &state);
2376 while (curr_posn > 0);
2378 /* copy trailing data */
2379 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2380 appendBinaryStringInfo(&str, start_ptr, chunk_len);
2382 text_position_cleanup(&state);
2384 ret_text = cstring_to_text_with_len(str.data, str.len);
2385 pfree(str.data);
2387 PG_RETURN_TEXT_P(ret_text);
2391 * check_replace_text_has_escape_char
2393 * check whether replace_text contains escape char.
2395 static bool
2396 check_replace_text_has_escape_char(const text *replace_text)
2398 const char *p = VARDATA_ANY(replace_text);
2399 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2401 if (pg_database_encoding_max_length() == 1)
2403 for (; p < p_end; p++)
2405 if (*p == '\\')
2406 return true;
2409 else
2411 for (; p < p_end; p += pg_mblen(p))
2413 if (*p == '\\')
2414 return true;
2418 return false;
2422 * appendStringInfoRegexpSubstr
2424 * Append replace_text to str, substituting regexp back references for
2425 * \n escapes. start_ptr is the start of the match in the source string,
2426 * at logical character position data_pos.
2428 static void
2429 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2430 regmatch_t *pmatch,
2431 char *start_ptr, int data_pos)
2433 const char *p = VARDATA_ANY(replace_text);
2434 const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2435 int eml = pg_database_encoding_max_length();
2437 for (;;)
2439 const char *chunk_start = p;
2440 int so;
2441 int eo;
2443 /* Find next escape char. */
2444 if (eml == 1)
2446 for (; p < p_end && *p != '\\'; p++)
2447 /* nothing */ ;
2449 else
2451 for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2452 /* nothing */ ;
2455 /* Copy the text we just scanned over, if any. */
2456 if (p > chunk_start)
2457 appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2459 /* Done if at end of string, else advance over escape char. */
2460 if (p >= p_end)
2461 break;
2462 p++;
2464 if (p >= p_end)
2466 /* Escape at very end of input. Treat same as unexpected char */
2467 appendStringInfoChar(str, '\\');
2468 break;
2471 if (*p >= '1' && *p <= '9')
2473 /* Use the back reference of regexp. */
2474 int idx = *p - '0';
2476 so = pmatch[idx].rm_so;
2477 eo = pmatch[idx].rm_eo;
2478 p++;
2480 else if (*p == '&')
2482 /* Use the entire matched string. */
2483 so = pmatch[0].rm_so;
2484 eo = pmatch[0].rm_eo;
2485 p++;
2487 else if (*p == '\\')
2489 /* \\ means transfer one \ to output. */
2490 appendStringInfoChar(str, '\\');
2491 p++;
2492 continue;
2494 else
2497 * If escape char is not followed by any expected char, just treat
2498 * it as ordinary data to copy. (XXX would it be better to throw
2499 * an error?)
2501 appendStringInfoChar(str, '\\');
2502 continue;
2505 if (so != -1 && eo != -1)
2508 * Copy the text that is back reference of regexp. Note so and eo
2509 * are counted in characters not bytes.
2511 char *chunk_start;
2512 int chunk_len;
2514 Assert(so >= data_pos);
2515 chunk_start = start_ptr;
2516 chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
2517 chunk_len = charlen_to_bytelen(chunk_start, eo - so);
2518 appendBinaryStringInfo(str, chunk_start, chunk_len);
2523 #define REGEXP_REPLACE_BACKREF_CNT 10
2526 * replace_text_regexp
2528 * replace text that matches to regexp in src_text to replace_text.
2530 * Note: to avoid having to include regex.h in builtins.h, we declare
2531 * the regexp argument as void *, but really it's regex_t *.
2533 text *
2534 replace_text_regexp(text *src_text, void *regexp,
2535 text *replace_text, bool glob)
2537 text *ret_text;
2538 regex_t *re = (regex_t *) regexp;
2539 int src_text_len = VARSIZE_ANY_EXHDR(src_text);
2540 StringInfoData buf;
2541 regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
2542 pg_wchar *data;
2543 size_t data_len;
2544 int search_start;
2545 int data_pos;
2546 char *start_ptr;
2547 bool have_escape;
2549 initStringInfo(&buf);
2551 /* Convert data string to wide characters. */
2552 data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2553 data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
2555 /* Check whether replace_text has escape char. */
2556 have_escape = check_replace_text_has_escape_char(replace_text);
2558 /* start_ptr points to the data_pos'th character of src_text */
2559 start_ptr = (char *) VARDATA_ANY(src_text);
2560 data_pos = 0;
2562 search_start = 0;
2563 while (search_start <= data_len)
2565 int regexec_result;
2567 CHECK_FOR_INTERRUPTS();
2569 regexec_result = pg_regexec(re,
2570 data,
2571 data_len,
2572 search_start,
2573 NULL, /* no details */
2574 REGEXP_REPLACE_BACKREF_CNT,
2575 pmatch,
2578 if (regexec_result == REG_NOMATCH)
2579 break;
2581 if (regexec_result != REG_OKAY)
2583 char errMsg[100];
2585 pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2586 ereport(ERROR,
2587 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2588 errmsg("regular expression failed: %s", errMsg)));
2592 * Copy the text to the left of the match position. Note we are given
2593 * character not byte indexes.
2595 if (pmatch[0].rm_so - data_pos > 0)
2597 int chunk_len;
2599 chunk_len = charlen_to_bytelen(start_ptr,
2600 pmatch[0].rm_so - data_pos);
2601 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2604 * Advance start_ptr over that text, to avoid multiple rescans of
2605 * it if the replace_text contains multiple back-references.
2607 start_ptr += chunk_len;
2608 data_pos = pmatch[0].rm_so;
2612 * Copy the replace_text. Process back references when the
2613 * replace_text has escape characters.
2615 if (have_escape)
2616 appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
2617 start_ptr, data_pos);
2618 else
2619 appendStringInfoText(&buf, replace_text);
2621 /* Advance start_ptr and data_pos over the matched text. */
2622 start_ptr += charlen_to_bytelen(start_ptr,
2623 pmatch[0].rm_eo - data_pos);
2624 data_pos = pmatch[0].rm_eo;
2627 * When global option is off, replace the first instance only.
2629 if (!glob)
2630 break;
2633 * Search from next character when the matching text is zero width.
2635 search_start = data_pos;
2636 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2637 search_start++;
2641 * Copy the text to the right of the last match.
2643 if (data_pos < data_len)
2645 int chunk_len;
2647 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2648 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2651 ret_text = cstring_to_text_with_len(buf.data, buf.len);
2652 pfree(buf.data);
2653 pfree(data);
2655 return ret_text;
2659 * split_text
2660 * parse input string
2661 * return ord item (1 based)
2662 * based on provided field separator
2664 Datum
2665 split_text(PG_FUNCTION_ARGS)
2667 text *inputstring = PG_GETARG_TEXT_PP(0);
2668 text *fldsep = PG_GETARG_TEXT_PP(1);
2669 int fldnum = PG_GETARG_INT32(2);
2670 int inputstring_len;
2671 int fldsep_len;
2672 TextPositionState state;
2673 int start_posn;
2674 int end_posn;
2675 text *result_text;
2677 /* field number is 1 based */
2678 if (fldnum < 1)
2679 ereport(ERROR,
2680 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2681 errmsg("field position must be greater than zero")));
2683 text_position_setup(inputstring, fldsep, &state);
2686 * Note: we check the converted string length, not the original, because
2687 * they could be different if the input contained invalid encoding.
2689 inputstring_len = state.len1;
2690 fldsep_len = state.len2;
2692 /* return empty string for empty input string */
2693 if (inputstring_len < 1)
2695 text_position_cleanup(&state);
2696 PG_RETURN_TEXT_P(cstring_to_text(""));
2699 /* empty field separator */
2700 if (fldsep_len < 1)
2702 text_position_cleanup(&state);
2703 /* if first field, return input string, else empty string */
2704 if (fldnum == 1)
2705 PG_RETURN_TEXT_P(inputstring);
2706 else
2707 PG_RETURN_TEXT_P(cstring_to_text(""));
2710 /* identify bounds of first field */
2711 start_posn = 1;
2712 end_posn = text_position_next(1, &state);
2714 /* special case if fldsep not found at all */
2715 if (end_posn == 0)
2717 text_position_cleanup(&state);
2718 /* if field 1 requested, return input string, else empty string */
2719 if (fldnum == 1)
2720 PG_RETURN_TEXT_P(inputstring);
2721 else
2722 PG_RETURN_TEXT_P(cstring_to_text(""));
2725 while (end_posn > 0 && --fldnum > 0)
2727 /* identify bounds of next field */
2728 start_posn = end_posn + fldsep_len;
2729 end_posn = text_position_next(start_posn, &state);
2732 text_position_cleanup(&state);
2734 if (fldnum > 0)
2736 /* N'th field separator not found */
2737 /* if last field requested, return it, else empty string */
2738 if (fldnum == 1)
2739 result_text = text_substring(PointerGetDatum(inputstring),
2740 start_posn,
2742 true);
2743 else
2744 result_text = cstring_to_text("");
2746 else
2748 /* non-last field requested */
2749 result_text = text_substring(PointerGetDatum(inputstring),
2750 start_posn,
2751 end_posn - start_posn,
2752 false);
2755 PG_RETURN_TEXT_P(result_text);
2759 * text_to_array
2760 * parse input string
2761 * return text array of elements
2762 * based on provided field separator
2764 Datum
2765 text_to_array(PG_FUNCTION_ARGS)
2767 text *inputstring = PG_GETARG_TEXT_PP(0);
2768 text *fldsep = PG_GETARG_TEXT_PP(1);
2769 int inputstring_len;
2770 int fldsep_len;
2771 TextPositionState state;
2772 int fldnum;
2773 int start_posn;
2774 int end_posn;
2775 int chunk_len;
2776 char *start_ptr;
2777 text *result_text;
2778 ArrayBuildState *astate = NULL;
2780 text_position_setup(inputstring, fldsep, &state);
2783 * Note: we check the converted string length, not the original, because
2784 * they could be different if the input contained invalid encoding.
2786 inputstring_len = state.len1;
2787 fldsep_len = state.len2;
2789 /* return NULL for empty input string */
2790 if (inputstring_len < 1)
2792 text_position_cleanup(&state);
2793 PG_RETURN_NULL();
2797 * empty field separator return one element, 1D, array using the input
2798 * string
2800 if (fldsep_len < 1)
2802 text_position_cleanup(&state);
2803 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2804 PointerGetDatum(inputstring), 1));
2807 start_posn = 1;
2808 /* start_ptr points to the start_posn'th character of inputstring */
2809 start_ptr = VARDATA_ANY(inputstring);
2811 for (fldnum = 1;; fldnum++) /* field number is 1 based */
2813 CHECK_FOR_INTERRUPTS();
2815 end_posn = text_position_next(start_posn, &state);
2817 if (end_posn == 0)
2819 /* fetch last field */
2820 chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
2822 else
2824 /* fetch non-last field */
2825 chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
2828 /* must build a temp text datum to pass to accumArrayResult */
2829 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
2831 /* stash away this field */
2832 astate = accumArrayResult(astate,
2833 PointerGetDatum(result_text),
2834 false,
2835 TEXTOID,
2836 CurrentMemoryContext);
2838 pfree(result_text);
2840 if (end_posn == 0)
2841 break;
2843 start_posn = end_posn;
2844 start_ptr += chunk_len;
2845 start_posn += fldsep_len;
2846 start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
2849 text_position_cleanup(&state);
2851 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2852 CurrentMemoryContext));
2856 * array_to_text
2857 * concatenate Cstring representation of input array elements
2858 * using provided field separator
2860 Datum
2861 array_to_text(PG_FUNCTION_ARGS)
2863 ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
2864 char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
2865 int nitems,
2866 *dims,
2867 ndims;
2868 Oid element_type;
2869 int typlen;
2870 bool typbyval;
2871 char typalign;
2872 StringInfoData buf;
2873 bool printed = false;
2874 char *p;
2875 bits8 *bitmap;
2876 int bitmask;
2877 int i;
2878 ArrayMetaState *my_extra;
2880 ndims = ARR_NDIM(v);
2881 dims = ARR_DIMS(v);
2882 nitems = ArrayGetNItems(ndims, dims);
2884 /* if there are no elements, return an empty string */
2885 if (nitems == 0)
2886 PG_RETURN_TEXT_P(cstring_to_text(""));
2888 element_type = ARR_ELEMTYPE(v);
2889 initStringInfo(&buf);
2892 * We arrange to look up info about element type, including its output
2893 * conversion proc, only once per series of calls, assuming the element
2894 * type doesn't change underneath us.
2896 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2897 if (my_extra == NULL)
2899 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2900 sizeof(ArrayMetaState));
2901 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2902 my_extra->element_type = ~element_type;
2905 if (my_extra->element_type != element_type)
2908 * Get info about element type, including its output conversion proc
2910 get_type_io_data(element_type, IOFunc_output,
2911 &my_extra->typlen, &my_extra->typbyval,
2912 &my_extra->typalign, &my_extra->typdelim,
2913 &my_extra->typioparam, &my_extra->typiofunc);
2914 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2915 fcinfo->flinfo->fn_mcxt);
2916 my_extra->element_type = element_type;
2918 typlen = my_extra->typlen;
2919 typbyval = my_extra->typbyval;
2920 typalign = my_extra->typalign;
2922 p = ARR_DATA_PTR(v);
2923 bitmap = ARR_NULLBITMAP(v);
2924 bitmask = 1;
2926 for (i = 0; i < nitems; i++)
2928 Datum itemvalue;
2929 char *value;
2931 /* Get source element, checking for NULL */
2932 if (bitmap && (*bitmap & bitmask) == 0)
2934 /* we ignore nulls */
2936 else
2938 itemvalue = fetch_att(p, typbyval, typlen);
2940 value = OutputFunctionCall(&my_extra->proc, itemvalue);
2942 if (printed)
2943 appendStringInfo(&buf, "%s%s", fldsep, value);
2944 else
2945 appendStringInfoString(&buf, value);
2946 printed = true;
2948 p = att_addlength_pointer(p, typlen, p);
2949 p = (char *) att_align_nominal(p, typalign);
2952 /* advance bitmap pointer if any */
2953 if (bitmap)
2955 bitmask <<= 1;
2956 if (bitmask == 0x100)
2958 bitmap++;
2959 bitmask = 1;
2964 PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len));
2967 #define HEXBASE 16
2969 * Convert a int32 to a string containing a base 16 (hex) representation of
2970 * the number.
2972 Datum
2973 to_hex32(PG_FUNCTION_ARGS)
2975 uint32 value = (uint32) PG_GETARG_INT32(0);
2976 char *ptr;
2977 const char *digits = "0123456789abcdef";
2978 char buf[32]; /* bigger than needed, but reasonable */
2980 ptr = buf + sizeof(buf) - 1;
2981 *ptr = '\0';
2985 *--ptr = digits[value % HEXBASE];
2986 value /= HEXBASE;
2987 } while (ptr > buf && value);
2989 PG_RETURN_TEXT_P(cstring_to_text(ptr));
2993 * Convert a int64 to a string containing a base 16 (hex) representation of
2994 * the number.
2996 Datum
2997 to_hex64(PG_FUNCTION_ARGS)
2999 uint64 value = (uint64) PG_GETARG_INT64(0);
3000 char *ptr;
3001 const char *digits = "0123456789abcdef";
3002 char buf[32]; /* bigger than needed, but reasonable */
3004 ptr = buf + sizeof(buf) - 1;
3005 *ptr = '\0';
3009 *--ptr = digits[value % HEXBASE];
3010 value /= HEXBASE;
3011 } while (ptr > buf && value);
3013 PG_RETURN_TEXT_P(cstring_to_text(ptr));
3017 * Create an md5 hash of a text string and return it as hex
3019 * md5 produces a 16 byte (128 bit) hash; double it for hex
3021 #define MD5_HASH_LEN 32
3023 Datum
3024 md5_text(PG_FUNCTION_ARGS)
3026 text *in_text = PG_GETARG_TEXT_PP(0);
3027 size_t len;
3028 char hexsum[MD5_HASH_LEN + 1];
3030 /* Calculate the length of the buffer using varlena metadata */
3031 len = VARSIZE_ANY_EXHDR(in_text);
3033 /* get the hash result */
3034 if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3035 ereport(ERROR,
3036 (errcode(ERRCODE_OUT_OF_MEMORY),
3037 errmsg("out of memory")));
3039 /* convert to text and return it */
3040 PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3044 * Create an md5 hash of a bytea field and return it as a hex string:
3045 * 16-byte md5 digest is represented in 32 hex characters.
3047 Datum
3048 md5_bytea(PG_FUNCTION_ARGS)
3050 bytea *in = PG_GETARG_BYTEA_PP(0);
3051 size_t len;
3052 char hexsum[MD5_HASH_LEN + 1];
3054 len = VARSIZE_ANY_EXHDR(in);
3055 if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3056 ereport(ERROR,
3057 (errcode(ERRCODE_OUT_OF_MEMORY),
3058 errmsg("out of memory")));
3060 PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3064 * Return the size of a datum, possibly compressed
3066 * Works on any data type
3068 Datum
3069 pg_column_size(PG_FUNCTION_ARGS)
3071 Datum value = PG_GETARG_DATUM(0);
3072 int32 result;
3073 int typlen;
3075 /* On first call, get the input type's typlen, and save at *fn_extra */
3076 if (fcinfo->flinfo->fn_extra == NULL)
3078 /* Lookup the datatype of the supplied argument */
3079 Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3081 typlen = get_typlen(argtypeid);
3082 if (typlen == 0) /* should not happen */
3083 elog(ERROR, "cache lookup failed for type %u", argtypeid);
3085 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3086 sizeof(int));
3087 *((int *) fcinfo->flinfo->fn_extra) = typlen;
3089 else
3090 typlen = *((int *) fcinfo->flinfo->fn_extra);
3092 if (typlen == -1)
3094 /* varlena type, possibly toasted */
3095 result = toast_datum_size(value);
3097 else if (typlen == -2)
3099 /* cstring */
3100 result = strlen(DatumGetCString(value)) + 1;
3102 else
3104 /* ordinary fixed-width type */
3105 result = typlen;
3108 PG_RETURN_INT32(result);