1 /*-------------------------------------------------------------------------
4 * Functions for the variable-length built-in types.
6 * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
13 *-------------------------------------------------------------------------
19 #include "access/tuptoaster.h"
20 #include "catalog/pg_type.h"
21 #include "libpq/md5.h"
22 #include "libpq/pqformat.h"
23 #include "miscadmin.h"
24 #include "parser/scansup.h"
25 #include "regex/regex.h"
26 #include "utils/builtins.h"
27 #include "utils/lsyscache.h"
28 #include "utils/pg_locale.h"
31 typedef struct varlena unknown
;
35 bool use_wchar
; /* T if multibyte encoding */
36 char *str1
; /* use these if not use_wchar */
37 char *str2
; /* note: these point to original texts */
38 pg_wchar
*wstr1
; /* use these if use_wchar */
39 pg_wchar
*wstr2
; /* note: these are palloc'd */
40 int len1
; /* string lengths in logical characters */
42 /* Skip table for Boyer-Moore-Horspool search algorithm: */
43 int skiptablemask
; /* mask for ANDing with skiptable subscripts */
44 int skiptable
[256]; /* skip distance for given mismatched char */
47 #define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
48 #define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
49 #define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
50 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
51 #define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
53 static int text_cmp(text
*arg1
, text
*arg2
);
54 static int32
text_length(Datum str
);
55 static int text_position(text
*t1
, text
*t2
);
56 static void text_position_setup(text
*t1
, text
*t2
, TextPositionState
*state
);
57 static int text_position_next(int start_pos
, TextPositionState
*state
);
58 static void text_position_cleanup(TextPositionState
*state
);
59 static text
*text_substring(Datum str
,
62 bool length_not_specified
);
63 static void appendStringInfoText(StringInfo str
, const text
*t
);
66 /*****************************************************************************
67 * CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
68 *****************************************************************************/
73 * Create a text value from a null-terminated C string.
75 * The new text value is freshly palloc'd with a full-size VARHDR.
78 cstring_to_text(const char *s
)
80 return cstring_to_text_with_len(s
, strlen(s
));
84 * cstring_to_text_with_len
86 * Same as cstring_to_text except the caller specifies the string length;
87 * the string need not be null_terminated.
90 cstring_to_text_with_len(const char *s
, int len
)
92 text
*result
= (text
*) palloc(len
+ VARHDRSZ
);
94 SET_VARSIZE(result
, len
+ VARHDRSZ
);
95 memcpy(VARDATA(result
), s
, len
);
103 * Create a palloc'd, null-terminated C string from a text value.
105 * We support being passed a compressed or toasted text value.
106 * This is a bit bogus since such values shouldn't really be referred to as
107 * "text *", but it seems useful for robustness. If we didn't handle that
108 * case here, we'd need another routine that did, anyway.
111 text_to_cstring(const text
*t
)
113 /* must cast away the const, unfortunately */
114 text
*tunpacked
= pg_detoast_datum_packed((struct varlena
*) t
);
115 int len
= VARSIZE_ANY_EXHDR(tunpacked
);
118 result
= (char *) palloc(len
+ 1);
119 memcpy(result
, VARDATA_ANY(tunpacked
), len
);
129 * text_to_cstring_buffer
131 * Copy a text value into a caller-supplied buffer of size dst_len.
133 * The text string is truncated if necessary to fit. The result is
134 * guaranteed null-terminated (unless dst_len == 0).
136 * We support being passed a compressed or toasted text value.
137 * This is a bit bogus since such values shouldn't really be referred to as
138 * "text *", but it seems useful for robustness. If we didn't handle that
139 * case here, we'd need another routine that did, anyway.
142 text_to_cstring_buffer(const text
*src
, char *dst
, size_t dst_len
)
144 /* must cast away the const, unfortunately */
145 text
*srcunpacked
= pg_detoast_datum_packed((struct varlena
*) src
);
146 size_t src_len
= VARSIZE_ANY_EXHDR(srcunpacked
);
151 if (dst_len
>= src_len
)
153 else /* ensure truncation is encoding-safe */
154 dst_len
= pg_mbcliplen(VARDATA_ANY(srcunpacked
), src_len
, dst_len
);
155 memcpy(dst
, VARDATA_ANY(srcunpacked
), dst_len
);
159 if (srcunpacked
!= src
)
164 /*****************************************************************************
165 * USER I/O ROUTINES *
166 *****************************************************************************/
169 #define VAL(CH) ((CH) - '0')
170 #define DIG(VAL) ((VAL) + '0')
173 * byteain - converts from printable representation of byte array
175 * Non-printable characters must be passed as '\nnn' (octal) and are
176 * converted to internal form. '\' must be passed as '\\'.
177 * ereport(ERROR, ...) if bad form.
180 * The input is scanned twice.
181 * The error checking of input is minimal.
184 byteain(PG_FUNCTION_ARGS
)
186 char *inputText
= PG_GETARG_CSTRING(0);
192 for (byte
= 0, tp
= inputText
; *tp
!= '\0'; byte
++)
196 else if ((tp
[0] == '\\') &&
197 (tp
[1] >= '0' && tp
[1] <= '3') &&
198 (tp
[2] >= '0' && tp
[2] <= '7') &&
199 (tp
[3] >= '0' && tp
[3] <= '7'))
201 else if ((tp
[0] == '\\') &&
207 * one backslash, not followed by 0 or ### valid octal
210 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION
),
211 errmsg("invalid input syntax for type bytea")));
217 result
= (bytea
*) palloc(byte
);
218 SET_VARSIZE(result
, byte
);
221 rp
= VARDATA(result
);
226 else if ((tp
[0] == '\\') &&
227 (tp
[1] >= '0' && tp
[1] <= '3') &&
228 (tp
[2] >= '0' && tp
[2] <= '7') &&
229 (tp
[3] >= '0' && tp
[3] <= '7'))
235 *rp
++ = byte
+VAL(tp
[3]);
239 else if ((tp
[0] == '\\') &&
248 * We should never get here. The first pass should not allow it.
251 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION
),
252 errmsg("invalid input syntax for type bytea")));
256 PG_RETURN_BYTEA_P(result
);
260 * byteaout - converts to printable representation of byte array
262 * Non-printable characters are inserted as '\nnn' (octal) and '\' as
265 * NULL vlena should be an error--returning string with NULL for now.
268 byteaout(PG_FUNCTION_ARGS
)
270 bytea
*vlena
= PG_GETARG_BYTEA_PP(0);
274 int val
; /* holds unprintable chars */
278 len
= 1; /* empty string has 1 char */
279 vp
= VARDATA_ANY(vlena
);
280 for (i
= VARSIZE_ANY_EXHDR(vlena
); i
!= 0; i
--, vp
++)
284 else if ((unsigned char) *vp
< 0x20 || (unsigned char) *vp
> 0x7e)
289 rp
= result
= (char *) palloc(len
);
290 vp
= VARDATA_ANY(vlena
);
291 for (i
= VARSIZE_ANY_EXHDR(vlena
); i
!= 0; i
--, vp
++)
298 else if ((unsigned char) *vp
< 0x20 || (unsigned char) *vp
> 0x7e)
302 rp
[3] = DIG(val
& 07);
304 rp
[2] = DIG(val
& 07);
306 rp
[1] = DIG(val
& 03);
313 PG_RETURN_CSTRING(result
);
317 * bytearecv - converts external binary format to bytea
320 bytearecv(PG_FUNCTION_ARGS
)
322 StringInfo buf
= (StringInfo
) PG_GETARG_POINTER(0);
326 nbytes
= buf
->len
- buf
->cursor
;
327 result
= (bytea
*) palloc(nbytes
+ VARHDRSZ
);
328 SET_VARSIZE(result
, nbytes
+ VARHDRSZ
);
329 pq_copymsgbytes(buf
, VARDATA(result
), nbytes
);
330 PG_RETURN_BYTEA_P(result
);
334 * byteasend - converts bytea to binary format
336 * This is a special case: just copy the input...
339 byteasend(PG_FUNCTION_ARGS
)
341 bytea
*vlena
= PG_GETARG_BYTEA_P_COPY(0);
343 PG_RETURN_BYTEA_P(vlena
);
348 * textin - converts "..." to internal representation
351 textin(PG_FUNCTION_ARGS
)
353 char *inputText
= PG_GETARG_CSTRING(0);
355 PG_RETURN_TEXT_P(cstring_to_text(inputText
));
359 * textout - converts internal representation to "..."
362 textout(PG_FUNCTION_ARGS
)
364 Datum txt
= PG_GETARG_DATUM(0);
366 PG_RETURN_CSTRING(TextDatumGetCString(txt
));
370 * textrecv - converts external binary format to text
373 textrecv(PG_FUNCTION_ARGS
)
375 StringInfo buf
= (StringInfo
) PG_GETARG_POINTER(0);
380 str
= pq_getmsgtext(buf
, buf
->len
- buf
->cursor
, &nbytes
);
382 result
= cstring_to_text_with_len(str
, nbytes
);
384 PG_RETURN_TEXT_P(result
);
388 * textsend - converts text to binary format
391 textsend(PG_FUNCTION_ARGS
)
393 text
*t
= PG_GETARG_TEXT_PP(0);
396 pq_begintypsend(&buf
);
397 pq_sendtext(&buf
, VARDATA_ANY(t
), VARSIZE_ANY_EXHDR(t
));
398 PG_RETURN_BYTEA_P(pq_endtypsend(&buf
));
403 * unknownin - converts "..." to internal representation
406 unknownin(PG_FUNCTION_ARGS
)
408 char *str
= PG_GETARG_CSTRING(0);
410 /* representation is same as cstring */
411 PG_RETURN_CSTRING(pstrdup(str
));
415 * unknownout - converts internal representation to "..."
418 unknownout(PG_FUNCTION_ARGS
)
420 /* representation is same as cstring */
421 char *str
= PG_GETARG_CSTRING(0);
423 PG_RETURN_CSTRING(pstrdup(str
));
427 * unknownrecv - converts external binary format to unknown
430 unknownrecv(PG_FUNCTION_ARGS
)
432 StringInfo buf
= (StringInfo
) PG_GETARG_POINTER(0);
436 str
= pq_getmsgtext(buf
, buf
->len
- buf
->cursor
, &nbytes
);
437 /* representation is same as cstring */
438 PG_RETURN_CSTRING(str
);
442 * unknownsend - converts unknown to binary format
445 unknownsend(PG_FUNCTION_ARGS
)
447 /* representation is same as cstring */
448 char *str
= PG_GETARG_CSTRING(0);
451 pq_begintypsend(&buf
);
452 pq_sendtext(&buf
, str
, strlen(str
));
453 PG_RETURN_BYTEA_P(pq_endtypsend(&buf
));
457 /* ========== PUBLIC ROUTINES ========== */
461 * returns the logical length of a text*
462 * (which is less than the VARSIZE of the text*)
465 textlen(PG_FUNCTION_ARGS
)
467 Datum str
= PG_GETARG_DATUM(0);
469 /* try to avoid decompressing argument */
470 PG_RETURN_INT32(text_length(str
));
475 * Does the real work for textlen()
477 * This is broken out so it can be called directly by other string processing
478 * functions. Note that the argument is passed as a Datum, to indicate that
479 * it may still be in compressed form. We can avoid decompressing it at all
483 text_length(Datum str
)
485 /* fastpath when max encoding length is one */
486 if (pg_database_encoding_max_length() == 1)
487 PG_RETURN_INT32(toast_raw_datum_size(str
) - VARHDRSZ
);
490 text
*t
= DatumGetTextPP(str
);
492 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t
),
493 VARSIZE_ANY_EXHDR(t
)));
499 * returns the physical length of a text*
500 * (which is less than the VARSIZE of the text*)
503 textoctetlen(PG_FUNCTION_ARGS
)
505 Datum str
= PG_GETARG_DATUM(0);
507 /* We need not detoast the input at all */
508 PG_RETURN_INT32(toast_raw_datum_size(str
) - VARHDRSZ
);
513 * takes two text* and returns a text* that is the concatenation of
516 * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
517 * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
518 * Allocate space for output in all cases.
519 * XXX - thomas 1997-07-10
522 textcat(PG_FUNCTION_ARGS
)
524 text
*t1
= PG_GETARG_TEXT_PP(0);
525 text
*t2
= PG_GETARG_TEXT_PP(1);
532 len1
= VARSIZE_ANY_EXHDR(t1
);
536 len2
= VARSIZE_ANY_EXHDR(t2
);
540 len
= len1
+ len2
+ VARHDRSZ
;
541 result
= (text
*) palloc(len
);
543 /* Set size of result string... */
544 SET_VARSIZE(result
, len
);
546 /* Fill data field of result string... */
547 ptr
= VARDATA(result
);
549 memcpy(ptr
, VARDATA_ANY(t1
), len1
);
551 memcpy(ptr
+ len1
, VARDATA_ANY(t2
), len2
);
553 PG_RETURN_TEXT_P(result
);
557 * charlen_to_bytelen()
558 * Compute the number of bytes occupied by n characters starting at *p
560 * It is caller's responsibility that there actually are n characters;
561 * the string need not be null-terminated.
564 charlen_to_bytelen(const char *p
, int n
)
566 if (pg_database_encoding_max_length() == 1)
568 /* Optimization for single-byte encodings */
575 for (s
= p
; n
> 0; n
--)
584 * Return a substring starting at the specified position.
585 * - thomas 1997-12-31
589 * - starting position (is one-based)
592 * If the starting position is zero or less, then return from the start of the string
593 * adjusting the length to be consistent with the "negative start" per SQL92.
594 * If the length is less than zero, return the remaining string.
596 * Added multibyte support.
597 * - Tatsuo Ishii 1998-4-21
598 * Changed behavior if starting position is less than one to conform to SQL92 behavior.
599 * Formerly returned the entire string; now returns a portion.
600 * - Thomas Lockhart 1998-12-10
601 * Now uses faster TOAST-slicing interface
602 * - John Gray 2002-02-22
603 * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
604 * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
605 * error; if E < 1, return '', not entire string). Fixed MB related bug when
606 * S > LC and < LC + 4 sometimes garbage characters are returned.
607 * - Joe Conway 2002-08-10
610 text_substr(PG_FUNCTION_ARGS
)
612 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
619 * text_substr_no_len -
620 * Wrapper to avoid opr_sanity failure due to
621 * one function accepting a different number of args.
624 text_substr_no_len(PG_FUNCTION_ARGS
)
626 PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
633 * Does the real work for text_substr() and text_substr_no_len()
635 * This is broken out so it can be called directly by other string processing
636 * functions. Note that the argument is passed as a Datum, to indicate that
637 * it may still be in compressed/toasted form. We can avoid detoasting all
638 * of it in some cases.
640 * The result is always a freshly palloc'd datum.
643 text_substring(Datum str
, int32 start
, int32 length
, bool length_not_specified
)
645 int32 eml
= pg_database_encoding_max_length();
646 int32 S
= start
; /* start position */
647 int32 S1
; /* adjusted start position */
648 int32 L1
; /* adjusted substring length */
650 /* life is easy if the encoding max length is 1 */
655 if (length_not_specified
) /* special case - get length to end of
664 * A negative value for L is the only way for the end position to
665 * be before the start. SQL99 says to throw an error.
669 (errcode(ERRCODE_SUBSTRING_ERROR
),
670 errmsg("negative substring length not allowed")));
673 * A zero or negative value for the end position can happen if the
674 * start was negative or one. SQL99 says to return a zero-length
678 return cstring_to_text("");
684 * If the start position is past the end of the string, SQL99 says to
685 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
686 * that for us. Convert to zero-based starting position
688 return DatumGetTextPSlice(str
, S1
- 1, L1
);
693 * When encoding max length is > 1, we can't get LC without
694 * detoasting, so we'll grab a conservatively large slice now and go
695 * back later to do the right thing
708 * if S is past the end of the string, the tuple toaster will return a
709 * zero-length string to us
714 * We need to start at position zero because there is no way to know
715 * in advance which byte offset corresponds to the supplied start
720 if (length_not_specified
) /* special case - get length to end of
722 slice_size
= L1
= -1;
728 * A negative value for L is the only way for the end position to
729 * be before the start. SQL99 says to throw an error.
733 (errcode(ERRCODE_SUBSTRING_ERROR
),
734 errmsg("negative substring length not allowed")));
737 * A zero or negative value for the end position can happen if the
738 * start was negative or one. SQL99 says to return a zero-length
742 return cstring_to_text("");
745 * if E is past the end of the string, the tuple toaster will
746 * truncate the length for us
751 * Total slice size in bytes can't be any longer than the start
752 * position plus substring length times the encoding max length.
754 slice_size
= (S1
+ L1
) * eml
;
758 * If we're working with an untoasted source, no need to do an extra
761 if (VARATT_IS_COMPRESSED(DatumGetPointer(str
)) ||
762 VARATT_IS_EXTERNAL(DatumGetPointer(str
)))
763 slice
= DatumGetTextPSlice(str
, slice_start
, slice_size
);
765 slice
= (text
*) DatumGetPointer(str
);
767 /* see if we got back an empty string */
768 if (VARSIZE_ANY_EXHDR(slice
) == 0)
770 if (slice
!= (text
*) DatumGetPointer(str
))
772 return cstring_to_text("");
775 /* Now we can get the actual length of the slice in MB characters */
776 slice_strlen
= pg_mbstrlen_with_len(VARDATA_ANY(slice
),
777 VARSIZE_ANY_EXHDR(slice
));
780 * Check that the start position wasn't > slice_strlen. If so, SQL99
781 * says to return a zero-length string.
783 if (S1
> slice_strlen
)
785 if (slice
!= (text
*) DatumGetPointer(str
))
787 return cstring_to_text("");
791 * Adjust L1 and E1 now that we know the slice string length. Again
792 * remember that S1 is one based, and slice_start is zero based.
795 E1
= Min(S1
+ L1
, slice_start
+ 1 + slice_strlen
);
797 E1
= slice_start
+ 1 + slice_strlen
;
800 * Find the start position in the slice; remember S1 is not zero based
802 p
= VARDATA_ANY(slice
);
803 for (i
= 0; i
< S1
- 1; i
++)
806 /* hang onto a pointer to our start position */
810 * Count the actual bytes used by the substring of the requested
813 for (i
= S1
; i
< E1
; i
++)
816 ret
= (text
*) palloc(VARHDRSZ
+ (p
- s
));
817 SET_VARSIZE(ret
, VARHDRSZ
+ (p
- s
));
818 memcpy(VARDATA(ret
), s
, (p
- s
));
820 if (slice
!= (text
*) DatumGetPointer(str
))
826 elog(ERROR
, "invalid backend encoding: encoding max length < 1");
828 /* not reached: suppress compiler warning */
834 * Return the position of the specified substring.
835 * Implements the SQL92 POSITION() function.
836 * Ref: A Guide To The SQL Standard, Date & Darwen, 1997
837 * - thomas 1997-07-27
840 textpos(PG_FUNCTION_ARGS
)
842 text
*str
= PG_GETARG_TEXT_PP(0);
843 text
*search_str
= PG_GETARG_TEXT_PP(1);
845 PG_RETURN_INT32((int32
) text_position(str
, search_str
));
850 * Does the real work for textpos()
853 * t1 - string to be searched
854 * t2 - pattern to match within t1
856 * Character index of the first matched char, starting from 1,
859 * This is broken out so it can be called directly by other string processing
863 text_position(text
*t1
, text
*t2
)
865 TextPositionState state
;
868 text_position_setup(t1
, t2
, &state
);
869 result
= text_position_next(1, &state
);
870 text_position_cleanup(&state
);
876 * text_position_setup, text_position_next, text_position_cleanup -
877 * Component steps of text_position()
879 * These are broken out so that a string can be efficiently searched for
880 * multiple occurrences of the same pattern. text_position_next may be
881 * called multiple times with increasing values of start_pos, which is
882 * the 1-based character position to start the search from. The "state"
883 * variable is normally just a local variable in the caller.
887 text_position_setup(text
*t1
, text
*t2
, TextPositionState
*state
)
889 int len1
= VARSIZE_ANY_EXHDR(t1
);
890 int len2
= VARSIZE_ANY_EXHDR(t2
);
892 if (pg_database_encoding_max_length() == 1)
894 /* simple case - single byte encoding */
895 state
->use_wchar
= false;
896 state
->str1
= VARDATA_ANY(t1
);
897 state
->str2
= VARDATA_ANY(t2
);
903 /* not as simple - multibyte encoding */
907 p1
= (pg_wchar
*) palloc((len1
+ 1) * sizeof(pg_wchar
));
908 len1
= pg_mb2wchar_with_len(VARDATA_ANY(t1
), p1
, len1
);
909 p2
= (pg_wchar
*) palloc((len2
+ 1) * sizeof(pg_wchar
));
910 len2
= pg_mb2wchar_with_len(VARDATA_ANY(t2
), p2
, len2
);
912 state
->use_wchar
= true;
920 * Prepare the skip table for Boyer-Moore-Horspool searching. In these
921 * notes we use the terminology that the "haystack" is the string to be
922 * searched (t1) and the "needle" is the pattern being sought (t2).
924 * If the needle is empty or bigger than the haystack then there is no
925 * point in wasting cycles initializing the table. We also choose not to
926 * use B-M-H for needles of length 1, since the skip table can't possibly
927 * save anything in that case.
929 if (len1
>= len2
&& len2
> 1)
931 int searchlength
= len1
- len2
;
937 * First we must determine how much of the skip table to use. The
938 * declaration of TextPositionState allows up to 256 elements, but for
939 * short search problems we don't really want to have to initialize so
940 * many elements --- it would take too long in comparison to the
941 * actual search time. So we choose a useful skip table size based on
942 * the haystack length minus the needle length. The closer the needle
943 * length is to the haystack length the less useful skipping becomes.
945 * Note: since we use bit-masking to select table elements, the skip
946 * table size MUST be a power of 2, and so the mask must be 2^N-1.
948 if (searchlength
< 16)
950 else if (searchlength
< 64)
952 else if (searchlength
< 128)
954 else if (searchlength
< 512)
956 else if (searchlength
< 2048)
958 else if (searchlength
< 4096)
962 state
->skiptablemask
= skiptablemask
;
965 * Initialize the skip table. We set all elements to the needle
966 * length, since this is the correct skip distance for any character
967 * not found in the needle.
969 for (i
= 0; i
<= skiptablemask
; i
++)
970 state
->skiptable
[i
] = len2
;
973 * Now examine the needle. For each character except the last one,
974 * set the corresponding table element to the appropriate skip
975 * distance. Note that when two characters share the same skip table
976 * entry, the one later in the needle must determine the skip
981 if (!state
->use_wchar
)
983 const char *str2
= state
->str2
;
985 for (i
= 0; i
< last
; i
++)
986 state
->skiptable
[(unsigned char) str2
[i
] & skiptablemask
] = last
- i
;
990 const pg_wchar
*wstr2
= state
->wstr2
;
992 for (i
= 0; i
< last
; i
++)
993 state
->skiptable
[wstr2
[i
] & skiptablemask
] = last
- i
;
999 text_position_next(int start_pos
, TextPositionState
*state
)
1001 int haystack_len
= state
->len1
;
1002 int needle_len
= state
->len2
;
1003 int skiptablemask
= state
->skiptablemask
;
1005 Assert(start_pos
> 0); /* else caller error */
1007 if (needle_len
<= 0)
1008 return start_pos
; /* result for empty pattern */
1010 start_pos
--; /* adjust for zero based arrays */
1012 /* Done if the needle can't possibly fit */
1013 if (haystack_len
< start_pos
+ needle_len
)
1016 if (!state
->use_wchar
)
1018 /* simple case - single byte encoding */
1019 const char *haystack
= state
->str1
;
1020 const char *needle
= state
->str2
;
1021 const char *haystack_end
= &haystack
[haystack_len
];
1024 if (needle_len
== 1)
1026 /* No point in using B-M-H for a one-character needle */
1027 char nchar
= *needle
;
1029 hptr
= &haystack
[start_pos
];
1030 while (hptr
< haystack_end
)
1033 return hptr
- haystack
+ 1;
1039 const char *needle_last
= &needle
[needle_len
- 1];
1041 /* Start at startpos plus the length of the needle */
1042 hptr
= &haystack
[start_pos
+ needle_len
- 1];
1043 while (hptr
< haystack_end
)
1045 /* Match the needle scanning *backward* */
1053 /* Matched it all? If so, return 1-based position */
1055 return p
- haystack
+ 1;
1060 * No match, so use the haystack char at hptr to decide how
1061 * far to advance. If the needle had any occurrence of that
1062 * character (or more precisely, one sharing the same
1063 * skiptable entry) before its last character, then we advance
1064 * far enough to align the last such needle character with
1065 * that haystack position. Otherwise we can advance by the
1066 * whole needle length.
1068 hptr
+= state
->skiptable
[(unsigned char) *hptr
& skiptablemask
];
1074 /* The multibyte char version. This works exactly the same way. */
1075 const pg_wchar
*haystack
= state
->wstr1
;
1076 const pg_wchar
*needle
= state
->wstr2
;
1077 const pg_wchar
*haystack_end
= &haystack
[haystack_len
];
1078 const pg_wchar
*hptr
;
1080 if (needle_len
== 1)
1082 /* No point in using B-M-H for a one-character needle */
1083 pg_wchar nchar
= *needle
;
1085 hptr
= &haystack
[start_pos
];
1086 while (hptr
< haystack_end
)
1089 return hptr
- haystack
+ 1;
1095 const pg_wchar
*needle_last
= &needle
[needle_len
- 1];
1097 /* Start at startpos plus the length of the needle */
1098 hptr
= &haystack
[start_pos
+ needle_len
- 1];
1099 while (hptr
< haystack_end
)
1101 /* Match the needle scanning *backward* */
1102 const pg_wchar
*nptr
;
1109 /* Matched it all? If so, return 1-based position */
1111 return p
- haystack
+ 1;
1116 * No match, so use the haystack char at hptr to decide how
1117 * far to advance. If the needle had any occurrence of that
1118 * character (or more precisely, one sharing the same
1119 * skiptable entry) before its last character, then we advance
1120 * far enough to align the last such needle character with
1121 * that haystack position. Otherwise we can advance by the
1122 * whole needle length.
1124 hptr
+= state
->skiptable
[*hptr
& skiptablemask
];
1129 return 0; /* not found */
1133 text_position_cleanup(TextPositionState
*state
)
1135 if (state
->use_wchar
)
1137 pfree(state
->wstr1
);
1138 pfree(state
->wstr2
);
1143 * Comparison function for text strings with given lengths.
1144 * Includes locale support, but must copy strings to temporary memory
1145 * to allow null-termination for inputs to strcoll().
1146 * Returns an integer less than, equal to, or greater than zero, indicating
1147 * whether arg1 is less than, equal to, or greater than arg2.
1150 varstr_cmp(char *arg1
, int len1
, char *arg2
, int len2
)
1155 * Unfortunately, there is no strncoll(), so in the non-C locale case we
1156 * have to do some memory copying. This turns out to be significantly
1157 * slower, so we optimize the case where LC_COLLATE is C. We also try to
1158 * optimize relatively-short strings by avoiding palloc/pfree overhead.
1160 if (lc_collate_is_c())
1162 result
= strncmp(arg1
, arg2
, Min(len1
, len2
));
1163 if ((result
== 0) && (len1
!= len2
))
1164 result
= (len1
< len2
) ? -1 : 1;
1168 #define STACKBUFLEN 1024
1170 char a1buf
[STACKBUFLEN
];
1171 char a2buf
[STACKBUFLEN
];
1176 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1177 if (GetDatabaseEncoding() == PG_UTF8
)
1183 if (len1
>= STACKBUFLEN
/ 2)
1185 a1len
= len1
* 2 + 2;
1186 a1p
= palloc(a1len
);
1190 a1len
= STACKBUFLEN
;
1193 if (len2
>= STACKBUFLEN
/ 2)
1195 a2len
= len2
* 2 + 2;
1196 a2p
= palloc(a2len
);
1200 a2len
= STACKBUFLEN
;
1204 /* stupid Microsloth API does not work for zero-length input */
1209 r
= MultiByteToWideChar(CP_UTF8
, 0, arg1
, len1
,
1210 (LPWSTR
) a1p
, a1len
/ 2);
1213 (errmsg("could not convert string to UTF-16: error %lu",
1216 ((LPWSTR
) a1p
)[r
] = 0;
1222 r
= MultiByteToWideChar(CP_UTF8
, 0, arg2
, len2
,
1223 (LPWSTR
) a2p
, a2len
/ 2);
1226 (errmsg("could not convert string to UTF-16: error %lu",
1229 ((LPWSTR
) a2p
)[r
] = 0;
1232 result
= wcscoll((LPWSTR
) a1p
, (LPWSTR
) a2p
);
1233 if (result
== 2147483647) /* _NLSCMPERROR; missing from mingw
1236 (errmsg("could not compare Unicode strings: %m")));
1239 * In some locales wcscoll() can claim that nonidentical strings
1240 * are equal. Believing that would be bad news for a number of
1241 * reasons, so we follow Perl's lead and sort "equal" strings
1242 * according to strcmp (on the UTF-8 representation).
1246 result
= strncmp(arg1
, arg2
, Min(len1
, len2
));
1247 if ((result
== 0) && (len1
!= len2
))
1248 result
= (len1
< len2
) ? -1 : 1;
1260 if (len1
>= STACKBUFLEN
)
1261 a1p
= (char *) palloc(len1
+ 1);
1264 if (len2
>= STACKBUFLEN
)
1265 a2p
= (char *) palloc(len2
+ 1);
1269 memcpy(a1p
, arg1
, len1
);
1271 memcpy(a2p
, arg2
, len2
);
1274 result
= strcoll(a1p
, a2p
);
1277 * In some locales strcoll() can claim that nonidentical strings are
1278 * equal. Believing that would be bad news for a number of reasons,
1279 * so we follow Perl's lead and sort "equal" strings according to
1283 result
= strcmp(a1p
, a2p
);
1296 * Internal comparison function for text strings.
1297 * Returns -1, 0 or 1
1300 text_cmp(text
*arg1
, text
*arg2
)
1307 a1p
= VARDATA_ANY(arg1
);
1308 a2p
= VARDATA_ANY(arg2
);
1310 len1
= VARSIZE_ANY_EXHDR(arg1
);
1311 len2
= VARSIZE_ANY_EXHDR(arg2
);
1313 return varstr_cmp(a1p
, len1
, a2p
, len2
);
1317 * Comparison functions for text strings.
1319 * Note: btree indexes need these routines not to leak memory; therefore,
1320 * be careful to free working copies of toasted datums. Most places don't
1321 * need to be so careful.
1325 texteq(PG_FUNCTION_ARGS
)
1327 text
*arg1
= PG_GETARG_TEXT_PP(0);
1328 text
*arg2
= PG_GETARG_TEXT_PP(1);
1332 * Since we only care about equality or not-equality, we can avoid all the
1333 * expense of strcoll() here, and just do bitwise comparison.
1335 if (VARSIZE_ANY_EXHDR(arg1
) != VARSIZE_ANY_EXHDR(arg2
))
1338 result
= (strncmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
),
1339 VARSIZE_ANY_EXHDR(arg1
)) == 0);
1341 PG_FREE_IF_COPY(arg1
, 0);
1342 PG_FREE_IF_COPY(arg2
, 1);
1344 PG_RETURN_BOOL(result
);
1348 textne(PG_FUNCTION_ARGS
)
1350 text
*arg1
= PG_GETARG_TEXT_PP(0);
1351 text
*arg2
= PG_GETARG_TEXT_PP(1);
1355 * Since we only care about equality or not-equality, we can avoid all the
1356 * expense of strcoll() here, and just do bitwise comparison.
1358 if (VARSIZE_ANY_EXHDR(arg1
) != VARSIZE_ANY_EXHDR(arg2
))
1361 result
= (strncmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
),
1362 VARSIZE_ANY_EXHDR(arg1
)) != 0);
1364 PG_FREE_IF_COPY(arg1
, 0);
1365 PG_FREE_IF_COPY(arg2
, 1);
1367 PG_RETURN_BOOL(result
);
1371 text_lt(PG_FUNCTION_ARGS
)
1373 text
*arg1
= PG_GETARG_TEXT_PP(0);
1374 text
*arg2
= PG_GETARG_TEXT_PP(1);
1377 result
= (text_cmp(arg1
, arg2
) < 0);
1379 PG_FREE_IF_COPY(arg1
, 0);
1380 PG_FREE_IF_COPY(arg2
, 1);
1382 PG_RETURN_BOOL(result
);
1386 text_le(PG_FUNCTION_ARGS
)
1388 text
*arg1
= PG_GETARG_TEXT_PP(0);
1389 text
*arg2
= PG_GETARG_TEXT_PP(1);
1392 result
= (text_cmp(arg1
, arg2
) <= 0);
1394 PG_FREE_IF_COPY(arg1
, 0);
1395 PG_FREE_IF_COPY(arg2
, 1);
1397 PG_RETURN_BOOL(result
);
1401 text_gt(PG_FUNCTION_ARGS
)
1403 text
*arg1
= PG_GETARG_TEXT_PP(0);
1404 text
*arg2
= PG_GETARG_TEXT_PP(1);
1407 result
= (text_cmp(arg1
, arg2
) > 0);
1409 PG_FREE_IF_COPY(arg1
, 0);
1410 PG_FREE_IF_COPY(arg2
, 1);
1412 PG_RETURN_BOOL(result
);
1416 text_ge(PG_FUNCTION_ARGS
)
1418 text
*arg1
= PG_GETARG_TEXT_PP(0);
1419 text
*arg2
= PG_GETARG_TEXT_PP(1);
1422 result
= (text_cmp(arg1
, arg2
) >= 0);
1424 PG_FREE_IF_COPY(arg1
, 0);
1425 PG_FREE_IF_COPY(arg2
, 1);
1427 PG_RETURN_BOOL(result
);
1431 bttextcmp(PG_FUNCTION_ARGS
)
1433 text
*arg1
= PG_GETARG_TEXT_PP(0);
1434 text
*arg2
= PG_GETARG_TEXT_PP(1);
1437 result
= text_cmp(arg1
, arg2
);
1439 PG_FREE_IF_COPY(arg1
, 0);
1440 PG_FREE_IF_COPY(arg2
, 1);
1442 PG_RETURN_INT32(result
);
1447 text_larger(PG_FUNCTION_ARGS
)
1449 text
*arg1
= PG_GETARG_TEXT_PP(0);
1450 text
*arg2
= PG_GETARG_TEXT_PP(1);
1453 result
= ((text_cmp(arg1
, arg2
) > 0) ? arg1
: arg2
);
1455 PG_RETURN_TEXT_P(result
);
1459 text_smaller(PG_FUNCTION_ARGS
)
1461 text
*arg1
= PG_GETARG_TEXT_PP(0);
1462 text
*arg2
= PG_GETARG_TEXT_PP(1);
1465 result
= ((text_cmp(arg1
, arg2
) < 0) ? arg1
: arg2
);
1467 PG_RETURN_TEXT_P(result
);
1472 * The following operators support character-by-character comparison
1473 * of text datums, to allow building indexes suitable for LIKE clauses.
1474 * Note that the regular texteq/textne comparison operators are assumed
1475 * to be compatible with these!
1479 internal_text_pattern_compare(text
*arg1
, text
*arg2
)
1485 len1
= VARSIZE_ANY_EXHDR(arg1
);
1486 len2
= VARSIZE_ANY_EXHDR(arg2
);
1488 result
= strncmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
1491 else if (len1
< len2
)
1493 else if (len1
> len2
)
1501 text_pattern_lt(PG_FUNCTION_ARGS
)
1503 text
*arg1
= PG_GETARG_TEXT_PP(0);
1504 text
*arg2
= PG_GETARG_TEXT_PP(1);
1507 result
= internal_text_pattern_compare(arg1
, arg2
);
1509 PG_FREE_IF_COPY(arg1
, 0);
1510 PG_FREE_IF_COPY(arg2
, 1);
1512 PG_RETURN_BOOL(result
< 0);
1517 text_pattern_le(PG_FUNCTION_ARGS
)
1519 text
*arg1
= PG_GETARG_TEXT_PP(0);
1520 text
*arg2
= PG_GETARG_TEXT_PP(1);
1523 result
= internal_text_pattern_compare(arg1
, arg2
);
1525 PG_FREE_IF_COPY(arg1
, 0);
1526 PG_FREE_IF_COPY(arg2
, 1);
1528 PG_RETURN_BOOL(result
<= 0);
1533 text_pattern_ge(PG_FUNCTION_ARGS
)
1535 text
*arg1
= PG_GETARG_TEXT_PP(0);
1536 text
*arg2
= PG_GETARG_TEXT_PP(1);
1539 result
= internal_text_pattern_compare(arg1
, arg2
);
1541 PG_FREE_IF_COPY(arg1
, 0);
1542 PG_FREE_IF_COPY(arg2
, 1);
1544 PG_RETURN_BOOL(result
>= 0);
1549 text_pattern_gt(PG_FUNCTION_ARGS
)
1551 text
*arg1
= PG_GETARG_TEXT_PP(0);
1552 text
*arg2
= PG_GETARG_TEXT_PP(1);
1555 result
= internal_text_pattern_compare(arg1
, arg2
);
1557 PG_FREE_IF_COPY(arg1
, 0);
1558 PG_FREE_IF_COPY(arg2
, 1);
1560 PG_RETURN_BOOL(result
> 0);
1565 bttext_pattern_cmp(PG_FUNCTION_ARGS
)
1567 text
*arg1
= PG_GETARG_TEXT_PP(0);
1568 text
*arg2
= PG_GETARG_TEXT_PP(1);
1571 result
= internal_text_pattern_compare(arg1
, arg2
);
1573 PG_FREE_IF_COPY(arg1
, 0);
1574 PG_FREE_IF_COPY(arg2
, 1);
1576 PG_RETURN_INT32(result
);
1580 /*-------------------------------------------------------------
1583 * get the number of bytes contained in an instance of type 'bytea'
1584 *-------------------------------------------------------------
1587 byteaoctetlen(PG_FUNCTION_ARGS
)
1589 Datum str
= PG_GETARG_DATUM(0);
1591 /* We need not detoast the input at all */
1592 PG_RETURN_INT32(toast_raw_datum_size(str
) - VARHDRSZ
);
1597 * takes two bytea* and returns a bytea* that is the concatenation of
1600 * Cloned from textcat and modified as required.
1603 byteacat(PG_FUNCTION_ARGS
)
1605 bytea
*t1
= PG_GETARG_BYTEA_PP(0);
1606 bytea
*t2
= PG_GETARG_BYTEA_PP(1);
1613 len1
= VARSIZE_ANY_EXHDR(t1
);
1617 len2
= VARSIZE_ANY_EXHDR(t2
);
1621 len
= len1
+ len2
+ VARHDRSZ
;
1622 result
= (bytea
*) palloc(len
);
1624 /* Set size of result string... */
1625 SET_VARSIZE(result
, len
);
1627 /* Fill data field of result string... */
1628 ptr
= VARDATA(result
);
1630 memcpy(ptr
, VARDATA_ANY(t1
), len1
);
1632 memcpy(ptr
+ len1
, VARDATA_ANY(t2
), len2
);
1634 PG_RETURN_BYTEA_P(result
);
1637 #define PG_STR_GET_BYTEA(str_) \
1638 DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1642 * Return a substring starting at the specified position.
1643 * Cloned from text_substr and modified as required.
1647 * - starting position (is one-based)
1648 * - string length (optional)
1650 * If the starting position is zero or less, then return from the start of the string
1651 * adjusting the length to be consistent with the "negative start" per SQL92.
1652 * If the length is less than zero, an ERROR is thrown. If no third argument
1653 * (length) is provided, the length to the end of the string is assumed.
1656 bytea_substr(PG_FUNCTION_ARGS
)
1658 int S
= PG_GETARG_INT32(1); /* start position */
1659 int S1
; /* adjusted start position */
1660 int L1
; /* adjusted substring length */
1664 if (fcinfo
->nargs
== 2)
1667 * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1668 * the end of the string if we pass it a negative value for length.
1675 int E
= S
+ PG_GETARG_INT32(2);
1678 * A negative value for L is the only way for the end position to be
1679 * before the start. SQL99 says to throw an error.
1683 (errcode(ERRCODE_SUBSTRING_ERROR
),
1684 errmsg("negative substring length not allowed")));
1687 * A zero or negative value for the end position can happen if the
1688 * start was negative or one. SQL99 says to return a zero-length
1692 PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1698 * If the start position is past the end of the string, SQL99 says to
1699 * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1700 * for us. Convert to zero-based starting position
1702 PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1
- 1, L1
));
1706 * bytea_substr_no_len -
1707 * Wrapper to avoid opr_sanity failure due to
1708 * one function accepting a different number of args.
1711 bytea_substr_no_len(PG_FUNCTION_ARGS
)
1713 return bytea_substr(fcinfo
);
1718 * Return the position of the specified substring.
1719 * Implements the SQL92 POSITION() function.
1720 * Cloned from textpos and modified as required.
1723 byteapos(PG_FUNCTION_ARGS
)
1725 bytea
*t1
= PG_GETARG_BYTEA_PP(0);
1726 bytea
*t2
= PG_GETARG_BYTEA_PP(1);
1735 len1
= VARSIZE_ANY_EXHDR(t1
);
1736 len2
= VARSIZE_ANY_EXHDR(t2
);
1739 PG_RETURN_INT32(1); /* result for empty pattern */
1741 p1
= VARDATA_ANY(t1
);
1742 p2
= VARDATA_ANY(t2
);
1746 for (p
= 0; p
<= px
; p
++)
1748 if ((*p2
== *p1
) && (memcmp(p1
, p2
, len2
) == 0))
1756 PG_RETURN_INT32(pos
);
1759 /*-------------------------------------------------------------
1762 * this routine treats "bytea" as an array of bytes.
1763 * It returns the Nth byte (a number between 0 and 255).
1764 *-------------------------------------------------------------
1767 byteaGetByte(PG_FUNCTION_ARGS
)
1769 bytea
*v
= PG_GETARG_BYTEA_PP(0);
1770 int32 n
= PG_GETARG_INT32(1);
1774 len
= VARSIZE_ANY_EXHDR(v
);
1776 if (n
< 0 || n
>= len
)
1778 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1779 errmsg("index %d out of valid range, 0..%d",
1782 byte
= ((unsigned char *) VARDATA_ANY(v
))[n
];
1784 PG_RETURN_INT32(byte
);
1787 /*-------------------------------------------------------------
1790 * This routine treats a "bytea" type like an array of bits.
1791 * It returns the value of the Nth bit (0 or 1).
1793 *-------------------------------------------------------------
1796 byteaGetBit(PG_FUNCTION_ARGS
)
1798 bytea
*v
= PG_GETARG_BYTEA_PP(0);
1799 int32 n
= PG_GETARG_INT32(1);
1805 len
= VARSIZE_ANY_EXHDR(v
);
1807 if (n
< 0 || n
>= len
* 8)
1809 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1810 errmsg("index %d out of valid range, 0..%d",
1816 byte
= ((unsigned char *) VARDATA_ANY(v
))[byteNo
];
1818 if (byte
&(1 << bitNo
))
1824 /*-------------------------------------------------------------
1827 * Given an instance of type 'bytea' creates a new one with
1828 * the Nth byte set to the given value.
1830 *-------------------------------------------------------------
1833 byteaSetByte(PG_FUNCTION_ARGS
)
1835 bytea
*v
= PG_GETARG_BYTEA_P(0);
1836 int32 n
= PG_GETARG_INT32(1);
1837 int32 newByte
= PG_GETARG_INT32(2);
1841 len
= VARSIZE(v
) - VARHDRSZ
;
1843 if (n
< 0 || n
>= len
)
1845 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1846 errmsg("index %d out of valid range, 0..%d",
1850 * Make a copy of the original varlena.
1852 res
= (bytea
*) palloc(VARSIZE(v
));
1853 memcpy((char *) res
, (char *) v
, VARSIZE(v
));
1858 ((unsigned char *) VARDATA(res
))[n
] = newByte
;
1860 PG_RETURN_BYTEA_P(res
);
1863 /*-------------------------------------------------------------
1866 * Given an instance of type 'bytea' creates a new one with
1867 * the Nth bit set to the given value.
1869 *-------------------------------------------------------------
1872 byteaSetBit(PG_FUNCTION_ARGS
)
1874 bytea
*v
= PG_GETARG_BYTEA_P(0);
1875 int32 n
= PG_GETARG_INT32(1);
1876 int32 newBit
= PG_GETARG_INT32(2);
1884 len
= VARSIZE(v
) - VARHDRSZ
;
1886 if (n
< 0 || n
>= len
* 8)
1888 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR
),
1889 errmsg("index %d out of valid range, 0..%d",
1898 if (newBit
!= 0 && newBit
!= 1)
1900 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
1901 errmsg("new bit must be 0 or 1")));
1904 * Make a copy of the original varlena.
1906 res
= (bytea
*) palloc(VARSIZE(v
));
1907 memcpy((char *) res
, (char *) v
, VARSIZE(v
));
1912 oldByte
= ((unsigned char *) VARDATA(res
))[byteNo
];
1915 newByte
= oldByte
& (~(1 << bitNo
));
1917 newByte
= oldByte
| (1 << bitNo
);
1919 ((unsigned char *) VARDATA(res
))[byteNo
] = newByte
;
1921 PG_RETURN_BYTEA_P(res
);
1926 * Converts a text type to a Name type.
1929 text_name(PG_FUNCTION_ARGS
)
1931 text
*s
= PG_GETARG_TEXT_PP(0);
1935 len
= VARSIZE_ANY_EXHDR(s
);
1937 /* Truncate oversize input */
1938 if (len
>= NAMEDATALEN
)
1939 len
= NAMEDATALEN
- 1;
1941 result
= (Name
) palloc(NAMEDATALEN
);
1942 memcpy(NameStr(*result
), VARDATA_ANY(s
), len
);
1944 /* now null pad to full length... */
1945 while (len
< NAMEDATALEN
)
1947 *(NameStr(*result
) + len
) = '\0';
1951 PG_RETURN_NAME(result
);
1955 * Converts a Name type to a text type.
1958 name_text(PG_FUNCTION_ARGS
)
1960 Name s
= PG_GETARG_NAME(0);
1962 PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s
)));
1967 * textToQualifiedNameList - convert a text object to list of names
1969 * This implements the input parsing needed by nextval() and other
1970 * functions that take a text parameter representing a qualified name.
1971 * We split the name at dots, downcase if not double-quoted, and
1972 * truncate names if they're too long.
1975 textToQualifiedNameList(text
*textval
)
1982 /* Convert to C string (handles possible detoasting). */
1983 /* Note we rely on being able to modify rawname below. */
1984 rawname
= text_to_cstring(textval
);
1986 if (!SplitIdentifierString(rawname
, '.', &namelist
))
1988 (errcode(ERRCODE_INVALID_NAME
),
1989 errmsg("invalid name syntax")));
1991 if (namelist
== NIL
)
1993 (errcode(ERRCODE_INVALID_NAME
),
1994 errmsg("invalid name syntax")));
1996 foreach(l
, namelist
)
1998 char *curname
= (char *) lfirst(l
);
2000 result
= lappend(result
, makeString(pstrdup(curname
)));
2004 list_free(namelist
);
2010 * SplitIdentifierString --- parse a string containing identifiers
2012 * This is the guts of textToQualifiedNameList, and is exported for use in
2013 * other situations such as parsing GUC variables. In the GUC case, it's
2014 * important to avoid memory leaks, so the API is designed to minimize the
2015 * amount of stuff that needs to be allocated and freed.
2018 * rawstring: the input string; must be overwritable! On return, it's
2019 * been modified to contain the separated identifiers.
2020 * separator: the separator punctuation expected between identifiers
2021 * (typically '.' or ','). Whitespace may also appear around
2024 * namelist: filled with a palloc'd list of pointers to identifiers within
2025 * rawstring. Caller should list_free() this even on error return.
2027 * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2029 * Note that an empty string is considered okay here, though not in
2030 * textToQualifiedNameList.
2033 SplitIdentifierString(char *rawstring
, char separator
,
2036 char *nextp
= rawstring
;
2041 while (isspace((unsigned char) *nextp
))
2042 nextp
++; /* skip leading whitespace */
2045 return true; /* allow empty string */
2047 /* At the top of the loop, we are at start of a new identifier. */
2055 /* Quoted name --- collapse quote-quote pairs, no downcasing */
2056 curname
= nextp
+ 1;
2059 endp
= strchr(nextp
+ 1, '\"');
2061 return false; /* mismatched quotes */
2062 if (endp
[1] != '\"')
2063 break; /* found end of quoted name */
2064 /* Collapse adjacent quotes into one quote, and look again */
2065 memmove(endp
, endp
+ 1, strlen(endp
));
2068 /* endp now points at the terminating quote */
2073 /* Unquoted name --- extends to separator or whitespace */
2078 while (*nextp
&& *nextp
!= separator
&&
2079 !isspace((unsigned char) *nextp
))
2082 if (curname
== nextp
)
2083 return false; /* empty unquoted name not allowed */
2086 * Downcase the identifier, using same code as main lexer does.
2088 * XXX because we want to overwrite the input in-place, we cannot
2089 * support a downcasing transformation that increases the string
2090 * length. This is not a problem given the current implementation
2091 * of downcase_truncate_identifier, but we'll probably have to do
2092 * something about this someday.
2094 len
= endp
- curname
;
2095 downname
= downcase_truncate_identifier(curname
, len
, false);
2096 Assert(strlen(downname
) <= len
);
2097 strncpy(curname
, downname
, len
);
2101 while (isspace((unsigned char) *nextp
))
2102 nextp
++; /* skip trailing whitespace */
2104 if (*nextp
== separator
)
2107 while (isspace((unsigned char) *nextp
))
2108 nextp
++; /* skip leading whitespace for next */
2109 /* we expect another name, so done remains false */
2111 else if (*nextp
== '\0')
2114 return false; /* invalid syntax */
2116 /* Now safe to overwrite separator with a null */
2119 /* Truncate name if it's overlength */
2120 truncate_identifier(curname
, strlen(curname
), false);
2123 * Finished isolating current name --- add it to list
2125 *namelist
= lappend(*namelist
, curname
);
2127 /* Loop back if we didn't reach end of string */
2134 /*****************************************************************************
2135 * Comparison Functions used for bytea
2137 * Note: btree indexes need these routines not to leak memory; therefore,
2138 * be careful to free working copies of toasted datums. Most places don't
2139 * need to be so careful.
2140 *****************************************************************************/
2143 byteaeq(PG_FUNCTION_ARGS
)
2145 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2146 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2151 len1
= VARSIZE_ANY_EXHDR(arg1
);
2152 len2
= VARSIZE_ANY_EXHDR(arg2
);
2154 /* fast path for different-length inputs */
2158 result
= (memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), len1
) == 0);
2160 PG_FREE_IF_COPY(arg1
, 0);
2161 PG_FREE_IF_COPY(arg2
, 1);
2163 PG_RETURN_BOOL(result
);
2167 byteane(PG_FUNCTION_ARGS
)
2169 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2170 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2175 len1
= VARSIZE_ANY_EXHDR(arg1
);
2176 len2
= VARSIZE_ANY_EXHDR(arg2
);
2178 /* fast path for different-length inputs */
2182 result
= (memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), len1
) != 0);
2184 PG_FREE_IF_COPY(arg1
, 0);
2185 PG_FREE_IF_COPY(arg2
, 1);
2187 PG_RETURN_BOOL(result
);
2191 bytealt(PG_FUNCTION_ARGS
)
2193 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2194 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2199 len1
= VARSIZE_ANY_EXHDR(arg1
);
2200 len2
= VARSIZE_ANY_EXHDR(arg2
);
2202 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2204 PG_FREE_IF_COPY(arg1
, 0);
2205 PG_FREE_IF_COPY(arg2
, 1);
2207 PG_RETURN_BOOL((cmp
< 0) || ((cmp
== 0) && (len1
< len2
)));
2211 byteale(PG_FUNCTION_ARGS
)
2213 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2214 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2219 len1
= VARSIZE_ANY_EXHDR(arg1
);
2220 len2
= VARSIZE_ANY_EXHDR(arg2
);
2222 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2224 PG_FREE_IF_COPY(arg1
, 0);
2225 PG_FREE_IF_COPY(arg2
, 1);
2227 PG_RETURN_BOOL((cmp
< 0) || ((cmp
== 0) && (len1
<= len2
)));
2231 byteagt(PG_FUNCTION_ARGS
)
2233 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2234 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2239 len1
= VARSIZE_ANY_EXHDR(arg1
);
2240 len2
= VARSIZE_ANY_EXHDR(arg2
);
2242 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2244 PG_FREE_IF_COPY(arg1
, 0);
2245 PG_FREE_IF_COPY(arg2
, 1);
2247 PG_RETURN_BOOL((cmp
> 0) || ((cmp
== 0) && (len1
> len2
)));
2251 byteage(PG_FUNCTION_ARGS
)
2253 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2254 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2259 len1
= VARSIZE_ANY_EXHDR(arg1
);
2260 len2
= VARSIZE_ANY_EXHDR(arg2
);
2262 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2264 PG_FREE_IF_COPY(arg1
, 0);
2265 PG_FREE_IF_COPY(arg2
, 1);
2267 PG_RETURN_BOOL((cmp
> 0) || ((cmp
== 0) && (len1
>= len2
)));
2271 byteacmp(PG_FUNCTION_ARGS
)
2273 bytea
*arg1
= PG_GETARG_BYTEA_PP(0);
2274 bytea
*arg2
= PG_GETARG_BYTEA_PP(1);
2279 len1
= VARSIZE_ANY_EXHDR(arg1
);
2280 len2
= VARSIZE_ANY_EXHDR(arg2
);
2282 cmp
= memcmp(VARDATA_ANY(arg1
), VARDATA_ANY(arg2
), Min(len1
, len2
));
2283 if ((cmp
== 0) && (len1
!= len2
))
2284 cmp
= (len1
< len2
) ? -1 : 1;
2286 PG_FREE_IF_COPY(arg1
, 0);
2287 PG_FREE_IF_COPY(arg2
, 1);
2289 PG_RETURN_INT32(cmp
);
2293 * appendStringInfoText
2295 * Append a text to str.
2296 * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2299 appendStringInfoText(StringInfo str
, const text
*t
)
2301 appendBinaryStringInfo(str
, VARDATA_ANY(t
), VARSIZE_ANY_EXHDR(t
));
2306 * replace all occurrences of 'old_sub_str' in 'orig_str'
2307 * with 'new_sub_str' to form 'new_str'
2309 * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2310 * otherwise returns 'new_str'
2313 replace_text(PG_FUNCTION_ARGS
)
2315 text
*src_text
= PG_GETARG_TEXT_PP(0);
2316 text
*from_sub_text
= PG_GETARG_TEXT_PP(1);
2317 text
*to_sub_text
= PG_GETARG_TEXT_PP(2);
2319 int from_sub_text_len
;
2320 TextPositionState state
;
2328 text_position_setup(src_text
, from_sub_text
, &state
);
2331 * Note: we check the converted string length, not the original, because
2332 * they could be different if the input contained invalid encoding.
2334 src_text_len
= state
.len1
;
2335 from_sub_text_len
= state
.len2
;
2337 /* Return unmodified source string if empty source or pattern */
2338 if (src_text_len
< 1 || from_sub_text_len
< 1)
2340 text_position_cleanup(&state
);
2341 PG_RETURN_TEXT_P(src_text
);
2345 curr_posn
= text_position_next(1, &state
);
2347 /* When the from_sub_text is not found, there is nothing to do. */
2350 text_position_cleanup(&state
);
2351 PG_RETURN_TEXT_P(src_text
);
2354 /* start_ptr points to the start_posn'th character of src_text */
2355 start_ptr
= VARDATA_ANY(src_text
);
2357 initStringInfo(&str
);
2361 CHECK_FOR_INTERRUPTS();
2363 /* copy the data skipped over by last text_position_next() */
2364 chunk_len
= charlen_to_bytelen(start_ptr
, curr_posn
- start_posn
);
2365 appendBinaryStringInfo(&str
, start_ptr
, chunk_len
);
2367 appendStringInfoText(&str
, to_sub_text
);
2369 start_posn
= curr_posn
;
2370 start_ptr
+= chunk_len
;
2371 start_posn
+= from_sub_text_len
;
2372 start_ptr
+= charlen_to_bytelen(start_ptr
, from_sub_text_len
);
2374 curr_posn
= text_position_next(start_posn
, &state
);
2376 while (curr_posn
> 0);
2378 /* copy trailing data */
2379 chunk_len
= ((char *) src_text
+ VARSIZE_ANY(src_text
)) - start_ptr
;
2380 appendBinaryStringInfo(&str
, start_ptr
, chunk_len
);
2382 text_position_cleanup(&state
);
2384 ret_text
= cstring_to_text_with_len(str
.data
, str
.len
);
2387 PG_RETURN_TEXT_P(ret_text
);
2391 * check_replace_text_has_escape_char
2393 * check whether replace_text contains escape char.
2396 check_replace_text_has_escape_char(const text
*replace_text
)
2398 const char *p
= VARDATA_ANY(replace_text
);
2399 const char *p_end
= p
+ VARSIZE_ANY_EXHDR(replace_text
);
2401 if (pg_database_encoding_max_length() == 1)
2403 for (; p
< p_end
; p
++)
2411 for (; p
< p_end
; p
+= pg_mblen(p
))
2422 * appendStringInfoRegexpSubstr
2424 * Append replace_text to str, substituting regexp back references for
2425 * \n escapes. start_ptr is the start of the match in the source string,
2426 * at logical character position data_pos.
2429 appendStringInfoRegexpSubstr(StringInfo str
, text
*replace_text
,
2431 char *start_ptr
, int data_pos
)
2433 const char *p
= VARDATA_ANY(replace_text
);
2434 const char *p_end
= p
+ VARSIZE_ANY_EXHDR(replace_text
);
2435 int eml
= pg_database_encoding_max_length();
2439 const char *chunk_start
= p
;
2443 /* Find next escape char. */
2446 for (; p
< p_end
&& *p
!= '\\'; p
++)
2451 for (; p
< p_end
&& *p
!= '\\'; p
+= pg_mblen(p
))
2455 /* Copy the text we just scanned over, if any. */
2456 if (p
> chunk_start
)
2457 appendBinaryStringInfo(str
, chunk_start
, p
- chunk_start
);
2459 /* Done if at end of string, else advance over escape char. */
2466 /* Escape at very end of input. Treat same as unexpected char */
2467 appendStringInfoChar(str
, '\\');
2471 if (*p
>= '1' && *p
<= '9')
2473 /* Use the back reference of regexp. */
2476 so
= pmatch
[idx
].rm_so
;
2477 eo
= pmatch
[idx
].rm_eo
;
2482 /* Use the entire matched string. */
2483 so
= pmatch
[0].rm_so
;
2484 eo
= pmatch
[0].rm_eo
;
2487 else if (*p
== '\\')
2489 /* \\ means transfer one \ to output. */
2490 appendStringInfoChar(str
, '\\');
2497 * If escape char is not followed by any expected char, just treat
2498 * it as ordinary data to copy. (XXX would it be better to throw
2501 appendStringInfoChar(str
, '\\');
2505 if (so
!= -1 && eo
!= -1)
2508 * Copy the text that is back reference of regexp. Note so and eo
2509 * are counted in characters not bytes.
2514 Assert(so
>= data_pos
);
2515 chunk_start
= start_ptr
;
2516 chunk_start
+= charlen_to_bytelen(chunk_start
, so
- data_pos
);
2517 chunk_len
= charlen_to_bytelen(chunk_start
, eo
- so
);
2518 appendBinaryStringInfo(str
, chunk_start
, chunk_len
);
2523 #define REGEXP_REPLACE_BACKREF_CNT 10
2526 * replace_text_regexp
2528 * replace text that matches to regexp in src_text to replace_text.
2530 * Note: to avoid having to include regex.h in builtins.h, we declare
2531 * the regexp argument as void *, but really it's regex_t *.
2534 replace_text_regexp(text
*src_text
, void *regexp
,
2535 text
*replace_text
, bool glob
)
2538 regex_t
*re
= (regex_t
*) regexp
;
2539 int src_text_len
= VARSIZE_ANY_EXHDR(src_text
);
2541 regmatch_t pmatch
[REGEXP_REPLACE_BACKREF_CNT
];
2549 initStringInfo(&buf
);
2551 /* Convert data string to wide characters. */
2552 data
= (pg_wchar
*) palloc((src_text_len
+ 1) * sizeof(pg_wchar
));
2553 data_len
= pg_mb2wchar_with_len(VARDATA_ANY(src_text
), data
, src_text_len
);
2555 /* Check whether replace_text has escape char. */
2556 have_escape
= check_replace_text_has_escape_char(replace_text
);
2558 /* start_ptr points to the data_pos'th character of src_text */
2559 start_ptr
= (char *) VARDATA_ANY(src_text
);
2563 while (search_start
<= data_len
)
2567 CHECK_FOR_INTERRUPTS();
2569 regexec_result
= pg_regexec(re
,
2573 NULL
, /* no details */
2574 REGEXP_REPLACE_BACKREF_CNT
,
2578 if (regexec_result
== REG_NOMATCH
)
2581 if (regexec_result
!= REG_OKAY
)
2585 pg_regerror(regexec_result
, re
, errMsg
, sizeof(errMsg
));
2587 (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION
),
2588 errmsg("regular expression failed: %s", errMsg
)));
2592 * Copy the text to the left of the match position. Note we are given
2593 * character not byte indexes.
2595 if (pmatch
[0].rm_so
- data_pos
> 0)
2599 chunk_len
= charlen_to_bytelen(start_ptr
,
2600 pmatch
[0].rm_so
- data_pos
);
2601 appendBinaryStringInfo(&buf
, start_ptr
, chunk_len
);
2604 * Advance start_ptr over that text, to avoid multiple rescans of
2605 * it if the replace_text contains multiple back-references.
2607 start_ptr
+= chunk_len
;
2608 data_pos
= pmatch
[0].rm_so
;
2612 * Copy the replace_text. Process back references when the
2613 * replace_text has escape characters.
2616 appendStringInfoRegexpSubstr(&buf
, replace_text
, pmatch
,
2617 start_ptr
, data_pos
);
2619 appendStringInfoText(&buf
, replace_text
);
2621 /* Advance start_ptr and data_pos over the matched text. */
2622 start_ptr
+= charlen_to_bytelen(start_ptr
,
2623 pmatch
[0].rm_eo
- data_pos
);
2624 data_pos
= pmatch
[0].rm_eo
;
2627 * When global option is off, replace the first instance only.
2633 * Search from next character when the matching text is zero width.
2635 search_start
= data_pos
;
2636 if (pmatch
[0].rm_so
== pmatch
[0].rm_eo
)
2641 * Copy the text to the right of the last match.
2643 if (data_pos
< data_len
)
2647 chunk_len
= ((char *) src_text
+ VARSIZE_ANY(src_text
)) - start_ptr
;
2648 appendBinaryStringInfo(&buf
, start_ptr
, chunk_len
);
2651 ret_text
= cstring_to_text_with_len(buf
.data
, buf
.len
);
2660 * parse input string
2661 * return ord item (1 based)
2662 * based on provided field separator
2665 split_text(PG_FUNCTION_ARGS
)
2667 text
*inputstring
= PG_GETARG_TEXT_PP(0);
2668 text
*fldsep
= PG_GETARG_TEXT_PP(1);
2669 int fldnum
= PG_GETARG_INT32(2);
2670 int inputstring_len
;
2672 TextPositionState state
;
2677 /* field number is 1 based */
2680 (errcode(ERRCODE_INVALID_PARAMETER_VALUE
),
2681 errmsg("field position must be greater than zero")));
2683 text_position_setup(inputstring
, fldsep
, &state
);
2686 * Note: we check the converted string length, not the original, because
2687 * they could be different if the input contained invalid encoding.
2689 inputstring_len
= state
.len1
;
2690 fldsep_len
= state
.len2
;
2692 /* return empty string for empty input string */
2693 if (inputstring_len
< 1)
2695 text_position_cleanup(&state
);
2696 PG_RETURN_TEXT_P(cstring_to_text(""));
2699 /* empty field separator */
2702 text_position_cleanup(&state
);
2703 /* if first field, return input string, else empty string */
2705 PG_RETURN_TEXT_P(inputstring
);
2707 PG_RETURN_TEXT_P(cstring_to_text(""));
2710 /* identify bounds of first field */
2712 end_posn
= text_position_next(1, &state
);
2714 /* special case if fldsep not found at all */
2717 text_position_cleanup(&state
);
2718 /* if field 1 requested, return input string, else empty string */
2720 PG_RETURN_TEXT_P(inputstring
);
2722 PG_RETURN_TEXT_P(cstring_to_text(""));
2725 while (end_posn
> 0 && --fldnum
> 0)
2727 /* identify bounds of next field */
2728 start_posn
= end_posn
+ fldsep_len
;
2729 end_posn
= text_position_next(start_posn
, &state
);
2732 text_position_cleanup(&state
);
2736 /* N'th field separator not found */
2737 /* if last field requested, return it, else empty string */
2739 result_text
= text_substring(PointerGetDatum(inputstring
),
2744 result_text
= cstring_to_text("");
2748 /* non-last field requested */
2749 result_text
= text_substring(PointerGetDatum(inputstring
),
2751 end_posn
- start_posn
,
2755 PG_RETURN_TEXT_P(result_text
);
2760 * parse input string
2761 * return text array of elements
2762 * based on provided field separator
2765 text_to_array(PG_FUNCTION_ARGS
)
2767 text
*inputstring
= PG_GETARG_TEXT_PP(0);
2768 text
*fldsep
= PG_GETARG_TEXT_PP(1);
2769 int inputstring_len
;
2771 TextPositionState state
;
2778 ArrayBuildState
*astate
= NULL
;
2780 text_position_setup(inputstring
, fldsep
, &state
);
2783 * Note: we check the converted string length, not the original, because
2784 * they could be different if the input contained invalid encoding.
2786 inputstring_len
= state
.len1
;
2787 fldsep_len
= state
.len2
;
2789 /* return NULL for empty input string */
2790 if (inputstring_len
< 1)
2792 text_position_cleanup(&state
);
2797 * empty field separator return one element, 1D, array using the input
2802 text_position_cleanup(&state
);
2803 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo
, TEXTOID
,
2804 PointerGetDatum(inputstring
), 1));
2808 /* start_ptr points to the start_posn'th character of inputstring */
2809 start_ptr
= VARDATA_ANY(inputstring
);
2811 for (fldnum
= 1;; fldnum
++) /* field number is 1 based */
2813 CHECK_FOR_INTERRUPTS();
2815 end_posn
= text_position_next(start_posn
, &state
);
2819 /* fetch last field */
2820 chunk_len
= ((char *) inputstring
+ VARSIZE_ANY(inputstring
)) - start_ptr
;
2824 /* fetch non-last field */
2825 chunk_len
= charlen_to_bytelen(start_ptr
, end_posn
- start_posn
);
2828 /* must build a temp text datum to pass to accumArrayResult */
2829 result_text
= cstring_to_text_with_len(start_ptr
, chunk_len
);
2831 /* stash away this field */
2832 astate
= accumArrayResult(astate
,
2833 PointerGetDatum(result_text
),
2836 CurrentMemoryContext
);
2843 start_posn
= end_posn
;
2844 start_ptr
+= chunk_len
;
2845 start_posn
+= fldsep_len
;
2846 start_ptr
+= charlen_to_bytelen(start_ptr
, fldsep_len
);
2849 text_position_cleanup(&state
);
2851 PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate
,
2852 CurrentMemoryContext
));
2857 * concatenate Cstring representation of input array elements
2858 * using provided field separator
2861 array_to_text(PG_FUNCTION_ARGS
)
2863 ArrayType
*v
= PG_GETARG_ARRAYTYPE_P(0);
2864 char *fldsep
= text_to_cstring(PG_GETARG_TEXT_PP(1));
2873 bool printed
= false;
2878 ArrayMetaState
*my_extra
;
2880 ndims
= ARR_NDIM(v
);
2882 nitems
= ArrayGetNItems(ndims
, dims
);
2884 /* if there are no elements, return an empty string */
2886 PG_RETURN_TEXT_P(cstring_to_text(""));
2888 element_type
= ARR_ELEMTYPE(v
);
2889 initStringInfo(&buf
);
2892 * We arrange to look up info about element type, including its output
2893 * conversion proc, only once per series of calls, assuming the element
2894 * type doesn't change underneath us.
2896 my_extra
= (ArrayMetaState
*) fcinfo
->flinfo
->fn_extra
;
2897 if (my_extra
== NULL
)
2899 fcinfo
->flinfo
->fn_extra
= MemoryContextAlloc(fcinfo
->flinfo
->fn_mcxt
,
2900 sizeof(ArrayMetaState
));
2901 my_extra
= (ArrayMetaState
*) fcinfo
->flinfo
->fn_extra
;
2902 my_extra
->element_type
= ~element_type
;
2905 if (my_extra
->element_type
!= element_type
)
2908 * Get info about element type, including its output conversion proc
2910 get_type_io_data(element_type
, IOFunc_output
,
2911 &my_extra
->typlen
, &my_extra
->typbyval
,
2912 &my_extra
->typalign
, &my_extra
->typdelim
,
2913 &my_extra
->typioparam
, &my_extra
->typiofunc
);
2914 fmgr_info_cxt(my_extra
->typiofunc
, &my_extra
->proc
,
2915 fcinfo
->flinfo
->fn_mcxt
);
2916 my_extra
->element_type
= element_type
;
2918 typlen
= my_extra
->typlen
;
2919 typbyval
= my_extra
->typbyval
;
2920 typalign
= my_extra
->typalign
;
2922 p
= ARR_DATA_PTR(v
);
2923 bitmap
= ARR_NULLBITMAP(v
);
2926 for (i
= 0; i
< nitems
; i
++)
2931 /* Get source element, checking for NULL */
2932 if (bitmap
&& (*bitmap
& bitmask
) == 0)
2934 /* we ignore nulls */
2938 itemvalue
= fetch_att(p
, typbyval
, typlen
);
2940 value
= OutputFunctionCall(&my_extra
->proc
, itemvalue
);
2943 appendStringInfo(&buf
, "%s%s", fldsep
, value
);
2945 appendStringInfoString(&buf
, value
);
2948 p
= att_addlength_pointer(p
, typlen
, p
);
2949 p
= (char *) att_align_nominal(p
, typalign
);
2952 /* advance bitmap pointer if any */
2956 if (bitmask
== 0x100)
2964 PG_RETURN_TEXT_P(cstring_to_text_with_len(buf
.data
, buf
.len
));
2969 * Convert a int32 to a string containing a base 16 (hex) representation of
2973 to_hex32(PG_FUNCTION_ARGS
)
2975 uint32 value
= (uint32
) PG_GETARG_INT32(0);
2977 const char *digits
= "0123456789abcdef";
2978 char buf
[32]; /* bigger than needed, but reasonable */
2980 ptr
= buf
+ sizeof(buf
) - 1;
2985 *--ptr
= digits
[value
% HEXBASE
];
2987 } while (ptr
> buf
&& value
);
2989 PG_RETURN_TEXT_P(cstring_to_text(ptr
));
2993 * Convert a int64 to a string containing a base 16 (hex) representation of
2997 to_hex64(PG_FUNCTION_ARGS
)
2999 uint64 value
= (uint64
) PG_GETARG_INT64(0);
3001 const char *digits
= "0123456789abcdef";
3002 char buf
[32]; /* bigger than needed, but reasonable */
3004 ptr
= buf
+ sizeof(buf
) - 1;
3009 *--ptr
= digits
[value
% HEXBASE
];
3011 } while (ptr
> buf
&& value
);
3013 PG_RETURN_TEXT_P(cstring_to_text(ptr
));
3017 * Create an md5 hash of a text string and return it as hex
3019 * md5 produces a 16 byte (128 bit) hash; double it for hex
3021 #define MD5_HASH_LEN 32
3024 md5_text(PG_FUNCTION_ARGS
)
3026 text
*in_text
= PG_GETARG_TEXT_PP(0);
3028 char hexsum
[MD5_HASH_LEN
+ 1];
3030 /* Calculate the length of the buffer using varlena metadata */
3031 len
= VARSIZE_ANY_EXHDR(in_text
);
3033 /* get the hash result */
3034 if (pg_md5_hash(VARDATA_ANY(in_text
), len
, hexsum
) == false)
3036 (errcode(ERRCODE_OUT_OF_MEMORY
),
3037 errmsg("out of memory")));
3039 /* convert to text and return it */
3040 PG_RETURN_TEXT_P(cstring_to_text(hexsum
));
3044 * Create an md5 hash of a bytea field and return it as a hex string:
3045 * 16-byte md5 digest is represented in 32 hex characters.
3048 md5_bytea(PG_FUNCTION_ARGS
)
3050 bytea
*in
= PG_GETARG_BYTEA_PP(0);
3052 char hexsum
[MD5_HASH_LEN
+ 1];
3054 len
= VARSIZE_ANY_EXHDR(in
);
3055 if (pg_md5_hash(VARDATA_ANY(in
), len
, hexsum
) == false)
3057 (errcode(ERRCODE_OUT_OF_MEMORY
),
3058 errmsg("out of memory")));
3060 PG_RETURN_TEXT_P(cstring_to_text(hexsum
));
3064 * Return the size of a datum, possibly compressed
3066 * Works on any data type
3069 pg_column_size(PG_FUNCTION_ARGS
)
3071 Datum value
= PG_GETARG_DATUM(0);
3075 /* On first call, get the input type's typlen, and save at *fn_extra */
3076 if (fcinfo
->flinfo
->fn_extra
== NULL
)
3078 /* Lookup the datatype of the supplied argument */
3079 Oid argtypeid
= get_fn_expr_argtype(fcinfo
->flinfo
, 0);
3081 typlen
= get_typlen(argtypeid
);
3082 if (typlen
== 0) /* should not happen */
3083 elog(ERROR
, "cache lookup failed for type %u", argtypeid
);
3085 fcinfo
->flinfo
->fn_extra
= MemoryContextAlloc(fcinfo
->flinfo
->fn_mcxt
,
3087 *((int *) fcinfo
->flinfo
->fn_extra
) = typlen
;
3090 typlen
= *((int *) fcinfo
->flinfo
->fn_extra
);
3094 /* varlena type, possibly toasted */
3095 result
= toast_datum_size(value
);
3097 else if (typlen
== -2)
3100 result
= strlen(DatumGetCString(value
)) + 1;
3104 /* ordinary fixed-width type */
3108 PG_RETURN_INT32(result
);