src/backend/utils/adt/varlena.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * varlena.c
   4  *        Functions for the variable-length built-in types.
   5  *
   6  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
   7  * Portions Copyright (c) 1994, Regents of the University of California
   8  *
   9  *
  10  * IDENTIFICATION
  11  *        $PostgreSQL$
  12  *
  13  *-------------------------------------------------------------------------
  14  */
  15 #include "postgres.h"
  16
  17 #include <ctype.h>
  18
  19 #include "access/tuptoaster.h"
  20 #include "catalog/pg_type.h"
  21 #include "libpq/md5.h"
  22 #include "libpq/pqformat.h"
  23 #include "miscadmin.h"
  24 #include "parser/scansup.h"
  25 #include "regex/regex.h"
  26 #include "utils/builtins.h"
  27 #include "utils/lsyscache.h"
  28 #include "utils/pg_locale.h"
  29
  30
  31 typedef struct varlena unknown;
  32
  33 typedef struct
  34 {
  35         bool            use_wchar;              /* T if multibyte encoding */
  36         char       *str1;                       /* use these if not use_wchar */
  37         char       *str2;                       /* note: these point to original texts */
  38         pg_wchar   *wstr1;                      /* use these if use_wchar */
  39         pg_wchar   *wstr2;                      /* note: these are palloc'd */
  40         int                     len1;                   /* string lengths in logical characters */
  41         int                     len2;
  42         /* Skip table for Boyer-Moore-Horspool search algorithm: */
  43         int                     skiptablemask;  /* mask for ANDing with skiptable subscripts */
  44         int                     skiptable[256]; /* skip distance for given mismatched char */
  45 } TextPositionState;
  46
  47 #define DatumGetUnknownP(X)                     ((unknown *) PG_DETOAST_DATUM(X))
  48 #define DatumGetUnknownPCopy(X)         ((unknown *) PG_DETOAST_DATUM_COPY(X))
  49 #define PG_GETARG_UNKNOWN_P(n)          DatumGetUnknownP(PG_GETARG_DATUM(n))
  50 #define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
  51 #define PG_RETURN_UNKNOWN_P(x)          PG_RETURN_POINTER(x)
  52
  53 static int      text_cmp(text *arg1, text *arg2);
  54 static int32 text_length(Datum str);
  55 static int      text_position(text *t1, text *t2);
  56 static void text_position_setup(text *t1, text *t2, TextPositionState *state);
  57 static int      text_position_next(int start_pos, TextPositionState *state);
  58 static void text_position_cleanup(TextPositionState *state);
  59 static text *text_substring(Datum str,
  60                            int32 start,
  61                            int32 length,
  62                            bool length_not_specified);
  63 static void appendStringInfoText(StringInfo str, const text *t);
  64
  65
  66 /*****************************************************************************
  67  *       CONVERSION ROUTINES EXPORTED FOR USE BY C CODE                                                  *
  68  *****************************************************************************/
  69
  70 /*
  71  * cstring_to_text
  72  *
  73  * Create a text value from a null-terminated C string.
  74  *
  75  * The new text value is freshly palloc'd with a full-size VARHDR.
  76  */
  77 text *
  78 cstring_to_text(const char *s)
  79 {
  80         return cstring_to_text_with_len(s, strlen(s));
  81 }
  82
  83 /*
  84  * cstring_to_text_with_len
  85  *
  86  * Same as cstring_to_text except the caller specifies the string length;
  87  * the string need not be null_terminated.
  88  */
  89 text *
  90 cstring_to_text_with_len(const char *s, int len)
  91 {
  92         text       *result = (text *) palloc(len + VARHDRSZ);
  93
  94         SET_VARSIZE(result, len + VARHDRSZ);
  95         memcpy(VARDATA(result), s, len);
  96
  97         return result;
  98 }
  99
 100 /*
 101  * text_to_cstring
 102  *
 103  * Create a palloc'd, null-terminated C string from a text value.
 104  *
 105  * We support being passed a compressed or toasted text value.
 106  * This is a bit bogus since such values shouldn't really be referred to as
 107  * "text *", but it seems useful for robustness.  If we didn't handle that
 108  * case here, we'd need another routine that did, anyway.
 109  */
 110 char *
 111 text_to_cstring(const text *t)
 112 {
 113         /* must cast away the const, unfortunately */
 114         text       *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
 115         int                     len = VARSIZE_ANY_EXHDR(tunpacked);
 116         char       *result;
 117
 118         result = (char *) palloc(len + 1);
 119         memcpy(result, VARDATA_ANY(tunpacked), len);
 120         result[len] = '\0';
 121
 122         if (tunpacked != t)
 123                 pfree(tunpacked);
 124
 125         return result;
 126 }
 127
 128 /*
 129  * text_to_cstring_buffer
 130  *
 131  * Copy a text value into a caller-supplied buffer of size dst_len.
 132  *
 133  * The text string is truncated if necessary to fit.  The result is
 134  * guaranteed null-terminated (unless dst_len == 0).
 135  *
 136  * We support being passed a compressed or toasted text value.
 137  * This is a bit bogus since such values shouldn't really be referred to as
 138  * "text *", but it seems useful for robustness.  If we didn't handle that
 139  * case here, we'd need another routine that did, anyway.
 140  */
 141 void
 142 text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
 143 {
 144         /* must cast away the const, unfortunately */
 145         text       *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
 146         size_t          src_len = VARSIZE_ANY_EXHDR(srcunpacked);
 147
 148         if (dst_len > 0)
 149         {
 150                 dst_len--;
 151                 if (dst_len >= src_len)
 152                         dst_len = src_len;
 153                 else    /* ensure truncation is encoding-safe */
 154                         dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
 155                 memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
 156                 dst[dst_len] = '\0';
 157         }
 158
 159         if (srcunpacked != src)
 160                 pfree(srcunpacked);
 161 }
 162
 163
 164 /*****************************************************************************
 165  *       USER I/O ROUTINES                                                                                                               *
 166  *****************************************************************************/
 167
 168
 169 #define VAL(CH)                 ((CH) - '0')
 170 #define DIG(VAL)                ((VAL) + '0')
 171
 172 /*
 173  *              byteain                 - converts from printable representation of byte array
 174  *
 175  *              Non-printable characters must be passed as '\nnn' (octal) and are
 176  *              converted to internal form.  '\' must be passed as '\\'.
 177  *              ereport(ERROR, ...) if bad form.
 178  *
 179  *              BUGS:
 180  *                              The input is scanned twice.
 181  *                              The error checking of input is minimal.
 182  */
 183 Datum
 184 byteain(PG_FUNCTION_ARGS)
 185 {
 186         char       *inputText = PG_GETARG_CSTRING(0);
 187         char       *tp;
 188         char       *rp;
 189         int byte;
 190         bytea      *result;
 191
 192         for (byte = 0, tp = inputText; *tp != '\0'; byte ++)
 193         {
 194                 if (tp[0] != '\\')
 195                         tp++;
 196                 else if ((tp[0] == '\\') &&
 197                                  (tp[1] >= '0' && tp[1] <= '3') &&
 198                                  (tp[2] >= '0' && tp[2] <= '7') &&
 199                                  (tp[3] >= '0' && tp[3] <= '7'))
 200                         tp += 4;
 201                 else if ((tp[0] == '\\') &&
 202                                  (tp[1] == '\\'))
 203                         tp += 2;
 204                 else
 205                 {
 206                         /*
 207                          * one backslash, not followed by 0 or ### valid octal
 208                          */
 209                         ereport(ERROR,
 210                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 211                                          errmsg("invalid input syntax for type bytea")));
 212                 }
 213         }
 214
 215         byte      +=VARHDRSZ;
 216
 217         result = (bytea *) palloc(byte);
 218         SET_VARSIZE(result, byte);
 219
 220         tp = inputText;
 221         rp = VARDATA(result);
 222         while (*tp != '\0')
 223         {
 224                 if (tp[0] != '\\')
 225                         *rp++ = *tp++;
 226                 else if ((tp[0] == '\\') &&
 227                                  (tp[1] >= '0' && tp[1] <= '3') &&
 228                                  (tp[2] >= '0' && tp[2] <= '7') &&
 229                                  (tp[3] >= '0' && tp[3] <= '7'))
 230                 {
 231                         byte = VAL(tp[1]);
 232                         byte     <<=3;
 233                         byte      +=VAL(tp[2]);
 234                         byte     <<=3;
 235                         *rp++ = byte +VAL(tp[3]);
 236
 237                         tp += 4;
 238                 }
 239                 else if ((tp[0] == '\\') &&
 240                                  (tp[1] == '\\'))
 241                 {
 242                         *rp++ = '\\';
 243                         tp += 2;
 244                 }
 245                 else
 246                 {
 247                         /*
 248                          * We should never get here. The first pass should not allow it.
 249                          */
 250                         ereport(ERROR,
 251                                         (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
 252                                          errmsg("invalid input syntax for type bytea")));
 253                 }
 254         }
 255
 256         PG_RETURN_BYTEA_P(result);
 257 }
 258
 259 /*
 260  *              byteaout                - converts to printable representation of byte array
 261  *
 262  *              Non-printable characters are inserted as '\nnn' (octal) and '\' as
 263  *              '\\'.
 264  *
 265  *              NULL vlena should be an error--returning string with NULL for now.
 266  */
 267 Datum
 268 byteaout(PG_FUNCTION_ARGS)
 269 {
 270         bytea      *vlena = PG_GETARG_BYTEA_PP(0);
 271         char       *result;
 272         char       *vp;
 273         char       *rp;
 274         int                     val;                    /* holds unprintable chars */
 275         int                     i;
 276         int                     len;
 277
 278         len = 1;                                        /* empty string has 1 char */
 279         vp = VARDATA_ANY(vlena);
 280         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 281         {
 282                 if (*vp == '\\')
 283                         len += 2;
 284                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 285                         len += 4;
 286                 else
 287                         len++;
 288         }
 289         rp = result = (char *) palloc(len);
 290         vp = VARDATA_ANY(vlena);
 291         for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
 292         {
 293                 if (*vp == '\\')
 294                 {
 295                         *rp++ = '\\';
 296                         *rp++ = '\\';
 297                 }
 298                 else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
 299                 {
 300                         val = *vp;
 301                         rp[0] = '\\';
 302                         rp[3] = DIG(val & 07);
 303                         val >>= 3;
 304                         rp[2] = DIG(val & 07);
 305                         val >>= 3;
 306                         rp[1] = DIG(val & 03);
 307                         rp += 4;
 308                 }
 309                 else
 310                         *rp++ = *vp;
 311         }
 312         *rp = '\0';
 313         PG_RETURN_CSTRING(result);
 314 }
 315
 316 /*
 317  *              bytearecv                       - converts external binary format to bytea
 318  */
 319 Datum
 320 bytearecv(PG_FUNCTION_ARGS)
 321 {
 322         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 323         bytea      *result;
 324         int                     nbytes;
 325
 326         nbytes = buf->len - buf->cursor;
 327         result = (bytea *) palloc(nbytes + VARHDRSZ);
 328         SET_VARSIZE(result, nbytes + VARHDRSZ);
 329         pq_copymsgbytes(buf, VARDATA(result), nbytes);
 330         PG_RETURN_BYTEA_P(result);
 331 }
 332
 333 /*
 334  *              byteasend                       - converts bytea to binary format
 335  *
 336  * This is a special case: just copy the input...
 337  */
 338 Datum
 339 byteasend(PG_FUNCTION_ARGS)
 340 {
 341         bytea      *vlena = PG_GETARG_BYTEA_P_COPY(0);
 342
 343         PG_RETURN_BYTEA_P(vlena);
 344 }
 345
 346
 347 /*
 348  *              textin                  - converts "..." to internal representation
 349  */
 350 Datum
 351 textin(PG_FUNCTION_ARGS)
 352 {
 353         char       *inputText = PG_GETARG_CSTRING(0);
 354
 355         PG_RETURN_TEXT_P(cstring_to_text(inputText));
 356 }
 357
 358 /*
 359  *              textout                 - converts internal representation to "..."
 360  */
 361 Datum
 362 textout(PG_FUNCTION_ARGS)
 363 {
 364         Datum           txt = PG_GETARG_DATUM(0);
 365
 366         PG_RETURN_CSTRING(TextDatumGetCString(txt));
 367 }
 368
 369 /*
 370  *              textrecv                        - converts external binary format to text
 371  */
 372 Datum
 373 textrecv(PG_FUNCTION_ARGS)
 374 {
 375         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 376         text       *result;
 377         char       *str;
 378         int                     nbytes;
 379
 380         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 381
 382         result = cstring_to_text_with_len(str, nbytes);
 383         pfree(str);
 384         PG_RETURN_TEXT_P(result);
 385 }
 386
 387 /*
 388  *              textsend                        - converts text to binary format
 389  */
 390 Datum
 391 textsend(PG_FUNCTION_ARGS)
 392 {
 393         text       *t = PG_GETARG_TEXT_PP(0);
 394         StringInfoData buf;
 395
 396         pq_begintypsend(&buf);
 397         pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
 398         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 399 }
 400
 401
 402 /*
 403  *              unknownin                       - converts "..." to internal representation
 404  */
 405 Datum
 406 unknownin(PG_FUNCTION_ARGS)
 407 {
 408         char       *str = PG_GETARG_CSTRING(0);
 409
 410         /* representation is same as cstring */
 411         PG_RETURN_CSTRING(pstrdup(str));
 412 }
 413
 414 /*
 415  *              unknownout                      - converts internal representation to "..."
 416  */
 417 Datum
 418 unknownout(PG_FUNCTION_ARGS)
 419 {
 420         /* representation is same as cstring */
 421         char       *str = PG_GETARG_CSTRING(0);
 422
 423         PG_RETURN_CSTRING(pstrdup(str));
 424 }
 425
 426 /*
 427  *              unknownrecv                     - converts external binary format to unknown
 428  */
 429 Datum
 430 unknownrecv(PG_FUNCTION_ARGS)
 431 {
 432         StringInfo      buf = (StringInfo) PG_GETARG_POINTER(0);
 433         char       *str;
 434         int                     nbytes;
 435
 436         str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 437         /* representation is same as cstring */
 438         PG_RETURN_CSTRING(str);
 439 }
 440
 441 /*
 442  *              unknownsend                     - converts unknown to binary format
 443  */
 444 Datum
 445 unknownsend(PG_FUNCTION_ARGS)
 446 {
 447         /* representation is same as cstring */
 448         char       *str = PG_GETARG_CSTRING(0);
 449         StringInfoData buf;
 450
 451         pq_begintypsend(&buf);
 452         pq_sendtext(&buf, str, strlen(str));
 453         PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 454 }
 455
 456
 457 /* ========== PUBLIC ROUTINES ========== */
 458
 459 /*
 460  * textlen -
 461  *        returns the logical length of a text*
 462  *         (which is less than the VARSIZE of the text*)
 463  */
 464 Datum
 465 textlen(PG_FUNCTION_ARGS)
 466 {
 467         Datum           str = PG_GETARG_DATUM(0);
 468
 469         /* try to avoid decompressing argument */
 470         PG_RETURN_INT32(text_length(str));
 471 }
 472
 473 /*
 474  * text_length -
 475  *      Does the real work for textlen()
 476  *
 477  *      This is broken out so it can be called directly by other string processing
 478  *      functions.      Note that the argument is passed as a Datum, to indicate that
 479  *      it may still be in compressed form.  We can avoid decompressing it at all
 480  *      in some cases.
 481  */
 482 static int32
 483 text_length(Datum str)
 484 {
 485         /* fastpath when max encoding length is one */
 486         if (pg_database_encoding_max_length() == 1)
 487                 PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 488         else
 489         {
 490                 text       *t = DatumGetTextPP(str);
 491
 492                 PG_RETURN_INT32(pg_mbstrlen_with_len(VARDATA_ANY(t),
 493                                                                                          VARSIZE_ANY_EXHDR(t)));
 494         }
 495 }
 496
 497 /*
 498  * textoctetlen -
 499  *        returns the physical length of a text*
 500  *         (which is less than the VARSIZE of the text*)
 501  */
 502 Datum
 503 textoctetlen(PG_FUNCTION_ARGS)
 504 {
 505         Datum           str = PG_GETARG_DATUM(0);
 506
 507         /* We need not detoast the input at all */
 508         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
 509 }
 510
 511 /*
 512  * textcat -
 513  *        takes two text* and returns a text* that is the concatenation of
 514  *        the two.
 515  *
 516  * Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
 517  * Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
 518  * Allocate space for output in all cases.
 519  * XXX - thomas 1997-07-10
 520  */
 521 Datum
 522 textcat(PG_FUNCTION_ARGS)
 523 {
 524         text       *t1 = PG_GETARG_TEXT_PP(0);
 525         text       *t2 = PG_GETARG_TEXT_PP(1);
 526         int                     len1,
 527                                 len2,
 528                                 len;
 529         text       *result;
 530         char       *ptr;
 531
 532         len1 = VARSIZE_ANY_EXHDR(t1);
 533         if (len1 < 0)
 534                 len1 = 0;
 535
 536         len2 = VARSIZE_ANY_EXHDR(t2);
 537         if (len2 < 0)
 538                 len2 = 0;
 539
 540         len = len1 + len2 + VARHDRSZ;
 541         result = (text *) palloc(len);
 542
 543         /* Set size of result string... */
 544         SET_VARSIZE(result, len);
 545
 546         /* Fill data field of result string... */
 547         ptr = VARDATA(result);
 548         if (len1 > 0)
 549                 memcpy(ptr, VARDATA_ANY(t1), len1);
 550         if (len2 > 0)
 551                 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
 552
 553         PG_RETURN_TEXT_P(result);
 554 }
 555
 556 /*
 557  * charlen_to_bytelen()
 558  *      Compute the number of bytes occupied by n characters starting at *p
 559  *
 560  * It is caller's responsibility that there actually are n characters;
 561  * the string need not be null-terminated.
 562  */
 563 static int
 564 charlen_to_bytelen(const char *p, int n)
 565 {
 566         if (pg_database_encoding_max_length() == 1)
 567         {
 568                 /* Optimization for single-byte encodings */
 569                 return n;
 570         }
 571         else
 572         {
 573                 const char *s;
 574
 575                 for (s = p; n > 0; n--)
 576                         s += pg_mblen(s);
 577
 578                 return s - p;
 579         }
 580 }
 581
 582 /*
 583  * text_substr()
 584  * Return a substring starting at the specified position.
 585  * - thomas 1997-12-31
 586  *
 587  * Input:
 588  *      - string
 589  *      - starting position (is one-based)
 590  *      - string length
 591  *
 592  * If the starting position is zero or less, then return from the start of the string
 593  *      adjusting the length to be consistent with the "negative start" per SQL92.
 594  * If the length is less than zero, return the remaining string.
 595  *
 596  * Added multibyte support.
 597  * - Tatsuo Ishii 1998-4-21
 598  * Changed behavior if starting position is less than one to conform to SQL92 behavior.
 599  * Formerly returned the entire string; now returns a portion.
 600  * - Thomas Lockhart 1998-12-10
 601  * Now uses faster TOAST-slicing interface
 602  * - John Gray 2002-02-22
 603  * Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
 604  * behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
 605  * error; if E < 1, return '', not entire string). Fixed MB related bug when
 606  * S > LC and < LC + 4 sometimes garbage characters are returned.
 607  * - Joe Conway 2002-08-10
 608  */
 609 Datum
 610 text_substr(PG_FUNCTION_ARGS)
 611 {
 612         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 613                                                                         PG_GETARG_INT32(1),
 614                                                                         PG_GETARG_INT32(2),
 615                                                                         false));
 616 }
 617
 618 /*
 619  * text_substr_no_len -
 620  *        Wrapper to avoid opr_sanity failure due to
 621  *        one function accepting a different number of args.
 622  */
 623 Datum
 624 text_substr_no_len(PG_FUNCTION_ARGS)
 625 {
 626         PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
 627                                                                         PG_GETARG_INT32(1),
 628                                                                         -1, true));
 629 }
 630
 631 /*
 632  * text_substring -
 633  *      Does the real work for text_substr() and text_substr_no_len()
 634  *
 635  *      This is broken out so it can be called directly by other string processing
 636  *      functions.      Note that the argument is passed as a Datum, to indicate that
 637  *      it may still be in compressed/toasted form.  We can avoid detoasting all
 638  *      of it in some cases.
 639  *
 640  *      The result is always a freshly palloc'd datum.
 641  */
 642 static text *
 643 text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
 644 {
 645         int32           eml = pg_database_encoding_max_length();
 646         int32           S = start;              /* start position */
 647         int32           S1;                             /* adjusted start position */
 648         int32           L1;                             /* adjusted substring length */
 649
 650         /* life is easy if the encoding max length is 1 */
 651         if (eml == 1)
 652         {
 653                 S1 = Max(S, 1);
 654
 655                 if (length_not_specified)               /* special case - get length to end of
 656                                                                                  * string */
 657                         L1 = -1;
 658                 else
 659                 {
 660                         /* end position */
 661                         int                     E = S + length;
 662
 663                         /*
 664                          * A negative value for L is the only way for the end position to
 665                          * be before the start. SQL99 says to throw an error.
 666                          */
 667                         if (E < S)
 668                                 ereport(ERROR,
 669                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 670                                                  errmsg("negative substring length not allowed")));
 671
 672                         /*
 673                          * A zero or negative value for the end position can happen if the
 674                          * start was negative or one. SQL99 says to return a zero-length
 675                          * string.
 676                          */
 677                         if (E < 1)
 678                                 return cstring_to_text("");
 679
 680                         L1 = E - S1;
 681                 }
 682
 683                 /*
 684                  * If the start position is past the end of the string, SQL99 says to
 685                  * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
 686                  * that for us. Convert to zero-based starting position
 687                  */
 688                 return DatumGetTextPSlice(str, S1 - 1, L1);
 689         }
 690         else if (eml > 1)
 691         {
 692                 /*
 693                  * When encoding max length is > 1, we can't get LC without
 694                  * detoasting, so we'll grab a conservatively large slice now and go
 695                  * back later to do the right thing
 696                  */
 697                 int32           slice_start;
 698                 int32           slice_size;
 699                 int32           slice_strlen;
 700                 text       *slice;
 701                 int32           E1;
 702                 int32           i;
 703                 char       *p;
 704                 char       *s;
 705                 text       *ret;
 706
 707                 /*
 708                  * if S is past the end of the string, the tuple toaster will return a
 709                  * zero-length string to us
 710                  */
 711                 S1 = Max(S, 1);
 712
 713                 /*
 714                  * We need to start at position zero because there is no way to know
 715                  * in advance which byte offset corresponds to the supplied start
 716                  * position.
 717                  */
 718                 slice_start = 0;
 719
 720                 if (length_not_specified)               /* special case - get length to end of
 721                                                                                  * string */
 722                         slice_size = L1 = -1;
 723                 else
 724                 {
 725                         int                     E = S + length;
 726
 727                         /*
 728                          * A negative value for L is the only way for the end position to
 729                          * be before the start. SQL99 says to throw an error.
 730                          */
 731                         if (E < S)
 732                                 ereport(ERROR,
 733                                                 (errcode(ERRCODE_SUBSTRING_ERROR),
 734                                                  errmsg("negative substring length not allowed")));
 735
 736                         /*
 737                          * A zero or negative value for the end position can happen if the
 738                          * start was negative or one. SQL99 says to return a zero-length
 739                          * string.
 740                          */
 741                         if (E < 1)
 742                                 return cstring_to_text("");
 743
 744                         /*
 745                          * if E is past the end of the string, the tuple toaster will
 746                          * truncate the length for us
 747                          */
 748                         L1 = E - S1;
 749
 750                         /*
 751                          * Total slice size in bytes can't be any longer than the start
 752                          * position plus substring length times the encoding max length.
 753                          */
 754                         slice_size = (S1 + L1) * eml;
 755                 }
 756
 757                 /*
 758                  * If we're working with an untoasted source, no need to do an extra
 759                  * copying step.
 760                  */
 761                 if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
 762                         VARATT_IS_EXTERNAL(DatumGetPointer(str)))
 763                         slice = DatumGetTextPSlice(str, slice_start, slice_size);
 764                 else
 765                         slice = (text *) DatumGetPointer(str);
 766
 767                 /* see if we got back an empty string */
 768                 if (VARSIZE_ANY_EXHDR(slice) == 0)
 769                 {
 770                         if (slice != (text *) DatumGetPointer(str))
 771                                 pfree(slice);
 772                         return cstring_to_text("");
 773                 }
 774
 775                 /* Now we can get the actual length of the slice in MB characters */
 776                 slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
 777                                                                                         VARSIZE_ANY_EXHDR(slice));
 778
 779                 /*
 780                  * Check that the start position wasn't > slice_strlen. If so, SQL99
 781                  * says to return a zero-length string.
 782                  */
 783                 if (S1 > slice_strlen)
 784                 {
 785                         if (slice != (text *) DatumGetPointer(str))
 786                                 pfree(slice);
 787                         return cstring_to_text("");
 788                 }
 789
 790                 /*
 791                  * Adjust L1 and E1 now that we know the slice string length. Again
 792                  * remember that S1 is one based, and slice_start is zero based.
 793                  */
 794                 if (L1 > -1)
 795                         E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
 796                 else
 797                         E1 = slice_start + 1 + slice_strlen;
 798
 799                 /*
 800                  * Find the start position in the slice; remember S1 is not zero based
 801                  */
 802                 p = VARDATA_ANY(slice);
 803                 for (i = 0; i < S1 - 1; i++)
 804                         p += pg_mblen(p);
 805
 806                 /* hang onto a pointer to our start position */
 807                 s = p;
 808
 809                 /*
 810                  * Count the actual bytes used by the substring of the requested
 811                  * length.
 812                  */
 813                 for (i = S1; i < E1; i++)
 814                         p += pg_mblen(p);
 815
 816                 ret = (text *) palloc(VARHDRSZ + (p - s));
 817                 SET_VARSIZE(ret, VARHDRSZ + (p - s));
 818                 memcpy(VARDATA(ret), s, (p - s));
 819
 820                 if (slice != (text *) DatumGetPointer(str))
 821                         pfree(slice);
 822
 823                 return ret;
 824         }
 825         else
 826                 elog(ERROR, "invalid backend encoding: encoding max length < 1");
 827
 828         /* not reached: suppress compiler warning */
 829         return NULL;
 830 }
 831
 832 /*
 833  * textpos -
 834  *        Return the position of the specified substring.
 835  *        Implements the SQL92 POSITION() function.
 836  *        Ref: A Guide To The SQL Standard, Date & Darwen, 1997
 837  * - thomas 1997-07-27
 838  */
 839 Datum
 840 textpos(PG_FUNCTION_ARGS)
 841 {
 842         text       *str = PG_GETARG_TEXT_PP(0);
 843         text       *search_str = PG_GETARG_TEXT_PP(1);
 844
 845         PG_RETURN_INT32((int32) text_position(str, search_str));
 846 }
 847
 848 /*
 849  * text_position -
 850  *      Does the real work for textpos()
 851  *
 852  * Inputs:
 853  *              t1 - string to be searched
 854  *              t2 - pattern to match within t1
 855  * Result:
 856  *              Character index of the first matched char, starting from 1,
 857  *              or 0 if no match.
 858  *
 859  *      This is broken out so it can be called directly by other string processing
 860  *      functions.
 861  */
 862 static int
 863 text_position(text *t1, text *t2)
 864 {
 865         TextPositionState state;
 866         int                     result;
 867
 868         text_position_setup(t1, t2, &state);
 869         result = text_position_next(1, &state);
 870         text_position_cleanup(&state);
 871         return result;
 872 }
 873
 874
 875 /*
 876  * text_position_setup, text_position_next, text_position_cleanup -
 877  *      Component steps of text_position()
 878  *
 879  * These are broken out so that a string can be efficiently searched for
 880  * multiple occurrences of the same pattern.  text_position_next may be
 881  * called multiple times with increasing values of start_pos, which is
 882  * the 1-based character position to start the search from.  The "state"
 883  * variable is normally just a local variable in the caller.
 884  */
 885
 886 static void
 887 text_position_setup(text *t1, text *t2, TextPositionState *state)
 888 {
 889         int                     len1 = VARSIZE_ANY_EXHDR(t1);
 890         int                     len2 = VARSIZE_ANY_EXHDR(t2);
 891
 892         if (pg_database_encoding_max_length() == 1)
 893         {
 894                 /* simple case - single byte encoding */
 895                 state->use_wchar = false;
 896                 state->str1 = VARDATA_ANY(t1);
 897                 state->str2 = VARDATA_ANY(t2);
 898                 state->len1 = len1;
 899                 state->len2 = len2;
 900         }
 901         else
 902         {
 903                 /* not as simple - multibyte encoding */
 904                 pg_wchar   *p1,
 905                                    *p2;
 906
 907                 p1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
 908                 len1 = pg_mb2wchar_with_len(VARDATA_ANY(t1), p1, len1);
 909                 p2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
 910                 len2 = pg_mb2wchar_with_len(VARDATA_ANY(t2), p2, len2);
 911
 912                 state->use_wchar = true;
 913                 state->wstr1 = p1;
 914                 state->wstr2 = p2;
 915                 state->len1 = len1;
 916                 state->len2 = len2;
 917         }
 918
 919         /*
 920          * Prepare the skip table for Boyer-Moore-Horspool searching.  In these
 921          * notes we use the terminology that the "haystack" is the string to be
 922          * searched (t1) and the "needle" is the pattern being sought (t2).
 923          *
 924          * If the needle is empty or bigger than the haystack then there is no
 925          * point in wasting cycles initializing the table.      We also choose not to
 926          * use B-M-H for needles of length 1, since the skip table can't possibly
 927          * save anything in that case.
 928          */
 929         if (len1 >= len2 && len2 > 1)
 930         {
 931                 int                     searchlength = len1 - len2;
 932                 int                     skiptablemask;
 933                 int                     last;
 934                 int                     i;
 935
 936                 /*
 937                  * First we must determine how much of the skip table to use.  The
 938                  * declaration of TextPositionState allows up to 256 elements, but for
 939                  * short search problems we don't really want to have to initialize so
 940                  * many elements --- it would take too long in comparison to the
 941                  * actual search time.  So we choose a useful skip table size based on
 942                  * the haystack length minus the needle length.  The closer the needle
 943                  * length is to the haystack length the less useful skipping becomes.
 944                  *
 945                  * Note: since we use bit-masking to select table elements, the skip
 946                  * table size MUST be a power of 2, and so the mask must be 2^N-1.
 947                  */
 948                 if (searchlength < 16)
 949                         skiptablemask = 3;
 950                 else if (searchlength < 64)
 951                         skiptablemask = 7;
 952                 else if (searchlength < 128)
 953                         skiptablemask = 15;
 954                 else if (searchlength < 512)
 955                         skiptablemask = 31;
 956                 else if (searchlength < 2048)
 957                         skiptablemask = 63;
 958                 else if (searchlength < 4096)
 959                         skiptablemask = 127;
 960                 else
 961                         skiptablemask = 255;
 962                 state->skiptablemask = skiptablemask;
 963
 964                 /*
 965                  * Initialize the skip table.  We set all elements to the needle
 966                  * length, since this is the correct skip distance for any character
 967                  * not found in the needle.
 968                  */
 969                 for (i = 0; i <= skiptablemask; i++)
 970                         state->skiptable[i] = len2;
 971
 972                 /*
 973                  * Now examine the needle.      For each character except the last one,
 974                  * set the corresponding table element to the appropriate skip
 975                  * distance.  Note that when two characters share the same skip table
 976                  * entry, the one later in the needle must determine the skip
 977                  * distance.
 978                  */
 979                 last = len2 - 1;
 980
 981                 if (!state->use_wchar)
 982                 {
 983                         const char *str2 = state->str2;
 984
 985                         for (i = 0; i < last; i++)
 986                                 state->skiptable[(unsigned char) str2[i] & skiptablemask] = last - i;
 987                 }
 988                 else
 989                 {
 990                         const pg_wchar *wstr2 = state->wstr2;
 991
 992                         for (i = 0; i < last; i++)
 993                                 state->skiptable[wstr2[i] & skiptablemask] = last - i;
 994                 }
 995         }
 996 }
 997
 998 static int
 999 text_position_next(int start_pos, TextPositionState *state)
1000 {
1001         int                     haystack_len = state->len1;
1002         int                     needle_len = state->len2;
1003         int                     skiptablemask = state->skiptablemask;
1004
1005         Assert(start_pos > 0);          /* else caller error */
1006
1007         if (needle_len <= 0)
1008                 return start_pos;               /* result for empty pattern */
1009
1010         start_pos--;                            /* adjust for zero based arrays */
1011
1012         /* Done if the needle can't possibly fit */
1013         if (haystack_len < start_pos + needle_len)
1014                 return 0;
1015
1016         if (!state->use_wchar)
1017         {
1018                 /* simple case - single byte encoding */
1019                 const char *haystack = state->str1;
1020                 const char *needle = state->str2;
1021                 const char *haystack_end = &haystack[haystack_len];
1022                 const char *hptr;
1023
1024                 if (needle_len == 1)
1025                 {
1026                         /* No point in using B-M-H for a one-character needle */
1027                         char            nchar = *needle;
1028
1029                         hptr = &haystack[start_pos];
1030                         while (hptr < haystack_end)
1031                         {
1032                                 if (*hptr == nchar)
1033                                         return hptr - haystack + 1;
1034                                 hptr++;
1035                         }
1036                 }
1037                 else
1038                 {
1039                         const char *needle_last = &needle[needle_len - 1];
1040
1041                         /* Start at startpos plus the length of the needle */
1042                         hptr = &haystack[start_pos + needle_len - 1];
1043                         while (hptr < haystack_end)
1044                         {
1045                                 /* Match the needle scanning *backward* */
1046                                 const char *nptr;
1047                                 const char *p;
1048
1049                                 nptr = needle_last;
1050                                 p = hptr;
1051                                 while (*nptr == *p)
1052                                 {
1053                                         /* Matched it all?      If so, return 1-based position */
1054                                         if (nptr == needle)
1055                                                 return p - haystack + 1;
1056                                         nptr--, p--;
1057                                 }
1058
1059                                 /*
1060                                  * No match, so use the haystack char at hptr to decide how
1061                                  * far to advance.      If the needle had any occurrence of that
1062                                  * character (or more precisely, one sharing the same
1063                                  * skiptable entry) before its last character, then we advance
1064                                  * far enough to align the last such needle character with
1065                                  * that haystack position.      Otherwise we can advance by the
1066                                  * whole needle length.
1067                                  */
1068                                 hptr += state->skiptable[(unsigned char) *hptr & skiptablemask];
1069                         }
1070                 }
1071         }
1072         else
1073         {
1074                 /* The multibyte char version. This works exactly the same way. */
1075                 const pg_wchar *haystack = state->wstr1;
1076                 const pg_wchar *needle = state->wstr2;
1077                 const pg_wchar *haystack_end = &haystack[haystack_len];
1078                 const pg_wchar *hptr;
1079
1080                 if (needle_len == 1)
1081                 {
1082                         /* No point in using B-M-H for a one-character needle */
1083                         pg_wchar        nchar = *needle;
1084
1085                         hptr = &haystack[start_pos];
1086                         while (hptr < haystack_end)
1087                         {
1088                                 if (*hptr == nchar)
1089                                         return hptr - haystack + 1;
1090                                 hptr++;
1091                         }
1092                 }
1093                 else
1094                 {
1095                         const pg_wchar *needle_last = &needle[needle_len - 1];
1096
1097                         /* Start at startpos plus the length of the needle */
1098                         hptr = &haystack[start_pos + needle_len - 1];
1099                         while (hptr < haystack_end)
1100                         {
1101                                 /* Match the needle scanning *backward* */
1102                                 const pg_wchar *nptr;
1103                                 const pg_wchar *p;
1104
1105                                 nptr = needle_last;
1106                                 p = hptr;
1107                                 while (*nptr == *p)
1108                                 {
1109                                         /* Matched it all?      If so, return 1-based position */
1110                                         if (nptr == needle)
1111                                                 return p - haystack + 1;
1112                                         nptr--, p--;
1113                                 }
1114
1115                                 /*
1116                                  * No match, so use the haystack char at hptr to decide how
1117                                  * far to advance.      If the needle had any occurrence of that
1118                                  * character (or more precisely, one sharing the same
1119                                  * skiptable entry) before its last character, then we advance
1120                                  * far enough to align the last such needle character with
1121                                  * that haystack position.      Otherwise we can advance by the
1122                                  * whole needle length.
1123                                  */
1124                                 hptr += state->skiptable[*hptr & skiptablemask];
1125                         }
1126                 }
1127         }
1128
1129         return 0;                                       /* not found */
1130 }
1131
1132 static void
1133 text_position_cleanup(TextPositionState *state)
1134 {
1135         if (state->use_wchar)
1136         {
1137                 pfree(state->wstr1);
1138                 pfree(state->wstr2);
1139         }
1140 }
1141
1142 /* varstr_cmp()
1143  * Comparison function for text strings with given lengths.
1144  * Includes locale support, but must copy strings to temporary memory
1145  *      to allow null-termination for inputs to strcoll().
1146  * Returns an integer less than, equal to, or greater than zero, indicating
1147  * whether arg1 is less than, equal to, or greater than arg2.
1148  */
1149 int
1150 varstr_cmp(char *arg1, int len1, char *arg2, int len2)
1151 {
1152         int                     result;
1153
1154         /*
1155          * Unfortunately, there is no strncoll(), so in the non-C locale case we
1156          * have to do some memory copying.      This turns out to be significantly
1157          * slower, so we optimize the case where LC_COLLATE is C.  We also try to
1158          * optimize relatively-short strings by avoiding palloc/pfree overhead.
1159          */
1160         if (lc_collate_is_c())
1161         {
1162                 result = strncmp(arg1, arg2, Min(len1, len2));
1163                 if ((result == 0) && (len1 != len2))
1164                         result = (len1 < len2) ? -1 : 1;
1165         }
1166         else
1167         {
1168 #define STACKBUFLEN             1024
1169
1170                 char            a1buf[STACKBUFLEN];
1171                 char            a2buf[STACKBUFLEN];
1172                 char       *a1p,
1173                                    *a2p;
1174
1175 #ifdef WIN32
1176                 /* Win32 does not have UTF-8, so we need to map to UTF-16 */
1177                 if (GetDatabaseEncoding() == PG_UTF8)
1178                 {
1179                         int                     a1len;
1180                         int                     a2len;
1181                         int                     r;
1182
1183                         if (len1 >= STACKBUFLEN / 2)
1184                         {
1185                                 a1len = len1 * 2 + 2;
1186                                 a1p = palloc(a1len);
1187                         }
1188                         else
1189                         {
1190                                 a1len = STACKBUFLEN;
1191                                 a1p = a1buf;
1192                         }
1193                         if (len2 >= STACKBUFLEN / 2)
1194                         {
1195                                 a2len = len2 * 2 + 2;
1196                                 a2p = palloc(a2len);
1197                         }
1198                         else
1199                         {
1200                                 a2len = STACKBUFLEN;
1201                                 a2p = a2buf;
1202                         }
1203
1204                         /* stupid Microsloth API does not work for zero-length input */
1205                         if (len1 == 0)
1206                                 r = 0;
1207                         else
1208                         {
1209                                 r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
1210                                                                                 (LPWSTR) a1p, a1len / 2);
1211                                 if (!r)
1212                                         ereport(ERROR,
1213                                          (errmsg("could not convert string to UTF-16: error %lu",
1214                                                          GetLastError())));
1215                         }
1216                         ((LPWSTR) a1p)[r] = 0;
1217
1218                         if (len2 == 0)
1219                                 r = 0;
1220                         else
1221                         {
1222                                 r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
1223                                                                                 (LPWSTR) a2p, a2len / 2);
1224                                 if (!r)
1225                                         ereport(ERROR,
1226                                          (errmsg("could not convert string to UTF-16: error %lu",
1227                                                          GetLastError())));
1228                         }
1229                         ((LPWSTR) a2p)[r] = 0;
1230
1231                         errno = 0;
1232                         result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
1233                         if (result == 2147483647)       /* _NLSCMPERROR; missing from mingw
1234                                                                                  * headers */
1235                                 ereport(ERROR,
1236                                                 (errmsg("could not compare Unicode strings: %m")));
1237
1238                         /*
1239                          * In some locales wcscoll() can claim that nonidentical strings
1240                          * are equal.  Believing that would be bad news for a number of
1241                          * reasons, so we follow Perl's lead and sort "equal" strings
1242                          * according to strcmp (on the UTF-8 representation).
1243                          */
1244                         if (result == 0)
1245                         {
1246                                 result = strncmp(arg1, arg2, Min(len1, len2));
1247                                 if ((result == 0) && (len1 != len2))
1248                                         result = (len1 < len2) ? -1 : 1;
1249                         }
1250
1251                         if (a1p != a1buf)
1252                                 pfree(a1p);
1253                         if (a2p != a2buf)
1254                                 pfree(a2p);
1255
1256                         return result;
1257                 }
1258 #endif   /* WIN32 */
1259
1260                 if (len1 >= STACKBUFLEN)
1261                         a1p = (char *) palloc(len1 + 1);
1262                 else
1263                         a1p = a1buf;
1264                 if (len2 >= STACKBUFLEN)
1265                         a2p = (char *) palloc(len2 + 1);
1266                 else
1267                         a2p = a2buf;
1268
1269                 memcpy(a1p, arg1, len1);
1270                 a1p[len1] = '\0';
1271                 memcpy(a2p, arg2, len2);
1272                 a2p[len2] = '\0';
1273
1274                 result = strcoll(a1p, a2p);
1275
1276                 /*
1277                  * In some locales strcoll() can claim that nonidentical strings are
1278                  * equal.  Believing that would be bad news for a number of reasons,
1279                  * so we follow Perl's lead and sort "equal" strings according to
1280                  * strcmp().
1281                  */
1282                 if (result == 0)
1283                         result = strcmp(a1p, a2p);
1284
1285                 if (a1p != a1buf)
1286                         pfree(a1p);
1287                 if (a2p != a2buf)
1288                         pfree(a2p);
1289         }
1290
1291         return result;
1292 }
1293
1294
1295 /* text_cmp()
1296  * Internal comparison function for text strings.
1297  * Returns -1, 0 or 1
1298  */
1299 static int
1300 text_cmp(text *arg1, text *arg2)
1301 {
1302         char       *a1p,
1303                            *a2p;
1304         int                     len1,
1305                                 len2;
1306
1307         a1p = VARDATA_ANY(arg1);
1308         a2p = VARDATA_ANY(arg2);
1309
1310         len1 = VARSIZE_ANY_EXHDR(arg1);
1311         len2 = VARSIZE_ANY_EXHDR(arg2);
1312
1313         return varstr_cmp(a1p, len1, a2p, len2);
1314 }
1315
1316 /*
1317  * Comparison functions for text strings.
1318  *
1319  * Note: btree indexes need these routines not to leak memory; therefore,
1320  * be careful to free working copies of toasted datums.  Most places don't
1321  * need to be so careful.
1322  */
1323
1324 Datum
1325 texteq(PG_FUNCTION_ARGS)
1326 {
1327         text       *arg1 = PG_GETARG_TEXT_PP(0);
1328         text       *arg2 = PG_GETARG_TEXT_PP(1);
1329         bool            result;
1330
1331         /*
1332          * Since we only care about equality or not-equality, we can avoid all the
1333          * expense of strcoll() here, and just do bitwise comparison.
1334          */
1335         if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1336                 result = false;
1337         else
1338                 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1339                                                   VARSIZE_ANY_EXHDR(arg1)) == 0);
1340
1341         PG_FREE_IF_COPY(arg1, 0);
1342         PG_FREE_IF_COPY(arg2, 1);
1343
1344         PG_RETURN_BOOL(result);
1345 }
1346
1347 Datum
1348 textne(PG_FUNCTION_ARGS)
1349 {
1350         text       *arg1 = PG_GETARG_TEXT_PP(0);
1351         text       *arg2 = PG_GETARG_TEXT_PP(1);
1352         bool            result;
1353
1354         /*
1355          * Since we only care about equality or not-equality, we can avoid all the
1356          * expense of strcoll() here, and just do bitwise comparison.
1357          */
1358         if (VARSIZE_ANY_EXHDR(arg1) != VARSIZE_ANY_EXHDR(arg2))
1359                 result = true;
1360         else
1361                 result = (strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2),
1362                                                   VARSIZE_ANY_EXHDR(arg1)) != 0);
1363
1364         PG_FREE_IF_COPY(arg1, 0);
1365         PG_FREE_IF_COPY(arg2, 1);
1366
1367         PG_RETURN_BOOL(result);
1368 }
1369
1370 Datum
1371 text_lt(PG_FUNCTION_ARGS)
1372 {
1373         text       *arg1 = PG_GETARG_TEXT_PP(0);
1374         text       *arg2 = PG_GETARG_TEXT_PP(1);
1375         bool            result;
1376
1377         result = (text_cmp(arg1, arg2) < 0);
1378
1379         PG_FREE_IF_COPY(arg1, 0);
1380         PG_FREE_IF_COPY(arg2, 1);
1381
1382         PG_RETURN_BOOL(result);
1383 }
1384
1385 Datum
1386 text_le(PG_FUNCTION_ARGS)
1387 {
1388         text       *arg1 = PG_GETARG_TEXT_PP(0);
1389         text       *arg2 = PG_GETARG_TEXT_PP(1);
1390         bool            result;
1391
1392         result = (text_cmp(arg1, arg2) <= 0);
1393
1394         PG_FREE_IF_COPY(arg1, 0);
1395         PG_FREE_IF_COPY(arg2, 1);
1396
1397         PG_RETURN_BOOL(result);
1398 }
1399
1400 Datum
1401 text_gt(PG_FUNCTION_ARGS)
1402 {
1403         text       *arg1 = PG_GETARG_TEXT_PP(0);
1404         text       *arg2 = PG_GETARG_TEXT_PP(1);
1405         bool            result;
1406
1407         result = (text_cmp(arg1, arg2) > 0);
1408
1409         PG_FREE_IF_COPY(arg1, 0);
1410         PG_FREE_IF_COPY(arg2, 1);
1411
1412         PG_RETURN_BOOL(result);
1413 }
1414
1415 Datum
1416 text_ge(PG_FUNCTION_ARGS)
1417 {
1418         text       *arg1 = PG_GETARG_TEXT_PP(0);
1419         text       *arg2 = PG_GETARG_TEXT_PP(1);
1420         bool            result;
1421
1422         result = (text_cmp(arg1, arg2) >= 0);
1423
1424         PG_FREE_IF_COPY(arg1, 0);
1425         PG_FREE_IF_COPY(arg2, 1);
1426
1427         PG_RETURN_BOOL(result);
1428 }
1429
1430 Datum
1431 bttextcmp(PG_FUNCTION_ARGS)
1432 {
1433         text       *arg1 = PG_GETARG_TEXT_PP(0);
1434         text       *arg2 = PG_GETARG_TEXT_PP(1);
1435         int32           result;
1436
1437         result = text_cmp(arg1, arg2);
1438
1439         PG_FREE_IF_COPY(arg1, 0);
1440         PG_FREE_IF_COPY(arg2, 1);
1441
1442         PG_RETURN_INT32(result);
1443 }
1444
1445
1446 Datum
1447 text_larger(PG_FUNCTION_ARGS)
1448 {
1449         text       *arg1 = PG_GETARG_TEXT_PP(0);
1450         text       *arg2 = PG_GETARG_TEXT_PP(1);
1451         text       *result;
1452
1453         result = ((text_cmp(arg1, arg2) > 0) ? arg1 : arg2);
1454
1455         PG_RETURN_TEXT_P(result);
1456 }
1457
1458 Datum
1459 text_smaller(PG_FUNCTION_ARGS)
1460 {
1461         text       *arg1 = PG_GETARG_TEXT_PP(0);
1462         text       *arg2 = PG_GETARG_TEXT_PP(1);
1463         text       *result;
1464
1465         result = ((text_cmp(arg1, arg2) < 0) ? arg1 : arg2);
1466
1467         PG_RETURN_TEXT_P(result);
1468 }
1469
1470
1471 /*
1472  * The following operators support character-by-character comparison
1473  * of text datums, to allow building indexes suitable for LIKE clauses.
1474  * Note that the regular texteq/textne comparison operators are assumed
1475  * to be compatible with these!
1476  */
1477
1478 static int
1479 internal_text_pattern_compare(text *arg1, text *arg2)
1480 {
1481         int                     result;
1482         int                     len1,
1483                                 len2;
1484
1485         len1 = VARSIZE_ANY_EXHDR(arg1);
1486         len2 = VARSIZE_ANY_EXHDR(arg2);
1487
1488         result = strncmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
1489         if (result != 0)
1490                 return result;
1491         else if (len1 < len2)
1492                 return -1;
1493         else if (len1 > len2)
1494                 return 1;
1495         else
1496                 return 0;
1497 }
1498
1499
1500 Datum
1501 text_pattern_lt(PG_FUNCTION_ARGS)
1502 {
1503         text       *arg1 = PG_GETARG_TEXT_PP(0);
1504         text       *arg2 = PG_GETARG_TEXT_PP(1);
1505         int                     result;
1506
1507         result = internal_text_pattern_compare(arg1, arg2);
1508
1509         PG_FREE_IF_COPY(arg1, 0);
1510         PG_FREE_IF_COPY(arg2, 1);
1511
1512         PG_RETURN_BOOL(result < 0);
1513 }
1514
1515
1516 Datum
1517 text_pattern_le(PG_FUNCTION_ARGS)
1518 {
1519         text       *arg1 = PG_GETARG_TEXT_PP(0);
1520         text       *arg2 = PG_GETARG_TEXT_PP(1);
1521         int                     result;
1522
1523         result = internal_text_pattern_compare(arg1, arg2);
1524
1525         PG_FREE_IF_COPY(arg1, 0);
1526         PG_FREE_IF_COPY(arg2, 1);
1527
1528         PG_RETURN_BOOL(result <= 0);
1529 }
1530
1531
1532 Datum
1533 text_pattern_ge(PG_FUNCTION_ARGS)
1534 {
1535         text       *arg1 = PG_GETARG_TEXT_PP(0);
1536         text       *arg2 = PG_GETARG_TEXT_PP(1);
1537         int                     result;
1538
1539         result = internal_text_pattern_compare(arg1, arg2);
1540
1541         PG_FREE_IF_COPY(arg1, 0);
1542         PG_FREE_IF_COPY(arg2, 1);
1543
1544         PG_RETURN_BOOL(result >= 0);
1545 }
1546
1547
1548 Datum
1549 text_pattern_gt(PG_FUNCTION_ARGS)
1550 {
1551         text       *arg1 = PG_GETARG_TEXT_PP(0);
1552         text       *arg2 = PG_GETARG_TEXT_PP(1);
1553         int                     result;
1554
1555         result = internal_text_pattern_compare(arg1, arg2);
1556
1557         PG_FREE_IF_COPY(arg1, 0);
1558         PG_FREE_IF_COPY(arg2, 1);
1559
1560         PG_RETURN_BOOL(result > 0);
1561 }
1562
1563
1564 Datum
1565 bttext_pattern_cmp(PG_FUNCTION_ARGS)
1566 {
1567         text       *arg1 = PG_GETARG_TEXT_PP(0);
1568         text       *arg2 = PG_GETARG_TEXT_PP(1);
1569         int                     result;
1570
1571         result = internal_text_pattern_compare(arg1, arg2);
1572
1573         PG_FREE_IF_COPY(arg1, 0);
1574         PG_FREE_IF_COPY(arg2, 1);
1575
1576         PG_RETURN_INT32(result);
1577 }
1578
1579
1580 /*-------------------------------------------------------------
1581  * byteaoctetlen
1582  *
1583  * get the number of bytes contained in an instance of type 'bytea'
1584  *-------------------------------------------------------------
1585  */
1586 Datum
1587 byteaoctetlen(PG_FUNCTION_ARGS)
1588 {
1589         Datum           str = PG_GETARG_DATUM(0);
1590
1591         /* We need not detoast the input at all */
1592         PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
1593 }
1594
1595 /*
1596  * byteacat -
1597  *        takes two bytea* and returns a bytea* that is the concatenation of
1598  *        the two.
1599  *
1600  * Cloned from textcat and modified as required.
1601  */
1602 Datum
1603 byteacat(PG_FUNCTION_ARGS)
1604 {
1605         bytea      *t1 = PG_GETARG_BYTEA_PP(0);
1606         bytea      *t2 = PG_GETARG_BYTEA_PP(1);
1607         int                     len1,
1608                                 len2,
1609                                 len;
1610         bytea      *result;
1611         char       *ptr;
1612
1613         len1 = VARSIZE_ANY_EXHDR(t1);
1614         if (len1 < 0)
1615                 len1 = 0;
1616
1617         len2 = VARSIZE_ANY_EXHDR(t2);
1618         if (len2 < 0)
1619                 len2 = 0;
1620
1621         len = len1 + len2 + VARHDRSZ;
1622         result = (bytea *) palloc(len);
1623
1624         /* Set size of result string... */
1625         SET_VARSIZE(result, len);
1626
1627         /* Fill data field of result string... */
1628         ptr = VARDATA(result);
1629         if (len1 > 0)
1630                 memcpy(ptr, VARDATA_ANY(t1), len1);
1631         if (len2 > 0)
1632                 memcpy(ptr + len1, VARDATA_ANY(t2), len2);
1633
1634         PG_RETURN_BYTEA_P(result);
1635 }
1636
1637 #define PG_STR_GET_BYTEA(str_) \
1638         DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
1639
1640 /*
1641  * bytea_substr()
1642  * Return a substring starting at the specified position.
1643  * Cloned from text_substr and modified as required.
1644  *
1645  * Input:
1646  *      - string
1647  *      - starting position (is one-based)
1648  *      - string length (optional)
1649  *
1650  * If the starting position is zero or less, then return from the start of the string
1651  * adjusting the length to be consistent with the "negative start" per SQL92.
1652  * If the length is less than zero, an ERROR is thrown. If no third argument
1653  * (length) is provided, the length to the end of the string is assumed.
1654  */
1655 Datum
1656 bytea_substr(PG_FUNCTION_ARGS)
1657 {
1658         int                     S = PG_GETARG_INT32(1); /* start position */
1659         int                     S1;                             /* adjusted start position */
1660         int                     L1;                             /* adjusted substring length */
1661
1662         S1 = Max(S, 1);
1663
1664         if (fcinfo->nargs == 2)
1665         {
1666                 /*
1667                  * Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
1668                  * the end of the string if we pass it a negative value for length.
1669                  */
1670                 L1 = -1;
1671         }
1672         else
1673         {
1674                 /* end position */
1675                 int                     E = S + PG_GETARG_INT32(2);
1676
1677                 /*
1678                  * A negative value for L is the only way for the end position to be
1679                  * before the start. SQL99 says to throw an error.
1680                  */
1681                 if (E < S)
1682                         ereport(ERROR,
1683                                         (errcode(ERRCODE_SUBSTRING_ERROR),
1684                                          errmsg("negative substring length not allowed")));
1685
1686                 /*
1687                  * A zero or negative value for the end position can happen if the
1688                  * start was negative or one. SQL99 says to return a zero-length
1689                  * string.
1690                  */
1691                 if (E < 1)
1692                         PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
1693
1694                 L1 = E - S1;
1695         }
1696
1697         /*
1698          * If the start position is past the end of the string, SQL99 says to
1699          * return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
1700          * for us. Convert to zero-based starting position
1701          */
1702         PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
1703 }
1704
1705 /*
1706  * bytea_substr_no_len -
1707  *        Wrapper to avoid opr_sanity failure due to
1708  *        one function accepting a different number of args.
1709  */
1710 Datum
1711 bytea_substr_no_len(PG_FUNCTION_ARGS)
1712 {
1713         return bytea_substr(fcinfo);
1714 }
1715
1716 /*
1717  * byteapos -
1718  *        Return the position of the specified substring.
1719  *        Implements the SQL92 POSITION() function.
1720  * Cloned from textpos and modified as required.
1721  */
1722 Datum
1723 byteapos(PG_FUNCTION_ARGS)
1724 {
1725         bytea      *t1 = PG_GETARG_BYTEA_PP(0);
1726         bytea      *t2 = PG_GETARG_BYTEA_PP(1);
1727         int                     pos;
1728         int                     px,
1729                                 p;
1730         int                     len1,
1731                                 len2;
1732         char       *p1,
1733                            *p2;
1734
1735         len1 = VARSIZE_ANY_EXHDR(t1);
1736         len2 = VARSIZE_ANY_EXHDR(t2);
1737
1738         if (len2 <= 0)
1739                 PG_RETURN_INT32(1);             /* result for empty pattern */
1740
1741         p1 = VARDATA_ANY(t1);
1742         p2 = VARDATA_ANY(t2);
1743
1744         pos = 0;
1745         px = (len1 - len2);
1746         for (p = 0; p <= px; p++)
1747         {
1748                 if ((*p2 == *p1) && (memcmp(p1, p2, len2) == 0))
1749                 {
1750                         pos = p + 1;
1751                         break;
1752                 };
1753                 p1++;
1754         };
1755
1756         PG_RETURN_INT32(pos);
1757 }
1758
1759 /*-------------------------------------------------------------
1760  * byteaGetByte
1761  *
1762  * this routine treats "bytea" as an array of bytes.
1763  * It returns the Nth byte (a number between 0 and 255).
1764  *-------------------------------------------------------------
1765  */
1766 Datum
1767 byteaGetByte(PG_FUNCTION_ARGS)
1768 {
1769         bytea      *v = PG_GETARG_BYTEA_PP(0);
1770         int32           n = PG_GETARG_INT32(1);
1771         int                     len;
1772         int byte;
1773
1774         len = VARSIZE_ANY_EXHDR(v);
1775
1776         if (n < 0 || n >= len)
1777                 ereport(ERROR,
1778                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1779                                  errmsg("index %d out of valid range, 0..%d",
1780                                                 n, len - 1)));
1781
1782         byte = ((unsigned char *) VARDATA_ANY(v))[n];
1783
1784         PG_RETURN_INT32(byte);
1785 }
1786
1787 /*-------------------------------------------------------------
1788  * byteaGetBit
1789  *
1790  * This routine treats a "bytea" type like an array of bits.
1791  * It returns the value of the Nth bit (0 or 1).
1792  *
1793  *-------------------------------------------------------------
1794  */
1795 Datum
1796 byteaGetBit(PG_FUNCTION_ARGS)
1797 {
1798         bytea      *v = PG_GETARG_BYTEA_PP(0);
1799         int32           n = PG_GETARG_INT32(1);
1800         int                     byteNo,
1801                                 bitNo;
1802         int                     len;
1803         int byte;
1804
1805         len = VARSIZE_ANY_EXHDR(v);
1806
1807         if (n < 0 || n >= len * 8)
1808                 ereport(ERROR,
1809                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1810                                  errmsg("index %d out of valid range, 0..%d",
1811                                                 n, len * 8 - 1)));
1812
1813         byteNo = n / 8;
1814         bitNo = n % 8;
1815
1816         byte = ((unsigned char *) VARDATA_ANY(v))[byteNo];
1817
1818         if (byte &(1 << bitNo))
1819                 PG_RETURN_INT32(1);
1820         else
1821                 PG_RETURN_INT32(0);
1822 }
1823
1824 /*-------------------------------------------------------------
1825  * byteaSetByte
1826  *
1827  * Given an instance of type 'bytea' creates a new one with
1828  * the Nth byte set to the given value.
1829  *
1830  *-------------------------------------------------------------
1831  */
1832 Datum
1833 byteaSetByte(PG_FUNCTION_ARGS)
1834 {
1835         bytea      *v = PG_GETARG_BYTEA_P(0);
1836         int32           n = PG_GETARG_INT32(1);
1837         int32           newByte = PG_GETARG_INT32(2);
1838         int                     len;
1839         bytea      *res;
1840
1841         len = VARSIZE(v) - VARHDRSZ;
1842
1843         if (n < 0 || n >= len)
1844                 ereport(ERROR,
1845                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1846                                  errmsg("index %d out of valid range, 0..%d",
1847                                                 n, len - 1)));
1848
1849         /*
1850          * Make a copy of the original varlena.
1851          */
1852         res = (bytea *) palloc(VARSIZE(v));
1853         memcpy((char *) res, (char *) v, VARSIZE(v));
1854
1855         /*
1856          * Now set the byte.
1857          */
1858         ((unsigned char *) VARDATA(res))[n] = newByte;
1859
1860         PG_RETURN_BYTEA_P(res);
1861 }
1862
1863 /*-------------------------------------------------------------
1864  * byteaSetBit
1865  *
1866  * Given an instance of type 'bytea' creates a new one with
1867  * the Nth bit set to the given value.
1868  *
1869  *-------------------------------------------------------------
1870  */
1871 Datum
1872 byteaSetBit(PG_FUNCTION_ARGS)
1873 {
1874         bytea      *v = PG_GETARG_BYTEA_P(0);
1875         int32           n = PG_GETARG_INT32(1);
1876         int32           newBit = PG_GETARG_INT32(2);
1877         bytea      *res;
1878         int                     len;
1879         int                     oldByte,
1880                                 newByte;
1881         int                     byteNo,
1882                                 bitNo;
1883
1884         len = VARSIZE(v) - VARHDRSZ;
1885
1886         if (n < 0 || n >= len * 8)
1887                 ereport(ERROR,
1888                                 (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
1889                                  errmsg("index %d out of valid range, 0..%d",
1890                                                 n, len * 8 - 1)));
1891
1892         byteNo = n / 8;
1893         bitNo = n % 8;
1894
1895         /*
1896          * sanity check!
1897          */
1898         if (newBit != 0 && newBit != 1)
1899                 ereport(ERROR,
1900                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1901                                  errmsg("new bit must be 0 or 1")));
1902
1903         /*
1904          * Make a copy of the original varlena.
1905          */
1906         res = (bytea *) palloc(VARSIZE(v));
1907         memcpy((char *) res, (char *) v, VARSIZE(v));
1908
1909         /*
1910          * Update the byte.
1911          */
1912         oldByte = ((unsigned char *) VARDATA(res))[byteNo];
1913
1914         if (newBit == 0)
1915                 newByte = oldByte & (~(1 << bitNo));
1916         else
1917                 newByte = oldByte | (1 << bitNo);
1918
1919         ((unsigned char *) VARDATA(res))[byteNo] = newByte;
1920
1921         PG_RETURN_BYTEA_P(res);
1922 }
1923
1924
1925 /* text_name()
1926  * Converts a text type to a Name type.
1927  */
1928 Datum
1929 text_name(PG_FUNCTION_ARGS)
1930 {
1931         text       *s = PG_GETARG_TEXT_PP(0);
1932         Name            result;
1933         int                     len;
1934
1935         len = VARSIZE_ANY_EXHDR(s);
1936
1937         /* Truncate oversize input */
1938         if (len >= NAMEDATALEN)
1939                 len = NAMEDATALEN - 1;
1940
1941         result = (Name) palloc(NAMEDATALEN);
1942         memcpy(NameStr(*result), VARDATA_ANY(s), len);
1943
1944         /* now null pad to full length... */
1945         while (len < NAMEDATALEN)
1946         {
1947                 *(NameStr(*result) + len) = '\0';
1948                 len++;
1949         }
1950
1951         PG_RETURN_NAME(result);
1952 }
1953
1954 /* name_text()
1955  * Converts a Name type to a text type.
1956  */
1957 Datum
1958 name_text(PG_FUNCTION_ARGS)
1959 {
1960         Name            s = PG_GETARG_NAME(0);
1961
1962         PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
1963 }
1964
1965
1966 /*
1967  * textToQualifiedNameList - convert a text object to list of names
1968  *
1969  * This implements the input parsing needed by nextval() and other
1970  * functions that take a text parameter representing a qualified name.
1971  * We split the name at dots, downcase if not double-quoted, and
1972  * truncate names if they're too long.
1973  */
1974 List *
1975 textToQualifiedNameList(text *textval)
1976 {
1977         char       *rawname;
1978         List       *result = NIL;
1979         List       *namelist;
1980         ListCell   *l;
1981
1982         /* Convert to C string (handles possible detoasting). */
1983         /* Note we rely on being able to modify rawname below. */
1984         rawname = text_to_cstring(textval);
1985
1986         if (!SplitIdentifierString(rawname, '.', &namelist))
1987                 ereport(ERROR,
1988                                 (errcode(ERRCODE_INVALID_NAME),
1989                                  errmsg("invalid name syntax")));
1990
1991         if (namelist == NIL)
1992                 ereport(ERROR,
1993                                 (errcode(ERRCODE_INVALID_NAME),
1994                                  errmsg("invalid name syntax")));
1995
1996         foreach(l, namelist)
1997         {
1998                 char       *curname = (char *) lfirst(l);
1999
2000                 result = lappend(result, makeString(pstrdup(curname)));
2001         }
2002
2003         pfree(rawname);
2004         list_free(namelist);
2005
2006         return result;
2007 }
2008
2009 /*
2010  * SplitIdentifierString --- parse a string containing identifiers
2011  *
2012  * This is the guts of textToQualifiedNameList, and is exported for use in
2013  * other situations such as parsing GUC variables.      In the GUC case, it's
2014  * important to avoid memory leaks, so the API is designed to minimize the
2015  * amount of stuff that needs to be allocated and freed.
2016  *
2017  * Inputs:
2018  *      rawstring: the input string; must be overwritable!      On return, it's
2019  *                         been modified to contain the separated identifiers.
2020  *      separator: the separator punctuation expected between identifiers
2021  *                         (typically '.' or ',').      Whitespace may also appear around
2022  *                         identifiers.
2023  * Outputs:
2024  *      namelist: filled with a palloc'd list of pointers to identifiers within
2025  *                        rawstring.  Caller should list_free() this even on error return.
2026  *
2027  * Returns TRUE if okay, FALSE if there is a syntax error in the string.
2028  *
2029  * Note that an empty string is considered okay here, though not in
2030  * textToQualifiedNameList.
2031  */
2032 bool
2033 SplitIdentifierString(char *rawstring, char separator,
2034                                           List **namelist)
2035 {
2036         char       *nextp = rawstring;
2037         bool            done = false;
2038
2039         *namelist = NIL;
2040
2041         while (isspace((unsigned char) *nextp))
2042                 nextp++;                                /* skip leading whitespace */
2043
2044         if (*nextp == '\0')
2045                 return true;                    /* allow empty string */
2046
2047         /* At the top of the loop, we are at start of a new identifier. */
2048         do
2049         {
2050                 char       *curname;
2051                 char       *endp;
2052
2053                 if (*nextp == '\"')
2054                 {
2055                         /* Quoted name --- collapse quote-quote pairs, no downcasing */
2056                         curname = nextp + 1;
2057                         for (;;)
2058                         {
2059                                 endp = strchr(nextp + 1, '\"');
2060                                 if (endp == NULL)
2061                                         return false;           /* mismatched quotes */
2062                                 if (endp[1] != '\"')
2063                                         break;          /* found end of quoted name */
2064                                 /* Collapse adjacent quotes into one quote, and look again */
2065                                 memmove(endp, endp + 1, strlen(endp));
2066                                 nextp = endp;
2067                         }
2068                         /* endp now points at the terminating quote */
2069                         nextp = endp + 1;
2070                 }
2071                 else
2072                 {
2073                         /* Unquoted name --- extends to separator or whitespace */
2074                         char       *downname;
2075                         int                     len;
2076
2077                         curname = nextp;
2078                         while (*nextp && *nextp != separator &&
2079                                    !isspace((unsigned char) *nextp))
2080                                 nextp++;
2081                         endp = nextp;
2082                         if (curname == nextp)
2083                                 return false;   /* empty unquoted name not allowed */
2084
2085                         /*
2086                          * Downcase the identifier, using same code as main lexer does.
2087                          *
2088                          * XXX because we want to overwrite the input in-place, we cannot
2089                          * support a downcasing transformation that increases the string
2090                          * length.      This is not a problem given the current implementation
2091                          * of downcase_truncate_identifier, but we'll probably have to do
2092                          * something about this someday.
2093                          */
2094                         len = endp - curname;
2095                         downname = downcase_truncate_identifier(curname, len, false);
2096                         Assert(strlen(downname) <= len);
2097                         strncpy(curname, downname, len);
2098                         pfree(downname);
2099                 }
2100
2101                 while (isspace((unsigned char) *nextp))
2102                         nextp++;                        /* skip trailing whitespace */
2103
2104                 if (*nextp == separator)
2105                 {
2106                         nextp++;
2107                         while (isspace((unsigned char) *nextp))
2108                                 nextp++;                /* skip leading whitespace for next */
2109                         /* we expect another name, so done remains false */
2110                 }
2111                 else if (*nextp == '\0')
2112                         done = true;
2113                 else
2114                         return false;           /* invalid syntax */
2115
2116                 /* Now safe to overwrite separator with a null */
2117                 *endp = '\0';
2118
2119                 /* Truncate name if it's overlength */
2120                 truncate_identifier(curname, strlen(curname), false);
2121
2122                 /*
2123                  * Finished isolating current name --- add it to list
2124                  */
2125                 *namelist = lappend(*namelist, curname);
2126
2127                 /* Loop back if we didn't reach end of string */
2128         } while (!done);
2129
2130         return true;
2131 }
2132
2133
2134 /*****************************************************************************
2135  *      Comparison Functions used for bytea
2136  *
2137  * Note: btree indexes need these routines not to leak memory; therefore,
2138  * be careful to free working copies of toasted datums.  Most places don't
2139  * need to be so careful.
2140  *****************************************************************************/
2141
2142 Datum
2143 byteaeq(PG_FUNCTION_ARGS)
2144 {
2145         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2146         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2147         int                     len1,
2148                                 len2;
2149         bool            result;
2150
2151         len1 = VARSIZE_ANY_EXHDR(arg1);
2152         len2 = VARSIZE_ANY_EXHDR(arg2);
2153
2154         /* fast path for different-length inputs */
2155         if (len1 != len2)
2156                 result = false;
2157         else
2158                 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) == 0);
2159
2160         PG_FREE_IF_COPY(arg1, 0);
2161         PG_FREE_IF_COPY(arg2, 1);
2162
2163         PG_RETURN_BOOL(result);
2164 }
2165
2166 Datum
2167 byteane(PG_FUNCTION_ARGS)
2168 {
2169         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2170         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2171         int                     len1,
2172                                 len2;
2173         bool            result;
2174
2175         len1 = VARSIZE_ANY_EXHDR(arg1);
2176         len2 = VARSIZE_ANY_EXHDR(arg2);
2177
2178         /* fast path for different-length inputs */
2179         if (len1 != len2)
2180                 result = true;
2181         else
2182                 result = (memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), len1) != 0);
2183
2184         PG_FREE_IF_COPY(arg1, 0);
2185         PG_FREE_IF_COPY(arg2, 1);
2186
2187         PG_RETURN_BOOL(result);
2188 }
2189
2190 Datum
2191 bytealt(PG_FUNCTION_ARGS)
2192 {
2193         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2194         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2195         int                     len1,
2196                                 len2;
2197         int                     cmp;
2198
2199         len1 = VARSIZE_ANY_EXHDR(arg1);
2200         len2 = VARSIZE_ANY_EXHDR(arg2);
2201
2202         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2203
2204         PG_FREE_IF_COPY(arg1, 0);
2205         PG_FREE_IF_COPY(arg2, 1);
2206
2207         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
2208 }
2209
2210 Datum
2211 byteale(PG_FUNCTION_ARGS)
2212 {
2213         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2214         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2215         int                     len1,
2216                                 len2;
2217         int                     cmp;
2218
2219         len1 = VARSIZE_ANY_EXHDR(arg1);
2220         len2 = VARSIZE_ANY_EXHDR(arg2);
2221
2222         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2223
2224         PG_FREE_IF_COPY(arg1, 0);
2225         PG_FREE_IF_COPY(arg2, 1);
2226
2227         PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
2228 }
2229
2230 Datum
2231 byteagt(PG_FUNCTION_ARGS)
2232 {
2233         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2234         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2235         int                     len1,
2236                                 len2;
2237         int                     cmp;
2238
2239         len1 = VARSIZE_ANY_EXHDR(arg1);
2240         len2 = VARSIZE_ANY_EXHDR(arg2);
2241
2242         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2243
2244         PG_FREE_IF_COPY(arg1, 0);
2245         PG_FREE_IF_COPY(arg2, 1);
2246
2247         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
2248 }
2249
2250 Datum
2251 byteage(PG_FUNCTION_ARGS)
2252 {
2253         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2254         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2255         int                     len1,
2256                                 len2;
2257         int                     cmp;
2258
2259         len1 = VARSIZE_ANY_EXHDR(arg1);
2260         len2 = VARSIZE_ANY_EXHDR(arg2);
2261
2262         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2263
2264         PG_FREE_IF_COPY(arg1, 0);
2265         PG_FREE_IF_COPY(arg2, 1);
2266
2267         PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
2268 }
2269
2270 Datum
2271 byteacmp(PG_FUNCTION_ARGS)
2272 {
2273         bytea      *arg1 = PG_GETARG_BYTEA_PP(0);
2274         bytea      *arg2 = PG_GETARG_BYTEA_PP(1);
2275         int                     len1,
2276                                 len2;
2277         int                     cmp;
2278
2279         len1 = VARSIZE_ANY_EXHDR(arg1);
2280         len2 = VARSIZE_ANY_EXHDR(arg2);
2281
2282         cmp = memcmp(VARDATA_ANY(arg1), VARDATA_ANY(arg2), Min(len1, len2));
2283         if ((cmp == 0) && (len1 != len2))
2284                 cmp = (len1 < len2) ? -1 : 1;
2285
2286         PG_FREE_IF_COPY(arg1, 0);
2287         PG_FREE_IF_COPY(arg2, 1);
2288
2289         PG_RETURN_INT32(cmp);
2290 }
2291
2292 /*
2293  * appendStringInfoText
2294  *
2295  * Append a text to str.
2296  * Like appendStringInfoString(str, text_to_cstring(t)) but faster.
2297  */
2298 static void
2299 appendStringInfoText(StringInfo str, const text *t)
2300 {
2301         appendBinaryStringInfo(str, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
2302 }
2303
2304 /*
2305  * replace_text
2306  * replace all occurrences of 'old_sub_str' in 'orig_str'
2307  * with 'new_sub_str' to form 'new_str'
2308  *
2309  * returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
2310  * otherwise returns 'new_str'
2311  */
2312 Datum
2313 replace_text(PG_FUNCTION_ARGS)
2314 {
2315         text       *src_text = PG_GETARG_TEXT_PP(0);
2316         text       *from_sub_text = PG_GETARG_TEXT_PP(1);
2317         text       *to_sub_text = PG_GETARG_TEXT_PP(2);
2318         int                     src_text_len;
2319         int                     from_sub_text_len;
2320         TextPositionState state;
2321         text       *ret_text;
2322         int                     start_posn;
2323         int                     curr_posn;
2324         int                     chunk_len;
2325         char       *start_ptr;
2326         StringInfoData str;
2327
2328         text_position_setup(src_text, from_sub_text, &state);
2329
2330         /*
2331          * Note: we check the converted string length, not the original, because
2332          * they could be different if the input contained invalid encoding.
2333          */
2334         src_text_len = state.len1;
2335         from_sub_text_len = state.len2;
2336
2337         /* Return unmodified source string if empty source or pattern */
2338         if (src_text_len < 1 || from_sub_text_len < 1)
2339         {
2340                 text_position_cleanup(&state);
2341                 PG_RETURN_TEXT_P(src_text);
2342         }
2343
2344         start_posn = 1;
2345         curr_posn = text_position_next(1, &state);
2346
2347         /* When the from_sub_text is not found, there is nothing to do. */
2348         if (curr_posn == 0)
2349         {
2350                 text_position_cleanup(&state);
2351                 PG_RETURN_TEXT_P(src_text);
2352         }
2353
2354         /* start_ptr points to the start_posn'th character of src_text */
2355         start_ptr = VARDATA_ANY(src_text);
2356
2357         initStringInfo(&str);
2358
2359         do
2360         {
2361                 CHECK_FOR_INTERRUPTS();
2362
2363                 /* copy the data skipped over by last text_position_next() */
2364                 chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
2365                 appendBinaryStringInfo(&str, start_ptr, chunk_len);
2366
2367                 appendStringInfoText(&str, to_sub_text);
2368
2369                 start_posn = curr_posn;
2370                 start_ptr += chunk_len;
2371                 start_posn += from_sub_text_len;
2372                 start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
2373
2374                 curr_posn = text_position_next(start_posn, &state);
2375         }
2376         while (curr_posn > 0);
2377
2378         /* copy trailing data */
2379         chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2380         appendBinaryStringInfo(&str, start_ptr, chunk_len);
2381
2382         text_position_cleanup(&state);
2383
2384         ret_text = cstring_to_text_with_len(str.data, str.len);
2385         pfree(str.data);
2386
2387         PG_RETURN_TEXT_P(ret_text);
2388 }
2389
2390 /*
2391  * check_replace_text_has_escape_char
2392  *
2393  * check whether replace_text contains escape char.
2394  */
2395 static bool
2396 check_replace_text_has_escape_char(const text *replace_text)
2397 {
2398         const char *p = VARDATA_ANY(replace_text);
2399         const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2400
2401         if (pg_database_encoding_max_length() == 1)
2402         {
2403                 for (; p < p_end; p++)
2404                 {
2405                         if (*p == '\\')
2406                                 return true;
2407                 }
2408         }
2409         else
2410         {
2411                 for (; p < p_end; p += pg_mblen(p))
2412                 {
2413                         if (*p == '\\')
2414                                 return true;
2415                 }
2416         }
2417
2418         return false;
2419 }
2420
2421 /*
2422  * appendStringInfoRegexpSubstr
2423  *
2424  * Append replace_text to str, substituting regexp back references for
2425  * \n escapes.  start_ptr is the start of the match in the source string,
2426  * at logical character position data_pos.
2427  */
2428 static void
2429 appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
2430                                                          regmatch_t *pmatch,
2431                                                          char *start_ptr, int data_pos)
2432 {
2433         const char *p = VARDATA_ANY(replace_text);
2434         const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
2435         int                     eml = pg_database_encoding_max_length();
2436
2437         for (;;)
2438         {
2439                 const char *chunk_start = p;
2440                 int                     so;
2441                 int                     eo;
2442
2443                 /* Find next escape char. */
2444                 if (eml == 1)
2445                 {
2446                         for (; p < p_end && *p != '\\'; p++)
2447                                  /* nothing */ ;
2448                 }
2449                 else
2450                 {
2451                         for (; p < p_end && *p != '\\'; p += pg_mblen(p))
2452                                  /* nothing */ ;
2453                 }
2454
2455                 /* Copy the text we just scanned over, if any. */
2456                 if (p > chunk_start)
2457                         appendBinaryStringInfo(str, chunk_start, p - chunk_start);
2458
2459                 /* Done if at end of string, else advance over escape char. */
2460                 if (p >= p_end)
2461                         break;
2462                 p++;
2463
2464                 if (p >= p_end)
2465                 {
2466                         /* Escape at very end of input.  Treat same as unexpected char */
2467                         appendStringInfoChar(str, '\\');
2468                         break;
2469                 }
2470
2471                 if (*p >= '1' && *p <= '9')
2472                 {
2473                         /* Use the back reference of regexp. */
2474                         int                     idx = *p - '0';
2475
2476                         so = pmatch[idx].rm_so;
2477                         eo = pmatch[idx].rm_eo;
2478                         p++;
2479                 }
2480                 else if (*p == '&')
2481                 {
2482                         /* Use the entire matched string. */
2483                         so = pmatch[0].rm_so;
2484                         eo = pmatch[0].rm_eo;
2485                         p++;
2486                 }
2487                 else if (*p == '\\')
2488                 {
2489                         /* \\ means transfer one \ to output. */
2490                         appendStringInfoChar(str, '\\');
2491                         p++;
2492                         continue;
2493                 }
2494                 else
2495                 {
2496                         /*
2497                          * If escape char is not followed by any expected char, just treat
2498                          * it as ordinary data to copy.  (XXX would it be better to throw
2499                          * an error?)
2500                          */
2501                         appendStringInfoChar(str, '\\');
2502                         continue;
2503                 }
2504
2505                 if (so != -1 && eo != -1)
2506                 {
2507                         /*
2508                          * Copy the text that is back reference of regexp.      Note so and eo
2509                          * are counted in characters not bytes.
2510                          */
2511                         char       *chunk_start;
2512                         int                     chunk_len;
2513
2514                         Assert(so >= data_pos);
2515                         chunk_start = start_ptr;
2516                         chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
2517                         chunk_len = charlen_to_bytelen(chunk_start, eo - so);
2518                         appendBinaryStringInfo(str, chunk_start, chunk_len);
2519                 }
2520         }
2521 }
2522
2523 #define REGEXP_REPLACE_BACKREF_CNT              10
2524
2525 /*
2526  * replace_text_regexp
2527  *
2528  * replace text that matches to regexp in src_text to replace_text.
2529  *
2530  * Note: to avoid having to include regex.h in builtins.h, we declare
2531  * the regexp argument as void *, but really it's regex_t *.
2532  */
2533 text *
2534 replace_text_regexp(text *src_text, void *regexp,
2535                                         text *replace_text, bool glob)
2536 {
2537         text       *ret_text;
2538         regex_t    *re = (regex_t *) regexp;
2539         int                     src_text_len = VARSIZE_ANY_EXHDR(src_text);
2540         StringInfoData buf;
2541         regmatch_t      pmatch[REGEXP_REPLACE_BACKREF_CNT];
2542         pg_wchar   *data;
2543         size_t          data_len;
2544         int                     search_start;
2545         int                     data_pos;
2546         char       *start_ptr;
2547         bool            have_escape;
2548
2549         initStringInfo(&buf);
2550
2551         /* Convert data string to wide characters. */
2552         data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
2553         data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
2554
2555         /* Check whether replace_text has escape char. */
2556         have_escape = check_replace_text_has_escape_char(replace_text);
2557
2558         /* start_ptr points to the data_pos'th character of src_text */
2559         start_ptr = (char *) VARDATA_ANY(src_text);
2560         data_pos = 0;
2561
2562         search_start = 0;
2563         while (search_start <= data_len)
2564         {
2565                 int                     regexec_result;
2566
2567                 CHECK_FOR_INTERRUPTS();
2568
2569                 regexec_result = pg_regexec(re,
2570                                                                         data,
2571                                                                         data_len,
2572                                                                         search_start,
2573                                                                         NULL,           /* no details */
2574                                                                         REGEXP_REPLACE_BACKREF_CNT,
2575                                                                         pmatch,
2576                                                                         0);
2577
2578                 if (regexec_result == REG_NOMATCH)
2579                         break;
2580
2581                 if (regexec_result != REG_OKAY)
2582                 {
2583                         char            errMsg[100];
2584
2585                         pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
2586                         ereport(ERROR,
2587                                         (errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
2588                                          errmsg("regular expression failed: %s", errMsg)));
2589                 }
2590
2591                 /*
2592                  * Copy the text to the left of the match position.  Note we are given
2593                  * character not byte indexes.
2594                  */
2595                 if (pmatch[0].rm_so - data_pos > 0)
2596                 {
2597                         int                     chunk_len;
2598
2599                         chunk_len = charlen_to_bytelen(start_ptr,
2600                                                                                    pmatch[0].rm_so - data_pos);
2601                         appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2602
2603                         /*
2604                          * Advance start_ptr over that text, to avoid multiple rescans of
2605                          * it if the replace_text contains multiple back-references.
2606                          */
2607                         start_ptr += chunk_len;
2608                         data_pos = pmatch[0].rm_so;
2609                 }
2610
2611                 /*
2612                  * Copy the replace_text. Process back references when the
2613                  * replace_text has escape characters.
2614                  */
2615                 if (have_escape)
2616                         appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
2617                                                                                  start_ptr, data_pos);
2618                 else
2619                         appendStringInfoText(&buf, replace_text);
2620
2621                 /* Advance start_ptr and data_pos over the matched text. */
2622                 start_ptr += charlen_to_bytelen(start_ptr,
2623                                                                                 pmatch[0].rm_eo - data_pos);
2624                 data_pos = pmatch[0].rm_eo;
2625
2626                 /*
2627                  * When global option is off, replace the first instance only.
2628                  */
2629                 if (!glob)
2630                         break;
2631
2632                 /*
2633                  * Search from next character when the matching text is zero width.
2634                  */
2635                 search_start = data_pos;
2636                 if (pmatch[0].rm_so == pmatch[0].rm_eo)
2637                         search_start++;
2638         }
2639
2640         /*
2641          * Copy the text to the right of the last match.
2642          */
2643         if (data_pos < data_len)
2644         {
2645                 int                     chunk_len;
2646
2647                 chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
2648                 appendBinaryStringInfo(&buf, start_ptr, chunk_len);
2649         }
2650
2651         ret_text = cstring_to_text_with_len(buf.data, buf.len);
2652         pfree(buf.data);
2653         pfree(data);
2654
2655         return ret_text;
2656 }
2657
2658 /*
2659  * split_text
2660  * parse input string
2661  * return ord item (1 based)
2662  * based on provided field separator
2663  */
2664 Datum
2665 split_text(PG_FUNCTION_ARGS)
2666 {
2667         text       *inputstring = PG_GETARG_TEXT_PP(0);
2668         text       *fldsep = PG_GETARG_TEXT_PP(1);
2669         int                     fldnum = PG_GETARG_INT32(2);
2670         int                     inputstring_len;
2671         int                     fldsep_len;
2672         TextPositionState state;
2673         int                     start_posn;
2674         int                     end_posn;
2675         text       *result_text;
2676
2677         /* field number is 1 based */
2678         if (fldnum < 1)
2679                 ereport(ERROR,
2680                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
2681                                  errmsg("field position must be greater than zero")));
2682
2683         text_position_setup(inputstring, fldsep, &state);
2684
2685         /*
2686          * Note: we check the converted string length, not the original, because
2687          * they could be different if the input contained invalid encoding.
2688          */
2689         inputstring_len = state.len1;
2690         fldsep_len = state.len2;
2691
2692         /* return empty string for empty input string */
2693         if (inputstring_len < 1)
2694         {
2695                 text_position_cleanup(&state);
2696                 PG_RETURN_TEXT_P(cstring_to_text(""));
2697         }
2698
2699         /* empty field separator */
2700         if (fldsep_len < 1)
2701         {
2702                 text_position_cleanup(&state);
2703                 /* if first field, return input string, else empty string */
2704                 if (fldnum == 1)
2705                         PG_RETURN_TEXT_P(inputstring);
2706                 else
2707                         PG_RETURN_TEXT_P(cstring_to_text(""));
2708         }
2709
2710         /* identify bounds of first field */
2711         start_posn = 1;
2712         end_posn = text_position_next(1, &state);
2713
2714         /* special case if fldsep not found at all */
2715         if (end_posn == 0)
2716         {
2717                 text_position_cleanup(&state);
2718                 /* if field 1 requested, return input string, else empty string */
2719                 if (fldnum == 1)
2720                         PG_RETURN_TEXT_P(inputstring);
2721                 else
2722                         PG_RETURN_TEXT_P(cstring_to_text(""));
2723         }
2724
2725         while (end_posn > 0 && --fldnum > 0)
2726         {
2727                 /* identify bounds of next field */
2728                 start_posn = end_posn + fldsep_len;
2729                 end_posn = text_position_next(start_posn, &state);
2730         }
2731
2732         text_position_cleanup(&state);
2733
2734         if (fldnum > 0)
2735         {
2736                 /* N'th field separator not found */
2737                 /* if last field requested, return it, else empty string */
2738                 if (fldnum == 1)
2739                         result_text = text_substring(PointerGetDatum(inputstring),
2740                                                                                  start_posn,
2741                                                                                  -1,
2742                                                                                  true);
2743                 else
2744                         result_text = cstring_to_text("");
2745         }
2746         else
2747         {
2748                 /* non-last field requested */
2749                 result_text = text_substring(PointerGetDatum(inputstring),
2750                                                                          start_posn,
2751                                                                          end_posn - start_posn,
2752                                                                          false);
2753         }
2754
2755         PG_RETURN_TEXT_P(result_text);
2756 }
2757
2758 /*
2759  * text_to_array
2760  * parse input string
2761  * return text array of elements
2762  * based on provided field separator
2763  */
2764 Datum
2765 text_to_array(PG_FUNCTION_ARGS)
2766 {
2767         text       *inputstring = PG_GETARG_TEXT_PP(0);
2768         text       *fldsep = PG_GETARG_TEXT_PP(1);
2769         int                     inputstring_len;
2770         int                     fldsep_len;
2771         TextPositionState state;
2772         int                     fldnum;
2773         int                     start_posn;
2774         int                     end_posn;
2775         int                     chunk_len;
2776         char       *start_ptr;
2777         text       *result_text;
2778         ArrayBuildState *astate = NULL;
2779
2780         text_position_setup(inputstring, fldsep, &state);
2781
2782         /*
2783          * Note: we check the converted string length, not the original, because
2784          * they could be different if the input contained invalid encoding.
2785          */
2786         inputstring_len = state.len1;
2787         fldsep_len = state.len2;
2788
2789         /* return NULL for empty input string */
2790         if (inputstring_len < 1)
2791         {
2792                 text_position_cleanup(&state);
2793                 PG_RETURN_NULL();
2794         }
2795
2796         /*
2797          * empty field separator return one element, 1D, array using the input
2798          * string
2799          */
2800         if (fldsep_len < 1)
2801         {
2802                 text_position_cleanup(&state);
2803                 PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID,
2804                                                                                    PointerGetDatum(inputstring), 1));
2805         }
2806
2807         start_posn = 1;
2808         /* start_ptr points to the start_posn'th character of inputstring */
2809         start_ptr = VARDATA_ANY(inputstring);
2810
2811         for (fldnum = 1;; fldnum++) /* field number is 1 based */
2812         {
2813                 CHECK_FOR_INTERRUPTS();
2814
2815                 end_posn = text_position_next(start_posn, &state);
2816
2817                 if (end_posn == 0)
2818                 {
2819                         /* fetch last field */
2820                         chunk_len = ((char *) inputstring + VARSIZE_ANY(inputstring)) - start_ptr;
2821                 }
2822                 else
2823                 {
2824                         /* fetch non-last field */
2825                         chunk_len = charlen_to_bytelen(start_ptr, end_posn - start_posn);
2826                 }
2827
2828                 /* must build a temp text datum to pass to accumArrayResult */
2829                 result_text = cstring_to_text_with_len(start_ptr, chunk_len);
2830
2831                 /* stash away this field */
2832                 astate = accumArrayResult(astate,
2833                                                                   PointerGetDatum(result_text),
2834                                                                   false,
2835                                                                   TEXTOID,
2836                                                                   CurrentMemoryContext);
2837
2838                 pfree(result_text);
2839
2840                 if (end_posn == 0)
2841                         break;
2842
2843                 start_posn = end_posn;
2844                 start_ptr += chunk_len;
2845                 start_posn += fldsep_len;
2846                 start_ptr += charlen_to_bytelen(start_ptr, fldsep_len);
2847         }
2848
2849         text_position_cleanup(&state);
2850
2851         PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate,
2852                                                                                   CurrentMemoryContext));
2853 }
2854
2855 /*
2856  * array_to_text
2857  * concatenate Cstring representation of input array elements
2858  * using provided field separator
2859  */
2860 Datum
2861 array_to_text(PG_FUNCTION_ARGS)
2862 {
2863         ArrayType  *v = PG_GETARG_ARRAYTYPE_P(0);
2864         char       *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
2865         int                     nitems,
2866                            *dims,
2867                                 ndims;
2868         Oid                     element_type;
2869         int                     typlen;
2870         bool            typbyval;
2871         char            typalign;
2872         StringInfoData buf;
2873         bool            printed = false;
2874         char       *p;
2875         bits8      *bitmap;
2876         int                     bitmask;
2877         int                     i;
2878         ArrayMetaState *my_extra;
2879
2880         ndims = ARR_NDIM(v);
2881         dims = ARR_DIMS(v);
2882         nitems = ArrayGetNItems(ndims, dims);
2883
2884         /* if there are no elements, return an empty string */
2885         if (nitems == 0)
2886                 PG_RETURN_TEXT_P(cstring_to_text(""));
2887
2888         element_type = ARR_ELEMTYPE(v);
2889         initStringInfo(&buf);
2890
2891         /*
2892          * We arrange to look up info about element type, including its output
2893          * conversion proc, only once per series of calls, assuming the element
2894          * type doesn't change underneath us.
2895          */
2896         my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2897         if (my_extra == NULL)
2898         {
2899                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
2900                                                                                                           sizeof(ArrayMetaState));
2901                 my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
2902                 my_extra->element_type = ~element_type;
2903         }
2904
2905         if (my_extra->element_type != element_type)
2906         {
2907                 /*
2908                  * Get info about element type, including its output conversion proc
2909                  */
2910                 get_type_io_data(element_type, IOFunc_output,
2911                                                  &my_extra->typlen, &my_extra->typbyval,
2912                                                  &my_extra->typalign, &my_extra->typdelim,
2913                                                  &my_extra->typioparam, &my_extra->typiofunc);
2914                 fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
2915                                           fcinfo->flinfo->fn_mcxt);
2916                 my_extra->element_type = element_type;
2917         }
2918         typlen = my_extra->typlen;
2919         typbyval = my_extra->typbyval;
2920         typalign = my_extra->typalign;
2921
2922         p = ARR_DATA_PTR(v);
2923         bitmap = ARR_NULLBITMAP(v);
2924         bitmask = 1;
2925
2926         for (i = 0; i < nitems; i++)
2927         {
2928                 Datum           itemvalue;
2929                 char       *value;
2930
2931                 /* Get source element, checking for NULL */
2932                 if (bitmap && (*bitmap & bitmask) == 0)
2933                 {
2934                         /* we ignore nulls */
2935                 }
2936                 else
2937                 {
2938                         itemvalue = fetch_att(p, typbyval, typlen);
2939
2940                         value = OutputFunctionCall(&my_extra->proc, itemvalue);
2941
2942                         if (printed)
2943                                 appendStringInfo(&buf, "%s%s", fldsep, value);
2944                         else
2945                                 appendStringInfoString(&buf, value);
2946                         printed = true;
2947
2948                         p = att_addlength_pointer(p, typlen, p);
2949                         p = (char *) att_align_nominal(p, typalign);
2950                 }
2951
2952                 /* advance bitmap pointer if any */
2953                 if (bitmap)
2954                 {
2955                         bitmask <<= 1;
2956                         if (bitmask == 0x100)
2957                         {
2958                                 bitmap++;
2959                                 bitmask = 1;
2960                         }
2961                 }
2962         }
2963
2964         PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len));
2965 }
2966
2967 #define HEXBASE 16
2968 /*
2969  * Convert a int32 to a string containing a base 16 (hex) representation of
2970  * the number.
2971  */
2972 Datum
2973 to_hex32(PG_FUNCTION_ARGS)
2974 {
2975         uint32          value = (uint32) PG_GETARG_INT32(0);
2976         char       *ptr;
2977         const char *digits = "0123456789abcdef";
2978         char            buf[32];                /* bigger than needed, but reasonable */
2979
2980         ptr = buf + sizeof(buf) - 1;
2981         *ptr = '\0';
2982
2983         do
2984         {
2985                 *--ptr = digits[value % HEXBASE];
2986                 value /= HEXBASE;
2987         } while (ptr > buf && value);
2988
2989         PG_RETURN_TEXT_P(cstring_to_text(ptr));
2990 }
2991
2992 /*
2993  * Convert a int64 to a string containing a base 16 (hex) representation of
2994  * the number.
2995  */
2996 Datum
2997 to_hex64(PG_FUNCTION_ARGS)
2998 {
2999         uint64          value = (uint64) PG_GETARG_INT64(0);
3000         char       *ptr;
3001         const char *digits = "0123456789abcdef";
3002         char            buf[32];                /* bigger than needed, but reasonable */
3003
3004         ptr = buf + sizeof(buf) - 1;
3005         *ptr = '\0';
3006
3007         do
3008         {
3009                 *--ptr = digits[value % HEXBASE];
3010                 value /= HEXBASE;
3011         } while (ptr > buf && value);
3012
3013         PG_RETURN_TEXT_P(cstring_to_text(ptr));
3014 }
3015
3016 /*
3017  * Create an md5 hash of a text string and return it as hex
3018  *
3019  * md5 produces a 16 byte (128 bit) hash; double it for hex
3020  */
3021 #define MD5_HASH_LEN  32
3022
3023 Datum
3024 md5_text(PG_FUNCTION_ARGS)
3025 {
3026         text       *in_text = PG_GETARG_TEXT_PP(0);
3027         size_t          len;
3028         char            hexsum[MD5_HASH_LEN + 1];
3029
3030         /* Calculate the length of the buffer using varlena metadata */
3031         len = VARSIZE_ANY_EXHDR(in_text);
3032
3033         /* get the hash result */
3034         if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
3035                 ereport(ERROR,
3036                                 (errcode(ERRCODE_OUT_OF_MEMORY),
3037                                  errmsg("out of memory")));
3038
3039         /* convert to text and return it */
3040         PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3041 }
3042
3043 /*
3044  * Create an md5 hash of a bytea field and return it as a hex string:
3045  * 16-byte md5 digest is represented in 32 hex characters.
3046  */
3047 Datum
3048 md5_bytea(PG_FUNCTION_ARGS)
3049 {
3050         bytea      *in = PG_GETARG_BYTEA_PP(0);
3051         size_t          len;
3052         char            hexsum[MD5_HASH_LEN + 1];
3053
3054         len = VARSIZE_ANY_EXHDR(in);
3055         if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
3056                 ereport(ERROR,
3057                                 (errcode(ERRCODE_OUT_OF_MEMORY),
3058                                  errmsg("out of memory")));
3059
3060         PG_RETURN_TEXT_P(cstring_to_text(hexsum));
3061 }
3062
3063 /*
3064  * Return the size of a datum, possibly compressed
3065  *
3066  * Works on any data type
3067  */
3068 Datum
3069 pg_column_size(PG_FUNCTION_ARGS)
3070 {
3071         Datum           value = PG_GETARG_DATUM(0);
3072         int32           result;
3073         int                     typlen;
3074
3075         /* On first call, get the input type's typlen, and save at *fn_extra */
3076         if (fcinfo->flinfo->fn_extra == NULL)
3077         {
3078                 /* Lookup the datatype of the supplied argument */
3079                 Oid                     argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
3080
3081                 typlen = get_typlen(argtypeid);
3082                 if (typlen == 0)                /* should not happen */
3083                         elog(ERROR, "cache lookup failed for type %u", argtypeid);
3084
3085                 fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
3086                                                                                                           sizeof(int));
3087                 *((int *) fcinfo->flinfo->fn_extra) = typlen;
3088         }
3089         else
3090                 typlen = *((int *) fcinfo->flinfo->fn_extra);
3091
3092         if (typlen == -1)
3093         {
3094                 /* varlena type, possibly toasted */
3095                 result = toast_datum_size(value);
3096         }
3097         else if (typlen == -2)
3098         {
3099                 /* cstring */
3100                 result = strlen(DatumGetCString(value)) + 1;
3101         }
3102         else
3103         {
3104                 /* ordinary fixed-width type */
3105                 result = typlen;
3106         }
3107
3108         PG_RETURN_INT32(result);
3109 }