src/backend/utils/adt/formatting.c

   1 /* -----------------------------------------------------------------------
   2  * formatting.c
   3  *
   4  * src/backend/utils/adt/formatting.c
   5  *
   6  *
   7  *       Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
   8  *
   9  *
  10  *       TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
  11  *
  12  *       The PostgreSQL routines for a timestamp/int/float/numeric formatting,
  13  *       inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
  14  *
  15  *
  16  *       Cache & Memory:
  17  *      Routines use (itself) internal cache for format pictures.
  18  *
  19  *      The cache uses a static buffer and is persistent across transactions.  If
  20  *      the format-picture is bigger than the cache buffer, the parser is called
  21  *      always.
  22  *
  23  *       NOTE for Number version:
  24  *      All in this version is implemented as keywords ( => not used
  25  *      suffixes), because a format picture is for *one* item (number)
  26  *      only. It not is as a timestamp version, where each keyword (can)
  27  *      has suffix.
  28  *
  29  *       NOTE for Timestamp routines:
  30  *      In this module the POSIX 'struct tm' type is *not* used, but rather
  31  *      PgSQL type, which has tm_mon based on one (*non* zero) and
  32  *      year *not* based on 1900, but is used full year number.
  33  *      Module supports AD / BC / AM / PM.
  34  *
  35  *      Supported types for to_char():
  36  *
  37  *              Timestamp, Numeric, int4, int8, float4, float8
  38  *
  39  *      Supported types for reverse conversion:
  40  *
  41  *              Timestamp       - to_timestamp()
  42  *              Date            - to_date()
  43  *              Numeric         - to_number()
  44  *
  45  *
  46  *      Karel Zak
  47  *
  48  * TODO
  49  *      - better number building (formatting) / parsing, now it isn't
  50  *                ideal code
  51  *      - use Assert()
  52  *      - add support for roman number to standard number conversion
  53  *      - add support for number spelling
  54  *      - add support for string to string formatting (we must be better
  55  *        than Oracle :-),
  56  *              to_char('Hello', 'X X X X X') -> 'H e l l o'
  57  *
  58  * -----------------------------------------------------------------------
  59  */
  60
  61 #ifdef DEBUG_TO_FROM_CHAR
  62 #define DEBUG_elog_output       DEBUG3
  63 #endif
  64
  65 #include "postgres.h"
  66
  67 #include <ctype.h>
  68 #include <unistd.h>
  69 #include <math.h>
  70 #include <float.h>
  71 #include <limits.h>
  72 #include <wctype.h>
  73
  74 #ifdef USE_ICU
  75 #include <unicode/ustring.h>
  76 #endif
  77
  78 #include "catalog/pg_collation.h"
  79 #include "catalog/pg_type.h"
  80 #include "common/unicode_case.h"
  81 #include "common/unicode_category.h"
  82 #include "mb/pg_wchar.h"
  83 #include "nodes/miscnodes.h"
  84 #include "parser/scansup.h"
  85 #include "utils/builtins.h"
  86 #include "utils/date.h"
  87 #include "utils/datetime.h"
  88 #include "utils/formatting.h"
  89 #include "utils/memutils.h"
  90 #include "utils/numeric.h"
  91 #include "utils/pg_locale.h"
  92 #include "varatt.h"
  93
  94
  95 /* ----------
  96  * Routines flags
  97  * ----------
  98  */
  99 #define DCH_FLAG                0x1             /* DATE-TIME flag       */
 100 #define NUM_FLAG                0x2             /* NUMBER flag  */
 101 #define STD_FLAG                0x4             /* STANDARD flag        */
 102
 103 /* ----------
 104  * KeyWord Index (ascii from position 32 (' ') to 126 (~))
 105  * ----------
 106  */
 107 #define KeyWord_INDEX_SIZE              ('~' - ' ')
 108 #define KeyWord_INDEX_FILTER(_c)        ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
 109
 110 /* ----------
 111  * Maximal length of one node
 112  * ----------
 113  */
 114 #define DCH_MAX_ITEM_SIZ           12   /* max localized day name               */
 115 #define NUM_MAX_ITEM_SIZ                8       /* roman number (RN has 15 chars)       */
 116
 117
 118 /* ----------
 119  * Format parser structs
 120  * ----------
 121  */
 122 typedef struct
 123 {
 124         const char *name;                       /* suffix string                */
 125         int                     len,                    /* suffix length                */
 126                                 id,                             /* used in node->suffix */
 127                                 type;                   /* prefix / postfix             */
 128 } KeySuffix;
 129
 130 /* ----------
 131  * FromCharDateMode
 132  * ----------
 133  *
 134  * This value is used to nominate one of several distinct (and mutually
 135  * exclusive) date conventions that a keyword can belong to.
 136  */
 137 typedef enum
 138 {
 139         FROM_CHAR_DATE_NONE = 0,        /* Value does not affect date mode. */
 140         FROM_CHAR_DATE_GREGORIAN,       /* Gregorian (day, month, year) style date */
 141         FROM_CHAR_DATE_ISOWEEK,         /* ISO 8601 week date */
 142 } FromCharDateMode;
 143
 144 typedef struct
 145 {
 146         const char *name;
 147         int                     len;
 148         int                     id;
 149         bool            is_digit;
 150         FromCharDateMode date_mode;
 151 } KeyWord;
 152
 153 typedef struct
 154 {
 155         uint8           type;                   /* NODE_TYPE_XXX, see below */
 156         char            character[MAX_MULTIBYTE_CHAR_LEN + 1];  /* if type is CHAR */
 157         uint8           suffix;                 /* keyword prefix/suffix code, if any */
 158         const KeyWord *key;                     /* if type is ACTION */
 159 } FormatNode;
 160
 161 #define NODE_TYPE_END           1
 162 #define NODE_TYPE_ACTION        2
 163 #define NODE_TYPE_CHAR          3
 164 #define NODE_TYPE_SEPARATOR     4
 165 #define NODE_TYPE_SPACE         5
 166
 167 #define SUFFTYPE_PREFIX         1
 168 #define SUFFTYPE_POSTFIX        2
 169
 170 #define CLOCK_24_HOUR           0
 171 #define CLOCK_12_HOUR           1
 172
 173
 174 /* ----------
 175  * Full months
 176  * ----------
 177  */
 178 static const char *const months_full[] = {
 179         "January", "February", "March", "April", "May", "June", "July",
 180         "August", "September", "October", "November", "December", NULL
 181 };
 182
 183 static const char *const days_short[] = {
 184         "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
 185 };
 186
 187 /* ----------
 188  * AD / BC
 189  * ----------
 190  *      There is no 0 AD.  Years go from 1 BC to 1 AD, so we make it
 191  *      positive and map year == -1 to year zero, and shift all negative
 192  *      years up one.  For interval years, we just return the year.
 193  */
 194 #define ADJUST_YEAR(year, is_interval)  ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
 195
 196 #define A_D_STR         "A.D."
 197 #define a_d_STR         "a.d."
 198 #define AD_STR          "AD"
 199 #define ad_STR          "ad"
 200
 201 #define B_C_STR         "B.C."
 202 #define b_c_STR         "b.c."
 203 #define BC_STR          "BC"
 204 #define bc_STR          "bc"
 205
 206 /*
 207  * AD / BC strings for seq_search.
 208  *
 209  * These are given in two variants, a long form with periods and a standard
 210  * form without.
 211  *
 212  * The array is laid out such that matches for AD have an even index, and
 213  * matches for BC have an odd index.  So the boolean value for BC is given by
 214  * taking the array index of the match, modulo 2.
 215  */
 216 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
 217 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
 218
 219 /* ----------
 220  * AM / PM
 221  * ----------
 222  */
 223 #define A_M_STR         "A.M."
 224 #define a_m_STR         "a.m."
 225 #define AM_STR          "AM"
 226 #define am_STR          "am"
 227
 228 #define P_M_STR         "P.M."
 229 #define p_m_STR         "p.m."
 230 #define PM_STR          "PM"
 231 #define pm_STR          "pm"
 232
 233 /*
 234  * AM / PM strings for seq_search.
 235  *
 236  * These are given in two variants, a long form with periods and a standard
 237  * form without.
 238  *
 239  * The array is laid out such that matches for AM have an even index, and
 240  * matches for PM have an odd index.  So the boolean value for PM is given by
 241  * taking the array index of the match, modulo 2.
 242  */
 243 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
 244 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
 245
 246 /* ----------
 247  * Months in roman-numeral
 248  * (Must be in reverse order for seq_search (in FROM_CHAR), because
 249  *      'VIII' must have higher precedence than 'V')
 250  * ----------
 251  */
 252 static const char *const rm_months_upper[] =
 253 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
 254
 255 static const char *const rm_months_lower[] =
 256 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
 257
 258 /* ----------
 259  * Roman numbers
 260  * ----------
 261  */
 262 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
 263 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
 264 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
 265
 266 /* ----------
 267  * Ordinal postfixes
 268  * ----------
 269  */
 270 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
 271 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
 272
 273 /* ----------
 274  * Flags & Options:
 275  * ----------
 276  */
 277 #define TH_UPPER                1
 278 #define TH_LOWER                2
 279
 280 /* ----------
 281  * Number description struct
 282  * ----------
 283  */
 284 typedef struct
 285 {
 286         int                     pre,                    /* (count) numbers before decimal */
 287                                 post,                   /* (count) numbers after decimal  */
 288                                 lsign,                  /* want locales sign              */
 289                                 flag,                   /* number parameters              */
 290                                 pre_lsign_num,  /* tmp value for lsign            */
 291                                 multi,                  /* multiplier for 'V'             */
 292                                 zero_start,             /* position of first zero         */
 293                                 zero_end,               /* position of last zero          */
 294                                 need_locale;    /* needs it locale                */
 295 } NUMDesc;
 296
 297 /* ----------
 298  * Flags for NUMBER version
 299  * ----------
 300  */
 301 #define NUM_F_DECIMAL           (1 << 1)
 302 #define NUM_F_LDECIMAL          (1 << 2)
 303 #define NUM_F_ZERO                      (1 << 3)
 304 #define NUM_F_BLANK                     (1 << 4)
 305 #define NUM_F_FILLMODE          (1 << 5)
 306 #define NUM_F_LSIGN                     (1 << 6)
 307 #define NUM_F_BRACKET           (1 << 7)
 308 #define NUM_F_MINUS                     (1 << 8)
 309 #define NUM_F_PLUS                      (1 << 9)
 310 #define NUM_F_ROMAN                     (1 << 10)
 311 #define NUM_F_MULTI                     (1 << 11)
 312 #define NUM_F_PLUS_POST         (1 << 12)
 313 #define NUM_F_MINUS_POST        (1 << 13)
 314 #define NUM_F_EEEE                      (1 << 14)
 315
 316 #define NUM_LSIGN_PRE   (-1)
 317 #define NUM_LSIGN_POST  1
 318 #define NUM_LSIGN_NONE  0
 319
 320 /* ----------
 321  * Tests
 322  * ----------
 323  */
 324 #define IS_DECIMAL(_f)  ((_f)->flag & NUM_F_DECIMAL)
 325 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
 326 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
 327 #define IS_BLANK(_f)    ((_f)->flag & NUM_F_BLANK)
 328 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
 329 #define IS_BRACKET(_f)  ((_f)->flag & NUM_F_BRACKET)
 330 #define IS_MINUS(_f)    ((_f)->flag & NUM_F_MINUS)
 331 #define IS_LSIGN(_f)    ((_f)->flag & NUM_F_LSIGN)
 332 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
 333 #define IS_ROMAN(_f)    ((_f)->flag & NUM_F_ROMAN)
 334 #define IS_MULTI(_f)    ((_f)->flag & NUM_F_MULTI)
 335 #define IS_EEEE(_f)             ((_f)->flag & NUM_F_EEEE)
 336
 337 /* ----------
 338  * Format picture cache
 339  *
 340  * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
 341  * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
 342  *
 343  * For simplicity, the cache entries are fixed-size, so they allow for the
 344  * worst case of a FormatNode for each byte in the picture string.
 345  *
 346  * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
 347  * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
 348  * we don't waste too much space by palloc'ing them individually.  Be sure
 349  * to adjust those macros if you add fields to those structs.
 350  *
 351  * The max number of entries in each cache is DCH_CACHE_ENTRIES
 352  * resp. NUM_CACHE_ENTRIES.
 353  * ----------
 354  */
 355 #define DCH_CACHE_OVERHEAD \
 356         MAXALIGN(sizeof(bool) + sizeof(int))
 357 #define NUM_CACHE_OVERHEAD \
 358         MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
 359
 360 #define DCH_CACHE_SIZE \
 361         ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
 362 #define NUM_CACHE_SIZE \
 363         ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
 364
 365 #define DCH_CACHE_ENTRIES       20
 366 #define NUM_CACHE_ENTRIES       20
 367
 368 typedef struct
 369 {
 370         FormatNode      format[DCH_CACHE_SIZE + 1];
 371         char            str[DCH_CACHE_SIZE + 1];
 372         bool            std;
 373         bool            valid;
 374         int                     age;
 375 } DCHCacheEntry;
 376
 377 typedef struct
 378 {
 379         FormatNode      format[NUM_CACHE_SIZE + 1];
 380         char            str[NUM_CACHE_SIZE + 1];
 381         bool            valid;
 382         int                     age;
 383         NUMDesc         Num;
 384 } NUMCacheEntry;
 385
 386 /* global cache for date/time format pictures */
 387 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
 388 static int      n_DCHCache = 0;         /* current number of entries */
 389 static int      DCHCounter = 0;         /* aging-event counter */
 390
 391 /* global cache for number format pictures */
 392 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
 393 static int      n_NUMCache = 0;         /* current number of entries */
 394 static int      NUMCounter = 0;         /* aging-event counter */
 395
 396 /* ----------
 397  * For char->date/time conversion
 398  * ----------
 399  */
 400 typedef struct
 401 {
 402         FromCharDateMode mode;
 403         int                     hh,
 404                                 pm,
 405                                 mi,
 406                                 ss,
 407                                 ssss,
 408                                 d,                              /* stored as 1-7, Sunday = 1, 0 means missing */
 409                                 dd,
 410                                 ddd,
 411                                 mm,
 412                                 ms,
 413                                 year,
 414                                 bc,
 415                                 ww,
 416                                 w,
 417                                 cc,
 418                                 j,
 419                                 us,
 420                                 yysz,                   /* is it YY or YYYY ? */
 421                                 clock,                  /* 12 or 24 hour clock? */
 422                                 tzsign,                 /* +1, -1, or 0 if no TZH/TZM fields */
 423                                 tzh,
 424                                 tzm,
 425                                 ff;                             /* fractional precision */
 426         bool            has_tz;                 /* was there a TZ field? */
 427         int                     gmtoffset;              /* GMT offset of fixed-offset zone abbrev */
 428         pg_tz      *tzp;                        /* pg_tz for dynamic abbrev */
 429         char       *abbrev;                     /* dynamic abbrev */
 430 } TmFromChar;
 431
 432 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
 433
 434 struct fmt_tz                                   /* do_to_timestamp's timezone info output */
 435 {
 436         bool            has_tz;                 /* was there any TZ/TZH/TZM field? */
 437         int                     gmtoffset;              /* GMT offset in seconds */
 438 };
 439
 440 /* ----------
 441  * Debug
 442  * ----------
 443  */
 444 #ifdef DEBUG_TO_FROM_CHAR
 445 #define DEBUG_TMFC(_X) \
 446                 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
 447                         (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
 448                         (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
 449                         (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
 450                         (_X)->yysz, (_X)->clock)
 451 #define DEBUG_TM(_X) \
 452                 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
 453                         (_X)->tm_sec, (_X)->tm_year,\
 454                         (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
 455                         (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
 456 #else
 457 #define DEBUG_TMFC(_X)
 458 #define DEBUG_TM(_X)
 459 #endif
 460
 461 /* ----------
 462  * Datetime to char conversion
 463  *
 464  * To support intervals as well as timestamps, we use a custom "tm" struct
 465  * that is almost like struct pg_tm, but has a 64-bit tm_hour field.
 466  * We omit the tm_isdst and tm_zone fields, which are not used here.
 467  * ----------
 468  */
 469 struct fmt_tm
 470 {
 471         int                     tm_sec;
 472         int                     tm_min;
 473         int64           tm_hour;
 474         int                     tm_mday;
 475         int                     tm_mon;
 476         int                     tm_year;
 477         int                     tm_wday;
 478         int                     tm_yday;
 479         long int        tm_gmtoff;
 480 };
 481
 482 typedef struct TmToChar
 483 {
 484         struct fmt_tm tm;                       /* almost the classic 'tm' struct */
 485         fsec_t          fsec;                   /* fractional seconds */
 486         const char *tzn;                        /* timezone */
 487 } TmToChar;
 488
 489 #define tmtcTm(_X)      (&(_X)->tm)
 490 #define tmtcTzn(_X) ((_X)->tzn)
 491 #define tmtcFsec(_X)    ((_X)->fsec)
 492
 493 /* Note: this is used to copy pg_tm to fmt_tm, so not quite a bitwise copy */
 494 #define COPY_tm(_DST, _SRC) \
 495 do {    \
 496         (_DST)->tm_sec = (_SRC)->tm_sec; \
 497         (_DST)->tm_min = (_SRC)->tm_min; \
 498         (_DST)->tm_hour = (_SRC)->tm_hour; \
 499         (_DST)->tm_mday = (_SRC)->tm_mday; \
 500         (_DST)->tm_mon = (_SRC)->tm_mon; \
 501         (_DST)->tm_year = (_SRC)->tm_year; \
 502         (_DST)->tm_wday = (_SRC)->tm_wday; \
 503         (_DST)->tm_yday = (_SRC)->tm_yday; \
 504         (_DST)->tm_gmtoff = (_SRC)->tm_gmtoff; \
 505 } while(0)
 506
 507 /* Caution: this is used to zero both pg_tm and fmt_tm structs */
 508 #define ZERO_tm(_X) \
 509 do {    \
 510         memset(_X, 0, sizeof(*(_X))); \
 511         (_X)->tm_mday = (_X)->tm_mon = 1; \
 512 } while(0)
 513
 514 #define ZERO_tmtc(_X) \
 515 do { \
 516         ZERO_tm( tmtcTm(_X) ); \
 517         tmtcFsec(_X) = 0; \
 518         tmtcTzn(_X) = NULL; \
 519 } while(0)
 520
 521 /*
 522  *      to_char(time) appears to to_char() as an interval, so this check
 523  *      is really for interval and time data types.
 524  */
 525 #define INVALID_FOR_INTERVAL  \
 526 do { \
 527         if (is_interval) \
 528                 ereport(ERROR, \
 529                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
 530                                  errmsg("invalid format specification for an interval value"), \
 531                                  errhint("Intervals are not tied to specific calendar dates."))); \
 532 } while(0)
 533
 534 /*****************************************************************************
 535  *                      KeyWord definitions
 536  *****************************************************************************/
 537
 538 /* ----------
 539  * Suffixes (FormatNode.suffix is an OR of these codes)
 540  * ----------
 541  */
 542 #define DCH_S_FM        0x01
 543 #define DCH_S_TH        0x02
 544 #define DCH_S_th        0x04
 545 #define DCH_S_SP        0x08
 546 #define DCH_S_TM        0x10
 547
 548 /* ----------
 549  * Suffix tests
 550  * ----------
 551  */
 552 #define S_THth(_s)      ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
 553 #define S_TH(_s)        (((_s) & DCH_S_TH) ? 1 : 0)
 554 #define S_th(_s)        (((_s) & DCH_S_th) ? 1 : 0)
 555 #define S_TH_TYPE(_s)   (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
 556
 557 /* Oracle toggles FM behavior, we don't; see docs. */
 558 #define S_FM(_s)        (((_s) & DCH_S_FM) ? 1 : 0)
 559 #define S_SP(_s)        (((_s) & DCH_S_SP) ? 1 : 0)
 560 #define S_TM(_s)        (((_s) & DCH_S_TM) ? 1 : 0)
 561
 562 /* ----------
 563  * Suffixes definition for DATE-TIME TO/FROM CHAR
 564  * ----------
 565  */
 566 #define TM_SUFFIX_LEN   2
 567
 568 static const KeySuffix DCH_suff[] = {
 569         {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
 570         {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
 571         {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
 572         {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
 573         {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
 574         {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
 575         {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
 576         /* last */
 577         {NULL, 0, 0, 0}
 578 };
 579
 580
 581 /* ----------
 582  * Format-pictures (KeyWord).
 583  *
 584  * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
 585  *                complicated -to-> easy:
 586  *
 587  *      (example: "DDD","DD","Day","D" )
 588  *
 589  * (this specific sort needs the algorithm for sequential search for strings,
 590  * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
 591  * or "HH12"? You must first try "HH12", because "HH" is in string, but
 592  * it is not good.
 593  *
 594  * (!)
 595  *       - Position for the keyword is similar as position in the enum DCH/NUM_poz.
 596  * (!)
 597  *
 598  * For fast search is used the 'int index[]', index is ascii table from position
 599  * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
 600  * position or -1 if char is not used in the KeyWord. Search example for
 601  * string "MM":
 602  *      1)      see in index to index['M' - 32],
 603  *      2)      take keywords position (enum DCH_MI) from index
 604  *      3)      run sequential search in keywords[] from this position
 605  *
 606  * ----------
 607  */
 608
 609 typedef enum
 610 {
 611         DCH_A_D,
 612         DCH_A_M,
 613         DCH_AD,
 614         DCH_AM,
 615         DCH_B_C,
 616         DCH_BC,
 617         DCH_CC,
 618         DCH_DAY,
 619         DCH_DDD,
 620         DCH_DD,
 621         DCH_DY,
 622         DCH_Day,
 623         DCH_Dy,
 624         DCH_D,
 625         DCH_FF1,
 626         DCH_FF2,
 627         DCH_FF3,
 628         DCH_FF4,
 629         DCH_FF5,
 630         DCH_FF6,
 631         DCH_FX,                                         /* global suffix */
 632         DCH_HH24,
 633         DCH_HH12,
 634         DCH_HH,
 635         DCH_IDDD,
 636         DCH_ID,
 637         DCH_IW,
 638         DCH_IYYY,
 639         DCH_IYY,
 640         DCH_IY,
 641         DCH_I,
 642         DCH_J,
 643         DCH_MI,
 644         DCH_MM,
 645         DCH_MONTH,
 646         DCH_MON,
 647         DCH_MS,
 648         DCH_Month,
 649         DCH_Mon,
 650         DCH_OF,
 651         DCH_P_M,
 652         DCH_PM,
 653         DCH_Q,
 654         DCH_RM,
 655         DCH_SSSSS,
 656         DCH_SSSS,
 657         DCH_SS,
 658         DCH_TZH,
 659         DCH_TZM,
 660         DCH_TZ,
 661         DCH_US,
 662         DCH_WW,
 663         DCH_W,
 664         DCH_Y_YYY,
 665         DCH_YYYY,
 666         DCH_YYY,
 667         DCH_YY,
 668         DCH_Y,
 669         DCH_a_d,
 670         DCH_a_m,
 671         DCH_ad,
 672         DCH_am,
 673         DCH_b_c,
 674         DCH_bc,
 675         DCH_cc,
 676         DCH_day,
 677         DCH_ddd,
 678         DCH_dd,
 679         DCH_dy,
 680         DCH_d,
 681         DCH_ff1,
 682         DCH_ff2,
 683         DCH_ff3,
 684         DCH_ff4,
 685         DCH_ff5,
 686         DCH_ff6,
 687         DCH_fx,
 688         DCH_hh24,
 689         DCH_hh12,
 690         DCH_hh,
 691         DCH_iddd,
 692         DCH_id,
 693         DCH_iw,
 694         DCH_iyyy,
 695         DCH_iyy,
 696         DCH_iy,
 697         DCH_i,
 698         DCH_j,
 699         DCH_mi,
 700         DCH_mm,
 701         DCH_month,
 702         DCH_mon,
 703         DCH_ms,
 704         DCH_of,
 705         DCH_p_m,
 706         DCH_pm,
 707         DCH_q,
 708         DCH_rm,
 709         DCH_sssss,
 710         DCH_ssss,
 711         DCH_ss,
 712         DCH_tzh,
 713         DCH_tzm,
 714         DCH_tz,
 715         DCH_us,
 716         DCH_ww,
 717         DCH_w,
 718         DCH_y_yyy,
 719         DCH_yyyy,
 720         DCH_yyy,
 721         DCH_yy,
 722         DCH_y,
 723
 724         /* last */
 725         _DCH_last_
 726 }                       DCH_poz;
 727
 728 typedef enum
 729 {
 730         NUM_COMMA,
 731         NUM_DEC,
 732         NUM_0,
 733         NUM_9,
 734         NUM_B,
 735         NUM_C,
 736         NUM_D,
 737         NUM_E,
 738         NUM_FM,
 739         NUM_G,
 740         NUM_L,
 741         NUM_MI,
 742         NUM_PL,
 743         NUM_PR,
 744         NUM_RN,
 745         NUM_SG,
 746         NUM_SP,
 747         NUM_S,
 748         NUM_TH,
 749         NUM_V,
 750         NUM_b,
 751         NUM_c,
 752         NUM_d,
 753         NUM_e,
 754         NUM_fm,
 755         NUM_g,
 756         NUM_l,
 757         NUM_mi,
 758         NUM_pl,
 759         NUM_pr,
 760         NUM_rn,
 761         NUM_sg,
 762         NUM_sp,
 763         NUM_s,
 764         NUM_th,
 765         NUM_v,
 766
 767         /* last */
 768         _NUM_last_
 769 }                       NUM_poz;
 770
 771 /* ----------
 772  * KeyWords for DATE-TIME version
 773  * ----------
 774  */
 775 static const KeyWord DCH_keywords[] = {
 776 /*      name, len, id, is_digit, date_mode */
 777         {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE},       /* A */
 778         {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
 779         {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
 780         {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
 781         {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE},       /* B */
 782         {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
 783         {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE},   /* C */
 784         {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE},        /* D */
 785         {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
 786         {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
 787         {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
 788         {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
 789         {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
 790         {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
 791         {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE},        /* F */
 792         {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
 793         {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
 794         {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
 795         {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
 796         {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
 797         {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
 798         {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE},       /* H */
 799         {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
 800         {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
 801         {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK},    /* I */
 802         {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
 803         {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
 804         {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
 805         {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
 806         {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
 807         {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
 808         {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
 809         {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE},   /* M */
 810         {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
 811         {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
 812         {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
 813         {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
 814         {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
 815         {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
 816         {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE},  /* O */
 817         {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE},       /* P */
 818         {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
 819         {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
 820         {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
 821         {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE},      /* S */
 822         {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
 823         {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
 824         {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE},        /* T */
 825         {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
 826         {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
 827         {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE},   /* U */
 828         {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN},      /* W */
 829         {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
 830         {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN},        /* Y */
 831         {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
 832         {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
 833         {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
 834         {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
 835         {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE},       /* a */
 836         {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
 837         {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
 838         {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
 839         {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE},       /* b */
 840         {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
 841         {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE},   /* c */
 842         {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE},        /* d */
 843         {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
 844         {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
 845         {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
 846         {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
 847         {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE},        /* f */
 848         {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
 849         {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
 850         {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
 851         {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
 852         {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
 853         {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
 854         {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE},       /* h */
 855         {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
 856         {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
 857         {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK},    /* i */
 858         {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
 859         {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
 860         {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
 861         {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
 862         {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
 863         {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
 864         {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
 865         {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE},   /* m */
 866         {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
 867         {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
 868         {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
 869         {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
 870         {"of", 2, DCH_OF, false, FROM_CHAR_DATE_NONE},  /* o */
 871         {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE},       /* p */
 872         {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
 873         {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
 874         {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
 875         {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE},      /* s */
 876         {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
 877         {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
 878         {"tzh", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE},        /* t */
 879         {"tzm", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
 880         {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},
 881         {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE},   /* u */
 882         {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN},      /* w */
 883         {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
 884         {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN},        /* y */
 885         {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
 886         {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
 887         {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
 888         {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
 889
 890         /* last */
 891         {NULL, 0, 0, 0, 0}
 892 };
 893
 894 /* ----------
 895  * KeyWords for NUMBER version
 896  *
 897  * The is_digit and date_mode fields are not relevant here.
 898  * ----------
 899  */
 900 static const KeyWord NUM_keywords[] = {
 901 /*      name, len, id                   is in Index */
 902         {",", 1, NUM_COMMA},            /* , */
 903         {".", 1, NUM_DEC},                      /* . */
 904         {"0", 1, NUM_0},                        /* 0 */
 905         {"9", 1, NUM_9},                        /* 9 */
 906         {"B", 1, NUM_B},                        /* B */
 907         {"C", 1, NUM_C},                        /* C */
 908         {"D", 1, NUM_D},                        /* D */
 909         {"EEEE", 4, NUM_E},                     /* E */
 910         {"FM", 2, NUM_FM},                      /* F */
 911         {"G", 1, NUM_G},                        /* G */
 912         {"L", 1, NUM_L},                        /* L */
 913         {"MI", 2, NUM_MI},                      /* M */
 914         {"PL", 2, NUM_PL},                      /* P */
 915         {"PR", 2, NUM_PR},
 916         {"RN", 2, NUM_RN},                      /* R */
 917         {"SG", 2, NUM_SG},                      /* S */
 918         {"SP", 2, NUM_SP},
 919         {"S", 1, NUM_S},
 920         {"TH", 2, NUM_TH},                      /* T */
 921         {"V", 1, NUM_V},                        /* V */
 922         {"b", 1, NUM_B},                        /* b */
 923         {"c", 1, NUM_C},                        /* c */
 924         {"d", 1, NUM_D},                        /* d */
 925         {"eeee", 4, NUM_E},                     /* e */
 926         {"fm", 2, NUM_FM},                      /* f */
 927         {"g", 1, NUM_G},                        /* g */
 928         {"l", 1, NUM_L},                        /* l */
 929         {"mi", 2, NUM_MI},                      /* m */
 930         {"pl", 2, NUM_PL},                      /* p */
 931         {"pr", 2, NUM_PR},
 932         {"rn", 2, NUM_rn},                      /* r */
 933         {"sg", 2, NUM_SG},                      /* s */
 934         {"sp", 2, NUM_SP},
 935         {"s", 1, NUM_S},
 936         {"th", 2, NUM_th},                      /* t */
 937         {"v", 1, NUM_V},                        /* v */
 938
 939         /* last */
 940         {NULL, 0, 0}
 941 };
 942
 943
 944 /* ----------
 945  * KeyWords index for DATE-TIME version
 946  * ----------
 947  */
 948 static const int DCH_index[KeyWord_INDEX_SIZE] = {
 949 /*
 950 0       1       2       3       4       5       6       7       8       9
 951 */
 952         /*---- first 0..31 chars are skipped ----*/
 953
 954         -1, -1, -1, -1, -1, -1, -1, -1,
 955         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 956         -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 957         -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
 958         DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
 959         DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
 960         -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
 961         DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
 962         -1, DCH_of, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tzh, DCH_us, -1, DCH_ww,
 963         -1, DCH_y_yyy, -1, -1, -1, -1
 964
 965         /*---- chars over 126 are skipped ----*/
 966 };
 967
 968 /* ----------
 969  * KeyWords index for NUMBER version
 970  * ----------
 971  */
 972 static const int NUM_index[KeyWord_INDEX_SIZE] = {
 973 /*
 974 0       1       2       3       4       5       6       7       8       9
 975 */
 976         /*---- first 0..31 chars are skipped ----*/
 977
 978         -1, -1, -1, -1, -1, -1, -1, -1,
 979         -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
 980         -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
 981         -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
 982         NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
 983         NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
 984         -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
 985         NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
 986         -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
 987         -1, -1, -1, -1, -1, -1
 988
 989         /*---- chars over 126 are skipped ----*/
 990 };
 991
 992 /* ----------
 993  * Number processor struct
 994  * ----------
 995  */
 996 typedef struct NUMProc
 997 {
 998         bool            is_to_char;
 999         NUMDesc    *Num;                        /* number description           */
1000
1001         int                     sign,                   /* '-' or '+'                   */
1002                                 sign_wrote,             /* was sign write               */
1003                                 num_count,              /* number of write digits       */
1004                                 num_in,                 /* is inside number             */
1005                                 num_curr,               /* current position in number   */
1006                                 out_pre_spaces, /* spaces before first digit    */
1007
1008                                 read_dec,               /* to_number - was read dec. point      */
1009                                 read_post,              /* to_number - number of dec. digit */
1010                                 read_pre;               /* to_number - number non-dec. digit */
1011
1012         char       *number,                     /* string with number   */
1013                            *number_p,           /* pointer to current number position */
1014                            *inout,                      /* in / out buffer      */
1015                            *inout_p,            /* pointer to current inout position */
1016                            *last_relevant,      /* last relevant number after decimal point */
1017
1018                            *L_negative_sign,    /* Locale */
1019                            *L_positive_sign,
1020                            *decimal,
1021                            *L_thousands_sep,
1022                            *L_currency_symbol;
1023 } NUMProc;
1024
1025 /* Return flags for DCH_from_char() */
1026 #define DCH_DATED       0x01
1027 #define DCH_TIMED       0x02
1028 #define DCH_ZONED       0x04
1029
1030 /* ----------
1031  * Functions
1032  * ----------
1033  */
1034 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1035                                                                            const int *index);
1036 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1037 static bool is_separator_char(const char *str);
1038 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1039 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1040                                                  const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1041
1042 static void DCH_to_char(FormatNode *node, bool is_interval,
1043                                                 TmToChar *in, char *out, Oid collid);
1044 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1045                                                   Oid collid, bool std, Node *escontext);
1046
1047 #ifdef DEBUG_TO_FROM_CHAR
1048 static void dump_index(const KeyWord *k, const int *index);
1049 static void dump_node(FormatNode *node, int max);
1050 #endif
1051
1052 static const char *get_th(char *num, int type);
1053 static char *str_numth(char *dest, char *num, int type);
1054 static int      adjust_partial_year_to_2020(int year);
1055 static int      strspace_len(const char *str);
1056 static bool from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1057                                                            Node *escontext);
1058 static bool from_char_set_int(int *dest, const int value, const FormatNode *node,
1059                                                           Node *escontext);
1060 static int      from_char_parse_int_len(int *dest, const char **src, const int len,
1061                                                                         FormatNode *node, Node *escontext);
1062 static int      from_char_parse_int(int *dest, const char **src, FormatNode *node,
1063                                                                 Node *escontext);
1064 static int      seq_search_ascii(const char *name, const char *const *array, int *len);
1065 static int      seq_search_localized(const char *name, char **array, int *len,
1066                                                                  Oid collid);
1067 static bool from_char_seq_search(int *dest, const char **src,
1068                                                                  const char *const *array,
1069                                                                  char **localized_array, Oid collid,
1070                                                                  FormatNode *node, Node *escontext);
1071 static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1072                                                         struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
1073                                                         int *fprec, uint32 *flags, Node *escontext);
1074 static char *fill_str(char *str, int c, int max);
1075 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1076 static char *int_to_roman(int number);
1077 static void NUM_prepare_locale(NUMProc *Np);
1078 static char *get_last_relevant_decnum(char *num);
1079 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1080 static void NUM_numpart_to_char(NUMProc *Np, int id);
1081 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1082                                                    char *number, int input_len, int to_char_out_pre_spaces,
1083                                                    int sign, bool is_to_char, Oid collid);
1084 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1085 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1086 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1087 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1088 static NUMCacheEntry *NUM_cache_search(const char *str);
1089 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1090
1091
1092 /* ----------
1093  * Fast sequential search, use index for data selection which
1094  * go to seq. cycle (it is very fast for unwanted strings)
1095  * (can't be used binary search in format parsing)
1096  * ----------
1097  */
1098 static const KeyWord *
1099 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1100 {
1101         int                     poz;
1102
1103         if (!KeyWord_INDEX_FILTER(*str))
1104                 return NULL;
1105
1106         if ((poz = *(index + (*str - ' '))) > -1)
1107         {
1108                 const KeyWord *k = kw + poz;
1109
1110                 do
1111                 {
1112                         if (strncmp(str, k->name, k->len) == 0)
1113                                 return k;
1114                         k++;
1115                         if (!k->name)
1116                                 return NULL;
1117                 } while (*str == *k->name);
1118         }
1119         return NULL;
1120 }
1121
1122 static const KeySuffix *
1123 suff_search(const char *str, const KeySuffix *suf, int type)
1124 {
1125         const KeySuffix *s;
1126
1127         for (s = suf; s->name != NULL; s++)
1128         {
1129                 if (s->type != type)
1130                         continue;
1131
1132                 if (strncmp(str, s->name, s->len) == 0)
1133                         return s;
1134         }
1135         return NULL;
1136 }
1137
1138 static bool
1139 is_separator_char(const char *str)
1140 {
1141         /* ASCII printable character, but not letter or digit */
1142         return (*str > 0x20 && *str < 0x7F &&
1143                         !(*str >= 'A' && *str <= 'Z') &&
1144                         !(*str >= 'a' && *str <= 'z') &&
1145                         !(*str >= '0' && *str <= '9'));
1146 }
1147
1148 /* ----------
1149  * Prepare NUMDesc (number description struct) via FormatNode struct
1150  * ----------
1151  */
1152 static void
1153 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1154 {
1155         if (n->type != NODE_TYPE_ACTION)
1156                 return;
1157
1158         if (IS_EEEE(num) && n->key->id != NUM_E)
1159                 ereport(ERROR,
1160                                 (errcode(ERRCODE_SYNTAX_ERROR),
1161                                  errmsg("\"EEEE\" must be the last pattern used")));
1162
1163         switch (n->key->id)
1164         {
1165                 case NUM_9:
1166                         if (IS_BRACKET(num))
1167                                 ereport(ERROR,
1168                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1169                                                  errmsg("\"9\" must be ahead of \"PR\"")));
1170                         if (IS_MULTI(num))
1171                         {
1172                                 ++num->multi;
1173                                 break;
1174                         }
1175                         if (IS_DECIMAL(num))
1176                                 ++num->post;
1177                         else
1178                                 ++num->pre;
1179                         break;
1180
1181                 case NUM_0:
1182                         if (IS_BRACKET(num))
1183                                 ereport(ERROR,
1184                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1185                                                  errmsg("\"0\" must be ahead of \"PR\"")));
1186                         if (!IS_ZERO(num) && !IS_DECIMAL(num))
1187                         {
1188                                 num->flag |= NUM_F_ZERO;
1189                                 num->zero_start = num->pre + 1;
1190                         }
1191                         if (!IS_DECIMAL(num))
1192                                 ++num->pre;
1193                         else
1194                                 ++num->post;
1195
1196                         num->zero_end = num->pre + num->post;
1197                         break;
1198
1199                 case NUM_B:
1200                         if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1201                                 num->flag |= NUM_F_BLANK;
1202                         break;
1203
1204                 case NUM_D:
1205                         num->flag |= NUM_F_LDECIMAL;
1206                         num->need_locale = true;
1207                         /* FALLTHROUGH */
1208                 case NUM_DEC:
1209                         if (IS_DECIMAL(num))
1210                                 ereport(ERROR,
1211                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1212                                                  errmsg("multiple decimal points")));
1213                         if (IS_MULTI(num))
1214                                 ereport(ERROR,
1215                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1216                                                  errmsg("cannot use \"V\" and decimal point together")));
1217                         num->flag |= NUM_F_DECIMAL;
1218                         break;
1219
1220                 case NUM_FM:
1221                         num->flag |= NUM_F_FILLMODE;
1222                         break;
1223
1224                 case NUM_S:
1225                         if (IS_LSIGN(num))
1226                                 ereport(ERROR,
1227                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1228                                                  errmsg("cannot use \"S\" twice")));
1229                         if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1230                                 ereport(ERROR,
1231                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1232                                                  errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1233                         if (!IS_DECIMAL(num))
1234                         {
1235                                 num->lsign = NUM_LSIGN_PRE;
1236                                 num->pre_lsign_num = num->pre;
1237                                 num->need_locale = true;
1238                                 num->flag |= NUM_F_LSIGN;
1239                         }
1240                         else if (num->lsign == NUM_LSIGN_NONE)
1241                         {
1242                                 num->lsign = NUM_LSIGN_POST;
1243                                 num->need_locale = true;
1244                                 num->flag |= NUM_F_LSIGN;
1245                         }
1246                         break;
1247
1248                 case NUM_MI:
1249                         if (IS_LSIGN(num))
1250                                 ereport(ERROR,
1251                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1252                                                  errmsg("cannot use \"S\" and \"MI\" together")));
1253                         num->flag |= NUM_F_MINUS;
1254                         if (IS_DECIMAL(num))
1255                                 num->flag |= NUM_F_MINUS_POST;
1256                         break;
1257
1258                 case NUM_PL:
1259                         if (IS_LSIGN(num))
1260                                 ereport(ERROR,
1261                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1262                                                  errmsg("cannot use \"S\" and \"PL\" together")));
1263                         num->flag |= NUM_F_PLUS;
1264                         if (IS_DECIMAL(num))
1265                                 num->flag |= NUM_F_PLUS_POST;
1266                         break;
1267
1268                 case NUM_SG:
1269                         if (IS_LSIGN(num))
1270                                 ereport(ERROR,
1271                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1272                                                  errmsg("cannot use \"S\" and \"SG\" together")));
1273                         num->flag |= NUM_F_MINUS;
1274                         num->flag |= NUM_F_PLUS;
1275                         break;
1276
1277                 case NUM_PR:
1278                         if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1279                                 ereport(ERROR,
1280                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1281                                                  errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1282                         num->flag |= NUM_F_BRACKET;
1283                         break;
1284
1285                 case NUM_rn:
1286                 case NUM_RN:
1287                         num->flag |= NUM_F_ROMAN;
1288                         break;
1289
1290                 case NUM_L:
1291                 case NUM_G:
1292                         num->need_locale = true;
1293                         break;
1294
1295                 case NUM_V:
1296                         if (IS_DECIMAL(num))
1297                                 ereport(ERROR,
1298                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1299                                                  errmsg("cannot use \"V\" and decimal point together")));
1300                         num->flag |= NUM_F_MULTI;
1301                         break;
1302
1303                 case NUM_E:
1304                         if (IS_EEEE(num))
1305                                 ereport(ERROR,
1306                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1307                                                  errmsg("cannot use \"EEEE\" twice")));
1308                         if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1309                                 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1310                                 IS_ROMAN(num) || IS_MULTI(num))
1311                                 ereport(ERROR,
1312                                                 (errcode(ERRCODE_SYNTAX_ERROR),
1313                                                  errmsg("\"EEEE\" is incompatible with other formats"),
1314                                                  errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1315                         num->flag |= NUM_F_EEEE;
1316                         break;
1317         }
1318 }
1319
1320 /* ----------
1321  * Format parser, search small keywords and keyword's suffixes, and make
1322  * format-node tree.
1323  *
1324  * for DATE-TIME & NUMBER version
1325  * ----------
1326  */
1327 static void
1328 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1329                          const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1330 {
1331         FormatNode *n;
1332
1333 #ifdef DEBUG_TO_FROM_CHAR
1334         elog(DEBUG_elog_output, "to_char/number(): run parser");
1335 #endif
1336
1337         n = node;
1338
1339         while (*str)
1340         {
1341                 int                     suffix = 0;
1342                 const KeySuffix *s;
1343
1344                 /*
1345                  * Prefix
1346                  */
1347                 if ((flags & DCH_FLAG) &&
1348                         (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1349                 {
1350                         suffix |= s->id;
1351                         if (s->len)
1352                                 str += s->len;
1353                 }
1354
1355                 /*
1356                  * Keyword
1357                  */
1358                 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1359                 {
1360                         n->type = NODE_TYPE_ACTION;
1361                         n->suffix = suffix;
1362                         if (n->key->len)
1363                                 str += n->key->len;
1364
1365                         /*
1366                          * NUM version: Prepare global NUMDesc struct
1367                          */
1368                         if (flags & NUM_FLAG)
1369                                 NUMDesc_prepare(Num, n);
1370
1371                         /*
1372                          * Postfix
1373                          */
1374                         if ((flags & DCH_FLAG) && *str &&
1375                                 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1376                         {
1377                                 n->suffix |= s->id;
1378                                 if (s->len)
1379                                         str += s->len;
1380                         }
1381
1382                         n++;
1383                 }
1384                 else if (*str)
1385                 {
1386                         int                     chlen;
1387
1388                         if ((flags & STD_FLAG) && *str != '"')
1389                         {
1390                                 /*
1391                                  * Standard mode, allow only following separators: "-./,':; ".
1392                                  * However, we support double quotes even in standard mode
1393                                  * (see below).  This is our extension of standard mode.
1394                                  */
1395                                 if (strchr("-./,':; ", *str) == NULL)
1396                                         ereport(ERROR,
1397                                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1398                                                          errmsg("invalid datetime format separator: \"%s\"",
1399                                                                         pnstrdup(str, pg_mblen(str)))));
1400
1401                                 if (*str == ' ')
1402                                         n->type = NODE_TYPE_SPACE;
1403                                 else
1404                                         n->type = NODE_TYPE_SEPARATOR;
1405
1406                                 n->character[0] = *str;
1407                                 n->character[1] = '\0';
1408                                 n->key = NULL;
1409                                 n->suffix = 0;
1410                                 n++;
1411                                 str++;
1412                         }
1413                         else if (*str == '"')
1414                         {
1415                                 /*
1416                                  * Process double-quoted literal string, if any
1417                                  */
1418                                 str++;
1419                                 while (*str)
1420                                 {
1421                                         if (*str == '"')
1422                                         {
1423                                                 str++;
1424                                                 break;
1425                                         }
1426                                         /* backslash quotes the next character, if any */
1427                                         if (*str == '\\' && *(str + 1))
1428                                                 str++;
1429                                         chlen = pg_mblen(str);
1430                                         n->type = NODE_TYPE_CHAR;
1431                                         memcpy(n->character, str, chlen);
1432                                         n->character[chlen] = '\0';
1433                                         n->key = NULL;
1434                                         n->suffix = 0;
1435                                         n++;
1436                                         str += chlen;
1437                                 }
1438                         }
1439                         else
1440                         {
1441                                 /*
1442                                  * Outside double-quoted strings, backslash is only special if
1443                                  * it immediately precedes a double quote.
1444                                  */
1445                                 if (*str == '\\' && *(str + 1) == '"')
1446                                         str++;
1447                                 chlen = pg_mblen(str);
1448
1449                                 if ((flags & DCH_FLAG) && is_separator_char(str))
1450                                         n->type = NODE_TYPE_SEPARATOR;
1451                                 else if (isspace((unsigned char) *str))
1452                                         n->type = NODE_TYPE_SPACE;
1453                                 else
1454                                         n->type = NODE_TYPE_CHAR;
1455
1456                                 memcpy(n->character, str, chlen);
1457                                 n->character[chlen] = '\0';
1458                                 n->key = NULL;
1459                                 n->suffix = 0;
1460                                 n++;
1461                                 str += chlen;
1462                         }
1463                 }
1464         }
1465
1466         n->type = NODE_TYPE_END;
1467         n->suffix = 0;
1468 }
1469
1470 /* ----------
1471  * DEBUG: Dump the FormatNode Tree (debug)
1472  * ----------
1473  */
1474 #ifdef DEBUG_TO_FROM_CHAR
1475
1476 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1477 #define DUMP_FM(_suf)   (S_FM(_suf) ? "FM" : " ")
1478
1479 static void
1480 dump_node(FormatNode *node, int max)
1481 {
1482         FormatNode *n;
1483         int                     a;
1484
1485         elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1486
1487         for (a = 0, n = node; a <= max; n++, a++)
1488         {
1489                 if (n->type == NODE_TYPE_ACTION)
1490                         elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1491                                  a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1492                 else if (n->type == NODE_TYPE_CHAR)
1493                         elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1494                                  a, n->character);
1495                 else if (n->type == NODE_TYPE_END)
1496                 {
1497                         elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1498                         return;
1499                 }
1500                 else
1501                         elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1502         }
1503 }
1504 #endif                                                  /* DEBUG */
1505
1506 /*****************************************************************************
1507  *                      Private utils
1508  *****************************************************************************/
1509
1510 /* ----------
1511  * Return ST/ND/RD/TH for simple (1..9) numbers
1512  * type --> 0 upper, 1 lower
1513  * ----------
1514  */
1515 static const char *
1516 get_th(char *num, int type)
1517 {
1518         int                     len = strlen(num),
1519                                 last;
1520
1521         last = *(num + (len - 1));
1522         if (!isdigit((unsigned char) last))
1523                 ereport(ERROR,
1524                                 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1525                                  errmsg("\"%s\" is not a number", num)));
1526
1527         /*
1528          * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1529          * 'ST/st', 'ND/nd', 'RD/rd', respectively
1530          */
1531         if ((len > 1) && (num[len - 2] == '1'))
1532                 last = 0;
1533
1534         switch (last)
1535         {
1536                 case '1':
1537                         if (type == TH_UPPER)
1538                                 return numTH[0];
1539                         return numth[0];
1540                 case '2':
1541                         if (type == TH_UPPER)
1542                                 return numTH[1];
1543                         return numth[1];
1544                 case '3':
1545                         if (type == TH_UPPER)
1546                                 return numTH[2];
1547                         return numth[2];
1548                 default:
1549                         if (type == TH_UPPER)
1550                                 return numTH[3];
1551                         return numth[3];
1552         }
1553 }
1554
1555 /* ----------
1556  * Convert string-number to ordinal string-number
1557  * type --> 0 upper, 1 lower
1558  * ----------
1559  */
1560 static char *
1561 str_numth(char *dest, char *num, int type)
1562 {
1563         if (dest != num)
1564                 strcpy(dest, num);
1565         strcat(dest, get_th(num, type));
1566         return dest;
1567 }
1568
1569 /*****************************************************************************
1570  *                      upper/lower/initcap functions
1571  *****************************************************************************/
1572
1573 #ifdef USE_ICU
1574
1575 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1576                                                                          const UChar *src, int32_t srcLength,
1577                                                                          const char *locale,
1578                                                                          UErrorCode *pErrorCode);
1579
1580 static int32_t
1581 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1582                                  UChar **buff_dest, UChar *buff_source, int32_t len_source)
1583 {
1584         UErrorCode      status;
1585         int32_t         len_dest;
1586
1587         len_dest = len_source;          /* try first with same length */
1588         *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1589         status = U_ZERO_ERROR;
1590         len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1591                                         mylocale->info.icu.locale, &status);
1592         if (status == U_BUFFER_OVERFLOW_ERROR)
1593         {
1594                 /* try again with adjusted length */
1595                 pfree(*buff_dest);
1596                 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1597                 status = U_ZERO_ERROR;
1598                 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1599                                                 mylocale->info.icu.locale, &status);
1600         }
1601         if (U_FAILURE(status))
1602                 ereport(ERROR,
1603                                 (errmsg("case conversion failed: %s", u_errorName(status))));
1604         return len_dest;
1605 }
1606
1607 static int32_t
1608 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1609                                                 const UChar *src, int32_t srcLength,
1610                                                 const char *locale,
1611                                                 UErrorCode *pErrorCode)
1612 {
1613         return u_strToTitle(dest, destCapacity, src, srcLength,
1614                                                 NULL, locale, pErrorCode);
1615 }
1616
1617 #endif                                                  /* USE_ICU */
1618
1619 /*
1620  * If the system provides the needed functions for wide-character manipulation
1621  * (which are all standardized by C99), then we implement upper/lower/initcap
1622  * using wide-character functions, if necessary.  Otherwise we use the
1623  * traditional <ctype.h> functions, which of course will not work as desired
1624  * in multibyte character sets.  Note that in either case we are effectively
1625  * assuming that the database character encoding matches the encoding implied
1626  * by LC_CTYPE.
1627  */
1628
1629 /*
1630  * collation-aware, wide-character-aware lower function
1631  *
1632  * We pass the number of bytes so we can pass varlena and char*
1633  * to this function.  The result is a palloc'd, null-terminated string.
1634  */
1635 char *
1636 str_tolower(const char *buff, size_t nbytes, Oid collid)
1637 {
1638         char       *result;
1639         pg_locale_t mylocale;
1640
1641         if (!buff)
1642                 return NULL;
1643
1644         if (!OidIsValid(collid))
1645         {
1646                 /*
1647                  * This typically means that the parser could not resolve a conflict
1648                  * of implicit collations, so report it that way.
1649                  */
1650                 ereport(ERROR,
1651                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1652                                  errmsg("could not determine which collation to use for %s function",
1653                                                 "lower()"),
1654                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
1655         }
1656
1657         mylocale = pg_newlocale_from_collation(collid);
1658
1659         /* C/POSIX collations use this path regardless of database encoding */
1660         if (mylocale->ctype_is_c)
1661         {
1662                 result = asc_tolower(buff, nbytes);
1663         }
1664         else
1665         {
1666 #ifdef USE_ICU
1667                 if (mylocale->provider == COLLPROVIDER_ICU)
1668                 {
1669                         int32_t         len_uchar;
1670                         int32_t         len_conv;
1671                         UChar      *buff_uchar;
1672                         UChar      *buff_conv;
1673
1674                         len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1675                         len_conv = icu_convert_case(u_strToLower, mylocale,
1676                                                                                 &buff_conv, buff_uchar, len_uchar);
1677                         icu_from_uchar(&result, buff_conv, len_conv);
1678                         pfree(buff_uchar);
1679                         pfree(buff_conv);
1680                 }
1681                 else
1682 #endif
1683                 if (mylocale->provider == COLLPROVIDER_BUILTIN)
1684                 {
1685                         const char *src = buff;
1686                         size_t          srclen = nbytes;
1687                         size_t          dstsize;
1688                         char       *dst;
1689                         size_t          needed;
1690
1691                         Assert(GetDatabaseEncoding() == PG_UTF8);
1692
1693                         /* first try buffer of equal size plus terminating NUL */
1694                         dstsize = srclen + 1;
1695                         dst = palloc(dstsize);
1696
1697                         needed = unicode_strlower(dst, dstsize, src, srclen);
1698                         if (needed + 1 > dstsize)
1699                         {
1700                                 /* grow buffer if needed and retry */
1701                                 dstsize = needed + 1;
1702                                 dst = repalloc(dst, dstsize);
1703                                 needed = unicode_strlower(dst, dstsize, src, srclen);
1704                                 Assert(needed + 1 == dstsize);
1705                         }
1706
1707                         Assert(dst[needed] == '\0');
1708                         result = dst;
1709                 }
1710                 else
1711                 {
1712                         Assert(mylocale->provider == COLLPROVIDER_LIBC);
1713
1714                         if (pg_database_encoding_max_length() > 1)
1715                         {
1716                                 wchar_t    *workspace;
1717                                 size_t          curr_char;
1718                                 size_t          result_size;
1719
1720                                 /* Overflow paranoia */
1721                                 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1722                                         ereport(ERROR,
1723                                                         (errcode(ERRCODE_OUT_OF_MEMORY),
1724                                                          errmsg("out of memory")));
1725
1726                                 /* Output workspace cannot have more codes than input bytes */
1727                                 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1728
1729                                 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1730
1731                                 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1732                                         workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1733
1734                                 /*
1735                                  * Make result large enough; case change might change number
1736                                  * of bytes
1737                                  */
1738                                 result_size = curr_char * pg_database_encoding_max_length() + 1;
1739                                 result = palloc(result_size);
1740
1741                                 wchar2char(result, workspace, result_size, mylocale);
1742                                 pfree(workspace);
1743                         }
1744                         else
1745                         {
1746                                 char       *p;
1747
1748                                 result = pnstrdup(buff, nbytes);
1749
1750                                 /*
1751                                  * Note: we assume that tolower_l() will not be so broken as
1752                                  * to need an isupper_l() guard test.  When using the default
1753                                  * collation, we apply the traditional Postgres behavior that
1754                                  * forces ASCII-style treatment of I/i, but in non-default
1755                                  * collations you get exactly what the collation says.
1756                                  */
1757                                 for (p = result; *p; p++)
1758                                 {
1759                                         if (mylocale->is_default)
1760                                                 *p = pg_tolower((unsigned char) *p);
1761                                         else
1762                                                 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1763                                 }
1764                         }
1765                 }
1766         }
1767
1768         return result;
1769 }
1770
1771 /*
1772  * collation-aware, wide-character-aware upper function
1773  *
1774  * We pass the number of bytes so we can pass varlena and char*
1775  * to this function.  The result is a palloc'd, null-terminated string.
1776  */
1777 char *
1778 str_toupper(const char *buff, size_t nbytes, Oid collid)
1779 {
1780         char       *result;
1781         pg_locale_t mylocale;
1782
1783         if (!buff)
1784                 return NULL;
1785
1786         if (!OidIsValid(collid))
1787         {
1788                 /*
1789                  * This typically means that the parser could not resolve a conflict
1790                  * of implicit collations, so report it that way.
1791                  */
1792                 ereport(ERROR,
1793                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1794                                  errmsg("could not determine which collation to use for %s function",
1795                                                 "upper()"),
1796                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
1797         }
1798
1799         mylocale = pg_newlocale_from_collation(collid);
1800
1801         /* C/POSIX collations use this path regardless of database encoding */
1802         if (mylocale->ctype_is_c)
1803         {
1804                 result = asc_toupper(buff, nbytes);
1805         }
1806         else
1807         {
1808 #ifdef USE_ICU
1809                 if (mylocale->provider == COLLPROVIDER_ICU)
1810                 {
1811                         int32_t         len_uchar,
1812                                                 len_conv;
1813                         UChar      *buff_uchar;
1814                         UChar      *buff_conv;
1815
1816                         len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1817                         len_conv = icu_convert_case(u_strToUpper, mylocale,
1818                                                                                 &buff_conv, buff_uchar, len_uchar);
1819                         icu_from_uchar(&result, buff_conv, len_conv);
1820                         pfree(buff_uchar);
1821                         pfree(buff_conv);
1822                 }
1823                 else
1824 #endif
1825                 if (mylocale->provider == COLLPROVIDER_BUILTIN)
1826                 {
1827                         const char *src = buff;
1828                         size_t          srclen = nbytes;
1829                         size_t          dstsize;
1830                         char       *dst;
1831                         size_t          needed;
1832
1833                         Assert(GetDatabaseEncoding() == PG_UTF8);
1834
1835                         /* first try buffer of equal size plus terminating NUL */
1836                         dstsize = srclen + 1;
1837                         dst = palloc(dstsize);
1838
1839                         needed = unicode_strupper(dst, dstsize, src, srclen);
1840                         if (needed + 1 > dstsize)
1841                         {
1842                                 /* grow buffer if needed and retry */
1843                                 dstsize = needed + 1;
1844                                 dst = repalloc(dst, dstsize);
1845                                 needed = unicode_strupper(dst, dstsize, src, srclen);
1846                                 Assert(needed + 1 == dstsize);
1847                         }
1848
1849                         Assert(dst[needed] == '\0');
1850                         result = dst;
1851                 }
1852                 else
1853                 {
1854                         Assert(mylocale->provider == COLLPROVIDER_LIBC);
1855
1856                         if (pg_database_encoding_max_length() > 1)
1857                         {
1858                                 wchar_t    *workspace;
1859                                 size_t          curr_char;
1860                                 size_t          result_size;
1861
1862                                 /* Overflow paranoia */
1863                                 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1864                                         ereport(ERROR,
1865                                                         (errcode(ERRCODE_OUT_OF_MEMORY),
1866                                                          errmsg("out of memory")));
1867
1868                                 /* Output workspace cannot have more codes than input bytes */
1869                                 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1870
1871                                 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1872
1873                                 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1874                                         workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1875
1876                                 /*
1877                                  * Make result large enough; case change might change number
1878                                  * of bytes
1879                                  */
1880                                 result_size = curr_char * pg_database_encoding_max_length() + 1;
1881                                 result = palloc(result_size);
1882
1883                                 wchar2char(result, workspace, result_size, mylocale);
1884                                 pfree(workspace);
1885                         }
1886                         else
1887                         {
1888                                 char       *p;
1889
1890                                 result = pnstrdup(buff, nbytes);
1891
1892                                 /*
1893                                  * Note: we assume that toupper_l() will not be so broken as
1894                                  * to need an islower_l() guard test.  When using the default
1895                                  * collation, we apply the traditional Postgres behavior that
1896                                  * forces ASCII-style treatment of I/i, but in non-default
1897                                  * collations you get exactly what the collation says.
1898                                  */
1899                                 for (p = result; *p; p++)
1900                                 {
1901                                         if (mylocale->is_default)
1902                                                 *p = pg_toupper((unsigned char) *p);
1903                                         else
1904                                                 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1905                                 }
1906                         }
1907                 }
1908         }
1909
1910         return result;
1911 }
1912
1913 struct WordBoundaryState
1914 {
1915         const char *str;
1916         size_t          len;
1917         size_t          offset;
1918         bool            init;
1919         bool            prev_alnum;
1920 };
1921
1922 /*
1923  * Simple word boundary iterator that draws boundaries each time the result of
1924  * pg_u_isalnum() changes.
1925  */
1926 static size_t
1927 initcap_wbnext(void *state)
1928 {
1929         struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
1930
1931         while (wbstate->offset < wbstate->len &&
1932                    wbstate->str[wbstate->offset] != '\0')
1933         {
1934                 pg_wchar        u = utf8_to_unicode((unsigned char *) wbstate->str +
1935                                                                                 wbstate->offset);
1936                 bool            curr_alnum = pg_u_isalnum(u, true);
1937
1938                 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
1939                 {
1940                         size_t          prev_offset = wbstate->offset;
1941
1942                         wbstate->init = true;
1943                         wbstate->offset += unicode_utf8len(u);
1944                         wbstate->prev_alnum = curr_alnum;
1945                         return prev_offset;
1946                 }
1947
1948                 wbstate->offset += unicode_utf8len(u);
1949         }
1950
1951         return wbstate->len;
1952 }
1953
1954 /*
1955  * collation-aware, wide-character-aware initcap function
1956  *
1957  * We pass the number of bytes so we can pass varlena and char*
1958  * to this function.  The result is a palloc'd, null-terminated string.
1959  */
1960 char *
1961 str_initcap(const char *buff, size_t nbytes, Oid collid)
1962 {
1963         char       *result;
1964         int                     wasalnum = false;
1965         pg_locale_t mylocale;
1966
1967         if (!buff)
1968                 return NULL;
1969
1970         if (!OidIsValid(collid))
1971         {
1972                 /*
1973                  * This typically means that the parser could not resolve a conflict
1974                  * of implicit collations, so report it that way.
1975                  */
1976                 ereport(ERROR,
1977                                 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1978                                  errmsg("could not determine which collation to use for %s function",
1979                                                 "initcap()"),
1980                                  errhint("Use the COLLATE clause to set the collation explicitly.")));
1981         }
1982
1983         mylocale = pg_newlocale_from_collation(collid);
1984
1985         /* C/POSIX collations use this path regardless of database encoding */
1986         if (mylocale->ctype_is_c)
1987         {
1988                 result = asc_initcap(buff, nbytes);
1989         }
1990         else
1991         {
1992 #ifdef USE_ICU
1993                 if (mylocale->provider == COLLPROVIDER_ICU)
1994                 {
1995                         int32_t         len_uchar,
1996                                                 len_conv;
1997                         UChar      *buff_uchar;
1998                         UChar      *buff_conv;
1999
2000                         len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
2001                         len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
2002                                                                                 &buff_conv, buff_uchar, len_uchar);
2003                         icu_from_uchar(&result, buff_conv, len_conv);
2004                         pfree(buff_uchar);
2005                         pfree(buff_conv);
2006                 }
2007                 else
2008 #endif
2009                 if (mylocale->provider == COLLPROVIDER_BUILTIN)
2010                 {
2011                         const char *src = buff;
2012                         size_t          srclen = nbytes;
2013                         size_t          dstsize;
2014                         char       *dst;
2015                         size_t          needed;
2016                         struct WordBoundaryState wbstate = {
2017                                 .str = src,
2018                                 .len = srclen,
2019                                 .offset = 0,
2020                                 .init = false,
2021                                 .prev_alnum = false,
2022                         };
2023
2024                         Assert(GetDatabaseEncoding() == PG_UTF8);
2025
2026                         /* first try buffer of equal size plus terminating NUL */
2027                         dstsize = srclen + 1;
2028                         dst = palloc(dstsize);
2029
2030                         needed = unicode_strtitle(dst, dstsize, src, srclen,
2031                                                                           initcap_wbnext, &wbstate);
2032                         if (needed + 1 > dstsize)
2033                         {
2034                                 /* reset iterator */
2035                                 wbstate.offset = 0;
2036                                 wbstate.init = false;
2037
2038                                 /* grow buffer if needed and retry */
2039                                 dstsize = needed + 1;
2040                                 dst = repalloc(dst, dstsize);
2041                                 needed = unicode_strtitle(dst, dstsize, src, srclen,
2042                                                                                   initcap_wbnext, &wbstate);
2043                                 Assert(needed + 1 == dstsize);
2044                         }
2045
2046                         result = dst;
2047                 }
2048                 else
2049                 {
2050                         Assert(mylocale->provider == COLLPROVIDER_LIBC);
2051
2052                         if (pg_database_encoding_max_length() > 1)
2053                         {
2054                                 wchar_t    *workspace;
2055                                 size_t          curr_char;
2056                                 size_t          result_size;
2057
2058                                 /* Overflow paranoia */
2059                                 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
2060                                         ereport(ERROR,
2061                                                         (errcode(ERRCODE_OUT_OF_MEMORY),
2062                                                          errmsg("out of memory")));
2063
2064                                 /* Output workspace cannot have more codes than input bytes */
2065                                 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
2066
2067                                 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
2068
2069                                 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
2070                                 {
2071                                         if (wasalnum)
2072                                                 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
2073                                         else
2074                                                 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
2075                                         wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
2076                                 }
2077
2078                                 /*
2079                                  * Make result large enough; case change might change number
2080                                  * of bytes
2081                                  */
2082                                 result_size = curr_char * pg_database_encoding_max_length() + 1;
2083                                 result = palloc(result_size);
2084
2085                                 wchar2char(result, workspace, result_size, mylocale);
2086                                 pfree(workspace);
2087                         }
2088                         else
2089                         {
2090                                 char       *p;
2091
2092                                 result = pnstrdup(buff, nbytes);
2093
2094                                 /*
2095                                  * Note: we assume that toupper_l()/tolower_l() will not be so
2096                                  * broken as to need guard tests.  When using the default
2097                                  * collation, we apply the traditional Postgres behavior that
2098                                  * forces ASCII-style treatment of I/i, but in non-default
2099                                  * collations you get exactly what the collation says.
2100                                  */
2101                                 for (p = result; *p; p++)
2102                                 {
2103                                         if (mylocale->is_default)
2104                                         {
2105                                                 if (wasalnum)
2106                                                         *p = pg_tolower((unsigned char) *p);
2107                                                 else
2108                                                         *p = pg_toupper((unsigned char) *p);
2109                                         }
2110                                         else
2111                                         {
2112                                                 if (wasalnum)
2113                                                         *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2114                                                 else
2115                                                         *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2116                                         }
2117                                         wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2118                                 }
2119                         }
2120                 }
2121         }
2122
2123         return result;
2124 }
2125
2126 /*
2127  * ASCII-only lower function
2128  *
2129  * We pass the number of bytes so we can pass varlena and char*
2130  * to this function.  The result is a palloc'd, null-terminated string.
2131  */
2132 char *
2133 asc_tolower(const char *buff, size_t nbytes)
2134 {
2135         char       *result;
2136         char       *p;
2137
2138         if (!buff)
2139                 return NULL;
2140
2141         result = pnstrdup(buff, nbytes);
2142
2143         for (p = result; *p; p++)
2144                 *p = pg_ascii_tolower((unsigned char) *p);
2145
2146         return result;
2147 }
2148
2149 /*
2150  * ASCII-only upper function
2151  *
2152  * We pass the number of bytes so we can pass varlena and char*
2153  * to this function.  The result is a palloc'd, null-terminated string.
2154  */
2155 char *
2156 asc_toupper(const char *buff, size_t nbytes)
2157 {
2158         char       *result;
2159         char       *p;
2160
2161         if (!buff)
2162                 return NULL;
2163
2164         result = pnstrdup(buff, nbytes);
2165
2166         for (p = result; *p; p++)
2167                 *p = pg_ascii_toupper((unsigned char) *p);
2168
2169         return result;
2170 }
2171
2172 /*
2173  * ASCII-only initcap function
2174  *
2175  * We pass the number of bytes so we can pass varlena and char*
2176  * to this function.  The result is a palloc'd, null-terminated string.
2177  */
2178 char *
2179 asc_initcap(const char *buff, size_t nbytes)
2180 {
2181         char       *result;
2182         char       *p;
2183         int                     wasalnum = false;
2184
2185         if (!buff)
2186                 return NULL;
2187
2188         result = pnstrdup(buff, nbytes);
2189
2190         for (p = result; *p; p++)
2191         {
2192                 char            c;
2193
2194                 if (wasalnum)
2195                         *p = c = pg_ascii_tolower((unsigned char) *p);
2196                 else
2197                         *p = c = pg_ascii_toupper((unsigned char) *p);
2198                 /* we don't trust isalnum() here */
2199                 wasalnum = ((c >= 'A' && c <= 'Z') ||
2200                                         (c >= 'a' && c <= 'z') ||
2201                                         (c >= '0' && c <= '9'));
2202         }
2203
2204         return result;
2205 }
2206
2207 /* convenience routines for when the input is null-terminated */
2208
2209 static char *
2210 str_tolower_z(const char *buff, Oid collid)
2211 {
2212         return str_tolower(buff, strlen(buff), collid);
2213 }
2214
2215 static char *
2216 str_toupper_z(const char *buff, Oid collid)
2217 {
2218         return str_toupper(buff, strlen(buff), collid);
2219 }
2220
2221 static char *
2222 str_initcap_z(const char *buff, Oid collid)
2223 {
2224         return str_initcap(buff, strlen(buff), collid);
2225 }
2226
2227 static char *
2228 asc_tolower_z(const char *buff)
2229 {
2230         return asc_tolower(buff, strlen(buff));
2231 }
2232
2233 static char *
2234 asc_toupper_z(const char *buff)
2235 {
2236         return asc_toupper(buff, strlen(buff));
2237 }
2238
2239 /* asc_initcap_z is not currently needed */
2240
2241
2242 /* ----------
2243  * Skip TM / th in FROM_CHAR
2244  *
2245  * If S_THth is on, skip two chars, assuming there are two available
2246  * ----------
2247  */
2248 #define SKIP_THth(ptr, _suf) \
2249         do { \
2250                 if (S_THth(_suf)) \
2251                 { \
2252                         if (*(ptr)) (ptr) += pg_mblen(ptr); \
2253                         if (*(ptr)) (ptr) += pg_mblen(ptr); \
2254                 } \
2255         } while (0)
2256
2257
2258 #ifdef DEBUG_TO_FROM_CHAR
2259 /* -----------
2260  * DEBUG: Call for debug and for index checking; (Show ASCII char
2261  * and defined keyword for each used position
2262  * ----------
2263  */
2264 static void
2265 dump_index(const KeyWord *k, const int *index)
2266 {
2267         int                     i,
2268                                 count = 0,
2269                                 free_i = 0;
2270
2271         elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2272
2273         for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2274         {
2275                 if (index[i] != -1)
2276                 {
2277                         elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2278                         count++;
2279                 }
2280                 else
2281                 {
2282                         free_i++;
2283                         elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2284                 }
2285         }
2286         elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2287                  count, free_i);
2288 }
2289 #endif                                                  /* DEBUG */
2290
2291 /* ----------
2292  * Return true if next format picture is not digit value
2293  * ----------
2294  */
2295 static bool
2296 is_next_separator(FormatNode *n)
2297 {
2298         if (n->type == NODE_TYPE_END)
2299                 return false;
2300
2301         if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2302                 return true;
2303
2304         /*
2305          * Next node
2306          */
2307         n++;
2308
2309         /* end of format string is treated like a non-digit separator */
2310         if (n->type == NODE_TYPE_END)
2311                 return true;
2312
2313         if (n->type == NODE_TYPE_ACTION)
2314         {
2315                 if (n->key->is_digit)
2316                         return false;
2317
2318                 return true;
2319         }
2320         else if (n->character[1] == '\0' &&
2321                          isdigit((unsigned char) n->character[0]))
2322                 return false;
2323
2324         return true;                            /* some non-digit input (separator) */
2325 }
2326
2327
2328 static int
2329 adjust_partial_year_to_2020(int year)
2330 {
2331         /*
2332          * Adjust all dates toward 2020; this is effectively what happens when we
2333          * assume '70' is 1970 and '69' is 2069.
2334          */
2335         /* Force 0-69 into the 2000's */
2336         if (year < 70)
2337                 return year + 2000;
2338         /* Force 70-99 into the 1900's */
2339         else if (year < 100)
2340                 return year + 1900;
2341         /* Force 100-519 into the 2000's */
2342         else if (year < 520)
2343                 return year + 2000;
2344         /* Force 520-999 into the 1000's */
2345         else if (year < 1000)
2346                 return year + 1000;
2347         else
2348                 return year;
2349 }
2350
2351
2352 static int
2353 strspace_len(const char *str)
2354 {
2355         int                     len = 0;
2356
2357         while (*str && isspace((unsigned char) *str))
2358         {
2359                 str++;
2360                 len++;
2361         }
2362         return len;
2363 }
2364
2365 /*
2366  * Set the date mode of a from-char conversion.
2367  *
2368  * Puke if the date mode has already been set, and the caller attempts to set
2369  * it to a conflicting mode.
2370  *
2371  * Returns true on success, false on failure (if escontext points to an
2372  * ErrorSaveContext; otherwise errors are thrown).
2373  */
2374 static bool
2375 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
2376                                    Node *escontext)
2377 {
2378         if (mode != FROM_CHAR_DATE_NONE)
2379         {
2380                 if (tmfc->mode == FROM_CHAR_DATE_NONE)
2381                         tmfc->mode = mode;
2382                 else if (tmfc->mode != mode)
2383                         ereturn(escontext, false,
2384                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2385                                          errmsg("invalid combination of date conventions"),
2386                                          errhint("Do not mix Gregorian and ISO week date "
2387                                                          "conventions in a formatting template.")));
2388         }
2389         return true;
2390 }
2391
2392 /*
2393  * Set the integer pointed to by 'dest' to the given value.
2394  *
2395  * Puke if the destination integer has previously been set to some other
2396  * non-zero value.
2397  *
2398  * Returns true on success, false on failure (if escontext points to an
2399  * ErrorSaveContext; otherwise errors are thrown).
2400  */
2401 static bool
2402 from_char_set_int(int *dest, const int value, const FormatNode *node,
2403                                   Node *escontext)
2404 {
2405         if (*dest != 0 && *dest != value)
2406                 ereturn(escontext, false,
2407                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2408                                  errmsg("conflicting values for \"%s\" field in formatting string",
2409                                                 node->key->name),
2410                                  errdetail("This value contradicts a previous setting "
2411                                                    "for the same field type.")));
2412         *dest = value;
2413         return true;
2414 }
2415
2416 /*
2417  * Read a single integer from the source string, into the int pointed to by
2418  * 'dest'. If 'dest' is NULL, the result is discarded.
2419  *
2420  * In fixed-width mode (the node does not have the FM suffix), consume at most
2421  * 'len' characters.  However, any leading whitespace isn't counted in 'len'.
2422  *
2423  * We use strtol() to recover the integer value from the source string, in
2424  * accordance with the given FormatNode.
2425  *
2426  * If the conversion completes successfully, src will have been advanced to
2427  * point at the character immediately following the last character used in the
2428  * conversion.
2429  *
2430  * Returns the number of characters consumed, or -1 on error (if escontext
2431  * points to an ErrorSaveContext; otherwise errors are thrown).
2432  *
2433  * Note that from_char_parse_int() provides a more convenient wrapper where
2434  * the length of the field is the same as the length of the format keyword (as
2435  * with DD and MI).
2436  */
2437 static int
2438 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2439                                                 Node *escontext)
2440 {
2441         long            result;
2442         char            copy[DCH_MAX_ITEM_SIZ + 1];
2443         const char *init = *src;
2444         int                     used;
2445
2446         /*
2447          * Skip any whitespace before parsing the integer.
2448          */
2449         *src += strspace_len(*src);
2450
2451         Assert(len <= DCH_MAX_ITEM_SIZ);
2452         used = (int) strlcpy(copy, *src, len + 1);
2453
2454         if (S_FM(node->suffix) || is_next_separator(node))
2455         {
2456                 /*
2457                  * This node is in Fill Mode, or the next node is known to be a
2458                  * non-digit value, so we just slurp as many characters as we can get.
2459                  */
2460                 char       *endptr;
2461
2462                 errno = 0;
2463                 result = strtol(init, &endptr, 10);
2464                 *src = endptr;
2465         }
2466         else
2467         {
2468                 /*
2469                  * We need to pull exactly the number of characters given in 'len' out
2470                  * of the string, and convert those.
2471                  */
2472                 char       *last;
2473
2474                 if (used < len)
2475                         ereturn(escontext, -1,
2476                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2477                                          errmsg("source string too short for \"%s\" formatting field",
2478                                                         node->key->name),
2479                                          errdetail("Field requires %d characters, but only %d remain.",
2480                                                            len, used),
2481                                          errhint("If your source string is not fixed-width, "
2482                                                          "try using the \"FM\" modifier.")));
2483
2484                 errno = 0;
2485                 result = strtol(copy, &last, 10);
2486                 used = last - copy;
2487
2488                 if (used > 0 && used < len)
2489                         ereturn(escontext, -1,
2490                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2491                                          errmsg("invalid value \"%s\" for \"%s\"",
2492                                                         copy, node->key->name),
2493                                          errdetail("Field requires %d characters, but only %d could be parsed.",
2494                                                            len, used),
2495                                          errhint("If your source string is not fixed-width, "
2496                                                          "try using the \"FM\" modifier.")));
2497
2498                 *src += used;
2499         }
2500
2501         if (*src == init)
2502                 ereturn(escontext, -1,
2503                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2504                                  errmsg("invalid value \"%s\" for \"%s\"",
2505                                                 copy, node->key->name),
2506                                  errdetail("Value must be an integer.")));
2507
2508         if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2509                 ereturn(escontext, -1,
2510                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2511                                  errmsg("value for \"%s\" in source string is out of range",
2512                                                 node->key->name),
2513                                  errdetail("Value must be in the range %d to %d.",
2514                                                    INT_MIN, INT_MAX)));
2515
2516         if (dest != NULL)
2517         {
2518                 if (!from_char_set_int(dest, (int) result, node, escontext))
2519                         return -1;
2520         }
2521
2522         return *src - init;
2523 }
2524
2525 /*
2526  * Call from_char_parse_int_len(), using the length of the format keyword as
2527  * the expected length of the field.
2528  *
2529  * Don't call this function if the field differs in length from the format
2530  * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2531  * In such cases, call from_char_parse_int_len() instead to specify the
2532  * required length explicitly.
2533  */
2534 static int
2535 from_char_parse_int(int *dest, const char **src, FormatNode *node,
2536                                         Node *escontext)
2537 {
2538         return from_char_parse_int_len(dest, src, node->key->len, node, escontext);
2539 }
2540
2541 /*
2542  * Sequentially search null-terminated "array" for a case-insensitive match
2543  * to the initial character(s) of "name".
2544  *
2545  * Returns array index of match, or -1 for no match.
2546  *
2547  * *len is set to the length of the match, or 0 for no match.
2548  *
2549  * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2550  * suitable for comparisons to ASCII strings.
2551  */
2552 static int
2553 seq_search_ascii(const char *name, const char *const *array, int *len)
2554 {
2555         unsigned char firstc;
2556         const char *const *a;
2557
2558         *len = 0;
2559
2560         /* empty string can't match anything */
2561         if (!*name)
2562                 return -1;
2563
2564         /* we handle first char specially to gain some speed */
2565         firstc = pg_ascii_tolower((unsigned char) *name);
2566
2567         for (a = array; *a != NULL; a++)
2568         {
2569                 const char *p;
2570                 const char *n;
2571
2572                 /* compare first chars */
2573                 if (pg_ascii_tolower((unsigned char) **a) != firstc)
2574                         continue;
2575
2576                 /* compare rest of string */
2577                 for (p = *a + 1, n = name + 1;; p++, n++)
2578                 {
2579                         /* return success if we matched whole array entry */
2580                         if (*p == '\0')
2581                         {
2582                                 *len = n - name;
2583                                 return a - array;
2584                         }
2585                         /* else, must have another character in "name" ... */
2586                         if (*n == '\0')
2587                                 break;
2588                         /* ... and it must match */
2589                         if (pg_ascii_tolower((unsigned char) *p) !=
2590                                 pg_ascii_tolower((unsigned char) *n))
2591                                 break;
2592                 }
2593         }
2594
2595         return -1;
2596 }
2597
2598 /*
2599  * Sequentially search an array of possibly non-English words for
2600  * a case-insensitive match to the initial character(s) of "name".
2601  *
2602  * This has the same API as seq_search_ascii(), but we use a more general
2603  * case-folding transformation to achieve case-insensitivity.  Case folding
2604  * is done per the rules of the collation identified by "collid".
2605  *
2606  * The array is treated as const, but we don't declare it that way because
2607  * the arrays exported by pg_locale.c aren't const.
2608  */
2609 static int
2610 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2611 {
2612         char      **a;
2613         char       *upper_name;
2614         char       *lower_name;
2615
2616         *len = 0;
2617
2618         /* empty string can't match anything */
2619         if (!*name)
2620                 return -1;
2621
2622         /*
2623          * The case-folding processing done below is fairly expensive, so before
2624          * doing that, make a quick pass to see if there is an exact match.
2625          */
2626         for (a = array; *a != NULL; a++)
2627         {
2628                 int                     element_len = strlen(*a);
2629
2630                 if (strncmp(name, *a, element_len) == 0)
2631                 {
2632                         *len = element_len;
2633                         return a - array;
2634                 }
2635         }
2636
2637         /*
2638          * Fold to upper case, then to lower case, so that we can match reliably
2639          * even in languages in which case conversions are not injective.
2640          */
2641         upper_name = str_toupper(name, strlen(name), collid);
2642         lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2643         pfree(upper_name);
2644
2645         for (a = array; *a != NULL; a++)
2646         {
2647                 char       *upper_element;
2648                 char       *lower_element;
2649                 int                     element_len;
2650
2651                 /* Likewise upper/lower-case array element */
2652                 upper_element = str_toupper(*a, strlen(*a), collid);
2653                 lower_element = str_tolower(upper_element, strlen(upper_element),
2654                                                                         collid);
2655                 pfree(upper_element);
2656                 element_len = strlen(lower_element);
2657
2658                 /* Match? */
2659                 if (strncmp(lower_name, lower_element, element_len) == 0)
2660                 {
2661                         *len = element_len;
2662                         pfree(lower_element);
2663                         pfree(lower_name);
2664                         return a - array;
2665                 }
2666                 pfree(lower_element);
2667         }
2668
2669         pfree(lower_name);
2670         return -1;
2671 }
2672
2673 /*
2674  * Perform a sequential search in 'array' (or 'localized_array', if that's
2675  * not NULL) for an entry matching the first character(s) of the 'src'
2676  * string case-insensitively.
2677  *
2678  * The 'array' is presumed to be English words (all-ASCII), but
2679  * if 'localized_array' is supplied, that might be non-English
2680  * so we need a more expensive case-folding transformation
2681  * (which will follow the rules of the collation 'collid').
2682  *
2683  * If a match is found, copy the array index of the match into the integer
2684  * pointed to by 'dest' and advance 'src' to the end of the part of the string
2685  * which matched.
2686  *
2687  * Returns true on match, false on failure (if escontext points to an
2688  * ErrorSaveContext; otherwise errors are thrown).
2689  *
2690  * 'node' is used only for error reports: node->key->name identifies the
2691  * field type we were searching for.
2692  */
2693 static bool
2694 from_char_seq_search(int *dest, const char **src, const char *const *array,
2695                                          char **localized_array, Oid collid,
2696                                          FormatNode *node, Node *escontext)
2697 {
2698         int                     len;
2699
2700         if (localized_array == NULL)
2701                 *dest = seq_search_ascii(*src, array, &len);
2702         else
2703                 *dest = seq_search_localized(*src, localized_array, &len, collid);
2704
2705         if (len <= 0)
2706         {
2707                 /*
2708                  * In the error report, truncate the string at the next whitespace (if
2709                  * any) to avoid including irrelevant data.
2710                  */
2711                 char       *copy = pstrdup(*src);
2712                 char       *c;
2713
2714                 for (c = copy; *c; c++)
2715                 {
2716                         if (scanner_isspace(*c))
2717                         {
2718                                 *c = '\0';
2719                                 break;
2720                         }
2721                 }
2722
2723                 ereturn(escontext, false,
2724                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2725                                  errmsg("invalid value \"%s\" for \"%s\"",
2726                                                 copy, node->key->name),
2727                                  errdetail("The given value did not match any of "
2728                                                    "the allowed values for this field.")));
2729         }
2730         *src += len;
2731         return true;
2732 }
2733
2734 /* ----------
2735  * Process a TmToChar struct as denoted by a list of FormatNodes.
2736  * The formatted data is written to the string pointed to by 'out'.
2737  * ----------
2738  */
2739 static void
2740 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2741 {
2742         FormatNode *n;
2743         char       *s;
2744         struct fmt_tm *tm = &in->tm;
2745         int                     i;
2746
2747         /* cache localized days and months */
2748         cache_locale_time();
2749
2750         s = out;
2751         for (n = node; n->type != NODE_TYPE_END; n++)
2752         {
2753                 if (n->type != NODE_TYPE_ACTION)
2754                 {
2755                         strcpy(s, n->character);
2756                         s += strlen(s);
2757                         continue;
2758                 }
2759
2760                 switch (n->key->id)
2761                 {
2762                         case DCH_A_M:
2763                         case DCH_P_M:
2764                                 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2765                                            ? P_M_STR : A_M_STR);
2766                                 s += strlen(s);
2767                                 break;
2768                         case DCH_AM:
2769                         case DCH_PM:
2770                                 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2771                                            ? PM_STR : AM_STR);
2772                                 s += strlen(s);
2773                                 break;
2774                         case DCH_a_m:
2775                         case DCH_p_m:
2776                                 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2777                                            ? p_m_STR : a_m_STR);
2778                                 s += strlen(s);
2779                                 break;
2780                         case DCH_am:
2781                         case DCH_pm:
2782                                 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2783                                            ? pm_STR : am_STR);
2784                                 s += strlen(s);
2785                                 break;
2786                         case DCH_HH:
2787                         case DCH_HH12:
2788
2789                                 /*
2790                                  * display time as shown on a 12-hour clock, even for
2791                                  * intervals
2792                                  */
2793                                 sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2794                                                 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ?
2795                                                 (long long) (HOURS_PER_DAY / 2) :
2796                                                 (long long) (tm->tm_hour % (HOURS_PER_DAY / 2)));
2797                                 if (S_THth(n->suffix))
2798                                         str_numth(s, s, S_TH_TYPE(n->suffix));
2799                                 s += strlen(s);
2800                                 break;
2801                         case DCH_HH24:
2802                                 sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2803                                                 (long long) tm->tm_hour);
2804                                 if (S_THth(n->suffix))
2805                                         str_numth(s, s, S_TH_TYPE(n->suffix));
2806                                 s += strlen(s);
2807                                 break;
2808                         case DCH_MI:
2809                                 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2810                                                 tm->tm_min);
2811                                 if (S_THth(n->suffix))
2812                                         str_numth(s, s, S_TH_TYPE(n->suffix));
2813                                 s += strlen(s);
2814                                 break;
2815                         case DCH_SS:
2816                                 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2817                                                 tm->tm_sec);
2818                                 if (S_THth(n->suffix))
2819                                         str_numth(s, s, S_TH_TYPE(n->suffix));
2820                                 s += strlen(s);
2821                                 break;
2822
2823 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2824                                 sprintf(s, frac_fmt, (int) (frac_val)); \
2825                                 if (S_THth(n->suffix)) \
2826                                         str_numth(s, s, S_TH_TYPE(n->suffix)); \
2827                                 s += strlen(s)
2828
2829                         case DCH_FF1:           /* tenth of second */
2830                                 DCH_to_char_fsec("%01d", in->fsec / 100000);
2831                                 break;
2832                         case DCH_FF2:           /* hundredth of second */
2833                                 DCH_to_char_fsec("%02d", in->fsec / 10000);
2834                                 break;
2835                         case DCH_FF3:
2836                         case DCH_MS:            /* millisecond */
2837                                 DCH_to_char_fsec("%03d", in->fsec / 1000);
2838                                 break;
2839                         case DCH_FF4:           /* tenth of a millisecond */
2840                                 DCH_to_char_fsec("%04d", in->fsec / 100);
2841                                 break;
2842                         case DCH_FF5:           /* hundredth of a millisecond */
2843                                 DCH_to_char_fsec("%05d", in->fsec / 10);
2844                                 break;
2845                         case DCH_FF6:
2846                         case DCH_US:            /* microsecond */
2847                                 DCH_to_char_fsec("%06d", in->fsec);
2848                                 break;
2849 #undef DCH_to_char_fsec
2850                         case DCH_SSSS:
2851                                 sprintf(s, "%lld",
2852                                                 (long long) (tm->tm_hour * SECS_PER_HOUR +
2853                                                                          tm->tm_min * SECS_PER_MINUTE +
2854                                                                          tm->tm_sec));
2855                                 if (S_THth(n->suffix))
2856                                         str_numth(s, s, S_TH_TYPE(n->suffix));
2857                                 s += strlen(s);
2858                                 break;
2859                         case DCH_tz:
2860                                 INVALID_FOR_INTERVAL;
2861                                 if (tmtcTzn(in))
2862                                 {
2863                                         /* We assume here that timezone names aren't localized */
2864                                         char       *p = asc_tolower_z(tmtcTzn(in));
2865
2866                                         strcpy(s, p);
2867                                         pfree(p);
2868                                         s += strlen(s);
2869                                 }
2870                                 break;
2871                         case DCH_TZ:
2872                                 INVALID_FOR_INTERVAL;
2873                                 if (tmtcTzn(in))
2874                                 {
2875                                         strcpy(s, tmtcTzn(in));
2876                                         s += strlen(s);
2877                                 }
2878                                 break;
2879                         case DCH_TZH:
2880                                 INVALID_FOR_INTERVAL;
2881                                 sprintf(s, "%c%02d",
2882                                                 (tm->tm_gmtoff >= 0) ? '+' : '-',
2883                                                 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2884                                 s += strlen(s);
2885                                 break;
2886                         case DCH_TZM:
2887                                 INVALID_FOR_INTERVAL;
2888                                 sprintf(s, "%02d",
2889                                                 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2890                                 s += strlen(s);
2891                                 break;
2892                         case DCH_OF:
2893                                 INVALID_FOR_INTERVAL;
2894                                 sprintf(s, "%c%0*d",
2895                                                 (tm->tm_gmtoff >= 0) ? '+' : '-',
2896                                                 S_FM(n->suffix) ? 0 : 2,
2897                                                 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2898                                 s += strlen(s);
2899                                 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2900                                 {
2901                                         sprintf(s, ":%02d",
2902                                                         (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2903                                         s += strlen(s);
2904                                 }
2905                                 break;
2906                         case DCH_A_D:
2907                         case DCH_B_C:
2908                                 INVALID_FOR_INTERVAL;
2909                                 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2910                                 s += strlen(s);
2911                                 break;
2912                         case DCH_AD:
2913                         case DCH_BC:
2914                                 INVALID_FOR_INTERVAL;
2915                                 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2916                                 s += strlen(s);
2917                                 break;
2918                         case DCH_a_d:
2919                         case DCH_b_c:
2920                                 INVALID_FOR_INTERVAL;
2921                                 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2922                                 s += strlen(s);
2923                                 break;
2924                         case DCH_ad:
2925                         case DCH_bc:
2926                                 INVALID_FOR_INTERVAL;
2927                                 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2928                                 s += strlen(s);
2929                                 break;
2930                         case DCH_MONTH:
2931                                 INVALID_FOR_INTERVAL;
2932                                 if (!tm->tm_mon)
2933                                         break;
2934                                 if (S_TM(n->suffix))
2935                                 {
2936                                         char       *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2937
2938                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2939                                                 strcpy(s, str);
2940                                         else
2941                                                 ereport(ERROR,
2942                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2943                                                                  errmsg("localized string format value too long")));
2944                                 }
2945                                 else
2946                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2947                                                         asc_toupper_z(months_full[tm->tm_mon - 1]));
2948                                 s += strlen(s);
2949                                 break;
2950                         case DCH_Month:
2951                                 INVALID_FOR_INTERVAL;
2952                                 if (!tm->tm_mon)
2953                                         break;
2954                                 if (S_TM(n->suffix))
2955                                 {
2956                                         char       *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2957
2958                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2959                                                 strcpy(s, str);
2960                                         else
2961                                                 ereport(ERROR,
2962                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2963                                                                  errmsg("localized string format value too long")));
2964                                 }
2965                                 else
2966                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2967                                                         months_full[tm->tm_mon - 1]);
2968                                 s += strlen(s);
2969                                 break;
2970                         case DCH_month:
2971                                 INVALID_FOR_INTERVAL;
2972                                 if (!tm->tm_mon)
2973                                         break;
2974                                 if (S_TM(n->suffix))
2975                                 {
2976                                         char       *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2977
2978                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2979                                                 strcpy(s, str);
2980                                         else
2981                                                 ereport(ERROR,
2982                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2983                                                                  errmsg("localized string format value too long")));
2984                                 }
2985                                 else
2986                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2987                                                         asc_tolower_z(months_full[tm->tm_mon - 1]));
2988                                 s += strlen(s);
2989                                 break;
2990                         case DCH_MON:
2991                                 INVALID_FOR_INTERVAL;
2992                                 if (!tm->tm_mon)
2993                                         break;
2994                                 if (S_TM(n->suffix))
2995                                 {
2996                                         char       *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2997
2998                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2999                                                 strcpy(s, str);
3000                                         else
3001                                                 ereport(ERROR,
3002                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3003                                                                  errmsg("localized string format value too long")));
3004                                 }
3005                                 else
3006                                         strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
3007                                 s += strlen(s);
3008                                 break;
3009                         case DCH_Mon:
3010                                 INVALID_FOR_INTERVAL;
3011                                 if (!tm->tm_mon)
3012                                         break;
3013                                 if (S_TM(n->suffix))
3014                                 {
3015                                         char       *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
3016
3017                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3018                                                 strcpy(s, str);
3019                                         else
3020                                                 ereport(ERROR,
3021                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3022                                                                  errmsg("localized string format value too long")));
3023                                 }
3024                                 else
3025                                         strcpy(s, months[tm->tm_mon - 1]);
3026                                 s += strlen(s);
3027                                 break;
3028                         case DCH_mon:
3029                                 INVALID_FOR_INTERVAL;
3030                                 if (!tm->tm_mon)
3031                                         break;
3032                                 if (S_TM(n->suffix))
3033                                 {
3034                                         char       *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
3035
3036                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3037                                                 strcpy(s, str);
3038                                         else
3039                                                 ereport(ERROR,
3040                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3041                                                                  errmsg("localized string format value too long")));
3042                                 }
3043                                 else
3044                                         strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
3045                                 s += strlen(s);
3046                                 break;
3047                         case DCH_MM:
3048                                 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
3049                                                 tm->tm_mon);
3050                                 if (S_THth(n->suffix))
3051                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3052                                 s += strlen(s);
3053                                 break;
3054                         case DCH_DAY:
3055                                 INVALID_FOR_INTERVAL;
3056                                 if (S_TM(n->suffix))
3057                                 {
3058                                         char       *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
3059
3060                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3061                                                 strcpy(s, str);
3062                                         else
3063                                                 ereport(ERROR,
3064                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3065                                                                  errmsg("localized string format value too long")));
3066                                 }
3067                                 else
3068                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3069                                                         asc_toupper_z(days[tm->tm_wday]));
3070                                 s += strlen(s);
3071                                 break;
3072                         case DCH_Day:
3073                                 INVALID_FOR_INTERVAL;
3074                                 if (S_TM(n->suffix))
3075                                 {
3076                                         char       *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
3077
3078                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3079                                                 strcpy(s, str);
3080                                         else
3081                                                 ereport(ERROR,
3082                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3083                                                                  errmsg("localized string format value too long")));
3084                                 }
3085                                 else
3086                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3087                                                         days[tm->tm_wday]);
3088                                 s += strlen(s);
3089                                 break;
3090                         case DCH_day:
3091                                 INVALID_FOR_INTERVAL;
3092                                 if (S_TM(n->suffix))
3093                                 {
3094                                         char       *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3095
3096                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3097                                                 strcpy(s, str);
3098                                         else
3099                                                 ereport(ERROR,
3100                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3101                                                                  errmsg("localized string format value too long")));
3102                                 }
3103                                 else
3104                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3105                                                         asc_tolower_z(days[tm->tm_wday]));
3106                                 s += strlen(s);
3107                                 break;
3108                         case DCH_DY:
3109                                 INVALID_FOR_INTERVAL;
3110                                 if (S_TM(n->suffix))
3111                                 {
3112                                         char       *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3113
3114                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3115                                                 strcpy(s, str);
3116                                         else
3117                                                 ereport(ERROR,
3118                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3119                                                                  errmsg("localized string format value too long")));
3120                                 }
3121                                 else
3122                                         strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3123                                 s += strlen(s);
3124                                 break;
3125                         case DCH_Dy:
3126                                 INVALID_FOR_INTERVAL;
3127                                 if (S_TM(n->suffix))
3128                                 {
3129                                         char       *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3130
3131                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3132                                                 strcpy(s, str);
3133                                         else
3134                                                 ereport(ERROR,
3135                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3136                                                                  errmsg("localized string format value too long")));
3137                                 }
3138                                 else
3139                                         strcpy(s, days_short[tm->tm_wday]);
3140                                 s += strlen(s);
3141                                 break;
3142                         case DCH_dy:
3143                                 INVALID_FOR_INTERVAL;
3144                                 if (S_TM(n->suffix))
3145                                 {
3146                                         char       *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3147
3148                                         if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3149                                                 strcpy(s, str);
3150                                         else
3151                                                 ereport(ERROR,
3152                                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3153                                                                  errmsg("localized string format value too long")));
3154                                 }
3155                                 else
3156                                         strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3157                                 s += strlen(s);
3158                                 break;
3159                         case DCH_DDD:
3160                         case DCH_IDDD:
3161                                 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3162                                                 (n->key->id == DCH_DDD) ?
3163                                                 tm->tm_yday :
3164                                                 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3165                                 if (S_THth(n->suffix))
3166                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3167                                 s += strlen(s);
3168                                 break;
3169                         case DCH_DD:
3170                                 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3171                                 if (S_THth(n->suffix))
3172                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3173                                 s += strlen(s);
3174                                 break;
3175                         case DCH_D:
3176                                 INVALID_FOR_INTERVAL;
3177                                 sprintf(s, "%d", tm->tm_wday + 1);
3178                                 if (S_THth(n->suffix))
3179                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3180                                 s += strlen(s);
3181                                 break;
3182                         case DCH_ID:
3183                                 INVALID_FOR_INTERVAL;
3184                                 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3185                                 if (S_THth(n->suffix))
3186                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3187                                 s += strlen(s);
3188                                 break;
3189                         case DCH_WW:
3190                                 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3191                                                 (tm->tm_yday - 1) / 7 + 1);
3192                                 if (S_THth(n->suffix))
3193                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3194                                 s += strlen(s);
3195                                 break;
3196                         case DCH_IW:
3197                                 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3198                                                 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3199                                 if (S_THth(n->suffix))
3200                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3201                                 s += strlen(s);
3202                                 break;
3203                         case DCH_Q:
3204                                 if (!tm->tm_mon)
3205                                         break;
3206                                 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3207                                 if (S_THth(n->suffix))
3208                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3209                                 s += strlen(s);
3210                                 break;
3211                         case DCH_CC:
3212                                 if (is_interval)        /* straight calculation */
3213                                         i = tm->tm_year / 100;
3214                                 else
3215                                 {
3216                                         if (tm->tm_year > 0)
3217                                                 /* Century 20 == 1901 - 2000 */
3218                                                 i = (tm->tm_year - 1) / 100 + 1;
3219                                         else
3220                                                 /* Century 6BC == 600BC - 501BC */
3221                                                 i = tm->tm_year / 100 - 1;
3222                                 }
3223                                 if (i <= 99 && i >= -99)
3224                                         sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3225                                 else
3226                                         sprintf(s, "%d", i);
3227                                 if (S_THth(n->suffix))
3228                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3229                                 s += strlen(s);
3230                                 break;
3231                         case DCH_Y_YYY:
3232                                 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3233                                 sprintf(s, "%d,%03d", i,
3234                                                 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3235                                 if (S_THth(n->suffix))
3236                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3237                                 s += strlen(s);
3238                                 break;
3239                         case DCH_YYYY:
3240                         case DCH_IYYY:
3241                                 sprintf(s, "%0*d",
3242                                                 S_FM(n->suffix) ? 0 :
3243                                                 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3244                                                 (n->key->id == DCH_YYYY ?
3245                                                  ADJUST_YEAR(tm->tm_year, is_interval) :
3246                                                  ADJUST_YEAR(date2isoyear(tm->tm_year,
3247                                                                                                   tm->tm_mon,
3248                                                                                                   tm->tm_mday),
3249                                                                          is_interval)));
3250                                 if (S_THth(n->suffix))
3251                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3252                                 s += strlen(s);
3253                                 break;
3254                         case DCH_YYY:
3255                         case DCH_IYY:
3256                                 sprintf(s, "%0*d",
3257                                                 S_FM(n->suffix) ? 0 :
3258                                                 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3259                                                 (n->key->id == DCH_YYY ?
3260                                                  ADJUST_YEAR(tm->tm_year, is_interval) :
3261                                                  ADJUST_YEAR(date2isoyear(tm->tm_year,
3262                                                                                                   tm->tm_mon,
3263                                                                                                   tm->tm_mday),
3264                                                                          is_interval)) % 1000);
3265                                 if (S_THth(n->suffix))
3266                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3267                                 s += strlen(s);
3268                                 break;
3269                         case DCH_YY:
3270                         case DCH_IY:
3271                                 sprintf(s, "%0*d",
3272                                                 S_FM(n->suffix) ? 0 :
3273                                                 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3274                                                 (n->key->id == DCH_YY ?
3275                                                  ADJUST_YEAR(tm->tm_year, is_interval) :
3276                                                  ADJUST_YEAR(date2isoyear(tm->tm_year,
3277                                                                                                   tm->tm_mon,
3278                                                                                                   tm->tm_mday),
3279                                                                          is_interval)) % 100);
3280                                 if (S_THth(n->suffix))
3281                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3282                                 s += strlen(s);
3283                                 break;
3284                         case DCH_Y:
3285                         case DCH_I:
3286                                 sprintf(s, "%1d",
3287                                                 (n->key->id == DCH_Y ?
3288                                                  ADJUST_YEAR(tm->tm_year, is_interval) :
3289                                                  ADJUST_YEAR(date2isoyear(tm->tm_year,
3290                                                                                                   tm->tm_mon,
3291                                                                                                   tm->tm_mday),
3292                                                                          is_interval)) % 10);
3293                                 if (S_THth(n->suffix))
3294                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3295                                 s += strlen(s);
3296                                 break;
3297                         case DCH_RM:
3298                                 /* FALLTHROUGH */
3299                         case DCH_rm:
3300
3301                                 /*
3302                                  * For intervals, values like '12 month' will be reduced to 0
3303                                  * month and some years.  These should be processed.
3304                                  */
3305                                 if (!tm->tm_mon && !tm->tm_year)
3306                                         break;
3307                                 else
3308                                 {
3309                                         int                     mon = 0;
3310                                         const char *const *months;
3311
3312                                         if (n->key->id == DCH_RM)
3313                                                 months = rm_months_upper;
3314                                         else
3315                                                 months = rm_months_lower;
3316
3317                                         /*
3318                                          * Compute the position in the roman-numeral array.  Note
3319                                          * that the contents of the array are reversed, December
3320                                          * being first and January last.
3321                                          */
3322                                         if (tm->tm_mon == 0)
3323                                         {
3324                                                 /*
3325                                                  * This case is special, and tracks the case of full
3326                                                  * interval years.
3327                                                  */
3328                                                 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3329                                         }
3330                                         else if (tm->tm_mon < 0)
3331                                         {
3332                                                 /*
3333                                                  * Negative case.  In this case, the calculation is
3334                                                  * reversed, where -1 means December, -2 November,
3335                                                  * etc.
3336                                                  */
3337                                                 mon = -1 * (tm->tm_mon + 1);
3338                                         }
3339                                         else
3340                                         {
3341                                                 /*
3342                                                  * Common case, with a strictly positive value.  The
3343                                                  * position in the array matches with the value of
3344                                                  * tm_mon.
3345                                                  */
3346                                                 mon = MONTHS_PER_YEAR - tm->tm_mon;
3347                                         }
3348
3349                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3350                                                         months[mon]);
3351                                         s += strlen(s);
3352                                 }
3353                                 break;
3354                         case DCH_W:
3355                                 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3356                                 if (S_THth(n->suffix))
3357                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3358                                 s += strlen(s);
3359                                 break;
3360                         case DCH_J:
3361                                 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3362                                 if (S_THth(n->suffix))
3363                                         str_numth(s, s, S_TH_TYPE(n->suffix));
3364                                 s += strlen(s);
3365                                 break;
3366                 }
3367         }
3368
3369         *s = '\0';
3370 }
3371
3372 /*
3373  * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3374  * The TmFromChar struct pointed to by 'out' is populated with the results.
3375  *
3376  * 'collid' identifies the collation to use, if needed.
3377  * 'std' specifies standard parsing mode.
3378  *
3379  * If escontext points to an ErrorSaveContext, data errors will be reported
3380  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
3381  * whether an error occurred.  Otherwise, errors are thrown.
3382  *
3383  * Note: we currently don't have any to_interval() function, so there
3384  * is no need here for INVALID_FOR_INTERVAL checks.
3385  */
3386 static void
3387 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3388                           Oid collid, bool std, Node *escontext)
3389 {
3390         FormatNode *n;
3391         const char *s;
3392         int                     len,
3393                                 value;
3394         bool            fx_mode = std;
3395
3396         /* number of extra skipped characters (more than given in format string) */
3397         int                     extra_skip = 0;
3398
3399         /* cache localized days and months */
3400         cache_locale_time();
3401
3402         for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3403         {
3404                 /*
3405                  * Ignore spaces at the beginning of the string and before fields when
3406                  * not in FX (fixed width) mode.
3407                  */
3408                 if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3409                         (n->type == NODE_TYPE_ACTION || n == node))
3410                 {
3411                         while (*s != '\0' && isspace((unsigned char) *s))
3412                         {
3413                                 s++;
3414                                 extra_skip++;
3415                         }
3416                 }
3417
3418                 if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3419                 {
3420                         if (std)
3421                         {
3422                                 /*
3423                                  * Standard mode requires strict matching between format
3424                                  * string separators/spaces and input string.
3425                                  */
3426                                 Assert(n->character[0] && !n->character[1]);
3427
3428                                 if (*s == n->character[0])
3429                                         s++;
3430                                 else
3431                                         ereturn(escontext,,
3432                                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3433                                                          errmsg("unmatched format separator \"%c\"",
3434                                                                         n->character[0])));
3435                         }
3436                         else if (!fx_mode)
3437                         {
3438                                 /*
3439                                  * In non FX (fixed format) mode one format string space or
3440                                  * separator match to one space or separator in input string.
3441                                  * Or match nothing if there is no space or separator in the
3442                                  * current position of input string.
3443                                  */
3444                                 extra_skip--;
3445                                 if (isspace((unsigned char) *s) || is_separator_char(s))
3446                                 {
3447                                         s++;
3448                                         extra_skip++;
3449                                 }
3450                         }
3451                         else
3452                         {
3453                                 /*
3454                                  * In FX mode, on format string space or separator we consume
3455                                  * exactly one character from input string.  Notice we don't
3456                                  * insist that the consumed character match the format's
3457                                  * character.
3458                                  */
3459                                 s += pg_mblen(s);
3460                         }
3461                         continue;
3462                 }
3463                 else if (n->type != NODE_TYPE_ACTION)
3464                 {
3465                         /*
3466                          * Text character, so consume one character from input string.
3467                          * Notice we don't insist that the consumed character match the
3468                          * format's character.
3469                          */
3470                         if (!fx_mode)
3471                         {
3472                                 /*
3473                                  * In non FX mode we might have skipped some extra characters
3474                                  * (more than specified in format string) before.  In this
3475                                  * case we don't skip input string character, because it might
3476                                  * be part of field.
3477                                  */
3478                                 if (extra_skip > 0)
3479                                         extra_skip--;
3480                                 else
3481                                         s += pg_mblen(s);
3482                         }
3483                         else
3484                         {
3485                                 int                     chlen = pg_mblen(s);
3486
3487                                 /*
3488                                  * Standard mode requires strict match of format characters.
3489                                  */
3490                                 if (std && n->type == NODE_TYPE_CHAR &&
3491                                         strncmp(s, n->character, chlen) != 0)
3492                                         ereturn(escontext,,
3493                                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3494                                                          errmsg("unmatched format character \"%s\"",
3495                                                                         n->character)));
3496
3497                                 s += chlen;
3498                         }
3499                         continue;
3500                 }
3501
3502                 if (!from_char_set_mode(out, n->key->date_mode, escontext))
3503                         return;
3504
3505                 switch (n->key->id)
3506                 {
3507                         case DCH_FX:
3508                                 fx_mode = true;
3509                                 break;
3510                         case DCH_A_M:
3511                         case DCH_P_M:
3512                         case DCH_a_m:
3513                         case DCH_p_m:
3514                                 if (!from_char_seq_search(&value, &s, ampm_strings_long,
3515                                                                                   NULL, InvalidOid,
3516                                                                                   n, escontext))
3517                                         return;
3518                                 if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3519                                         return;
3520                                 out->clock = CLOCK_12_HOUR;
3521                                 break;
3522                         case DCH_AM:
3523                         case DCH_PM:
3524                         case DCH_am:
3525                         case DCH_pm:
3526                                 if (!from_char_seq_search(&value, &s, ampm_strings,
3527                                                                                   NULL, InvalidOid,
3528                                                                                   n, escontext))
3529                                         return;
3530                                 if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3531                                         return;
3532                                 out->clock = CLOCK_12_HOUR;
3533                                 break;
3534                         case DCH_HH:
3535                         case DCH_HH12:
3536                                 if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3537                                         return;
3538                                 out->clock = CLOCK_12_HOUR;
3539                                 SKIP_THth(s, n->suffix);
3540                                 break;
3541                         case DCH_HH24:
3542                                 if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3543                                         return;
3544                                 SKIP_THth(s, n->suffix);
3545                                 break;
3546                         case DCH_MI:
3547                                 if (from_char_parse_int(&out->mi, &s, n, escontext) < 0)
3548                                         return;
3549                                 SKIP_THth(s, n->suffix);
3550                                 break;
3551                         case DCH_SS:
3552                                 if (from_char_parse_int(&out->ss, &s, n, escontext) < 0)
3553                                         return;
3554                                 SKIP_THth(s, n->suffix);
3555                                 break;
3556                         case DCH_MS:            /* millisecond */
3557                                 len = from_char_parse_int_len(&out->ms, &s, 3, n, escontext);
3558                                 if (len < 0)
3559                                         return;
3560
3561                                 /*
3562                                  * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3563                                  */
3564                                 out->ms *= len == 1 ? 100 :
3565                                         len == 2 ? 10 : 1;
3566
3567                                 SKIP_THth(s, n->suffix);
3568                                 break;
3569                         case DCH_FF1:
3570                         case DCH_FF2:
3571                         case DCH_FF3:
3572                         case DCH_FF4:
3573                         case DCH_FF5:
3574                         case DCH_FF6:
3575                                 out->ff = n->key->id - DCH_FF1 + 1;
3576                                 /* FALLTHROUGH */
3577                         case DCH_US:            /* microsecond */
3578                                 len = from_char_parse_int_len(&out->us, &s,
3579                                                                                           n->key->id == DCH_US ? 6 :
3580                                                                                           out->ff, n, escontext);
3581                                 if (len < 0)
3582                                         return;
3583
3584                                 out->us *= len == 1 ? 100000 :
3585                                         len == 2 ? 10000 :
3586                                         len == 3 ? 1000 :
3587                                         len == 4 ? 100 :
3588                                         len == 5 ? 10 : 1;
3589
3590                                 SKIP_THth(s, n->suffix);
3591                                 break;
3592                         case DCH_SSSS:
3593                                 if (from_char_parse_int(&out->ssss, &s, n, escontext) < 0)
3594                                         return;
3595                                 SKIP_THth(s, n->suffix);
3596                                 break;
3597                         case DCH_tz:
3598                         case DCH_TZ:
3599                                 {
3600                                         int                     tzlen;
3601
3602                                         tzlen = DecodeTimezoneAbbrevPrefix(s,
3603                                                                                                            &out->gmtoffset,
3604                                                                                                            &out->tzp);
3605                                         if (tzlen > 0)
3606                                         {
3607                                                 out->has_tz = true;
3608                                                 /* we only need the zone abbrev for DYNTZ case */
3609                                                 if (out->tzp)
3610                                                         out->abbrev = pnstrdup(s, tzlen);
3611                                                 out->tzsign = 0;        /* drop any earlier TZH/TZM info */
3612                                                 s += tzlen;
3613                                                 break;
3614                                         }
3615                                         else if (isalpha((unsigned char) *s))
3616                                         {
3617                                                 /*
3618                                                  * It doesn't match any abbreviation, but it starts
3619                                                  * with a letter.  OF format certainly won't succeed;
3620                                                  * assume it's a misspelled abbreviation and complain
3621                                                  * accordingly.
3622                                                  */
3623                                                 ereturn(escontext,,
3624                                                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3625                                                                  errmsg("invalid value \"%s\" for \"%s\"",
3626                                                                                 s, n->key->name),
3627                                                                  errdetail("Time zone abbreviation is not recognized.")));
3628                                         }
3629                                         /* otherwise parse it like OF */
3630                                 }
3631                                 /* FALLTHROUGH */
3632                         case DCH_OF:
3633                                 /* OF is equivalent to TZH or TZH:TZM */
3634                                 /* see TZH comments below */
3635                                 if (*s == '+' || *s == '-' || *s == ' ')
3636                                 {
3637                                         out->tzsign = *s == '-' ? -1 : +1;
3638                                         s++;
3639                                 }
3640                                 else
3641                                 {
3642                                         if (extra_skip > 0 && *(s - 1) == '-')
3643                                                 out->tzsign = -1;
3644                                         else
3645                                                 out->tzsign = +1;
3646                                 }
3647                                 if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3648                                         return;
3649                                 if (*s == ':')
3650                                 {
3651                                         s++;
3652                                         if (from_char_parse_int_len(&out->tzm, &s, 2, n,
3653                                                                                                 escontext) < 0)
3654                                                 return;
3655                                 }
3656                                 break;
3657                         case DCH_TZH:
3658
3659                                 /*
3660                                  * Value of TZH might be negative.  And the issue is that we
3661                                  * might swallow minus sign as the separator.  So, if we have
3662                                  * skipped more characters than specified in the format
3663                                  * string, then we consider prepending last skipped minus to
3664                                  * TZH.
3665                                  */
3666                                 if (*s == '+' || *s == '-' || *s == ' ')
3667                                 {
3668                                         out->tzsign = *s == '-' ? -1 : +1;
3669                                         s++;
3670                                 }
3671                                 else
3672                                 {
3673                                         if (extra_skip > 0 && *(s - 1) == '-')
3674                                                 out->tzsign = -1;
3675                                         else
3676                                                 out->tzsign = +1;
3677                                 }
3678
3679                                 if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3680                                         return;
3681                                 break;
3682                         case DCH_TZM:
3683                                 /* assign positive timezone sign if TZH was not seen before */
3684                                 if (!out->tzsign)
3685                                         out->tzsign = +1;
3686                                 if (from_char_parse_int_len(&out->tzm, &s, 2, n, escontext) < 0)
3687                                         return;
3688                                 break;
3689                         case DCH_A_D:
3690                         case DCH_B_C:
3691                         case DCH_a_d:
3692                         case DCH_b_c:
3693                                 if (!from_char_seq_search(&value, &s, adbc_strings_long,
3694                                                                                   NULL, InvalidOid,
3695                                                                                   n, escontext))
3696                                         return;
3697                                 if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3698                                         return;
3699                                 break;
3700                         case DCH_AD:
3701                         case DCH_BC:
3702                         case DCH_ad:
3703                         case DCH_bc:
3704                                 if (!from_char_seq_search(&value, &s, adbc_strings,
3705                                                                                   NULL, InvalidOid,
3706                                                                                   n, escontext))
3707                                         return;
3708                                 if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3709                                         return;
3710                                 break;
3711                         case DCH_MONTH:
3712                         case DCH_Month:
3713                         case DCH_month:
3714                                 if (!from_char_seq_search(&value, &s, months_full,
3715                                                                                   S_TM(n->suffix) ? localized_full_months : NULL,
3716                                                                                   collid,
3717                                                                                   n, escontext))
3718                                         return;
3719                                 if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3720                                         return;
3721                                 break;
3722                         case DCH_MON:
3723                         case DCH_Mon:
3724                         case DCH_mon:
3725                                 if (!from_char_seq_search(&value, &s, months,
3726                                                                                   S_TM(n->suffix) ? localized_abbrev_months : NULL,
3727                                                                                   collid,
3728                                                                                   n, escontext))
3729                                         return;
3730                                 if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3731                                         return;
3732                                 break;
3733                         case DCH_MM:
3734                                 if (from_char_parse_int(&out->mm, &s, n, escontext) < 0)
3735                                         return;
3736                                 SKIP_THth(s, n->suffix);
3737                                 break;
3738                         case DCH_DAY:
3739                         case DCH_Day:
3740                         case DCH_day:
3741                                 if (!from_char_seq_search(&value, &s, days,
3742                                                                                   S_TM(n->suffix) ? localized_full_days : NULL,
3743                                                                                   collid,
3744                                                                                   n, escontext))
3745                                         return;
3746                                 if (!from_char_set_int(&out->d, value, n, escontext))
3747                                         return;
3748                                 out->d++;
3749                                 break;
3750                         case DCH_DY:
3751                         case DCH_Dy:
3752                         case DCH_dy:
3753                                 if (!from_char_seq_search(&value, &s, days_short,
3754                                                                                   S_TM(n->suffix) ? localized_abbrev_days : NULL,
3755                                                                                   collid,
3756                                                                                   n, escontext))
3757                                         return;
3758                                 if (!from_char_set_int(&out->d, value, n, escontext))
3759                                         return;
3760                                 out->d++;
3761                                 break;
3762                         case DCH_DDD:
3763                                 if (from_char_parse_int(&out->ddd, &s, n, escontext) < 0)
3764                                         return;
3765                                 SKIP_THth(s, n->suffix);
3766                                 break;
3767                         case DCH_IDDD:
3768                                 if (from_char_parse_int_len(&out->ddd, &s, 3, n, escontext) < 0)
3769                                         return;
3770                                 SKIP_THth(s, n->suffix);
3771                                 break;
3772                         case DCH_DD:
3773                                 if (from_char_parse_int(&out->dd, &s, n, escontext) < 0)
3774                                         return;
3775                                 SKIP_THth(s, n->suffix);
3776                                 break;
3777                         case DCH_D:
3778                                 if (from_char_parse_int(&out->d, &s, n, escontext) < 0)
3779                                         return;
3780                                 SKIP_THth(s, n->suffix);
3781                                 break;
3782                         case DCH_ID:
3783                                 if (from_char_parse_int_len(&out->d, &s, 1, n, escontext) < 0)
3784                                         return;
3785                                 /* Shift numbering to match Gregorian where Sunday = 1 */
3786                                 if (++out->d > 7)
3787                                         out->d = 1;
3788                                 SKIP_THth(s, n->suffix);
3789                                 break;
3790                         case DCH_WW:
3791                         case DCH_IW:
3792                                 if (from_char_parse_int(&out->ww, &s, n, escontext) < 0)
3793                                         return;
3794                                 SKIP_THth(s, n->suffix);
3795                                 break;
3796                         case DCH_Q:
3797
3798                                 /*
3799                                  * We ignore 'Q' when converting to date because it is unclear
3800                                  * which date in the quarter to use, and some people specify
3801                                  * both quarter and month, so if it was honored it might
3802                                  * conflict with the supplied month. That is also why we don't
3803                                  * throw an error.
3804                                  *
3805                                  * We still parse the source string for an integer, but it
3806                                  * isn't stored anywhere in 'out'.
3807                                  */
3808                                 if (from_char_parse_int((int *) NULL, &s, n, escontext) < 0)
3809                                         return;
3810                                 SKIP_THth(s, n->suffix);
3811                                 break;
3812                         case DCH_CC:
3813                                 if (from_char_parse_int(&out->cc, &s, n, escontext) < 0)
3814                                         return;
3815                                 SKIP_THth(s, n->suffix);
3816                                 break;
3817                         case DCH_Y_YYY:
3818                                 {
3819                                         int                     matched,
3820                                                                 years,
3821                                                                 millennia,
3822                                                                 nch;
3823
3824                                         matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3825                                         if (matched < 2)
3826                                                 ereturn(escontext,,
3827                                                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3828                                                                  errmsg("invalid input string for \"Y,YYY\"")));
3829                                         years += (millennia * 1000);
3830                                         if (!from_char_set_int(&out->year, years, n, escontext))
3831                                                 return;
3832                                         out->yysz = 4;
3833                                         s += nch;
3834                                         SKIP_THth(s, n->suffix);
3835                                 }
3836                                 break;
3837                         case DCH_YYYY:
3838                         case DCH_IYYY:
3839                                 if (from_char_parse_int(&out->year, &s, n, escontext) < 0)
3840                                         return;
3841                                 out->yysz = 4;
3842                                 SKIP_THth(s, n->suffix);
3843                                 break;
3844                         case DCH_YYY:
3845                         case DCH_IYY:
3846                                 len = from_char_parse_int(&out->year, &s, n, escontext);
3847                                 if (len < 0)
3848                                         return;
3849                                 if (len < 4)
3850                                         out->year = adjust_partial_year_to_2020(out->year);
3851                                 out->yysz = 3;
3852                                 SKIP_THth(s, n->suffix);
3853                                 break;
3854                         case DCH_YY:
3855                         case DCH_IY:
3856                                 len = from_char_parse_int(&out->year, &s, n, escontext);
3857                                 if (len < 0)
3858                                         return;
3859                                 if (len < 4)
3860                                         out->year = adjust_partial_year_to_2020(out->year);
3861                                 out->yysz = 2;
3862                                 SKIP_THth(s, n->suffix);
3863                                 break;
3864                         case DCH_Y:
3865                         case DCH_I:
3866                                 len = from_char_parse_int(&out->year, &s, n, escontext);
3867                                 if (len < 0)
3868                                         return;
3869                                 if (len < 4)
3870                                         out->year = adjust_partial_year_to_2020(out->year);
3871                                 out->yysz = 1;
3872                                 SKIP_THth(s, n->suffix);
3873                                 break;
3874                         case DCH_RM:
3875                         case DCH_rm:
3876                                 if (!from_char_seq_search(&value, &s, rm_months_lower,
3877                                                                                   NULL, InvalidOid,
3878                                                                                   n, escontext))
3879                                         return;
3880                                 if (!from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n,
3881                                                                            escontext))
3882                                         return;
3883                                 break;
3884                         case DCH_W:
3885                                 if (from_char_parse_int(&out->w, &s, n, escontext) < 0)
3886                                         return;
3887                                 SKIP_THth(s, n->suffix);
3888                                 break;
3889                         case DCH_J:
3890                                 if (from_char_parse_int(&out->j, &s, n, escontext) < 0)
3891                                         return;
3892                                 SKIP_THth(s, n->suffix);
3893                                 break;
3894                 }
3895
3896                 /* Ignore all spaces after fields */
3897                 if (!fx_mode)
3898                 {
3899                         extra_skip = 0;
3900                         while (*s != '\0' && isspace((unsigned char) *s))
3901                         {
3902                                 s++;
3903                                 extra_skip++;
3904                         }
3905                 }
3906         }
3907
3908         /*
3909          * Standard parsing mode doesn't allow unmatched format patterns or
3910          * trailing characters in the input string.
3911          */
3912         if (std)
3913         {
3914                 if (n->type != NODE_TYPE_END)
3915                         ereturn(escontext,,
3916                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3917                                          errmsg("input string is too short for datetime format")));
3918
3919                 while (*s != '\0' && isspace((unsigned char) *s))
3920                         s++;
3921
3922                 if (*s != '\0')
3923                         ereturn(escontext,,
3924                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3925                                          errmsg("trailing characters remain in input string after datetime format")));
3926         }
3927 }
3928
3929 /*
3930  * The invariant for DCH cache entry management is that DCHCounter is equal
3931  * to the maximum age value among the existing entries, and we increment it
3932  * whenever an access occurs.  If we approach overflow, deal with that by
3933  * halving all the age values, so that we retain a fairly accurate idea of
3934  * which entries are oldest.
3935  */
3936 static inline void
3937 DCH_prevent_counter_overflow(void)
3938 {
3939         if (DCHCounter >= (INT_MAX - 1))
3940         {
3941                 for (int i = 0; i < n_DCHCache; i++)
3942                         DCHCache[i]->age >>= 1;
3943                 DCHCounter >>= 1;
3944         }
3945 }
3946
3947 /*
3948  * Get mask of date/time/zone components present in format nodes.
3949  */
3950 static int
3951 DCH_datetime_type(FormatNode *node)
3952 {
3953         FormatNode *n;
3954         int                     flags = 0;
3955
3956         for (n = node; n->type != NODE_TYPE_END; n++)
3957         {
3958                 if (n->type != NODE_TYPE_ACTION)
3959                         continue;
3960
3961                 switch (n->key->id)
3962                 {
3963                         case DCH_FX:
3964                                 break;
3965                         case DCH_A_M:
3966                         case DCH_P_M:
3967                         case DCH_a_m:
3968                         case DCH_p_m:
3969                         case DCH_AM:
3970                         case DCH_PM:
3971                         case DCH_am:
3972                         case DCH_pm:
3973                         case DCH_HH:
3974                         case DCH_HH12:
3975                         case DCH_HH24:
3976                         case DCH_MI:
3977                         case DCH_SS:
3978                         case DCH_MS:            /* millisecond */
3979                         case DCH_US:            /* microsecond */
3980                         case DCH_FF1:
3981                         case DCH_FF2:
3982                         case DCH_FF3:
3983                         case DCH_FF4:
3984                         case DCH_FF5:
3985                         case DCH_FF6:
3986                         case DCH_SSSS:
3987                                 flags |= DCH_TIMED;
3988                                 break;
3989                         case DCH_tz:
3990                         case DCH_TZ:
3991                         case DCH_OF:
3992                         case DCH_TZH:
3993                         case DCH_TZM:
3994                                 flags |= DCH_ZONED;
3995                                 break;
3996                         case DCH_A_D:
3997                         case DCH_B_C:
3998                         case DCH_a_d:
3999                         case DCH_b_c:
4000                         case DCH_AD:
4001                         case DCH_BC:
4002                         case DCH_ad:
4003                         case DCH_bc:
4004                         case DCH_MONTH:
4005                         case DCH_Month:
4006                         case DCH_month:
4007                         case DCH_MON:
4008                         case DCH_Mon:
4009                         case DCH_mon:
4010                         case DCH_MM:
4011                         case DCH_DAY:
4012                         case DCH_Day:
4013                         case DCH_day:
4014                         case DCH_DY:
4015                         case DCH_Dy:
4016                         case DCH_dy:
4017                         case DCH_DDD:
4018                         case DCH_IDDD:
4019                         case DCH_DD:
4020                         case DCH_D:
4021                         case DCH_ID:
4022                         case DCH_WW:
4023                         case DCH_Q:
4024                         case DCH_CC:
4025                         case DCH_Y_YYY:
4026                         case DCH_YYYY:
4027                         case DCH_IYYY:
4028                         case DCH_YYY:
4029                         case DCH_IYY:
4030                         case DCH_YY:
4031                         case DCH_IY:
4032                         case DCH_Y:
4033                         case DCH_I:
4034                         case DCH_RM:
4035                         case DCH_rm:
4036                         case DCH_W:
4037                         case DCH_J:
4038                                 flags |= DCH_DATED;
4039                                 break;
4040                 }
4041         }
4042
4043         return flags;
4044 }
4045
4046 /* select a DCHCacheEntry to hold the given format picture */
4047 static DCHCacheEntry *
4048 DCH_cache_getnew(const char *str, bool std)
4049 {
4050         DCHCacheEntry *ent;
4051
4052         /* Ensure we can advance DCHCounter below */
4053         DCH_prevent_counter_overflow();
4054
4055         /*
4056          * If cache is full, remove oldest entry (or recycle first not-valid one)
4057          */
4058         if (n_DCHCache >= DCH_CACHE_ENTRIES)
4059         {
4060                 DCHCacheEntry *old = DCHCache[0];
4061
4062 #ifdef DEBUG_TO_FROM_CHAR
4063                 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
4064 #endif
4065                 if (old->valid)
4066                 {
4067                         for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
4068                         {
4069                                 ent = DCHCache[i];
4070                                 if (!ent->valid)
4071                                 {
4072                                         old = ent;
4073                                         break;
4074                                 }
4075                                 if (ent->age < old->age)
4076                                         old = ent;
4077                         }
4078                 }
4079 #ifdef DEBUG_TO_FROM_CHAR
4080                 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
4081 #endif
4082                 old->valid = false;
4083                 strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
4084                 old->age = (++DCHCounter);
4085                 /* caller is expected to fill format, then set valid */
4086                 return old;
4087         }
4088         else
4089         {
4090 #ifdef DEBUG_TO_FROM_CHAR
4091                 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
4092 #endif
4093                 Assert(DCHCache[n_DCHCache] == NULL);
4094                 DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
4095                         MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
4096                 ent->valid = false;
4097                 strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
4098                 ent->std = std;
4099                 ent->age = (++DCHCounter);
4100                 /* caller is expected to fill format, then set valid */
4101                 ++n_DCHCache;
4102                 return ent;
4103         }
4104 }
4105
4106 /* look for an existing DCHCacheEntry matching the given format picture */
4107 static DCHCacheEntry *
4108 DCH_cache_search(const char *str, bool std)
4109 {
4110         /* Ensure we can advance DCHCounter below */
4111         DCH_prevent_counter_overflow();
4112
4113         for (int i = 0; i < n_DCHCache; i++)
4114         {
4115                 DCHCacheEntry *ent = DCHCache[i];
4116
4117                 if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
4118                 {
4119                         ent->age = (++DCHCounter);
4120                         return ent;
4121                 }
4122         }
4123
4124         return NULL;
4125 }
4126
4127 /* Find or create a DCHCacheEntry for the given format picture */
4128 static DCHCacheEntry *
4129 DCH_cache_fetch(const char *str, bool std)
4130 {
4131         DCHCacheEntry *ent;
4132
4133         if ((ent = DCH_cache_search(str, std)) == NULL)
4134         {
4135                 /*
4136                  * Not in the cache, must run parser and save a new format-picture to
4137                  * the cache.  Do not mark the cache entry valid until parsing
4138                  * succeeds.
4139                  */
4140                 ent = DCH_cache_getnew(str, std);
4141
4142                 parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
4143                                          DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4144
4145                 ent->valid = true;
4146         }
4147         return ent;
4148 }
4149
4150 /*
4151  * Format a date/time or interval into a string according to fmt.
4152  * We parse fmt into a list of FormatNodes.  This is then passed to DCH_to_char
4153  * for formatting.
4154  */
4155 static text *
4156 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4157 {
4158         FormatNode *format;
4159         char       *fmt_str,
4160                            *result;
4161         bool            incache;
4162         int                     fmt_len;
4163         text       *res;
4164
4165         /*
4166          * Convert fmt to C string
4167          */
4168         fmt_str = text_to_cstring(fmt);
4169         fmt_len = strlen(fmt_str);
4170
4171         /*
4172          * Allocate workspace for result as C string
4173          */
4174         result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4175         *result = '\0';
4176
4177         if (fmt_len > DCH_CACHE_SIZE)
4178         {
4179                 /*
4180                  * Allocate new memory if format picture is bigger than static cache
4181                  * and do not use cache (call parser always)
4182                  */
4183                 incache = false;
4184
4185                 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4186
4187                 parse_format(format, fmt_str, DCH_keywords,
4188                                          DCH_suff, DCH_index, DCH_FLAG, NULL);
4189         }
4190         else
4191         {
4192                 /*
4193                  * Use cache buffers
4194                  */
4195                 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4196
4197                 incache = true;
4198                 format = ent->format;
4199         }
4200
4201         /* The real work is here */
4202         DCH_to_char(format, is_interval, tmtc, result, collid);
4203
4204         if (!incache)
4205                 pfree(format);
4206
4207         pfree(fmt_str);
4208
4209         /* convert C-string result to TEXT format */
4210         res = cstring_to_text(result);
4211
4212         pfree(result);
4213         return res;
4214 }
4215
4216 /****************************************************************************
4217  *                              Public routines
4218  ***************************************************************************/
4219
4220 /* -------------------
4221  * TIMESTAMP to_char()
4222  * -------------------
4223  */
4224 Datum
4225 timestamp_to_char(PG_FUNCTION_ARGS)
4226 {
4227         Timestamp       dt = PG_GETARG_TIMESTAMP(0);
4228         text       *fmt = PG_GETARG_TEXT_PP(1),
4229                            *res;
4230         TmToChar        tmtc;
4231         struct pg_tm tt;
4232         struct fmt_tm *tm;
4233         int                     thisdate;
4234
4235         if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4236                 PG_RETURN_NULL();
4237
4238         ZERO_tmtc(&tmtc);
4239         tm = tmtcTm(&tmtc);
4240
4241         if (timestamp2tm(dt, NULL, &tt, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4242                 ereport(ERROR,
4243                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4244                                  errmsg("timestamp out of range")));
4245
4246         /* calculate wday and yday, because timestamp2tm doesn't */
4247         thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4248         tt.tm_wday = (thisdate + 1) % 7;
4249         tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4250
4251         COPY_tm(tm, &tt);
4252
4253         if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4254                 PG_RETURN_NULL();
4255
4256         PG_RETURN_TEXT_P(res);
4257 }
4258
4259 Datum
4260 timestamptz_to_char(PG_FUNCTION_ARGS)
4261 {
4262         TimestampTz dt = PG_GETARG_TIMESTAMP(0);
4263         text       *fmt = PG_GETARG_TEXT_PP(1),
4264                            *res;
4265         TmToChar        tmtc;
4266         int                     tz;
4267         struct pg_tm tt;
4268         struct fmt_tm *tm;
4269         int                     thisdate;
4270
4271         if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4272                 PG_RETURN_NULL();
4273
4274         ZERO_tmtc(&tmtc);
4275         tm = tmtcTm(&tmtc);
4276
4277         if (timestamp2tm(dt, &tz, &tt, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4278                 ereport(ERROR,
4279                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4280                                  errmsg("timestamp out of range")));
4281
4282         /* calculate wday and yday, because timestamp2tm doesn't */
4283         thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4284         tt.tm_wday = (thisdate + 1) % 7;
4285         tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4286
4287         COPY_tm(tm, &tt);
4288
4289         if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4290                 PG_RETURN_NULL();
4291
4292         PG_RETURN_TEXT_P(res);
4293 }
4294
4295
4296 /* -------------------
4297  * INTERVAL to_char()
4298  * -------------------
4299  */
4300 Datum
4301 interval_to_char(PG_FUNCTION_ARGS)
4302 {
4303         Interval   *it = PG_GETARG_INTERVAL_P(0);
4304         text       *fmt = PG_GETARG_TEXT_PP(1),
4305                            *res;
4306         TmToChar        tmtc;
4307         struct fmt_tm *tm;
4308         struct pg_itm tt,
4309                            *itm = &tt;
4310
4311         if (VARSIZE_ANY_EXHDR(fmt) <= 0 || INTERVAL_NOT_FINITE(it))
4312                 PG_RETURN_NULL();
4313
4314         ZERO_tmtc(&tmtc);
4315         tm = tmtcTm(&tmtc);
4316
4317         interval2itm(*it, itm);
4318         tmtc.fsec = itm->tm_usec;
4319         tm->tm_sec = itm->tm_sec;
4320         tm->tm_min = itm->tm_min;
4321         tm->tm_hour = itm->tm_hour;
4322         tm->tm_mday = itm->tm_mday;
4323         tm->tm_mon = itm->tm_mon;
4324         tm->tm_year = itm->tm_year;
4325
4326         /* wday is meaningless, yday approximates the total span in days */
4327         tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4328
4329         if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4330                 PG_RETURN_NULL();
4331
4332         PG_RETURN_TEXT_P(res);
4333 }
4334
4335 /* ---------------------
4336  * TO_TIMESTAMP()
4337  *
4338  * Make Timestamp from date_str which is formatted at argument 'fmt'
4339  * ( to_timestamp is reverse to_char() )
4340  * ---------------------
4341  */
4342 Datum
4343 to_timestamp(PG_FUNCTION_ARGS)
4344 {
4345         text       *date_txt = PG_GETARG_TEXT_PP(0);
4346         text       *fmt = PG_GETARG_TEXT_PP(1);
4347         Oid                     collid = PG_GET_COLLATION();
4348         Timestamp       result;
4349         int                     tz;
4350         struct pg_tm tm;
4351         struct fmt_tz ftz;
4352         fsec_t          fsec;
4353         int                     fprec;
4354
4355         do_to_timestamp(date_txt, fmt, collid, false,
4356                                         &tm, &fsec, &ftz, &fprec, NULL, NULL);
4357
4358         /* Use the specified time zone, if any. */
4359         if (ftz.has_tz)
4360                 tz = ftz.gmtoffset;
4361         else
4362                 tz = DetermineTimeZoneOffset(&tm, session_timezone);
4363
4364         if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4365                 ereport(ERROR,
4366                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4367                                  errmsg("timestamp out of range")));
4368
4369         /* Use the specified fractional precision, if any. */
4370         if (fprec)
4371                 AdjustTimestampForTypmod(&result, fprec, NULL);
4372
4373         PG_RETURN_TIMESTAMP(result);
4374 }
4375
4376 /* ----------
4377  * TO_DATE
4378  *      Make Date from date_str which is formatted at argument 'fmt'
4379  * ----------
4380  */
4381 Datum
4382 to_date(PG_FUNCTION_ARGS)
4383 {
4384         text       *date_txt = PG_GETARG_TEXT_PP(0);
4385         text       *fmt = PG_GETARG_TEXT_PP(1);
4386         Oid                     collid = PG_GET_COLLATION();
4387         DateADT         result;
4388         struct pg_tm tm;
4389         struct fmt_tz ftz;
4390         fsec_t          fsec;
4391
4392         do_to_timestamp(date_txt, fmt, collid, false,
4393                                         &tm, &fsec, &ftz, NULL, NULL, NULL);
4394
4395         /* Prevent overflow in Julian-day routines */
4396         if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4397                 ereport(ERROR,
4398                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4399                                  errmsg("date out of range: \"%s\"",
4400                                                 text_to_cstring(date_txt))));
4401
4402         result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4403
4404         /* Now check for just-out-of-range dates */
4405         if (!IS_VALID_DATE(result))
4406                 ereport(ERROR,
4407                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4408                                  errmsg("date out of range: \"%s\"",
4409                                                 text_to_cstring(date_txt))));
4410
4411         PG_RETURN_DATEADT(result);
4412 }
4413
4414 /*
4415  * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4416  * as a format string.  The collation 'collid' may be used for case-folding
4417  * rules in some cases.  'strict' specifies standard parsing mode.
4418  *
4419  * The actual data type (returned in 'typid', 'typmod') is determined by
4420  * the presence of date/time/zone components in the format string.
4421  *
4422  * When a timezone component is present, the corresponding offset is
4423  * returned in '*tz'.
4424  *
4425  * If escontext points to an ErrorSaveContext, data errors will be reported
4426  * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
4427  * whether an error occurred.  Otherwise, errors are thrown.
4428  */
4429 Datum
4430 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4431                            Oid *typid, int32 *typmod, int *tz,
4432                            Node *escontext)
4433 {
4434         struct pg_tm tm;
4435         struct fmt_tz ftz;
4436         fsec_t          fsec;
4437         int                     fprec;
4438         uint32          flags;
4439
4440         if (!do_to_timestamp(date_txt, fmt, collid, strict,
4441                                                  &tm, &fsec, &ftz, &fprec, &flags, escontext))
4442                 return (Datum) 0;
4443
4444         *typmod = fprec ? fprec : -1;   /* fractional part precision */
4445
4446         if (flags & DCH_DATED)
4447         {
4448                 if (flags & DCH_TIMED)
4449                 {
4450                         if (flags & DCH_ZONED)
4451                         {
4452                                 TimestampTz result;
4453
4454                                 if (ftz.has_tz)
4455                                 {
4456                                         *tz = ftz.gmtoffset;
4457                                 }
4458                                 else
4459                                 {
4460                                         /*
4461                                          * Time zone is present in format string, but not in input
4462                                          * string.  Assuming do_to_timestamp() triggers no error
4463                                          * this should be possible only in non-strict case.
4464                                          */
4465                                         Assert(!strict);
4466
4467                                         ereturn(escontext, (Datum) 0,
4468                                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4469                                                          errmsg("missing time zone in input string for type timestamptz")));
4470                                 }
4471
4472                                 if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4473                                         ereturn(escontext, (Datum) 0,
4474                                                         (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4475                                                          errmsg("timestamptz out of range")));
4476
4477                                 AdjustTimestampForTypmod(&result, *typmod, escontext);
4478
4479                                 *typid = TIMESTAMPTZOID;
4480                                 return TimestampTzGetDatum(result);
4481                         }
4482                         else
4483                         {
4484                                 Timestamp       result;
4485
4486                                 if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4487                                         ereturn(escontext, (Datum) 0,
4488                                                         (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4489                                                          errmsg("timestamp out of range")));
4490
4491                                 AdjustTimestampForTypmod(&result, *typmod, escontext);
4492
4493                                 *typid = TIMESTAMPOID;
4494                                 return TimestampGetDatum(result);
4495                         }
4496                 }
4497                 else
4498                 {
4499                         if (flags & DCH_ZONED)
4500                         {
4501                                 ereturn(escontext, (Datum) 0,
4502                                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4503                                                  errmsg("datetime format is zoned but not timed")));
4504                         }
4505                         else
4506                         {
4507                                 DateADT         result;
4508
4509                                 /* Prevent overflow in Julian-day routines */
4510                                 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4511                                         ereturn(escontext, (Datum) 0,
4512                                                         (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4513                                                          errmsg("date out of range: \"%s\"",
4514                                                                         text_to_cstring(date_txt))));
4515
4516                                 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4517                                         POSTGRES_EPOCH_JDATE;
4518
4519                                 /* Now check for just-out-of-range dates */
4520                                 if (!IS_VALID_DATE(result))
4521                                         ereturn(escontext, (Datum) 0,
4522                                                         (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4523                                                          errmsg("date out of range: \"%s\"",
4524                                                                         text_to_cstring(date_txt))));
4525
4526                                 *typid = DATEOID;
4527                                 return DateADTGetDatum(result);
4528                         }
4529                 }
4530         }
4531         else if (flags & DCH_TIMED)
4532         {
4533                 if (flags & DCH_ZONED)
4534                 {
4535                         TimeTzADT  *result = palloc(sizeof(TimeTzADT));
4536
4537                         if (ftz.has_tz)
4538                         {
4539                                 *tz = ftz.gmtoffset;
4540                         }
4541                         else
4542                         {
4543                                 /*
4544                                  * Time zone is present in format string, but not in input
4545                                  * string.  Assuming do_to_timestamp() triggers no error this
4546                                  * should be possible only in non-strict case.
4547                                  */
4548                                 Assert(!strict);
4549
4550                                 ereturn(escontext, (Datum) 0,
4551                                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4552                                                  errmsg("missing time zone in input string for type timetz")));
4553                         }
4554
4555                         if (tm2timetz(&tm, fsec, *tz, result) != 0)
4556                                 ereturn(escontext, (Datum) 0,
4557                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4558                                                  errmsg("timetz out of range")));
4559
4560                         AdjustTimeForTypmod(&result->time, *typmod);
4561
4562                         *typid = TIMETZOID;
4563                         return TimeTzADTPGetDatum(result);
4564                 }
4565                 else
4566                 {
4567                         TimeADT         result;
4568
4569                         if (tm2time(&tm, fsec, &result) != 0)
4570                                 ereturn(escontext, (Datum) 0,
4571                                                 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4572                                                  errmsg("time out of range")));
4573
4574                         AdjustTimeForTypmod(&result, *typmod);
4575
4576                         *typid = TIMEOID;
4577                         return TimeADTGetDatum(result);
4578                 }
4579         }
4580         else
4581         {
4582                 ereturn(escontext, (Datum) 0,
4583                                 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4584                                  errmsg("datetime format is not dated and not timed")));
4585         }
4586 }
4587
4588 /*
4589  * Parses the datetime format string in 'fmt_str' and returns true if it
4590  * contains a timezone specifier, false if not.
4591  */
4592 bool
4593 datetime_format_has_tz(const char *fmt_str)
4594 {
4595         bool            incache;
4596         int                     fmt_len = strlen(fmt_str);
4597         int                     result;
4598         FormatNode *format;
4599
4600         if (fmt_len > DCH_CACHE_SIZE)
4601         {
4602                 /*
4603                  * Allocate new memory if format picture is bigger than static cache
4604                  * and do not use cache (call parser always)
4605                  */
4606                 incache = false;
4607
4608                 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4609
4610                 parse_format(format, fmt_str, DCH_keywords,
4611                                          DCH_suff, DCH_index, DCH_FLAG, NULL);
4612         }
4613         else
4614         {
4615                 /*
4616                  * Use cache buffers
4617                  */
4618                 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4619
4620                 incache = true;
4621                 format = ent->format;
4622         }
4623
4624         result = DCH_datetime_type(format);
4625
4626         if (!incache)
4627                 pfree(format);
4628
4629         return result & DCH_ZONED;
4630 }
4631
4632 /*
4633  * do_to_timestamp: shared code for to_timestamp and to_date
4634  *
4635  * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4636  * fractional seconds, struct fmt_tz, and fractional precision.
4637  *
4638  * 'collid' identifies the collation to use, if needed.
4639  * 'std' specifies standard parsing mode.
4640  *
4641  * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4642  * if that is not NULL.
4643  *
4644  * Returns true on success, false on failure (if escontext points to an
4645  * ErrorSaveContext; otherwise errors are thrown).  Note that currently,
4646  * soft-error behavior is provided for bad data but not bad format.
4647  *
4648  * We parse 'fmt' into a list of FormatNodes, which is then passed to
4649  * DCH_from_char to populate a TmFromChar with the parsed contents of
4650  * 'date_txt'.
4651  *
4652  * The TmFromChar is then analysed and converted into the final results in
4653  * struct 'tm', 'fsec', struct 'tz', and 'fprec'.
4654  */
4655 static bool
4656 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4657                                 struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
4658                                 int *fprec, uint32 *flags, Node *escontext)
4659 {
4660         FormatNode *format = NULL;
4661         TmFromChar      tmfc;
4662         int                     fmt_len;
4663         char       *date_str;
4664         int                     fmask;
4665         bool            incache = false;
4666
4667         Assert(tm != NULL);
4668         Assert(fsec != NULL);
4669
4670         date_str = text_to_cstring(date_txt);
4671
4672         ZERO_tmfc(&tmfc);
4673         ZERO_tm(tm);
4674         *fsec = 0;
4675         tz->has_tz = false;
4676         if (fprec)
4677                 *fprec = 0;
4678         if (flags)
4679                 *flags = 0;
4680         fmask = 0;                                      /* bit mask for ValidateDate() */
4681
4682         fmt_len = VARSIZE_ANY_EXHDR(fmt);
4683
4684         if (fmt_len)
4685         {
4686                 char       *fmt_str;
4687
4688                 fmt_str = text_to_cstring(fmt);
4689
4690                 if (fmt_len > DCH_CACHE_SIZE)
4691                 {
4692                         /*
4693                          * Allocate new memory if format picture is bigger than static
4694                          * cache and do not use cache (call parser always)
4695                          */
4696                         format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4697
4698                         parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4699                                                  DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4700                 }
4701                 else
4702                 {
4703                         /*
4704                          * Use cache buffers
4705                          */
4706                         DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4707
4708                         incache = true;
4709                         format = ent->format;
4710                 }
4711
4712 #ifdef DEBUG_TO_FROM_CHAR
4713                 /* dump_node(format, fmt_len); */
4714                 /* dump_index(DCH_keywords, DCH_index); */
4715 #endif
4716
4717                 DCH_from_char(format, date_str, &tmfc, collid, std, escontext);
4718                 pfree(fmt_str);
4719                 if (SOFT_ERROR_OCCURRED(escontext))
4720                         goto fail;
4721
4722                 if (flags)
4723                         *flags = DCH_datetime_type(format);
4724
4725                 if (!incache)
4726                 {
4727                         pfree(format);
4728                         format = NULL;
4729                 }
4730         }
4731
4732         DEBUG_TMFC(&tmfc);
4733
4734         /*
4735          * Convert to_date/to_timestamp input fields to standard 'tm'
4736          */
4737         if (tmfc.ssss)
4738         {
4739                 int                     x = tmfc.ssss;
4740
4741                 tm->tm_hour = x / SECS_PER_HOUR;
4742                 x %= SECS_PER_HOUR;
4743                 tm->tm_min = x / SECS_PER_MINUTE;
4744                 x %= SECS_PER_MINUTE;
4745                 tm->tm_sec = x;
4746         }
4747
4748         if (tmfc.ss)
4749                 tm->tm_sec = tmfc.ss;
4750         if (tmfc.mi)
4751                 tm->tm_min = tmfc.mi;
4752         if (tmfc.hh)
4753                 tm->tm_hour = tmfc.hh;
4754
4755         if (tmfc.clock == CLOCK_12_HOUR)
4756         {
4757                 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4758                 {
4759                         errsave(escontext,
4760                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4761                                          errmsg("hour \"%d\" is invalid for the 12-hour clock",
4762                                                         tm->tm_hour),
4763                                          errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
4764                         goto fail;
4765                 }
4766
4767                 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4768                         tm->tm_hour += HOURS_PER_DAY / 2;
4769                 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4770                         tm->tm_hour = 0;
4771         }
4772
4773         if (tmfc.year)
4774         {
4775                 /*
4776                  * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4777                  * the year in the given century.  Keep in mind that the 21st century
4778                  * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4779                  * 600BC to 501BC.
4780                  */
4781                 if (tmfc.cc && tmfc.yysz <= 2)
4782                 {
4783                         if (tmfc.bc)
4784                                 tmfc.cc = -tmfc.cc;
4785                         tm->tm_year = tmfc.year % 100;
4786                         if (tm->tm_year)
4787                         {
4788                                 if (tmfc.cc >= 0)
4789                                         tm->tm_year += (tmfc.cc - 1) * 100;
4790                                 else
4791                                         tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4792                         }
4793                         else
4794                         {
4795                                 /* find century year for dates ending in "00" */
4796                                 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4797                         }
4798                 }
4799                 else
4800                 {
4801                         /* If a 4-digit year is provided, we use that and ignore CC. */
4802                         tm->tm_year = tmfc.year;
4803                         if (tmfc.bc)
4804                                 tm->tm_year = -tm->tm_year;
4805                         /* correct for our representation of BC years */
4806                         if (tm->tm_year < 0)
4807                                 tm->tm_year++;
4808                 }
4809                 fmask |= DTK_M(YEAR);
4810         }
4811         else if (tmfc.cc)
4812         {
4813                 /* use first year of century */
4814                 if (tmfc.bc)
4815                         tmfc.cc = -tmfc.cc;
4816                 if (tmfc.cc >= 0)
4817                         /* +1 because 21st century started in 2001 */
4818                         tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4819                 else
4820                         /* +1 because year == 599 is 600 BC */
4821                         tm->tm_year = tmfc.cc * 100 + 1;
4822                 fmask |= DTK_M(YEAR);
4823         }
4824
4825         if (tmfc.j)
4826         {
4827                 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4828                 fmask |= DTK_DATE_M;
4829         }
4830
4831         if (tmfc.ww)
4832         {
4833                 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4834                 {
4835                         /*
4836                          * If tmfc.d is not set, then the date is left at the beginning of
4837                          * the ISO week (Monday).
4838                          */
4839                         if (tmfc.d)
4840                                 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4841                         else
4842                                 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4843                         fmask |= DTK_DATE_M;
4844                 }
4845                 else
4846                         tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4847         }
4848
4849         if (tmfc.w)
4850                 tmfc.dd = (tmfc.w - 1) * 7 + 1;
4851         if (tmfc.dd)
4852         {
4853                 tm->tm_mday = tmfc.dd;
4854                 fmask |= DTK_M(DAY);
4855         }
4856         if (tmfc.mm)
4857         {
4858                 tm->tm_mon = tmfc.mm;
4859                 fmask |= DTK_M(MONTH);
4860         }
4861
4862         if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4863         {
4864                 /*
4865                  * The month and day field have not been set, so we use the
4866                  * day-of-year field to populate them.  Depending on the date mode,
4867                  * this field may be interpreted as a Gregorian day-of-year, or an ISO
4868                  * week date day-of-year.
4869                  */
4870
4871                 if (!tm->tm_year && !tmfc.bc)
4872                 {
4873                         errsave(escontext,
4874                                         (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4875                                          errmsg("cannot calculate day of year without year information")));
4876                         goto fail;
4877                 }
4878
4879                 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4880                 {
4881                         int                     j0;             /* zeroth day of the ISO year, in Julian */
4882
4883                         j0 = isoweek2j(tm->tm_year, 1) - 1;
4884
4885                         j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4886                         fmask |= DTK_DATE_M;
4887                 }
4888                 else
4889                 {
4890                         const int  *y;
4891                         int                     i;
4892
4893                         static const int ysum[2][13] = {
4894                                 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4895                         {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4896
4897                         y = ysum[isleap(tm->tm_year)];
4898
4899                         for (i = 1; i <= MONTHS_PER_YEAR; i++)
4900                         {
4901                                 if (tmfc.ddd <= y[i])
4902                                         break;
4903                         }
4904                         if (tm->tm_mon <= 1)
4905                                 tm->tm_mon = i;
4906
4907                         if (tm->tm_mday <= 1)
4908                                 tm->tm_mday = tmfc.ddd - y[i - 1];
4909
4910                         fmask |= DTK_M(MONTH) | DTK_M(DAY);
4911                 }
4912         }
4913
4914         if (tmfc.ms)
4915                 *fsec += tmfc.ms * 1000;
4916         if (tmfc.us)
4917                 *fsec += tmfc.us;
4918         if (fprec)
4919                 *fprec = tmfc.ff;               /* fractional precision, if specified */
4920
4921         /* Range-check date fields according to bit mask computed above */
4922         if (fmask != 0)
4923         {
4924                 /* We already dealt with AD/BC, so pass isjulian = true */
4925                 int                     dterr = ValidateDate(fmask, true, false, false, tm);
4926
4927                 if (dterr != 0)
4928                 {
4929                         /*
4930                          * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4931                          * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4932                          * irrelevant hint about datestyle.
4933                          */
4934                         DateTimeParseError(DTERR_FIELD_OVERFLOW, NULL,
4935                                                            date_str, "timestamp", escontext);
4936                         goto fail;
4937                 }
4938         }
4939
4940         /* Range-check time fields too */
4941         if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4942                 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4943                 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4944                 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4945         {
4946                 DateTimeParseError(DTERR_FIELD_OVERFLOW, NULL,
4947                                                    date_str, "timestamp", escontext);
4948                 goto fail;
4949         }
4950
4951         /*
4952          * If timezone info was present, reduce it to a GMT offset.  (We cannot do
4953          * this until we've filled all of the tm struct, since the zone's offset
4954          * might be time-varying.)
4955          */
4956         if (tmfc.tzsign)
4957         {
4958                 /* TZH and/or TZM fields */
4959                 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4960                         tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4961                 {
4962                         DateTimeParseError(DTERR_TZDISP_OVERFLOW, NULL,
4963                                                            date_str, "timestamp", escontext);
4964                         goto fail;
4965                 }
4966
4967                 tz->has_tz = true;
4968                 tz->gmtoffset = (tmfc.tzh * MINS_PER_HOUR + tmfc.tzm) * SECS_PER_MINUTE;
4969                 /* note we are flipping the sign convention here */
4970                 if (tmfc.tzsign > 0)
4971                         tz->gmtoffset = -tz->gmtoffset;
4972         }
4973         else if (tmfc.has_tz)
4974         {
4975                 /* TZ field */
4976                 tz->has_tz = true;
4977                 if (tmfc.tzp == NULL)
4978                 {
4979                         /* fixed-offset abbreviation; flip the sign convention */
4980                         tz->gmtoffset = -tmfc.gmtoffset;
4981                 }
4982                 else
4983                 {
4984                         /* dynamic-offset abbreviation, resolve using specified time */
4985                         tz->gmtoffset = DetermineTimeZoneAbbrevOffset(tm, tmfc.abbrev,
4986                                                                                                                   tmfc.tzp);
4987                 }
4988         }
4989
4990         DEBUG_TM(tm);
4991
4992         if (format && !incache)
4993                 pfree(format);
4994         pfree(date_str);
4995
4996         return true;
4997
4998 fail:
4999         if (format && !incache)
5000                 pfree(format);
5001         pfree(date_str);
5002
5003         return false;
5004 }
5005
5006
5007 /**********************************************************************
5008  *      the NUMBER version part
5009  *********************************************************************/
5010
5011
5012 static char *
5013 fill_str(char *str, int c, int max)
5014 {
5015         memset(str, c, max);
5016         *(str + max) = '\0';
5017         return str;
5018 }
5019
5020 #define zeroize_NUM(_n) \
5021 do { \
5022         (_n)->flag              = 0;    \
5023         (_n)->lsign             = 0;    \
5024         (_n)->pre               = 0;    \
5025         (_n)->post              = 0;    \
5026         (_n)->pre_lsign_num = 0;        \
5027         (_n)->need_locale       = 0;    \
5028         (_n)->multi             = 0;    \
5029         (_n)->zero_start        = 0;    \
5030         (_n)->zero_end          = 0;    \
5031 } while(0)
5032
5033 /* This works the same as DCH_prevent_counter_overflow */
5034 static inline void
5035 NUM_prevent_counter_overflow(void)
5036 {
5037         if (NUMCounter >= (INT_MAX - 1))
5038         {
5039                 for (int i = 0; i < n_NUMCache; i++)
5040                         NUMCache[i]->age >>= 1;
5041                 NUMCounter >>= 1;
5042         }
5043 }
5044
5045 /* select a NUMCacheEntry to hold the given format picture */
5046 static NUMCacheEntry *
5047 NUM_cache_getnew(const char *str)
5048 {
5049         NUMCacheEntry *ent;
5050
5051         /* Ensure we can advance NUMCounter below */
5052         NUM_prevent_counter_overflow();
5053
5054         /*
5055          * If cache is full, remove oldest entry (or recycle first not-valid one)
5056          */
5057         if (n_NUMCache >= NUM_CACHE_ENTRIES)
5058         {
5059                 NUMCacheEntry *old = NUMCache[0];
5060
5061 #ifdef DEBUG_TO_FROM_CHAR
5062                 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
5063 #endif
5064                 if (old->valid)
5065                 {
5066                         for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
5067                         {
5068                                 ent = NUMCache[i];
5069                                 if (!ent->valid)
5070                                 {
5071                                         old = ent;
5072                                         break;
5073                                 }
5074                                 if (ent->age < old->age)
5075                                         old = ent;
5076                         }
5077                 }
5078 #ifdef DEBUG_TO_FROM_CHAR
5079                 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
5080 #endif
5081                 old->valid = false;
5082                 strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
5083                 old->age = (++NUMCounter);
5084                 /* caller is expected to fill format and Num, then set valid */
5085                 return old;
5086         }
5087         else
5088         {
5089 #ifdef DEBUG_TO_FROM_CHAR
5090                 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
5091 #endif
5092                 Assert(NUMCache[n_NUMCache] == NULL);
5093                 NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
5094                         MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
5095                 ent->valid = false;
5096                 strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
5097                 ent->age = (++NUMCounter);
5098                 /* caller is expected to fill format and Num, then set valid */
5099                 ++n_NUMCache;
5100                 return ent;
5101         }
5102 }
5103
5104 /* look for an existing NUMCacheEntry matching the given format picture */
5105 static NUMCacheEntry *
5106 NUM_cache_search(const char *str)
5107 {
5108         /* Ensure we can advance NUMCounter below */
5109         NUM_prevent_counter_overflow();
5110
5111         for (int i = 0; i < n_NUMCache; i++)
5112         {
5113                 NUMCacheEntry *ent = NUMCache[i];
5114
5115                 if (ent->valid && strcmp(ent->str, str) == 0)
5116                 {
5117                         ent->age = (++NUMCounter);
5118                         return ent;
5119                 }
5120         }
5121
5122         return NULL;
5123 }
5124
5125 /* Find or create a NUMCacheEntry for the given format picture */
5126 static NUMCacheEntry *
5127 NUM_cache_fetch(const char *str)
5128 {
5129         NUMCacheEntry *ent;
5130
5131         if ((ent = NUM_cache_search(str)) == NULL)
5132         {
5133                 /*
5134                  * Not in the cache, must run parser and save a new format-picture to
5135                  * the cache.  Do not mark the cache entry valid until parsing
5136                  * succeeds.
5137                  */
5138                 ent = NUM_cache_getnew(str);
5139
5140                 zeroize_NUM(&ent->Num);
5141
5142                 parse_format(ent->format, str, NUM_keywords,
5143                                          NULL, NUM_index, NUM_FLAG, &ent->Num);
5144
5145                 ent->valid = true;
5146         }
5147         return ent;
5148 }
5149
5150 /* ----------
5151  * Cache routine for NUM to_char version
5152  * ----------
5153  */
5154 static FormatNode *
5155 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
5156 {
5157         FormatNode *format = NULL;
5158         char       *str;
5159
5160         str = text_to_cstring(pars_str);
5161
5162         if (len > NUM_CACHE_SIZE)
5163         {
5164                 /*
5165                  * Allocate new memory if format picture is bigger than static cache
5166                  * and do not use cache (call parser always)
5167                  */
5168                 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
5169
5170                 *shouldFree = true;
5171
5172                 zeroize_NUM(Num);
5173
5174                 parse_format(format, str, NUM_keywords,
5175                                          NULL, NUM_index, NUM_FLAG, Num);
5176         }
5177         else
5178         {
5179                 /*
5180                  * Use cache buffers
5181                  */
5182                 NUMCacheEntry *ent = NUM_cache_fetch(str);
5183
5184                 *shouldFree = false;
5185
5186                 format = ent->format;
5187
5188                 /*
5189                  * Copy cache to used struct
5190                  */
5191                 Num->flag = ent->Num.flag;
5192                 Num->lsign = ent->Num.lsign;
5193                 Num->pre = ent->Num.pre;
5194                 Num->post = ent->Num.post;
5195                 Num->pre_lsign_num = ent->Num.pre_lsign_num;
5196                 Num->need_locale = ent->Num.need_locale;
5197                 Num->multi = ent->Num.multi;
5198                 Num->zero_start = ent->Num.zero_start;
5199                 Num->zero_end = ent->Num.zero_end;
5200         }
5201
5202 #ifdef DEBUG_TO_FROM_CHAR
5203         /* dump_node(format, len); */
5204         dump_index(NUM_keywords, NUM_index);
5205 #endif
5206
5207         pfree(str);
5208         return format;
5209 }
5210
5211
5212 /*
5213  * Convert integer to Roman numerals
5214  * Result is upper-case and not blank-padded (NUM_processor converts as needed)
5215  * If input is out-of-range, produce '###############'
5216  */
5217 static char *
5218 int_to_roman(int number)
5219 {
5220         int                     len,
5221                                 num;
5222         char       *p,
5223                            *result,
5224                                 numstr[12];
5225
5226         result = (char *) palloc(16);
5227         *result = '\0';
5228
5229         /*
5230          * This range limit is the same as in Oracle(TM).  The difficulty with
5231          * handling 4000 or more is that we'd need to use more than 3 "M"'s, and
5232          * more than 3 of the same digit isn't considered a valid Roman string.
5233          */
5234         if (number > 3999 || number < 1)
5235         {
5236                 fill_str(result, '#', 15);
5237                 return result;
5238         }
5239
5240         /* Convert to decimal, then examine each digit */
5241         len = snprintf(numstr, sizeof(numstr), "%d", number);
5242         Assert(len > 0 && len <= 4);
5243
5244         for (p = numstr; *p != '\0'; p++, --len)
5245         {
5246                 num = *p - ('0' + 1);
5247                 if (num < 0)
5248                         continue;                       /* ignore zeroes */
5249                 /* switch on current column position */
5250                 switch (len)
5251                 {
5252                         case 4:
5253                                 while (num-- >= 0)
5254                                         strcat(result, "M");
5255                                 break;
5256                         case 3:
5257                                 strcat(result, rm100[num]);
5258                                 break;
5259                         case 2:
5260                                 strcat(result, rm10[num]);
5261                                 break;
5262                         case 1:
5263                                 strcat(result, rm1[num]);
5264                                 break;
5265                 }
5266         }
5267         return result;
5268 }
5269
5270
5271
5272 /* ----------
5273  * Locale
5274  * ----------
5275  */
5276 static void
5277 NUM_prepare_locale(NUMProc *Np)
5278 {
5279         if (Np->Num->need_locale)
5280         {
5281                 struct lconv *lconv;
5282
5283                 /*
5284                  * Get locales
5285                  */
5286                 lconv = PGLC_localeconv();
5287
5288                 /*
5289                  * Positive / Negative number sign
5290                  */
5291                 if (lconv->negative_sign && *lconv->negative_sign)
5292                         Np->L_negative_sign = lconv->negative_sign;
5293                 else
5294                         Np->L_negative_sign = "-";
5295
5296                 if (lconv->positive_sign && *lconv->positive_sign)
5297                         Np->L_positive_sign = lconv->positive_sign;
5298                 else
5299                         Np->L_positive_sign = "+";
5300
5301                 /*
5302                  * Number decimal point
5303                  */
5304                 if (lconv->decimal_point && *lconv->decimal_point)
5305                         Np->decimal = lconv->decimal_point;
5306
5307                 else
5308                         Np->decimal = ".";
5309
5310                 if (!IS_LDECIMAL(Np->Num))
5311                         Np->decimal = ".";
5312
5313                 /*
5314                  * Number thousands separator
5315                  *
5316                  * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5317                  * but "" for thousands_sep, so we set the thousands_sep too.
5318                  * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5319                  */
5320                 if (lconv->thousands_sep && *lconv->thousands_sep)
5321                         Np->L_thousands_sep = lconv->thousands_sep;
5322                 /* Make sure thousands separator doesn't match decimal point symbol. */
5323                 else if (strcmp(Np->decimal, ",") != 0)
5324                         Np->L_thousands_sep = ",";
5325                 else
5326                         Np->L_thousands_sep = ".";
5327
5328                 /*
5329                  * Currency symbol
5330                  */
5331                 if (lconv->currency_symbol && *lconv->currency_symbol)
5332                         Np->L_currency_symbol = lconv->currency_symbol;
5333                 else
5334                         Np->L_currency_symbol = " ";
5335         }
5336         else
5337         {
5338                 /*
5339                  * Default values
5340                  */
5341                 Np->L_negative_sign = "-";
5342                 Np->L_positive_sign = "+";
5343                 Np->decimal = ".";
5344
5345                 Np->L_thousands_sep = ",";
5346                 Np->L_currency_symbol = " ";
5347         }
5348 }
5349
5350 /* ----------
5351  * Return pointer of last relevant number after decimal point
5352  *      12.0500 --> last relevant is '5'
5353  *      12.0000 --> last relevant is '.'
5354  * If there is no decimal point, return NULL (which will result in same
5355  * behavior as if FM hadn't been specified).
5356  * ----------
5357  */
5358 static char *
5359 get_last_relevant_decnum(char *num)
5360 {
5361         char       *result,
5362                            *p = strchr(num, '.');
5363
5364 #ifdef DEBUG_TO_FROM_CHAR
5365         elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5366 #endif
5367
5368         if (!p)
5369                 return NULL;
5370
5371         result = p;
5372
5373         while (*(++p))
5374         {
5375                 if (*p != '0')
5376                         result = p;
5377         }
5378
5379         return result;
5380 }
5381
5382 /*
5383  * These macros are used in NUM_processor() and its subsidiary routines.
5384  * OVERLOAD_TEST: true if we've reached end of input string
5385  * AMOUNT_TEST(s): true if at least s bytes remain in string
5386  */
5387 #define OVERLOAD_TEST   (Np->inout_p >= Np->inout + input_len)
5388 #define AMOUNT_TEST(s)  (Np->inout_p <= Np->inout + (input_len - (s)))
5389
5390 /* ----------
5391  * Number extraction for TO_NUMBER()
5392  * ----------
5393  */
5394 static void
5395 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5396 {
5397         bool            isread = false;
5398
5399 #ifdef DEBUG_TO_FROM_CHAR
5400         elog(DEBUG_elog_output, " --- scan start --- id=%s",
5401                  (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5402 #endif
5403
5404         if (OVERLOAD_TEST)
5405                 return;
5406
5407         if (*Np->inout_p == ' ')
5408                 Np->inout_p++;
5409
5410         if (OVERLOAD_TEST)
5411                 return;
5412
5413         /*
5414          * read sign before number
5415          */
5416         if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5417                 (Np->read_pre + Np->read_post) == 0)
5418         {
5419 #ifdef DEBUG_TO_FROM_CHAR
5420                 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5421                          *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5422 #endif
5423
5424                 /*
5425                  * locale sign
5426                  */
5427                 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5428                 {
5429                         int                     x = 0;
5430
5431 #ifdef DEBUG_TO_FROM_CHAR
5432                         elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5433 #endif
5434                         if ((x = strlen(Np->L_negative_sign)) &&
5435                                 AMOUNT_TEST(x) &&
5436                                 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5437                         {
5438                                 Np->inout_p += x;
5439                                 *Np->number = '-';
5440                         }
5441                         else if ((x = strlen(Np->L_positive_sign)) &&
5442                                          AMOUNT_TEST(x) &&
5443                                          strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5444                         {
5445                                 Np->inout_p += x;
5446                                 *Np->number = '+';
5447                         }
5448                 }
5449                 else
5450                 {
5451 #ifdef DEBUG_TO_FROM_CHAR
5452                         elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5453 #endif
5454
5455                         /*
5456                          * simple + - < >
5457                          */
5458                         if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5459                                                                                 *Np->inout_p == '<'))
5460                         {
5461                                 *Np->number = '-';      /* set - */
5462                                 Np->inout_p++;
5463                         }
5464                         else if (*Np->inout_p == '+')
5465                         {
5466                                 *Np->number = '+';      /* set + */
5467                                 Np->inout_p++;
5468                         }
5469                 }
5470         }
5471
5472         if (OVERLOAD_TEST)
5473                 return;
5474
5475 #ifdef DEBUG_TO_FROM_CHAR
5476         elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5477 #endif
5478
5479         /*
5480          * read digit or decimal point
5481          */
5482         if (isdigit((unsigned char) *Np->inout_p))
5483         {
5484                 if (Np->read_dec && Np->read_post == Np->Num->post)
5485                         return;
5486
5487                 *Np->number_p = *Np->inout_p;
5488                 Np->number_p++;
5489
5490                 if (Np->read_dec)
5491                         Np->read_post++;
5492                 else
5493                         Np->read_pre++;
5494
5495                 isread = true;
5496
5497 #ifdef DEBUG_TO_FROM_CHAR
5498                 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5499 #endif
5500         }
5501         else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5502         {
5503                 /*
5504                  * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5505                  * Np->decimal is always just "." if we don't have a D format token.
5506                  * So we just unconditionally match to Np->decimal.
5507                  */
5508                 int                     x = strlen(Np->decimal);
5509
5510 #ifdef DEBUG_TO_FROM_CHAR
5511                 elog(DEBUG_elog_output, "Try read decimal point (%c)",
5512                          *Np->inout_p);
5513 #endif
5514                 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5515                 {
5516                         Np->inout_p += x - 1;
5517                         *Np->number_p = '.';
5518                         Np->number_p++;
5519                         Np->read_dec = true;
5520                         isread = true;
5521                 }
5522         }
5523
5524         if (OVERLOAD_TEST)
5525                 return;
5526
5527         /*
5528          * Read sign behind "last" number
5529          *
5530          * We need sign detection because determine exact position of post-sign is
5531          * difficult:
5532          *
5533          * FM9999.9999999S         -> 123.001- 9.9S                        -> .5- FM9.999999MI ->
5534          * 5.01-
5535          */
5536         if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5537         {
5538                 /*
5539                  * locale sign (NUM_S) is always anchored behind a last number, if: -
5540                  * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5541                  * next char is not digit
5542                  */
5543                 if (IS_LSIGN(Np->Num) && isread &&
5544                         (Np->inout_p + 1) < Np->inout + input_len &&
5545                         !isdigit((unsigned char) *(Np->inout_p + 1)))
5546                 {
5547                         int                     x;
5548                         char       *tmp = Np->inout_p++;
5549
5550 #ifdef DEBUG_TO_FROM_CHAR
5551                         elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5552 #endif
5553                         if ((x = strlen(Np->L_negative_sign)) &&
5554                                 AMOUNT_TEST(x) &&
5555                                 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5556                         {
5557                                 Np->inout_p += x - 1;   /* -1 .. NUM_processor() do inout_p++ */
5558                                 *Np->number = '-';
5559                         }
5560                         else if ((x = strlen(Np->L_positive_sign)) &&
5561                                          AMOUNT_TEST(x) &&
5562                                          strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5563                         {
5564                                 Np->inout_p += x - 1;   /* -1 .. NUM_processor() do inout_p++ */
5565                                 *Np->number = '+';
5566                         }
5567                         if (*Np->number == ' ')
5568                                 /* no sign read */
5569                                 Np->inout_p = tmp;
5570                 }
5571
5572                 /*
5573                  * try read non-locale sign, it's happen only if format is not exact
5574                  * and we cannot determine sign position of MI/PL/SG, an example:
5575                  *
5576                  * FM9.999999MI                    -> 5.01-
5577                  *
5578                  * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
5579                  * like to_number('1 -', '9S') where sign is not anchored to last
5580                  * number.
5581                  */
5582                 else if (isread == false && IS_LSIGN(Np->Num) == false &&
5583                                  (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
5584                 {
5585 #ifdef DEBUG_TO_FROM_CHAR
5586                         elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
5587 #endif
5588
5589                         /*
5590                          * simple + -
5591                          */
5592                         if (*Np->inout_p == '-' || *Np->inout_p == '+')
5593                                 /* NUM_processor() do inout_p++ */
5594                                 *Np->number = *Np->inout_p;
5595                 }
5596         }
5597 }
5598
5599 #define IS_PREDEC_SPACE(_n) \
5600                 (IS_ZERO((_n)->Num)==false && \
5601                  (_n)->number == (_n)->number_p && \
5602                  *(_n)->number == '0' && \
5603                                  (_n)->Num->post != 0)
5604
5605 /* ----------
5606  * Add digit or sign to number-string
5607  * ----------
5608  */
5609 static void
5610 NUM_numpart_to_char(NUMProc *Np, int id)
5611 {
5612         int                     end;
5613
5614         if (IS_ROMAN(Np->Num))
5615                 return;
5616
5617         /* Note: in this elog() output not set '\0' in 'inout' */
5618
5619 #ifdef DEBUG_TO_FROM_CHAR
5620
5621         /*
5622          * Np->num_curr is number of current item in format-picture, it is not
5623          * current position in inout!
5624          */
5625         elog(DEBUG_elog_output,
5626                  "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
5627                  Np->sign_wrote,
5628                  Np->num_curr,
5629                  Np->number_p,
5630                  Np->inout);
5631 #endif
5632         Np->num_in = false;
5633
5634         /*
5635          * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
5636          * handle "9.9" --> " .1"
5637          */
5638         if (Np->sign_wrote == false &&
5639                 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
5640                 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
5641         {
5642                 if (IS_LSIGN(Np->Num))
5643                 {
5644                         if (Np->Num->lsign == NUM_LSIGN_PRE)
5645                         {
5646                                 if (Np->sign == '-')
5647                                         strcpy(Np->inout_p, Np->L_negative_sign);
5648                                 else
5649                                         strcpy(Np->inout_p, Np->L_positive_sign);
5650                                 Np->inout_p += strlen(Np->inout_p);
5651                                 Np->sign_wrote = true;
5652                         }
5653                 }
5654                 else if (IS_BRACKET(Np->Num))
5655                 {
5656                         *Np->inout_p = Np->sign == '+' ? ' ' : '<';
5657                         ++Np->inout_p;
5658                         Np->sign_wrote = true;
5659                 }
5660                 else if (Np->sign == '+')
5661                 {
5662                         if (!IS_FILLMODE(Np->Num))
5663                         {
5664                                 *Np->inout_p = ' '; /* Write + */
5665                                 ++Np->inout_p;
5666                         }
5667                         Np->sign_wrote = true;
5668                 }
5669                 else if (Np->sign == '-')
5670                 {                                               /* Write - */
5671                         *Np->inout_p = '-';
5672                         ++Np->inout_p;
5673                         Np->sign_wrote = true;
5674                 }
5675         }
5676
5677
5678         /*
5679          * digits / FM / Zero / Dec. point
5680          */
5681         if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
5682         {
5683                 if (Np->num_curr < Np->out_pre_spaces &&
5684                         (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
5685                 {
5686                         /*
5687                          * Write blank space
5688                          */
5689                         if (!IS_FILLMODE(Np->Num))
5690                         {
5691                                 *Np->inout_p = ' '; /* Write ' ' */
5692                                 ++Np->inout_p;
5693                         }
5694                 }
5695                 else if (IS_ZERO(Np->Num) &&
5696                                  Np->num_curr < Np->out_pre_spaces &&
5697                                  Np->Num->zero_start <= Np->num_curr)
5698                 {
5699                         /*
5700                          * Write ZERO
5701                          */
5702                         *Np->inout_p = '0'; /* Write '0' */
5703                         ++Np->inout_p;
5704                         Np->num_in = true;
5705                 }
5706                 else
5707                 {
5708                         /*
5709                          * Write Decimal point
5710                          */
5711                         if (*Np->number_p == '.')
5712                         {
5713                                 if (!Np->last_relevant || *Np->last_relevant != '.')
5714                                 {
5715                                         strcpy(Np->inout_p, Np->decimal);       /* Write DEC/D */
5716                                         Np->inout_p += strlen(Np->inout_p);
5717                                 }
5718
5719                                 /*
5720                                  * Ora 'n' -- FM9.9 --> 'n.'
5721                                  */
5722                                 else if (IS_FILLMODE(Np->Num) &&
5723                                                  Np->last_relevant && *Np->last_relevant == '.')
5724                                 {
5725                                         strcpy(Np->inout_p, Np->decimal);       /* Write DEC/D */
5726                                         Np->inout_p += strlen(Np->inout_p);
5727                                 }
5728                         }
5729                         else
5730                         {
5731                                 /*
5732                                  * Write Digits
5733                                  */
5734                                 if (Np->last_relevant && Np->number_p > Np->last_relevant &&
5735                                         id != NUM_0)
5736                                         ;
5737
5738                                 /*
5739                                  * '0.1' -- 9.9 --> '  .1'
5740                                  */
5741                                 else if (IS_PREDEC_SPACE(Np))
5742                                 {
5743                                         if (!IS_FILLMODE(Np->Num))
5744                                         {
5745                                                 *Np->inout_p = ' ';
5746                                                 ++Np->inout_p;
5747                                         }
5748
5749                                         /*
5750                                          * '0' -- FM9.9 --> '0.'
5751                                          */
5752                                         else if (Np->last_relevant && *Np->last_relevant == '.')
5753                                         {
5754                                                 *Np->inout_p = '0';
5755                                                 ++Np->inout_p;
5756                                         }
5757                                 }
5758                                 else
5759                                 {
5760                                         *Np->inout_p = *Np->number_p;   /* Write DIGIT */
5761                                         ++Np->inout_p;
5762                                         Np->num_in = true;
5763                                 }
5764                         }
5765                         /* do no exceed string length */
5766                         if (*Np->number_p)
5767                                 ++Np->number_p;
5768                 }
5769
5770                 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
5771
5772                 if (Np->last_relevant && Np->last_relevant == Np->number_p)
5773                         end = Np->num_curr;
5774
5775                 if (Np->num_curr + 1 == end)
5776                 {
5777                         if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
5778                         {
5779                                 *Np->inout_p = Np->sign == '+' ? ' ' : '>';
5780                                 ++Np->inout_p;
5781                         }
5782                         else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
5783                         {
5784                                 if (Np->sign == '-')
5785                                         strcpy(Np->inout_p, Np->L_negative_sign);
5786                                 else
5787                                         strcpy(Np->inout_p, Np->L_positive_sign);
5788                                 Np->inout_p += strlen(Np->inout_p);
5789                         }
5790                 }
5791         }
5792
5793         ++Np->num_curr;
5794 }
5795
5796 /*
5797  * Skip over "n" input characters, but only if they aren't numeric data
5798  */
5799 static void
5800 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
5801 {
5802         while (n-- > 0)
5803         {
5804                 if (OVERLOAD_TEST)
5805                         break;                          /* end of input */
5806                 if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
5807                         break;                          /* it's a data character */
5808                 Np->inout_p += pg_mblen(Np->inout_p);
5809         }
5810 }
5811
5812 static char *
5813 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
5814                           char *number, int input_len, int to_char_out_pre_spaces,
5815                           int sign, bool is_to_char, Oid collid)
5816 {
5817         FormatNode *n;
5818         NUMProc         _Np,
5819                            *Np = &_Np;
5820         const char *pattern;
5821         int                     pattern_len;
5822
5823         MemSet(Np, 0, sizeof(NUMProc));
5824
5825         Np->Num = Num;
5826         Np->is_to_char = is_to_char;
5827         Np->number = number;
5828         Np->inout = inout;
5829         Np->last_relevant = NULL;
5830         Np->read_post = 0;
5831         Np->read_pre = 0;
5832         Np->read_dec = false;
5833
5834         if (Np->Num->zero_start)
5835                 --Np->Num->zero_start;
5836
5837         if (IS_EEEE(Np->Num))
5838         {
5839                 if (!Np->is_to_char)
5840                         ereport(ERROR,
5841                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5842                                          errmsg("\"EEEE\" not supported for input")));
5843                 return strcpy(inout, number);
5844         }
5845
5846         /*
5847          * Roman correction
5848          */
5849         if (IS_ROMAN(Np->Num))
5850         {
5851                 if (!Np->is_to_char)
5852                         ereport(ERROR,
5853                                         (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5854                                          errmsg("\"RN\" not supported for input")));
5855
5856                 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
5857                         Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
5858
5859                 if (IS_FILLMODE(Np->Num))
5860                 {
5861                         Np->Num->flag = 0;
5862                         Np->Num->flag |= NUM_F_FILLMODE;
5863                 }
5864                 else
5865                         Np->Num->flag = 0;
5866                 Np->Num->flag |= NUM_F_ROMAN;
5867         }
5868
5869         /*
5870          * Sign
5871          */
5872         if (is_to_char)
5873         {
5874                 Np->sign = sign;
5875
5876                 /* MI/PL/SG - write sign itself and not in number */
5877                 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
5878                 {
5879                         if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
5880                                 Np->sign_wrote = false; /* need sign */
5881                         else
5882                                 Np->sign_wrote = true;  /* needn't sign */
5883                 }
5884                 else
5885                 {
5886                         if (Np->sign != '-')
5887                         {
5888                                 if (IS_FILLMODE(Np->Num))
5889                                         Np->Num->flag &= ~NUM_F_BRACKET;
5890                         }
5891
5892                         if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
5893                                 Np->sign_wrote = true;  /* needn't sign */
5894                         else
5895                                 Np->sign_wrote = false; /* need sign */
5896
5897                         if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
5898                                 Np->Num->lsign = NUM_LSIGN_POST;
5899                 }
5900         }
5901         else
5902                 Np->sign = false;
5903
5904         /*
5905          * Count
5906          */
5907         Np->num_count = Np->Num->post + Np->Num->pre - 1;
5908
5909         if (is_to_char)
5910         {
5911                 Np->out_pre_spaces = to_char_out_pre_spaces;
5912
5913                 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
5914                 {
5915                         Np->last_relevant = get_last_relevant_decnum(Np->number);
5916
5917                         /*
5918                          * If any '0' specifiers are present, make sure we don't strip
5919                          * those digits.  But don't advance last_relevant beyond the last
5920                          * character of the Np->number string, which is a hazard if the
5921                          * number got shortened due to precision limitations.
5922                          */
5923                         if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
5924                         {
5925                                 int                     last_zero_pos;
5926                                 char       *last_zero;
5927
5928                                 /* note that Np->number cannot be zero-length here */
5929                                 last_zero_pos = strlen(Np->number) - 1;
5930                                 last_zero_pos = Min(last_zero_pos,
5931                                                                         Np->Num->zero_end - Np->out_pre_spaces);
5932                                 last_zero = Np->number + last_zero_pos;
5933                                 if (Np->last_relevant < last_zero)
5934                                         Np->last_relevant = last_zero;
5935                         }
5936                 }
5937
5938                 if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
5939                         ++Np->num_count;
5940         }
5941         else
5942         {
5943                 Np->out_pre_spaces = 0;
5944                 *Np->number = ' ';              /* sign space */
5945                 *(Np->number + 1) = '\0';
5946         }
5947
5948         Np->num_in = 0;
5949         Np->num_curr = 0;
5950
5951 #ifdef DEBUG_TO_FROM_CHAR
5952         elog(DEBUG_elog_output,
5953                  "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
5954                  Np->sign,
5955                  Np->number,
5956                  Np->Num->pre,
5957                  Np->Num->post,
5958                  Np->num_count,
5959                  Np->out_pre_spaces,
5960                  Np->sign_wrote ? "Yes" : "No",
5961                  IS_ZERO(Np->Num) ? "Yes" : "No",
5962                  Np->Num->zero_start,
5963                  Np->Num->zero_end,
5964                  Np->last_relevant ? Np->last_relevant : "<not set>",
5965                  IS_BRACKET(Np->Num) ? "Yes" : "No",
5966                  IS_PLUS(Np->Num) ? "Yes" : "No",
5967                  IS_MINUS(Np->Num) ? "Yes" : "No",
5968                  IS_FILLMODE(Np->Num) ? "Yes" : "No",
5969                  IS_ROMAN(Np->Num) ? "Yes" : "No",
5970                  IS_EEEE(Np->Num) ? "Yes" : "No"
5971                 );
5972 #endif
5973
5974         /*
5975          * Locale
5976          */
5977         NUM_prepare_locale(Np);
5978
5979         /*
5980          * Processor direct cycle
5981          */
5982         if (Np->is_to_char)
5983                 Np->number_p = Np->number;
5984         else
5985                 Np->number_p = Np->number + 1;  /* first char is space for sign */
5986
5987         for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5988         {
5989                 if (!Np->is_to_char)
5990                 {
5991                         /*
5992                          * Check at least one byte remains to be scanned.  (In actions
5993                          * below, must use AMOUNT_TEST if we want to read more bytes than
5994                          * that.)
5995                          */
5996                         if (OVERLOAD_TEST)
5997                                 break;
5998                 }
5999
6000                 /*
6001                  * Format pictures actions
6002                  */
6003                 if (n->type == NODE_TYPE_ACTION)
6004                 {
6005                         /*
6006                          * Create/read digit/zero/blank/sign/special-case
6007                          *
6008                          * 'NUM_S' note: The locale sign is anchored to number and we
6009                          * read/write it when we work with first or last number
6010                          * (NUM_0/NUM_9).  This is why NUM_S is missing in switch().
6011                          *
6012                          * Notice the "Np->inout_p++" at the bottom of the loop.  This is
6013                          * why most of the actions advance inout_p one less than you might
6014                          * expect.  In cases where we don't want that increment to happen,
6015                          * a switch case ends with "continue" not "break".
6016                          */
6017                         switch (n->key->id)
6018                         {
6019                                 case NUM_9:
6020                                 case NUM_0:
6021                                 case NUM_DEC:
6022                                 case NUM_D:
6023                                         if (Np->is_to_char)
6024                                         {
6025                                                 NUM_numpart_to_char(Np, n->key->id);
6026                                                 continue;       /* for() */
6027                                         }
6028                                         else
6029                                         {
6030                                                 NUM_numpart_from_char(Np, n->key->id, input_len);
6031                                                 break;  /* switch() case: */
6032                                         }
6033
6034                                 case NUM_COMMA:
6035                                         if (Np->is_to_char)
6036                                         {
6037                                                 if (!Np->num_in)
6038                                                 {
6039                                                         if (IS_FILLMODE(Np->Num))
6040                                                                 continue;
6041                                                         else
6042                                                                 *Np->inout_p = ' ';
6043                                                 }
6044                                                 else
6045                                                         *Np->inout_p = ',';
6046                                         }
6047                                         else
6048                                         {
6049                                                 if (!Np->num_in)
6050                                                 {
6051                                                         if (IS_FILLMODE(Np->Num))
6052                                                                 continue;
6053                                                 }
6054                                                 if (*Np->inout_p != ',')
6055                                                         continue;
6056                                         }
6057                                         break;
6058
6059                                 case NUM_G:
6060                                         pattern = Np->L_thousands_sep;
6061                                         pattern_len = strlen(pattern);
6062                                         if (Np->is_to_char)
6063                                         {
6064                                                 if (!Np->num_in)
6065                                                 {
6066                                                         if (IS_FILLMODE(Np->Num))
6067                                                                 continue;
6068                                                         else
6069                                                         {
6070                                                                 /* just in case there are MB chars */
6071                                                                 pattern_len = pg_mbstrlen(pattern);
6072                                                                 memset(Np->inout_p, ' ', pattern_len);
6073                                                                 Np->inout_p += pattern_len - 1;
6074                                                         }
6075                                                 }
6076                                                 else
6077                                                 {
6078                                                         strcpy(Np->inout_p, pattern);
6079                                                         Np->inout_p += pattern_len - 1;
6080                                                 }
6081                                         }
6082                                         else
6083                                         {
6084                                                 if (!Np->num_in)
6085                                                 {
6086                                                         if (IS_FILLMODE(Np->Num))
6087                                                                 continue;
6088                                                 }
6089
6090                                                 /*
6091                                                  * Because L_thousands_sep typically contains data
6092                                                  * characters (either '.' or ','), we can't use
6093                                                  * NUM_eat_non_data_chars here.  Instead skip only if
6094                                                  * the input matches L_thousands_sep.
6095                                                  */
6096                                                 if (AMOUNT_TEST(pattern_len) &&
6097                                                         strncmp(Np->inout_p, pattern, pattern_len) == 0)
6098                                                         Np->inout_p += pattern_len - 1;
6099                                                 else
6100                                                         continue;
6101                                         }
6102                                         break;
6103
6104                                 case NUM_L:
6105                                         pattern = Np->L_currency_symbol;
6106                                         if (Np->is_to_char)
6107                                         {
6108                                                 strcpy(Np->inout_p, pattern);
6109                                                 Np->inout_p += strlen(pattern) - 1;
6110                                         }
6111                                         else
6112                                         {
6113                                                 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
6114                                                 continue;
6115                                         }
6116                                         break;
6117
6118                                 case NUM_RN:
6119                                         if (IS_FILLMODE(Np->Num))
6120                                         {
6121                                                 strcpy(Np->inout_p, Np->number_p);
6122                                                 Np->inout_p += strlen(Np->inout_p) - 1;
6123                                         }
6124                                         else
6125                                         {
6126                                                 sprintf(Np->inout_p, "%15s", Np->number_p);
6127                                                 Np->inout_p += strlen(Np->inout_p) - 1;
6128                                         }
6129                                         break;
6130
6131                                 case NUM_rn:
6132                                         if (IS_FILLMODE(Np->Num))
6133                                         {
6134                                                 strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
6135                                                 Np->inout_p += strlen(Np->inout_p) - 1;
6136                                         }
6137                                         else
6138                                         {
6139                                                 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
6140                                                 Np->inout_p += strlen(Np->inout_p) - 1;
6141                                         }
6142                                         break;
6143
6144                                 case NUM_th:
6145                                         if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
6146                                                 Np->sign == '-' || IS_DECIMAL(Np->Num))
6147                                                 continue;
6148
6149                                         if (Np->is_to_char)
6150                                         {
6151                                                 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
6152                                                 Np->inout_p += 1;
6153                                         }
6154                                         else
6155                                         {
6156                                                 /* All variants of 'th' occupy 2 characters */
6157                                                 NUM_eat_non_data_chars(Np, 2, input_len);
6158                                                 continue;
6159                                         }
6160                                         break;
6161
6162                                 case NUM_TH:
6163                                         if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
6164                                                 Np->sign == '-' || IS_DECIMAL(Np->Num))
6165                                                 continue;
6166
6167                                         if (Np->is_to_char)
6168                                         {
6169                                                 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
6170                                                 Np->inout_p += 1;
6171                                         }
6172                                         else
6173                                         {
6174                                                 /* All variants of 'TH' occupy 2 characters */
6175                                                 NUM_eat_non_data_chars(Np, 2, input_len);
6176                                                 continue;
6177                                         }
6178                                         break;
6179
6180                                 case NUM_MI:
6181                                         if (Np->is_to_char)
6182                                         {
6183                                                 if (Np->sign == '-')
6184                                                         *Np->inout_p = '-';
6185                                                 else if (IS_FILLMODE(Np->Num))
6186                                                         continue;
6187                                                 else
6188                                                         *Np->inout_p = ' ';
6189                                         }
6190                                         else
6191                                         {
6192                                                 if (*Np->inout_p == '-')
6193                                                         *Np->number = '-';
6194                                                 else
6195                                                 {
6196                                                         NUM_eat_non_data_chars(Np, 1, input_len);
6197                                                         continue;
6198                                                 }
6199                                         }
6200                                         break;
6201
6202                                 case NUM_PL:
6203                                         if (Np->is_to_char)
6204                                         {
6205                                                 if (Np->sign == '+')
6206                                                         *Np->inout_p = '+';
6207                                                 else if (IS_FILLMODE(Np->Num))
6208                                                         continue;
6209                                                 else
6210                                                         *Np->inout_p = ' ';
6211                                         }
6212                                         else
6213                                         {
6214                                                 if (*Np->inout_p == '+')
6215                                                         *Np->number = '+';
6216                                                 else
6217                                                 {
6218                                                         NUM_eat_non_data_chars(Np, 1, input_len);
6219                                                         continue;
6220                                                 }
6221                                         }
6222                                         break;
6223
6224                                 case NUM_SG:
6225                                         if (Np->is_to_char)
6226                                                 *Np->inout_p = Np->sign;
6227                                         else
6228                                         {
6229                                                 if (*Np->inout_p == '-')
6230                                                         *Np->number = '-';
6231                                                 else if (*Np->inout_p == '+')
6232                                                         *Np->number = '+';
6233                                                 else
6234                                                 {
6235                                                         NUM_eat_non_data_chars(Np, 1, input_len);
6236                                                         continue;
6237                                                 }
6238                                         }
6239                                         break;
6240
6241                                 default:
6242                                         continue;
6243                                         break;
6244                         }
6245                 }
6246                 else
6247                 {
6248                         /*
6249                          * In TO_CHAR, non-pattern characters in the format are copied to
6250                          * the output.  In TO_NUMBER, we skip one input character for each
6251                          * non-pattern format character, whether or not it matches the
6252                          * format character.
6253                          */
6254                         if (Np->is_to_char)
6255                         {
6256                                 strcpy(Np->inout_p, n->character);
6257                                 Np->inout_p += strlen(Np->inout_p);
6258                         }
6259                         else
6260                         {
6261                                 Np->inout_p += pg_mblen(Np->inout_p);
6262                         }
6263                         continue;
6264                 }
6265                 Np->inout_p++;
6266         }
6267
6268         if (Np->is_to_char)
6269         {
6270                 *Np->inout_p = '\0';
6271                 return Np->inout;
6272         }
6273         else
6274         {
6275                 if (*(Np->number_p - 1) == '.')
6276                         *(Np->number_p - 1) = '\0';
6277                 else
6278                         *Np->number_p = '\0';
6279
6280                 /*
6281                  * Correction - precision of dec. number
6282                  */
6283                 Np->Num->post = Np->read_post;
6284
6285 #ifdef DEBUG_TO_FROM_CHAR
6286                 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
6287 #endif
6288                 return Np->number;
6289         }
6290 }
6291
6292 /* ----------
6293  * MACRO: Start part of NUM - for all NUM's to_char variants
6294  *      (sorry, but I hate copy same code - macro is better..)
6295  * ----------
6296  */
6297 #define NUM_TOCHAR_prepare \
6298 do { \
6299         int len = VARSIZE_ANY_EXHDR(fmt); \
6300         if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ)             \
6301                 PG_RETURN_TEXT_P(cstring_to_text("")); \
6302         result  = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ);    \
6303         format  = NUM_cache(len, &Num, fmt, &shouldFree);               \
6304 } while (0)
6305
6306 /* ----------
6307  * MACRO: Finish part of NUM
6308  * ----------
6309  */
6310 #define NUM_TOCHAR_finish \
6311 do { \
6312         int             len; \
6313                                                                         \
6314         NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
6315                                                                         \
6316         if (shouldFree)                                 \
6317                 pfree(format);                          \
6318                                                                         \
6319         /*                                                              \
6320          * Convert null-terminated representation of result to standard text. \
6321          * The result is usually much bigger than it needs to be, but there \
6322          * seems little point in realloc'ing it smaller. \
6323          */                                                             \
6324         len = strlen(VARDATA(result));  \
6325         SET_VARSIZE(result, len + VARHDRSZ); \
6326 } while (0)
6327
6328 /* -------------------
6329  * NUMERIC to_number() (convert string to numeric)
6330  * -------------------
6331  */
6332 Datum
6333 numeric_to_number(PG_FUNCTION_ARGS)
6334 {
6335         text       *value = PG_GETARG_TEXT_PP(0);
6336         text       *fmt = PG_GETARG_TEXT_PP(1);
6337         NUMDesc         Num;
6338         Datum           result;
6339         FormatNode *format;
6340         char       *numstr;
6341         bool            shouldFree;
6342         int                     len = 0;
6343         int                     scale,
6344                                 precision;
6345
6346         len = VARSIZE_ANY_EXHDR(fmt);
6347
6348         if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
6349                 PG_RETURN_NULL();
6350
6351         format = NUM_cache(len, &Num, fmt, &shouldFree);
6352
6353         numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
6354
6355         NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
6356                                   VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
6357
6358         scale = Num.post;
6359         precision = Num.pre + Num.multi + scale;
6360
6361         if (shouldFree)
6362                 pfree(format);
6363
6364         result = DirectFunctionCall3(numeric_in,
6365                                                                  CStringGetDatum(numstr),
6366                                                                  ObjectIdGetDatum(InvalidOid),
6367                                                                  Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
6368
6369         if (IS_MULTI(&Num))
6370         {
6371                 Numeric         x;
6372                 Numeric         a = int64_to_numeric(10);
6373                 Numeric         b = int64_to_numeric(-Num.multi);
6374
6375                 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6376                                                                                                 NumericGetDatum(a),
6377                                                                                                 NumericGetDatum(b)));
6378                 result = DirectFunctionCall2(numeric_mul,
6379                                                                          result,
6380                                                                          NumericGetDatum(x));
6381         }
6382
6383         pfree(numstr);
6384         return result;
6385 }
6386
6387 /* ------------------
6388  * NUMERIC to_char()
6389  * ------------------
6390  */
6391 Datum
6392 numeric_to_char(PG_FUNCTION_ARGS)
6393 {
6394         Numeric         value = PG_GETARG_NUMERIC(0);
6395         text       *fmt = PG_GETARG_TEXT_PP(1);
6396         NUMDesc         Num;
6397         FormatNode *format;
6398         text       *result;
6399         bool            shouldFree;
6400         int                     out_pre_spaces = 0,
6401                                 sign = 0;
6402         char       *numstr,
6403                            *orgnum,
6404                            *p;
6405
6406         NUM_TOCHAR_prepare;
6407
6408         /*
6409          * On DateType depend part (numeric)
6410          */
6411         if (IS_ROMAN(&Num))
6412         {
6413                 int32           intvalue;
6414                 bool            err;
6415
6416                 /* Round and convert to int */
6417                 intvalue = numeric_int4_opt_error(value, &err);
6418                 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6419                 if (err)
6420                         intvalue = PG_INT32_MAX;
6421                 numstr = int_to_roman(intvalue);
6422         }
6423         else if (IS_EEEE(&Num))
6424         {
6425                 orgnum = numeric_out_sci(value, Num.post);
6426
6427                 /*
6428                  * numeric_out_sci() does not emit a sign for positive numbers.  We
6429                  * need to add a space in this case so that positive and negative
6430                  * numbers are aligned.  Also must check for NaN/infinity cases, which
6431                  * we handle the same way as in float8_to_char.
6432                  */
6433                 if (strcmp(orgnum, "NaN") == 0 ||
6434                         strcmp(orgnum, "Infinity") == 0 ||
6435                         strcmp(orgnum, "-Infinity") == 0)
6436                 {
6437                         /*
6438                          * Allow 6 characters for the leading sign, the decimal point,
6439                          * "e", the exponent's sign and two exponent digits.
6440                          */
6441                         numstr = (char *) palloc(Num.pre + Num.post + 7);
6442                         fill_str(numstr, '#', Num.pre + Num.post + 6);
6443                         *numstr = ' ';
6444                         *(numstr + Num.pre + 1) = '.';
6445                 }
6446                 else if (*orgnum != '-')
6447                 {
6448                         numstr = (char *) palloc(strlen(orgnum) + 2);
6449                         *numstr = ' ';
6450                         strcpy(numstr + 1, orgnum);
6451                 }
6452                 else
6453                 {
6454                         numstr = orgnum;
6455                 }
6456         }
6457         else
6458         {
6459                 int                     numstr_pre_len;
6460                 Numeric         val = value;
6461                 Numeric         x;
6462
6463                 if (IS_MULTI(&Num))
6464                 {
6465                         Numeric         a = int64_to_numeric(10);
6466                         Numeric         b = int64_to_numeric(Num.multi);
6467
6468                         x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6469                                                                                                         NumericGetDatum(a),
6470                                                                                                         NumericGetDatum(b)));
6471                         val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
6472                                                                                                           NumericGetDatum(value),
6473                                                                                                           NumericGetDatum(x)));
6474                         Num.pre += Num.multi;
6475                 }
6476
6477                 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6478                                                                                                 NumericGetDatum(val),
6479                                                                                                 Int32GetDatum(Num.post)));
6480                 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
6481                                                                                                          NumericGetDatum(x)));
6482
6483                 if (*orgnum == '-')
6484                 {
6485                         sign = '-';
6486                         numstr = orgnum + 1;
6487                 }
6488                 else
6489                 {
6490                         sign = '+';
6491                         numstr = orgnum;
6492                 }
6493
6494                 if ((p = strchr(numstr, '.')))
6495                         numstr_pre_len = p - numstr;
6496                 else
6497                         numstr_pre_len = strlen(numstr);
6498
6499                 /* needs padding? */
6500                 if (numstr_pre_len < Num.pre)
6501                         out_pre_spaces = Num.pre - numstr_pre_len;
6502                 /* overflowed prefix digit format? */
6503                 else if (numstr_pre_len > Num.pre)
6504                 {
6505                         numstr = (char *) palloc(Num.pre + Num.post + 2);
6506                         fill_str(numstr, '#', Num.pre + Num.post + 1);
6507                         *(numstr + Num.pre) = '.';
6508                 }
6509         }
6510
6511         NUM_TOCHAR_finish;
6512         PG_RETURN_TEXT_P(result);
6513 }
6514
6515 /* ---------------
6516  * INT4 to_char()
6517  * ---------------
6518  */
6519 Datum
6520 int4_to_char(PG_FUNCTION_ARGS)
6521 {
6522         int32           value = PG_GETARG_INT32(0);
6523         text       *fmt = PG_GETARG_TEXT_PP(1);
6524         NUMDesc         Num;
6525         FormatNode *format;
6526         text       *result;
6527         bool            shouldFree;
6528         int                     out_pre_spaces = 0,
6529                                 sign = 0;
6530         char       *numstr,
6531                            *orgnum;
6532
6533         NUM_TOCHAR_prepare;
6534
6535         /*
6536          * On DateType depend part (int32)
6537          */
6538         if (IS_ROMAN(&Num))
6539                 numstr = int_to_roman(value);
6540         else if (IS_EEEE(&Num))
6541         {
6542                 /* we can do it easily because float8 won't lose any precision */
6543                 float8          val = (float8) value;
6544
6545                 orgnum = (char *) psprintf("%+.*e", Num.post, val);
6546
6547                 /*
6548                  * Swap a leading positive sign for a space.
6549                  */
6550                 if (*orgnum == '+')
6551                         *orgnum = ' ';
6552
6553                 numstr = orgnum;
6554         }
6555         else
6556         {
6557                 int                     numstr_pre_len;
6558
6559                 if (IS_MULTI(&Num))
6560                 {
6561                         orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6562                                                                                                                  Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
6563                         Num.pre += Num.multi;
6564                 }
6565                 else
6566                 {
6567                         orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6568                                                                                                                  Int32GetDatum(value)));
6569                 }
6570
6571                 if (*orgnum == '-')
6572                 {
6573                         sign = '-';
6574                         orgnum++;
6575                 }
6576                 else
6577                         sign = '+';
6578
6579                 numstr_pre_len = strlen(orgnum);
6580
6581                 /* post-decimal digits?  Pad out with zeros. */
6582                 if (Num.post)
6583                 {
6584                         numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6585                         strcpy(numstr, orgnum);
6586                         *(numstr + numstr_pre_len) = '.';
6587                         memset(numstr + numstr_pre_len + 1, '0', Num.post);
6588                         *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6589                 }
6590                 else
6591                         numstr = orgnum;
6592
6593                 /* needs padding? */
6594                 if (numstr_pre_len < Num.pre)
6595                         out_pre_spaces = Num.pre - numstr_pre_len;
6596                 /* overflowed prefix digit format? */
6597                 else if (numstr_pre_len > Num.pre)
6598                 {
6599                         numstr = (char *) palloc(Num.pre + Num.post + 2);
6600                         fill_str(numstr, '#', Num.pre + Num.post + 1);
6601                         *(numstr + Num.pre) = '.';
6602                 }
6603         }
6604
6605         NUM_TOCHAR_finish;
6606         PG_RETURN_TEXT_P(result);
6607 }
6608
6609 /* ---------------
6610  * INT8 to_char()
6611  * ---------------
6612  */
6613 Datum
6614 int8_to_char(PG_FUNCTION_ARGS)
6615 {
6616         int64           value = PG_GETARG_INT64(0);
6617         text       *fmt = PG_GETARG_TEXT_PP(1);
6618         NUMDesc         Num;
6619         FormatNode *format;
6620         text       *result;
6621         bool            shouldFree;
6622         int                     out_pre_spaces = 0,
6623                                 sign = 0;
6624         char       *numstr,
6625                            *orgnum;
6626
6627         NUM_TOCHAR_prepare;
6628
6629         /*
6630          * On DateType depend part (int64)
6631          */
6632         if (IS_ROMAN(&Num))
6633         {
6634                 int32           intvalue;
6635
6636                 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6637                 if (value <= PG_INT32_MAX && value >= PG_INT32_MIN)
6638                         intvalue = (int32) value;
6639                 else
6640                         intvalue = PG_INT32_MAX;
6641                 numstr = int_to_roman(intvalue);
6642         }
6643         else if (IS_EEEE(&Num))
6644         {
6645                 /* to avoid loss of precision, must go via numeric not float8 */
6646                 orgnum = numeric_out_sci(int64_to_numeric(value),
6647                                                                  Num.post);
6648
6649                 /*
6650                  * numeric_out_sci() does not emit a sign for positive numbers.  We
6651                  * need to add a space in this case so that positive and negative
6652                  * numbers are aligned.  We don't have to worry about NaN/inf here.
6653                  */
6654                 if (*orgnum != '-')
6655                 {
6656                         numstr = (char *) palloc(strlen(orgnum) + 2);
6657                         *numstr = ' ';
6658                         strcpy(numstr + 1, orgnum);
6659                 }
6660                 else
6661                 {
6662                         numstr = orgnum;
6663                 }
6664         }
6665         else
6666         {
6667                 int                     numstr_pre_len;
6668
6669                 if (IS_MULTI(&Num))
6670                 {
6671                         double          multi = pow((double) 10, (double) Num.multi);
6672
6673                         value = DatumGetInt64(DirectFunctionCall2(int8mul,
6674                                                                                                           Int64GetDatum(value),
6675                                                                                                           DirectFunctionCall1(dtoi8,
6676                                                                                                                                                   Float8GetDatum(multi))));
6677                         Num.pre += Num.multi;
6678                 }
6679
6680                 orgnum = DatumGetCString(DirectFunctionCall1(int8out,
6681                                                                                                          Int64GetDatum(value)));
6682
6683                 if (*orgnum == '-')
6684                 {
6685                         sign = '-';
6686                         orgnum++;
6687                 }
6688                 else
6689                         sign = '+';
6690
6691                 numstr_pre_len = strlen(orgnum);
6692
6693                 /* post-decimal digits?  Pad out with zeros. */
6694                 if (Num.post)
6695                 {
6696                         numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6697                         strcpy(numstr, orgnum);
6698                         *(numstr + numstr_pre_len) = '.';
6699                         memset(numstr + numstr_pre_len + 1, '0', Num.post);
6700                         *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6701                 }
6702                 else
6703                         numstr = orgnum;
6704
6705                 /* needs padding? */
6706                 if (numstr_pre_len < Num.pre)
6707                         out_pre_spaces = Num.pre - numstr_pre_len;
6708                 /* overflowed prefix digit format? */
6709                 else if (numstr_pre_len > Num.pre)
6710                 {
6711                         numstr = (char *) palloc(Num.pre + Num.post + 2);
6712                         fill_str(numstr, '#', Num.pre + Num.post + 1);
6713                         *(numstr + Num.pre) = '.';
6714                 }
6715         }
6716
6717         NUM_TOCHAR_finish;
6718         PG_RETURN_TEXT_P(result);
6719 }
6720
6721 /* -----------------
6722  * FLOAT4 to_char()
6723  * -----------------
6724  */
6725 Datum
6726 float4_to_char(PG_FUNCTION_ARGS)
6727 {
6728         float4          value = PG_GETARG_FLOAT4(0);
6729         text       *fmt = PG_GETARG_TEXT_PP(1);
6730         NUMDesc         Num;
6731         FormatNode *format;
6732         text       *result;
6733         bool            shouldFree;
6734         int                     out_pre_spaces = 0,
6735                                 sign = 0;
6736         char       *numstr,
6737                            *p;
6738
6739         NUM_TOCHAR_prepare;
6740
6741         if (IS_ROMAN(&Num))
6742         {
6743                 int32           intvalue;
6744
6745                 /* See notes in ftoi4() */
6746                 value = rint(value);
6747                 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6748                 if (!isnan(value) && FLOAT4_FITS_IN_INT32(value))
6749                         intvalue = (int32) value;
6750                 else
6751                         intvalue = PG_INT32_MAX;
6752                 numstr = int_to_roman(intvalue);
6753         }
6754         else if (IS_EEEE(&Num))
6755         {
6756                 if (isnan(value) || isinf(value))
6757                 {
6758                         /*
6759                          * Allow 6 characters for the leading sign, the decimal point,
6760                          * "e", the exponent's sign and two exponent digits.
6761                          */
6762                         numstr = (char *) palloc(Num.pre + Num.post + 7);
6763                         fill_str(numstr, '#', Num.pre + Num.post + 6);
6764                         *numstr = ' ';
6765                         *(numstr + Num.pre + 1) = '.';
6766                 }
6767                 else
6768                 {
6769                         numstr = psprintf("%+.*e", Num.post, value);
6770
6771                         /*
6772                          * Swap a leading positive sign for a space.
6773                          */
6774                         if (*numstr == '+')
6775                                 *numstr = ' ';
6776                 }
6777         }
6778         else
6779         {
6780                 float4          val = value;
6781                 char       *orgnum;
6782                 int                     numstr_pre_len;
6783
6784                 if (IS_MULTI(&Num))
6785                 {
6786                         float           multi = pow((double) 10, (double) Num.multi);
6787
6788                         val = value * multi;
6789                         Num.pre += Num.multi;
6790                 }
6791
6792                 orgnum = psprintf("%.0f", fabs(val));
6793                 numstr_pre_len = strlen(orgnum);
6794
6795                 /* adjust post digits to fit max float digits */
6796                 if (numstr_pre_len >= FLT_DIG)
6797                         Num.post = 0;
6798                 else if (numstr_pre_len + Num.post > FLT_DIG)
6799                         Num.post = FLT_DIG - numstr_pre_len;
6800                 orgnum = psprintf("%.*f", Num.post, val);
6801
6802                 if (*orgnum == '-')
6803                 {                                               /* < 0 */
6804                         sign = '-';
6805                         numstr = orgnum + 1;
6806                 }
6807                 else
6808                 {
6809                         sign = '+';
6810                         numstr = orgnum;
6811                 }
6812
6813                 if ((p = strchr(numstr, '.')))
6814                         numstr_pre_len = p - numstr;
6815                 else
6816                         numstr_pre_len = strlen(numstr);
6817
6818                 /* needs padding? */
6819                 if (numstr_pre_len < Num.pre)
6820                         out_pre_spaces = Num.pre - numstr_pre_len;
6821                 /* overflowed prefix digit format? */
6822                 else if (numstr_pre_len > Num.pre)
6823                 {
6824                         numstr = (char *) palloc(Num.pre + Num.post + 2);
6825                         fill_str(numstr, '#', Num.pre + Num.post + 1);
6826                         *(numstr + Num.pre) = '.';
6827                 }
6828         }
6829
6830         NUM_TOCHAR_finish;
6831         PG_RETURN_TEXT_P(result);
6832 }
6833
6834 /* -----------------
6835  * FLOAT8 to_char()
6836  * -----------------
6837  */
6838 Datum
6839 float8_to_char(PG_FUNCTION_ARGS)
6840 {
6841         float8          value = PG_GETARG_FLOAT8(0);
6842         text       *fmt = PG_GETARG_TEXT_PP(1);
6843         NUMDesc         Num;
6844         FormatNode *format;
6845         text       *result;
6846         bool            shouldFree;
6847         int                     out_pre_spaces = 0,
6848                                 sign = 0;
6849         char       *numstr,
6850                            *p;
6851
6852         NUM_TOCHAR_prepare;
6853
6854         if (IS_ROMAN(&Num))
6855         {
6856                 int32           intvalue;
6857
6858                 /* See notes in dtoi4() */
6859                 value = rint(value);
6860                 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6861                 if (!isnan(value) && FLOAT8_FITS_IN_INT32(value))
6862                         intvalue = (int32) value;
6863                 else
6864                         intvalue = PG_INT32_MAX;
6865                 numstr = int_to_roman(intvalue);
6866         }
6867         else if (IS_EEEE(&Num))
6868         {
6869                 if (isnan(value) || isinf(value))
6870                 {
6871                         /*
6872                          * Allow 6 characters for the leading sign, the decimal point,
6873                          * "e", the exponent's sign and two exponent digits.
6874                          */
6875                         numstr = (char *) palloc(Num.pre + Num.post + 7);
6876                         fill_str(numstr, '#', Num.pre + Num.post + 6);
6877                         *numstr = ' ';
6878                         *(numstr + Num.pre + 1) = '.';
6879                 }
6880                 else
6881                 {
6882                         numstr = psprintf("%+.*e", Num.post, value);
6883
6884                         /*
6885                          * Swap a leading positive sign for a space.
6886                          */
6887                         if (*numstr == '+')
6888                                 *numstr = ' ';
6889                 }
6890         }
6891         else
6892         {
6893                 float8          val = value;
6894                 char       *orgnum;
6895                 int                     numstr_pre_len;
6896
6897                 if (IS_MULTI(&Num))
6898                 {
6899                         double          multi = pow((double) 10, (double) Num.multi);
6900
6901                         val = value * multi;
6902                         Num.pre += Num.multi;
6903                 }
6904
6905                 orgnum = psprintf("%.0f", fabs(val));
6906                 numstr_pre_len = strlen(orgnum);
6907
6908                 /* adjust post digits to fit max double digits */
6909                 if (numstr_pre_len >= DBL_DIG)
6910                         Num.post = 0;
6911                 else if (numstr_pre_len + Num.post > DBL_DIG)
6912                         Num.post = DBL_DIG - numstr_pre_len;
6913                 orgnum = psprintf("%.*f", Num.post, val);
6914
6915                 if (*orgnum == '-')
6916                 {                                               /* < 0 */
6917                         sign = '-';
6918                         numstr = orgnum + 1;
6919                 }
6920                 else
6921                 {
6922                         sign = '+';
6923                         numstr = orgnum;
6924                 }
6925
6926                 if ((p = strchr(numstr, '.')))
6927                         numstr_pre_len = p - numstr;
6928                 else
6929                         numstr_pre_len = strlen(numstr);
6930
6931                 /* needs padding? */
6932                 if (numstr_pre_len < Num.pre)
6933                         out_pre_spaces = Num.pre - numstr_pre_len;
6934                 /* overflowed prefix digit format? */
6935                 else if (numstr_pre_len > Num.pre)
6936                 {
6937                         numstr = (char *) palloc(Num.pre + Num.post + 2);
6938                         fill_str(numstr, '#', Num.pre + Num.post + 1);
6939                         *(numstr + Num.pre) = '.';
6940                 }
6941         }
6942
6943         NUM_TOCHAR_finish;
6944         PG_RETURN_TEXT_P(result);
6945 }