Fix unintentional behavior change in commit e9931bfb75.
[pgsql.git] / src / backend / utils / adt / formatting.c
blob2bcc185708c79fcbcccdd8dc5132a4462b8ade56
1 /* -----------------------------------------------------------------------
2 * formatting.c
4 * src/backend/utils/adt/formatting.c
7 * Portions Copyright (c) 1999-2024, PostgreSQL Global Development Group
10 * TO_CHAR(); TO_TIMESTAMP(); TO_DATE(); TO_NUMBER();
12 * The PostgreSQL routines for a timestamp/int/float/numeric formatting,
13 * inspired by the Oracle TO_CHAR() / TO_DATE() / TO_NUMBER() routines.
16 * Cache & Memory:
17 * Routines use (itself) internal cache for format pictures.
19 * The cache uses a static buffer and is persistent across transactions. If
20 * the format-picture is bigger than the cache buffer, the parser is called
21 * always.
23 * NOTE for Number version:
24 * All in this version is implemented as keywords ( => not used
25 * suffixes), because a format picture is for *one* item (number)
26 * only. It not is as a timestamp version, where each keyword (can)
27 * has suffix.
29 * NOTE for Timestamp routines:
30 * In this module the POSIX 'struct tm' type is *not* used, but rather
31 * PgSQL type, which has tm_mon based on one (*non* zero) and
32 * year *not* based on 1900, but is used full year number.
33 * Module supports AD / BC / AM / PM.
35 * Supported types for to_char():
37 * Timestamp, Numeric, int4, int8, float4, float8
39 * Supported types for reverse conversion:
41 * Timestamp - to_timestamp()
42 * Date - to_date()
43 * Numeric - to_number()
46 * Karel Zak
48 * TODO
49 * - better number building (formatting) / parsing, now it isn't
50 * ideal code
51 * - use Assert()
52 * - add support for roman number to standard number conversion
53 * - add support for number spelling
54 * - add support for string to string formatting (we must be better
55 * than Oracle :-),
56 * to_char('Hello', 'X X X X X') -> 'H e l l o'
58 * -----------------------------------------------------------------------
61 #ifdef DEBUG_TO_FROM_CHAR
62 #define DEBUG_elog_output DEBUG3
63 #endif
65 #include "postgres.h"
67 #include <ctype.h>
68 #include <unistd.h>
69 #include <math.h>
70 #include <float.h>
71 #include <limits.h>
72 #include <wctype.h>
74 #ifdef USE_ICU
75 #include <unicode/ustring.h>
76 #endif
78 #include "catalog/pg_collation.h"
79 #include "catalog/pg_type.h"
80 #include "common/unicode_case.h"
81 #include "common/unicode_category.h"
82 #include "mb/pg_wchar.h"
83 #include "nodes/miscnodes.h"
84 #include "parser/scansup.h"
85 #include "utils/builtins.h"
86 #include "utils/date.h"
87 #include "utils/datetime.h"
88 #include "utils/formatting.h"
89 #include "utils/memutils.h"
90 #include "utils/numeric.h"
91 #include "utils/pg_locale.h"
92 #include "varatt.h"
95 /* ----------
96 * Routines flags
97 * ----------
99 #define DCH_FLAG 0x1 /* DATE-TIME flag */
100 #define NUM_FLAG 0x2 /* NUMBER flag */
101 #define STD_FLAG 0x4 /* STANDARD flag */
103 /* ----------
104 * KeyWord Index (ascii from position 32 (' ') to 126 (~))
105 * ----------
107 #define KeyWord_INDEX_SIZE ('~' - ' ')
108 #define KeyWord_INDEX_FILTER(_c) ((_c) <= ' ' || (_c) >= '~' ? 0 : 1)
110 /* ----------
111 * Maximal length of one node
112 * ----------
114 #define DCH_MAX_ITEM_SIZ 12 /* max localized day name */
115 #define NUM_MAX_ITEM_SIZ 8 /* roman number (RN has 15 chars) */
118 /* ----------
119 * Format parser structs
120 * ----------
122 typedef struct
124 const char *name; /* suffix string */
125 int len, /* suffix length */
126 id, /* used in node->suffix */
127 type; /* prefix / postfix */
128 } KeySuffix;
130 /* ----------
131 * FromCharDateMode
132 * ----------
134 * This value is used to nominate one of several distinct (and mutually
135 * exclusive) date conventions that a keyword can belong to.
137 typedef enum
139 FROM_CHAR_DATE_NONE = 0, /* Value does not affect date mode. */
140 FROM_CHAR_DATE_GREGORIAN, /* Gregorian (day, month, year) style date */
141 FROM_CHAR_DATE_ISOWEEK, /* ISO 8601 week date */
142 } FromCharDateMode;
144 typedef struct
146 const char *name;
147 int len;
148 int id;
149 bool is_digit;
150 FromCharDateMode date_mode;
151 } KeyWord;
153 typedef struct
155 uint8 type; /* NODE_TYPE_XXX, see below */
156 char character[MAX_MULTIBYTE_CHAR_LEN + 1]; /* if type is CHAR */
157 uint8 suffix; /* keyword prefix/suffix code, if any */
158 const KeyWord *key; /* if type is ACTION */
159 } FormatNode;
161 #define NODE_TYPE_END 1
162 #define NODE_TYPE_ACTION 2
163 #define NODE_TYPE_CHAR 3
164 #define NODE_TYPE_SEPARATOR 4
165 #define NODE_TYPE_SPACE 5
167 #define SUFFTYPE_PREFIX 1
168 #define SUFFTYPE_POSTFIX 2
170 #define CLOCK_24_HOUR 0
171 #define CLOCK_12_HOUR 1
174 /* ----------
175 * Full months
176 * ----------
178 static const char *const months_full[] = {
179 "January", "February", "March", "April", "May", "June", "July",
180 "August", "September", "October", "November", "December", NULL
183 static const char *const days_short[] = {
184 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", NULL
187 /* ----------
188 * AD / BC
189 * ----------
190 * There is no 0 AD. Years go from 1 BC to 1 AD, so we make it
191 * positive and map year == -1 to year zero, and shift all negative
192 * years up one. For interval years, we just return the year.
194 #define ADJUST_YEAR(year, is_interval) ((is_interval) ? (year) : ((year) <= 0 ? -((year) - 1) : (year)))
196 #define A_D_STR "A.D."
197 #define a_d_STR "a.d."
198 #define AD_STR "AD"
199 #define ad_STR "ad"
201 #define B_C_STR "B.C."
202 #define b_c_STR "b.c."
203 #define BC_STR "BC"
204 #define bc_STR "bc"
207 * AD / BC strings for seq_search.
209 * These are given in two variants, a long form with periods and a standard
210 * form without.
212 * The array is laid out such that matches for AD have an even index, and
213 * matches for BC have an odd index. So the boolean value for BC is given by
214 * taking the array index of the match, modulo 2.
216 static const char *const adbc_strings[] = {ad_STR, bc_STR, AD_STR, BC_STR, NULL};
217 static const char *const adbc_strings_long[] = {a_d_STR, b_c_STR, A_D_STR, B_C_STR, NULL};
219 /* ----------
220 * AM / PM
221 * ----------
223 #define A_M_STR "A.M."
224 #define a_m_STR "a.m."
225 #define AM_STR "AM"
226 #define am_STR "am"
228 #define P_M_STR "P.M."
229 #define p_m_STR "p.m."
230 #define PM_STR "PM"
231 #define pm_STR "pm"
234 * AM / PM strings for seq_search.
236 * These are given in two variants, a long form with periods and a standard
237 * form without.
239 * The array is laid out such that matches for AM have an even index, and
240 * matches for PM have an odd index. So the boolean value for PM is given by
241 * taking the array index of the match, modulo 2.
243 static const char *const ampm_strings[] = {am_STR, pm_STR, AM_STR, PM_STR, NULL};
244 static const char *const ampm_strings_long[] = {a_m_STR, p_m_STR, A_M_STR, P_M_STR, NULL};
246 /* ----------
247 * Months in roman-numeral
248 * (Must be in reverse order for seq_search (in FROM_CHAR), because
249 * 'VIII' must have higher precedence than 'V')
250 * ----------
252 static const char *const rm_months_upper[] =
253 {"XII", "XI", "X", "IX", "VIII", "VII", "VI", "V", "IV", "III", "II", "I", NULL};
255 static const char *const rm_months_lower[] =
256 {"xii", "xi", "x", "ix", "viii", "vii", "vi", "v", "iv", "iii", "ii", "i", NULL};
258 /* ----------
259 * Roman numbers
260 * ----------
262 static const char *const rm1[] = {"I", "II", "III", "IV", "V", "VI", "VII", "VIII", "IX", NULL};
263 static const char *const rm10[] = {"X", "XX", "XXX", "XL", "L", "LX", "LXX", "LXXX", "XC", NULL};
264 static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "DCCC", "CM", NULL};
266 /* ----------
267 * Ordinal postfixes
268 * ----------
270 static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL};
271 static const char *const numth[] = {"st", "nd", "rd", "th", NULL};
273 /* ----------
274 * Flags & Options:
275 * ----------
277 #define TH_UPPER 1
278 #define TH_LOWER 2
280 /* ----------
281 * Number description struct
282 * ----------
284 typedef struct
286 int pre, /* (count) numbers before decimal */
287 post, /* (count) numbers after decimal */
288 lsign, /* want locales sign */
289 flag, /* number parameters */
290 pre_lsign_num, /* tmp value for lsign */
291 multi, /* multiplier for 'V' */
292 zero_start, /* position of first zero */
293 zero_end, /* position of last zero */
294 need_locale; /* needs it locale */
295 } NUMDesc;
297 /* ----------
298 * Flags for NUMBER version
299 * ----------
301 #define NUM_F_DECIMAL (1 << 1)
302 #define NUM_F_LDECIMAL (1 << 2)
303 #define NUM_F_ZERO (1 << 3)
304 #define NUM_F_BLANK (1 << 4)
305 #define NUM_F_FILLMODE (1 << 5)
306 #define NUM_F_LSIGN (1 << 6)
307 #define NUM_F_BRACKET (1 << 7)
308 #define NUM_F_MINUS (1 << 8)
309 #define NUM_F_PLUS (1 << 9)
310 #define NUM_F_ROMAN (1 << 10)
311 #define NUM_F_MULTI (1 << 11)
312 #define NUM_F_PLUS_POST (1 << 12)
313 #define NUM_F_MINUS_POST (1 << 13)
314 #define NUM_F_EEEE (1 << 14)
316 #define NUM_LSIGN_PRE (-1)
317 #define NUM_LSIGN_POST 1
318 #define NUM_LSIGN_NONE 0
320 /* ----------
321 * Tests
322 * ----------
324 #define IS_DECIMAL(_f) ((_f)->flag & NUM_F_DECIMAL)
325 #define IS_LDECIMAL(_f) ((_f)->flag & NUM_F_LDECIMAL)
326 #define IS_ZERO(_f) ((_f)->flag & NUM_F_ZERO)
327 #define IS_BLANK(_f) ((_f)->flag & NUM_F_BLANK)
328 #define IS_FILLMODE(_f) ((_f)->flag & NUM_F_FILLMODE)
329 #define IS_BRACKET(_f) ((_f)->flag & NUM_F_BRACKET)
330 #define IS_MINUS(_f) ((_f)->flag & NUM_F_MINUS)
331 #define IS_LSIGN(_f) ((_f)->flag & NUM_F_LSIGN)
332 #define IS_PLUS(_f) ((_f)->flag & NUM_F_PLUS)
333 #define IS_ROMAN(_f) ((_f)->flag & NUM_F_ROMAN)
334 #define IS_MULTI(_f) ((_f)->flag & NUM_F_MULTI)
335 #define IS_EEEE(_f) ((_f)->flag & NUM_F_EEEE)
337 /* ----------
338 * Format picture cache
340 * We will cache datetime format pictures up to DCH_CACHE_SIZE bytes long;
341 * likewise number format pictures up to NUM_CACHE_SIZE bytes long.
343 * For simplicity, the cache entries are fixed-size, so they allow for the
344 * worst case of a FormatNode for each byte in the picture string.
346 * The CACHE_SIZE constants are computed to make sizeof(DCHCacheEntry) and
347 * sizeof(NUMCacheEntry) be powers of 2, or just less than that, so that
348 * we don't waste too much space by palloc'ing them individually. Be sure
349 * to adjust those macros if you add fields to those structs.
351 * The max number of entries in each cache is DCH_CACHE_ENTRIES
352 * resp. NUM_CACHE_ENTRIES.
353 * ----------
355 #define DCH_CACHE_OVERHEAD \
356 MAXALIGN(sizeof(bool) + sizeof(int))
357 #define NUM_CACHE_OVERHEAD \
358 MAXALIGN(sizeof(bool) + sizeof(int) + sizeof(NUMDesc))
360 #define DCH_CACHE_SIZE \
361 ((2048 - DCH_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
362 #define NUM_CACHE_SIZE \
363 ((1024 - NUM_CACHE_OVERHEAD) / (sizeof(FormatNode) + sizeof(char)) - 1)
365 #define DCH_CACHE_ENTRIES 20
366 #define NUM_CACHE_ENTRIES 20
368 typedef struct
370 FormatNode format[DCH_CACHE_SIZE + 1];
371 char str[DCH_CACHE_SIZE + 1];
372 bool std;
373 bool valid;
374 int age;
375 } DCHCacheEntry;
377 typedef struct
379 FormatNode format[NUM_CACHE_SIZE + 1];
380 char str[NUM_CACHE_SIZE + 1];
381 bool valid;
382 int age;
383 NUMDesc Num;
384 } NUMCacheEntry;
386 /* global cache for date/time format pictures */
387 static DCHCacheEntry *DCHCache[DCH_CACHE_ENTRIES];
388 static int n_DCHCache = 0; /* current number of entries */
389 static int DCHCounter = 0; /* aging-event counter */
391 /* global cache for number format pictures */
392 static NUMCacheEntry *NUMCache[NUM_CACHE_ENTRIES];
393 static int n_NUMCache = 0; /* current number of entries */
394 static int NUMCounter = 0; /* aging-event counter */
396 /* ----------
397 * For char->date/time conversion
398 * ----------
400 typedef struct
402 FromCharDateMode mode;
403 int hh,
407 ssss,
408 d, /* stored as 1-7, Sunday = 1, 0 means missing */
410 ddd,
413 year,
420 yysz, /* is it YY or YYYY ? */
421 clock, /* 12 or 24 hour clock? */
422 tzsign, /* +1, -1, or 0 if no TZH/TZM fields */
423 tzh,
424 tzm,
425 ff; /* fractional precision */
426 bool has_tz; /* was there a TZ field? */
427 int gmtoffset; /* GMT offset of fixed-offset zone abbrev */
428 pg_tz *tzp; /* pg_tz for dynamic abbrev */
429 char *abbrev; /* dynamic abbrev */
430 } TmFromChar;
432 #define ZERO_tmfc(_X) memset(_X, 0, sizeof(TmFromChar))
434 struct fmt_tz /* do_to_timestamp's timezone info output */
436 bool has_tz; /* was there any TZ/TZH/TZM field? */
437 int gmtoffset; /* GMT offset in seconds */
440 /* ----------
441 * Debug
442 * ----------
444 #ifdef DEBUG_TO_FROM_CHAR
445 #define DEBUG_TMFC(_X) \
446 elog(DEBUG_elog_output, "TMFC:\nmode %d\nhh %d\npm %d\nmi %d\nss %d\nssss %d\nd %d\ndd %d\nddd %d\nmm %d\nms: %d\nyear %d\nbc %d\nww %d\nw %d\ncc %d\nj %d\nus: %d\nyysz: %d\nclock: %d", \
447 (_X)->mode, (_X)->hh, (_X)->pm, (_X)->mi, (_X)->ss, (_X)->ssss, \
448 (_X)->d, (_X)->dd, (_X)->ddd, (_X)->mm, (_X)->ms, (_X)->year, \
449 (_X)->bc, (_X)->ww, (_X)->w, (_X)->cc, (_X)->j, (_X)->us, \
450 (_X)->yysz, (_X)->clock)
451 #define DEBUG_TM(_X) \
452 elog(DEBUG_elog_output, "TM:\nsec %d\nyear %d\nmin %d\nwday %d\nhour %d\nyday %d\nmday %d\nnisdst %d\nmon %d\n",\
453 (_X)->tm_sec, (_X)->tm_year,\
454 (_X)->tm_min, (_X)->tm_wday, (_X)->tm_hour, (_X)->tm_yday,\
455 (_X)->tm_mday, (_X)->tm_isdst, (_X)->tm_mon)
456 #else
457 #define DEBUG_TMFC(_X)
458 #define DEBUG_TM(_X)
459 #endif
461 /* ----------
462 * Datetime to char conversion
464 * To support intervals as well as timestamps, we use a custom "tm" struct
465 * that is almost like struct pg_tm, but has a 64-bit tm_hour field.
466 * We omit the tm_isdst and tm_zone fields, which are not used here.
467 * ----------
469 struct fmt_tm
471 int tm_sec;
472 int tm_min;
473 int64 tm_hour;
474 int tm_mday;
475 int tm_mon;
476 int tm_year;
477 int tm_wday;
478 int tm_yday;
479 long int tm_gmtoff;
482 typedef struct TmToChar
484 struct fmt_tm tm; /* almost the classic 'tm' struct */
485 fsec_t fsec; /* fractional seconds */
486 const char *tzn; /* timezone */
487 } TmToChar;
489 #define tmtcTm(_X) (&(_X)->tm)
490 #define tmtcTzn(_X) ((_X)->tzn)
491 #define tmtcFsec(_X) ((_X)->fsec)
493 /* Note: this is used to copy pg_tm to fmt_tm, so not quite a bitwise copy */
494 #define COPY_tm(_DST, _SRC) \
495 do { \
496 (_DST)->tm_sec = (_SRC)->tm_sec; \
497 (_DST)->tm_min = (_SRC)->tm_min; \
498 (_DST)->tm_hour = (_SRC)->tm_hour; \
499 (_DST)->tm_mday = (_SRC)->tm_mday; \
500 (_DST)->tm_mon = (_SRC)->tm_mon; \
501 (_DST)->tm_year = (_SRC)->tm_year; \
502 (_DST)->tm_wday = (_SRC)->tm_wday; \
503 (_DST)->tm_yday = (_SRC)->tm_yday; \
504 (_DST)->tm_gmtoff = (_SRC)->tm_gmtoff; \
505 } while(0)
507 /* Caution: this is used to zero both pg_tm and fmt_tm structs */
508 #define ZERO_tm(_X) \
509 do { \
510 memset(_X, 0, sizeof(*(_X))); \
511 (_X)->tm_mday = (_X)->tm_mon = 1; \
512 } while(0)
514 #define ZERO_tmtc(_X) \
515 do { \
516 ZERO_tm( tmtcTm(_X) ); \
517 tmtcFsec(_X) = 0; \
518 tmtcTzn(_X) = NULL; \
519 } while(0)
522 * to_char(time) appears to to_char() as an interval, so this check
523 * is really for interval and time data types.
525 #define INVALID_FOR_INTERVAL \
526 do { \
527 if (is_interval) \
528 ereport(ERROR, \
529 (errcode(ERRCODE_INVALID_DATETIME_FORMAT), \
530 errmsg("invalid format specification for an interval value"), \
531 errhint("Intervals are not tied to specific calendar dates."))); \
532 } while(0)
534 /*****************************************************************************
535 * KeyWord definitions
536 *****************************************************************************/
538 /* ----------
539 * Suffixes (FormatNode.suffix is an OR of these codes)
540 * ----------
542 #define DCH_S_FM 0x01
543 #define DCH_S_TH 0x02
544 #define DCH_S_th 0x04
545 #define DCH_S_SP 0x08
546 #define DCH_S_TM 0x10
548 /* ----------
549 * Suffix tests
550 * ----------
552 #define S_THth(_s) ((((_s) & DCH_S_TH) || ((_s) & DCH_S_th)) ? 1 : 0)
553 #define S_TH(_s) (((_s) & DCH_S_TH) ? 1 : 0)
554 #define S_th(_s) (((_s) & DCH_S_th) ? 1 : 0)
555 #define S_TH_TYPE(_s) (((_s) & DCH_S_TH) ? TH_UPPER : TH_LOWER)
557 /* Oracle toggles FM behavior, we don't; see docs. */
558 #define S_FM(_s) (((_s) & DCH_S_FM) ? 1 : 0)
559 #define S_SP(_s) (((_s) & DCH_S_SP) ? 1 : 0)
560 #define S_TM(_s) (((_s) & DCH_S_TM) ? 1 : 0)
562 /* ----------
563 * Suffixes definition for DATE-TIME TO/FROM CHAR
564 * ----------
566 #define TM_SUFFIX_LEN 2
568 static const KeySuffix DCH_suff[] = {
569 {"FM", 2, DCH_S_FM, SUFFTYPE_PREFIX},
570 {"fm", 2, DCH_S_FM, SUFFTYPE_PREFIX},
571 {"TM", TM_SUFFIX_LEN, DCH_S_TM, SUFFTYPE_PREFIX},
572 {"tm", 2, DCH_S_TM, SUFFTYPE_PREFIX},
573 {"TH", 2, DCH_S_TH, SUFFTYPE_POSTFIX},
574 {"th", 2, DCH_S_th, SUFFTYPE_POSTFIX},
575 {"SP", 2, DCH_S_SP, SUFFTYPE_POSTFIX},
576 /* last */
577 {NULL, 0, 0, 0}
581 /* ----------
582 * Format-pictures (KeyWord).
584 * The KeyWord field; alphabetic sorted, *BUT* strings alike is sorted
585 * complicated -to-> easy:
587 * (example: "DDD","DD","Day","D" )
589 * (this specific sort needs the algorithm for sequential search for strings,
590 * which not has exact end; -> How keyword is in "HH12blabla" ? - "HH"
591 * or "HH12"? You must first try "HH12", because "HH" is in string, but
592 * it is not good.
594 * (!)
595 * - Position for the keyword is similar as position in the enum DCH/NUM_poz.
596 * (!)
598 * For fast search is used the 'int index[]', index is ascii table from position
599 * 32 (' ') to 126 (~), in this index is DCH_ / NUM_ enums for each ASCII
600 * position or -1 if char is not used in the KeyWord. Search example for
601 * string "MM":
602 * 1) see in index to index['M' - 32],
603 * 2) take keywords position (enum DCH_MI) from index
604 * 3) run sequential search in keywords[] from this position
606 * ----------
609 typedef enum
611 DCH_A_D,
612 DCH_A_M,
613 DCH_AD,
614 DCH_AM,
615 DCH_B_C,
616 DCH_BC,
617 DCH_CC,
618 DCH_DAY,
619 DCH_DDD,
620 DCH_DD,
621 DCH_DY,
622 DCH_Day,
623 DCH_Dy,
624 DCH_D,
625 DCH_FF1,
626 DCH_FF2,
627 DCH_FF3,
628 DCH_FF4,
629 DCH_FF5,
630 DCH_FF6,
631 DCH_FX, /* global suffix */
632 DCH_HH24,
633 DCH_HH12,
634 DCH_HH,
635 DCH_IDDD,
636 DCH_ID,
637 DCH_IW,
638 DCH_IYYY,
639 DCH_IYY,
640 DCH_IY,
641 DCH_I,
642 DCH_J,
643 DCH_MI,
644 DCH_MM,
645 DCH_MONTH,
646 DCH_MON,
647 DCH_MS,
648 DCH_Month,
649 DCH_Mon,
650 DCH_OF,
651 DCH_P_M,
652 DCH_PM,
653 DCH_Q,
654 DCH_RM,
655 DCH_SSSSS,
656 DCH_SSSS,
657 DCH_SS,
658 DCH_TZH,
659 DCH_TZM,
660 DCH_TZ,
661 DCH_US,
662 DCH_WW,
663 DCH_W,
664 DCH_Y_YYY,
665 DCH_YYYY,
666 DCH_YYY,
667 DCH_YY,
668 DCH_Y,
669 DCH_a_d,
670 DCH_a_m,
671 DCH_ad,
672 DCH_am,
673 DCH_b_c,
674 DCH_bc,
675 DCH_cc,
676 DCH_day,
677 DCH_ddd,
678 DCH_dd,
679 DCH_dy,
680 DCH_d,
681 DCH_ff1,
682 DCH_ff2,
683 DCH_ff3,
684 DCH_ff4,
685 DCH_ff5,
686 DCH_ff6,
687 DCH_fx,
688 DCH_hh24,
689 DCH_hh12,
690 DCH_hh,
691 DCH_iddd,
692 DCH_id,
693 DCH_iw,
694 DCH_iyyy,
695 DCH_iyy,
696 DCH_iy,
697 DCH_i,
698 DCH_j,
699 DCH_mi,
700 DCH_mm,
701 DCH_month,
702 DCH_mon,
703 DCH_ms,
704 DCH_of,
705 DCH_p_m,
706 DCH_pm,
707 DCH_q,
708 DCH_rm,
709 DCH_sssss,
710 DCH_ssss,
711 DCH_ss,
712 DCH_tzh,
713 DCH_tzm,
714 DCH_tz,
715 DCH_us,
716 DCH_ww,
717 DCH_w,
718 DCH_y_yyy,
719 DCH_yyyy,
720 DCH_yyy,
721 DCH_yy,
722 DCH_y,
724 /* last */
725 _DCH_last_
726 } DCH_poz;
728 typedef enum
730 NUM_COMMA,
731 NUM_DEC,
732 NUM_0,
733 NUM_9,
734 NUM_B,
735 NUM_C,
736 NUM_D,
737 NUM_E,
738 NUM_FM,
739 NUM_G,
740 NUM_L,
741 NUM_MI,
742 NUM_PL,
743 NUM_PR,
744 NUM_RN,
745 NUM_SG,
746 NUM_SP,
747 NUM_S,
748 NUM_TH,
749 NUM_V,
750 NUM_b,
751 NUM_c,
752 NUM_d,
753 NUM_e,
754 NUM_fm,
755 NUM_g,
756 NUM_l,
757 NUM_mi,
758 NUM_pl,
759 NUM_pr,
760 NUM_rn,
761 NUM_sg,
762 NUM_sp,
763 NUM_s,
764 NUM_th,
765 NUM_v,
767 /* last */
768 _NUM_last_
769 } NUM_poz;
771 /* ----------
772 * KeyWords for DATE-TIME version
773 * ----------
775 static const KeyWord DCH_keywords[] = {
776 /* name, len, id, is_digit, date_mode */
777 {"A.D.", 4, DCH_A_D, false, FROM_CHAR_DATE_NONE}, /* A */
778 {"A.M.", 4, DCH_A_M, false, FROM_CHAR_DATE_NONE},
779 {"AD", 2, DCH_AD, false, FROM_CHAR_DATE_NONE},
780 {"AM", 2, DCH_AM, false, FROM_CHAR_DATE_NONE},
781 {"B.C.", 4, DCH_B_C, false, FROM_CHAR_DATE_NONE}, /* B */
782 {"BC", 2, DCH_BC, false, FROM_CHAR_DATE_NONE},
783 {"CC", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* C */
784 {"DAY", 3, DCH_DAY, false, FROM_CHAR_DATE_NONE}, /* D */
785 {"DDD", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
786 {"DD", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
787 {"DY", 2, DCH_DY, false, FROM_CHAR_DATE_NONE},
788 {"Day", 3, DCH_Day, false, FROM_CHAR_DATE_NONE},
789 {"Dy", 2, DCH_Dy, false, FROM_CHAR_DATE_NONE},
790 {"D", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
791 {"FF1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* F */
792 {"FF2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
793 {"FF3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
794 {"FF4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
795 {"FF5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
796 {"FF6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
797 {"FX", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
798 {"HH24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* H */
799 {"HH12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
800 {"HH", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
801 {"IDDD", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* I */
802 {"ID", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
803 {"IW", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
804 {"IYYY", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
805 {"IYY", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
806 {"IY", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
807 {"I", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
808 {"J", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* J */
809 {"MI", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* M */
810 {"MM", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
811 {"MONTH", 5, DCH_MONTH, false, FROM_CHAR_DATE_GREGORIAN},
812 {"MON", 3, DCH_MON, false, FROM_CHAR_DATE_GREGORIAN},
813 {"MS", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
814 {"Month", 5, DCH_Month, false, FROM_CHAR_DATE_GREGORIAN},
815 {"Mon", 3, DCH_Mon, false, FROM_CHAR_DATE_GREGORIAN},
816 {"OF", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* O */
817 {"P.M.", 4, DCH_P_M, false, FROM_CHAR_DATE_NONE}, /* P */
818 {"PM", 2, DCH_PM, false, FROM_CHAR_DATE_NONE},
819 {"Q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* Q */
820 {"RM", 2, DCH_RM, false, FROM_CHAR_DATE_GREGORIAN}, /* R */
821 {"SSSSS", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* S */
822 {"SSSS", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
823 {"SS", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
824 {"TZH", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* T */
825 {"TZM", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
826 {"TZ", 2, DCH_TZ, false, FROM_CHAR_DATE_NONE},
827 {"US", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* U */
828 {"WW", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* W */
829 {"W", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
830 {"Y,YYY", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* Y */
831 {"YYYY", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
832 {"YYY", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
833 {"YY", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
834 {"Y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
835 {"a.d.", 4, DCH_a_d, false, FROM_CHAR_DATE_NONE}, /* a */
836 {"a.m.", 4, DCH_a_m, false, FROM_CHAR_DATE_NONE},
837 {"ad", 2, DCH_ad, false, FROM_CHAR_DATE_NONE},
838 {"am", 2, DCH_am, false, FROM_CHAR_DATE_NONE},
839 {"b.c.", 4, DCH_b_c, false, FROM_CHAR_DATE_NONE}, /* b */
840 {"bc", 2, DCH_bc, false, FROM_CHAR_DATE_NONE},
841 {"cc", 2, DCH_CC, true, FROM_CHAR_DATE_NONE}, /* c */
842 {"day", 3, DCH_day, false, FROM_CHAR_DATE_NONE}, /* d */
843 {"ddd", 3, DCH_DDD, true, FROM_CHAR_DATE_GREGORIAN},
844 {"dd", 2, DCH_DD, true, FROM_CHAR_DATE_GREGORIAN},
845 {"dy", 2, DCH_dy, false, FROM_CHAR_DATE_NONE},
846 {"d", 1, DCH_D, true, FROM_CHAR_DATE_GREGORIAN},
847 {"ff1", 3, DCH_FF1, false, FROM_CHAR_DATE_NONE}, /* f */
848 {"ff2", 3, DCH_FF2, false, FROM_CHAR_DATE_NONE},
849 {"ff3", 3, DCH_FF3, false, FROM_CHAR_DATE_NONE},
850 {"ff4", 3, DCH_FF4, false, FROM_CHAR_DATE_NONE},
851 {"ff5", 3, DCH_FF5, false, FROM_CHAR_DATE_NONE},
852 {"ff6", 3, DCH_FF6, false, FROM_CHAR_DATE_NONE},
853 {"fx", 2, DCH_FX, false, FROM_CHAR_DATE_NONE},
854 {"hh24", 4, DCH_HH24, true, FROM_CHAR_DATE_NONE}, /* h */
855 {"hh12", 4, DCH_HH12, true, FROM_CHAR_DATE_NONE},
856 {"hh", 2, DCH_HH, true, FROM_CHAR_DATE_NONE},
857 {"iddd", 4, DCH_IDDD, true, FROM_CHAR_DATE_ISOWEEK}, /* i */
858 {"id", 2, DCH_ID, true, FROM_CHAR_DATE_ISOWEEK},
859 {"iw", 2, DCH_IW, true, FROM_CHAR_DATE_ISOWEEK},
860 {"iyyy", 4, DCH_IYYY, true, FROM_CHAR_DATE_ISOWEEK},
861 {"iyy", 3, DCH_IYY, true, FROM_CHAR_DATE_ISOWEEK},
862 {"iy", 2, DCH_IY, true, FROM_CHAR_DATE_ISOWEEK},
863 {"i", 1, DCH_I, true, FROM_CHAR_DATE_ISOWEEK},
864 {"j", 1, DCH_J, true, FROM_CHAR_DATE_NONE}, /* j */
865 {"mi", 2, DCH_MI, true, FROM_CHAR_DATE_NONE}, /* m */
866 {"mm", 2, DCH_MM, true, FROM_CHAR_DATE_GREGORIAN},
867 {"month", 5, DCH_month, false, FROM_CHAR_DATE_GREGORIAN},
868 {"mon", 3, DCH_mon, false, FROM_CHAR_DATE_GREGORIAN},
869 {"ms", 2, DCH_MS, true, FROM_CHAR_DATE_NONE},
870 {"of", 2, DCH_OF, false, FROM_CHAR_DATE_NONE}, /* o */
871 {"p.m.", 4, DCH_p_m, false, FROM_CHAR_DATE_NONE}, /* p */
872 {"pm", 2, DCH_pm, false, FROM_CHAR_DATE_NONE},
873 {"q", 1, DCH_Q, true, FROM_CHAR_DATE_NONE}, /* q */
874 {"rm", 2, DCH_rm, false, FROM_CHAR_DATE_GREGORIAN}, /* r */
875 {"sssss", 5, DCH_SSSS, true, FROM_CHAR_DATE_NONE}, /* s */
876 {"ssss", 4, DCH_SSSS, true, FROM_CHAR_DATE_NONE},
877 {"ss", 2, DCH_SS, true, FROM_CHAR_DATE_NONE},
878 {"tzh", 3, DCH_TZH, false, FROM_CHAR_DATE_NONE}, /* t */
879 {"tzm", 3, DCH_TZM, true, FROM_CHAR_DATE_NONE},
880 {"tz", 2, DCH_tz, false, FROM_CHAR_DATE_NONE},
881 {"us", 2, DCH_US, true, FROM_CHAR_DATE_NONE}, /* u */
882 {"ww", 2, DCH_WW, true, FROM_CHAR_DATE_GREGORIAN}, /* w */
883 {"w", 1, DCH_W, true, FROM_CHAR_DATE_GREGORIAN},
884 {"y,yyy", 5, DCH_Y_YYY, true, FROM_CHAR_DATE_GREGORIAN}, /* y */
885 {"yyyy", 4, DCH_YYYY, true, FROM_CHAR_DATE_GREGORIAN},
886 {"yyy", 3, DCH_YYY, true, FROM_CHAR_DATE_GREGORIAN},
887 {"yy", 2, DCH_YY, true, FROM_CHAR_DATE_GREGORIAN},
888 {"y", 1, DCH_Y, true, FROM_CHAR_DATE_GREGORIAN},
890 /* last */
891 {NULL, 0, 0, 0, 0}
894 /* ----------
895 * KeyWords for NUMBER version
897 * The is_digit and date_mode fields are not relevant here.
898 * ----------
900 static const KeyWord NUM_keywords[] = {
901 /* name, len, id is in Index */
902 {",", 1, NUM_COMMA}, /* , */
903 {".", 1, NUM_DEC}, /* . */
904 {"0", 1, NUM_0}, /* 0 */
905 {"9", 1, NUM_9}, /* 9 */
906 {"B", 1, NUM_B}, /* B */
907 {"C", 1, NUM_C}, /* C */
908 {"D", 1, NUM_D}, /* D */
909 {"EEEE", 4, NUM_E}, /* E */
910 {"FM", 2, NUM_FM}, /* F */
911 {"G", 1, NUM_G}, /* G */
912 {"L", 1, NUM_L}, /* L */
913 {"MI", 2, NUM_MI}, /* M */
914 {"PL", 2, NUM_PL}, /* P */
915 {"PR", 2, NUM_PR},
916 {"RN", 2, NUM_RN}, /* R */
917 {"SG", 2, NUM_SG}, /* S */
918 {"SP", 2, NUM_SP},
919 {"S", 1, NUM_S},
920 {"TH", 2, NUM_TH}, /* T */
921 {"V", 1, NUM_V}, /* V */
922 {"b", 1, NUM_B}, /* b */
923 {"c", 1, NUM_C}, /* c */
924 {"d", 1, NUM_D}, /* d */
925 {"eeee", 4, NUM_E}, /* e */
926 {"fm", 2, NUM_FM}, /* f */
927 {"g", 1, NUM_G}, /* g */
928 {"l", 1, NUM_L}, /* l */
929 {"mi", 2, NUM_MI}, /* m */
930 {"pl", 2, NUM_PL}, /* p */
931 {"pr", 2, NUM_PR},
932 {"rn", 2, NUM_rn}, /* r */
933 {"sg", 2, NUM_SG}, /* s */
934 {"sp", 2, NUM_SP},
935 {"s", 1, NUM_S},
936 {"th", 2, NUM_th}, /* t */
937 {"v", 1, NUM_V}, /* v */
939 /* last */
940 {NULL, 0, 0}
944 /* ----------
945 * KeyWords index for DATE-TIME version
946 * ----------
948 static const int DCH_index[KeyWord_INDEX_SIZE] = {
950 0 1 2 3 4 5 6 7 8 9
952 /*---- first 0..31 chars are skipped ----*/
954 -1, -1, -1, -1, -1, -1, -1, -1,
955 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
956 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
957 -1, -1, -1, -1, -1, DCH_A_D, DCH_B_C, DCH_CC, DCH_DAY, -1,
958 DCH_FF1, -1, DCH_HH24, DCH_IDDD, DCH_J, -1, -1, DCH_MI, -1, DCH_OF,
959 DCH_P_M, DCH_Q, DCH_RM, DCH_SSSSS, DCH_TZH, DCH_US, -1, DCH_WW, -1, DCH_Y_YYY,
960 -1, -1, -1, -1, -1, -1, -1, DCH_a_d, DCH_b_c, DCH_cc,
961 DCH_day, -1, DCH_ff1, -1, DCH_hh24, DCH_iddd, DCH_j, -1, -1, DCH_mi,
962 -1, DCH_of, DCH_p_m, DCH_q, DCH_rm, DCH_sssss, DCH_tzh, DCH_us, -1, DCH_ww,
963 -1, DCH_y_yyy, -1, -1, -1, -1
965 /*---- chars over 126 are skipped ----*/
968 /* ----------
969 * KeyWords index for NUMBER version
970 * ----------
972 static const int NUM_index[KeyWord_INDEX_SIZE] = {
974 0 1 2 3 4 5 6 7 8 9
976 /*---- first 0..31 chars are skipped ----*/
978 -1, -1, -1, -1, -1, -1, -1, -1,
979 -1, -1, -1, -1, NUM_COMMA, -1, NUM_DEC, -1, NUM_0, -1,
980 -1, -1, -1, -1, -1, -1, -1, NUM_9, -1, -1,
981 -1, -1, -1, -1, -1, -1, NUM_B, NUM_C, NUM_D, NUM_E,
982 NUM_FM, NUM_G, -1, -1, -1, -1, NUM_L, NUM_MI, -1, -1,
983 NUM_PL, -1, NUM_RN, NUM_SG, NUM_TH, -1, NUM_V, -1, -1, -1,
984 -1, -1, -1, -1, -1, -1, -1, -1, NUM_b, NUM_c,
985 NUM_d, NUM_e, NUM_fm, NUM_g, -1, -1, -1, -1, NUM_l, NUM_mi,
986 -1, -1, NUM_pl, -1, NUM_rn, NUM_sg, NUM_th, -1, NUM_v, -1,
987 -1, -1, -1, -1, -1, -1
989 /*---- chars over 126 are skipped ----*/
992 /* ----------
993 * Number processor struct
994 * ----------
996 typedef struct NUMProc
998 bool is_to_char;
999 NUMDesc *Num; /* number description */
1001 int sign, /* '-' or '+' */
1002 sign_wrote, /* was sign write */
1003 num_count, /* number of write digits */
1004 num_in, /* is inside number */
1005 num_curr, /* current position in number */
1006 out_pre_spaces, /* spaces before first digit */
1008 read_dec, /* to_number - was read dec. point */
1009 read_post, /* to_number - number of dec. digit */
1010 read_pre; /* to_number - number non-dec. digit */
1012 char *number, /* string with number */
1013 *number_p, /* pointer to current number position */
1014 *inout, /* in / out buffer */
1015 *inout_p, /* pointer to current inout position */
1016 *last_relevant, /* last relevant number after decimal point */
1018 *L_negative_sign, /* Locale */
1019 *L_positive_sign,
1020 *decimal,
1021 *L_thousands_sep,
1022 *L_currency_symbol;
1023 } NUMProc;
1025 /* Return flags for DCH_from_char() */
1026 #define DCH_DATED 0x01
1027 #define DCH_TIMED 0x02
1028 #define DCH_ZONED 0x04
1030 /* ----------
1031 * Functions
1032 * ----------
1034 static const KeyWord *index_seq_search(const char *str, const KeyWord *kw,
1035 const int *index);
1036 static const KeySuffix *suff_search(const char *str, const KeySuffix *suf, int type);
1037 static bool is_separator_char(const char *str);
1038 static void NUMDesc_prepare(NUMDesc *num, FormatNode *n);
1039 static void parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1040 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num);
1042 static void DCH_to_char(FormatNode *node, bool is_interval,
1043 TmToChar *in, char *out, Oid collid);
1044 static void DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
1045 Oid collid, bool std, Node *escontext);
1047 #ifdef DEBUG_TO_FROM_CHAR
1048 static void dump_index(const KeyWord *k, const int *index);
1049 static void dump_node(FormatNode *node, int max);
1050 #endif
1052 static const char *get_th(char *num, int type);
1053 static char *str_numth(char *dest, char *num, int type);
1054 static int adjust_partial_year_to_2020(int year);
1055 static int strspace_len(const char *str);
1056 static bool from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
1057 Node *escontext);
1058 static bool from_char_set_int(int *dest, const int value, const FormatNode *node,
1059 Node *escontext);
1060 static int from_char_parse_int_len(int *dest, const char **src, const int len,
1061 FormatNode *node, Node *escontext);
1062 static int from_char_parse_int(int *dest, const char **src, FormatNode *node,
1063 Node *escontext);
1064 static int seq_search_ascii(const char *name, const char *const *array, int *len);
1065 static int seq_search_localized(const char *name, char **array, int *len,
1066 Oid collid);
1067 static bool from_char_seq_search(int *dest, const char **src,
1068 const char *const *array,
1069 char **localized_array, Oid collid,
1070 FormatNode *node, Node *escontext);
1071 static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
1072 struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
1073 int *fprec, uint32 *flags, Node *escontext);
1074 static char *fill_str(char *str, int c, int max);
1075 static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree);
1076 static char *int_to_roman(int number);
1077 static void NUM_prepare_locale(NUMProc *Np);
1078 static char *get_last_relevant_decnum(char *num);
1079 static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len);
1080 static void NUM_numpart_to_char(NUMProc *Np, int id);
1081 static char *NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
1082 char *number, int input_len, int to_char_out_pre_spaces,
1083 int sign, bool is_to_char, Oid collid);
1084 static DCHCacheEntry *DCH_cache_getnew(const char *str, bool std);
1085 static DCHCacheEntry *DCH_cache_search(const char *str, bool std);
1086 static DCHCacheEntry *DCH_cache_fetch(const char *str, bool std);
1087 static NUMCacheEntry *NUM_cache_getnew(const char *str);
1088 static NUMCacheEntry *NUM_cache_search(const char *str);
1089 static NUMCacheEntry *NUM_cache_fetch(const char *str);
1092 /* ----------
1093 * Fast sequential search, use index for data selection which
1094 * go to seq. cycle (it is very fast for unwanted strings)
1095 * (can't be used binary search in format parsing)
1096 * ----------
1098 static const KeyWord *
1099 index_seq_search(const char *str, const KeyWord *kw, const int *index)
1101 int poz;
1103 if (!KeyWord_INDEX_FILTER(*str))
1104 return NULL;
1106 if ((poz = *(index + (*str - ' '))) > -1)
1108 const KeyWord *k = kw + poz;
1112 if (strncmp(str, k->name, k->len) == 0)
1113 return k;
1114 k++;
1115 if (!k->name)
1116 return NULL;
1117 } while (*str == *k->name);
1119 return NULL;
1122 static const KeySuffix *
1123 suff_search(const char *str, const KeySuffix *suf, int type)
1125 const KeySuffix *s;
1127 for (s = suf; s->name != NULL; s++)
1129 if (s->type != type)
1130 continue;
1132 if (strncmp(str, s->name, s->len) == 0)
1133 return s;
1135 return NULL;
1138 static bool
1139 is_separator_char(const char *str)
1141 /* ASCII printable character, but not letter or digit */
1142 return (*str > 0x20 && *str < 0x7F &&
1143 !(*str >= 'A' && *str <= 'Z') &&
1144 !(*str >= 'a' && *str <= 'z') &&
1145 !(*str >= '0' && *str <= '9'));
1148 /* ----------
1149 * Prepare NUMDesc (number description struct) via FormatNode struct
1150 * ----------
1152 static void
1153 NUMDesc_prepare(NUMDesc *num, FormatNode *n)
1155 if (n->type != NODE_TYPE_ACTION)
1156 return;
1158 if (IS_EEEE(num) && n->key->id != NUM_E)
1159 ereport(ERROR,
1160 (errcode(ERRCODE_SYNTAX_ERROR),
1161 errmsg("\"EEEE\" must be the last pattern used")));
1163 switch (n->key->id)
1165 case NUM_9:
1166 if (IS_BRACKET(num))
1167 ereport(ERROR,
1168 (errcode(ERRCODE_SYNTAX_ERROR),
1169 errmsg("\"9\" must be ahead of \"PR\"")));
1170 if (IS_MULTI(num))
1172 ++num->multi;
1173 break;
1175 if (IS_DECIMAL(num))
1176 ++num->post;
1177 else
1178 ++num->pre;
1179 break;
1181 case NUM_0:
1182 if (IS_BRACKET(num))
1183 ereport(ERROR,
1184 (errcode(ERRCODE_SYNTAX_ERROR),
1185 errmsg("\"0\" must be ahead of \"PR\"")));
1186 if (!IS_ZERO(num) && !IS_DECIMAL(num))
1188 num->flag |= NUM_F_ZERO;
1189 num->zero_start = num->pre + 1;
1191 if (!IS_DECIMAL(num))
1192 ++num->pre;
1193 else
1194 ++num->post;
1196 num->zero_end = num->pre + num->post;
1197 break;
1199 case NUM_B:
1200 if (num->pre == 0 && num->post == 0 && (!IS_ZERO(num)))
1201 num->flag |= NUM_F_BLANK;
1202 break;
1204 case NUM_D:
1205 num->flag |= NUM_F_LDECIMAL;
1206 num->need_locale = true;
1207 /* FALLTHROUGH */
1208 case NUM_DEC:
1209 if (IS_DECIMAL(num))
1210 ereport(ERROR,
1211 (errcode(ERRCODE_SYNTAX_ERROR),
1212 errmsg("multiple decimal points")));
1213 if (IS_MULTI(num))
1214 ereport(ERROR,
1215 (errcode(ERRCODE_SYNTAX_ERROR),
1216 errmsg("cannot use \"V\" and decimal point together")));
1217 num->flag |= NUM_F_DECIMAL;
1218 break;
1220 case NUM_FM:
1221 num->flag |= NUM_F_FILLMODE;
1222 break;
1224 case NUM_S:
1225 if (IS_LSIGN(num))
1226 ereport(ERROR,
1227 (errcode(ERRCODE_SYNTAX_ERROR),
1228 errmsg("cannot use \"S\" twice")));
1229 if (IS_PLUS(num) || IS_MINUS(num) || IS_BRACKET(num))
1230 ereport(ERROR,
1231 (errcode(ERRCODE_SYNTAX_ERROR),
1232 errmsg("cannot use \"S\" and \"PL\"/\"MI\"/\"SG\"/\"PR\" together")));
1233 if (!IS_DECIMAL(num))
1235 num->lsign = NUM_LSIGN_PRE;
1236 num->pre_lsign_num = num->pre;
1237 num->need_locale = true;
1238 num->flag |= NUM_F_LSIGN;
1240 else if (num->lsign == NUM_LSIGN_NONE)
1242 num->lsign = NUM_LSIGN_POST;
1243 num->need_locale = true;
1244 num->flag |= NUM_F_LSIGN;
1246 break;
1248 case NUM_MI:
1249 if (IS_LSIGN(num))
1250 ereport(ERROR,
1251 (errcode(ERRCODE_SYNTAX_ERROR),
1252 errmsg("cannot use \"S\" and \"MI\" together")));
1253 num->flag |= NUM_F_MINUS;
1254 if (IS_DECIMAL(num))
1255 num->flag |= NUM_F_MINUS_POST;
1256 break;
1258 case NUM_PL:
1259 if (IS_LSIGN(num))
1260 ereport(ERROR,
1261 (errcode(ERRCODE_SYNTAX_ERROR),
1262 errmsg("cannot use \"S\" and \"PL\" together")));
1263 num->flag |= NUM_F_PLUS;
1264 if (IS_DECIMAL(num))
1265 num->flag |= NUM_F_PLUS_POST;
1266 break;
1268 case NUM_SG:
1269 if (IS_LSIGN(num))
1270 ereport(ERROR,
1271 (errcode(ERRCODE_SYNTAX_ERROR),
1272 errmsg("cannot use \"S\" and \"SG\" together")));
1273 num->flag |= NUM_F_MINUS;
1274 num->flag |= NUM_F_PLUS;
1275 break;
1277 case NUM_PR:
1278 if (IS_LSIGN(num) || IS_PLUS(num) || IS_MINUS(num))
1279 ereport(ERROR,
1280 (errcode(ERRCODE_SYNTAX_ERROR),
1281 errmsg("cannot use \"PR\" and \"S\"/\"PL\"/\"MI\"/\"SG\" together")));
1282 num->flag |= NUM_F_BRACKET;
1283 break;
1285 case NUM_rn:
1286 case NUM_RN:
1287 num->flag |= NUM_F_ROMAN;
1288 break;
1290 case NUM_L:
1291 case NUM_G:
1292 num->need_locale = true;
1293 break;
1295 case NUM_V:
1296 if (IS_DECIMAL(num))
1297 ereport(ERROR,
1298 (errcode(ERRCODE_SYNTAX_ERROR),
1299 errmsg("cannot use \"V\" and decimal point together")));
1300 num->flag |= NUM_F_MULTI;
1301 break;
1303 case NUM_E:
1304 if (IS_EEEE(num))
1305 ereport(ERROR,
1306 (errcode(ERRCODE_SYNTAX_ERROR),
1307 errmsg("cannot use \"EEEE\" twice")));
1308 if (IS_BLANK(num) || IS_FILLMODE(num) || IS_LSIGN(num) ||
1309 IS_BRACKET(num) || IS_MINUS(num) || IS_PLUS(num) ||
1310 IS_ROMAN(num) || IS_MULTI(num))
1311 ereport(ERROR,
1312 (errcode(ERRCODE_SYNTAX_ERROR),
1313 errmsg("\"EEEE\" is incompatible with other formats"),
1314 errdetail("\"EEEE\" may only be used together with digit and decimal point patterns.")));
1315 num->flag |= NUM_F_EEEE;
1316 break;
1320 /* ----------
1321 * Format parser, search small keywords and keyword's suffixes, and make
1322 * format-node tree.
1324 * for DATE-TIME & NUMBER version
1325 * ----------
1327 static void
1328 parse_format(FormatNode *node, const char *str, const KeyWord *kw,
1329 const KeySuffix *suf, const int *index, uint32 flags, NUMDesc *Num)
1331 FormatNode *n;
1333 #ifdef DEBUG_TO_FROM_CHAR
1334 elog(DEBUG_elog_output, "to_char/number(): run parser");
1335 #endif
1337 n = node;
1339 while (*str)
1341 int suffix = 0;
1342 const KeySuffix *s;
1345 * Prefix
1347 if ((flags & DCH_FLAG) &&
1348 (s = suff_search(str, suf, SUFFTYPE_PREFIX)) != NULL)
1350 suffix |= s->id;
1351 if (s->len)
1352 str += s->len;
1356 * Keyword
1358 if (*str && (n->key = index_seq_search(str, kw, index)) != NULL)
1360 n->type = NODE_TYPE_ACTION;
1361 n->suffix = suffix;
1362 if (n->key->len)
1363 str += n->key->len;
1366 * NUM version: Prepare global NUMDesc struct
1368 if (flags & NUM_FLAG)
1369 NUMDesc_prepare(Num, n);
1372 * Postfix
1374 if ((flags & DCH_FLAG) && *str &&
1375 (s = suff_search(str, suf, SUFFTYPE_POSTFIX)) != NULL)
1377 n->suffix |= s->id;
1378 if (s->len)
1379 str += s->len;
1382 n++;
1384 else if (*str)
1386 int chlen;
1388 if ((flags & STD_FLAG) && *str != '"')
1391 * Standard mode, allow only following separators: "-./,':; ".
1392 * However, we support double quotes even in standard mode
1393 * (see below). This is our extension of standard mode.
1395 if (strchr("-./,':; ", *str) == NULL)
1396 ereport(ERROR,
1397 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
1398 errmsg("invalid datetime format separator: \"%s\"",
1399 pnstrdup(str, pg_mblen(str)))));
1401 if (*str == ' ')
1402 n->type = NODE_TYPE_SPACE;
1403 else
1404 n->type = NODE_TYPE_SEPARATOR;
1406 n->character[0] = *str;
1407 n->character[1] = '\0';
1408 n->key = NULL;
1409 n->suffix = 0;
1410 n++;
1411 str++;
1413 else if (*str == '"')
1416 * Process double-quoted literal string, if any
1418 str++;
1419 while (*str)
1421 if (*str == '"')
1423 str++;
1424 break;
1426 /* backslash quotes the next character, if any */
1427 if (*str == '\\' && *(str + 1))
1428 str++;
1429 chlen = pg_mblen(str);
1430 n->type = NODE_TYPE_CHAR;
1431 memcpy(n->character, str, chlen);
1432 n->character[chlen] = '\0';
1433 n->key = NULL;
1434 n->suffix = 0;
1435 n++;
1436 str += chlen;
1439 else
1442 * Outside double-quoted strings, backslash is only special if
1443 * it immediately precedes a double quote.
1445 if (*str == '\\' && *(str + 1) == '"')
1446 str++;
1447 chlen = pg_mblen(str);
1449 if ((flags & DCH_FLAG) && is_separator_char(str))
1450 n->type = NODE_TYPE_SEPARATOR;
1451 else if (isspace((unsigned char) *str))
1452 n->type = NODE_TYPE_SPACE;
1453 else
1454 n->type = NODE_TYPE_CHAR;
1456 memcpy(n->character, str, chlen);
1457 n->character[chlen] = '\0';
1458 n->key = NULL;
1459 n->suffix = 0;
1460 n++;
1461 str += chlen;
1466 n->type = NODE_TYPE_END;
1467 n->suffix = 0;
1470 /* ----------
1471 * DEBUG: Dump the FormatNode Tree (debug)
1472 * ----------
1474 #ifdef DEBUG_TO_FROM_CHAR
1476 #define DUMP_THth(_suf) (S_TH(_suf) ? "TH" : (S_th(_suf) ? "th" : " "))
1477 #define DUMP_FM(_suf) (S_FM(_suf) ? "FM" : " ")
1479 static void
1480 dump_node(FormatNode *node, int max)
1482 FormatNode *n;
1483 int a;
1485 elog(DEBUG_elog_output, "to_from-char(): DUMP FORMAT");
1487 for (a = 0, n = node; a <= max; n++, a++)
1489 if (n->type == NODE_TYPE_ACTION)
1490 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_ACTION '%s'\t(%s,%s)",
1491 a, n->key->name, DUMP_THth(n->suffix), DUMP_FM(n->suffix));
1492 else if (n->type == NODE_TYPE_CHAR)
1493 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_CHAR '%s'",
1494 a, n->character);
1495 else if (n->type == NODE_TYPE_END)
1497 elog(DEBUG_elog_output, "%d:\t NODE_TYPE_END", a);
1498 return;
1500 else
1501 elog(DEBUG_elog_output, "%d:\t unknown NODE!", a);
1504 #endif /* DEBUG */
1506 /*****************************************************************************
1507 * Private utils
1508 *****************************************************************************/
1510 /* ----------
1511 * Return ST/ND/RD/TH for simple (1..9) numbers
1512 * type --> 0 upper, 1 lower
1513 * ----------
1515 static const char *
1516 get_th(char *num, int type)
1518 int len = strlen(num),
1519 last;
1521 last = *(num + (len - 1));
1522 if (!isdigit((unsigned char) last))
1523 ereport(ERROR,
1524 (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
1525 errmsg("\"%s\" is not a number", num)));
1528 * All "teens" (<x>1[0-9]) get 'TH/th', while <x>[02-9][123] still get
1529 * 'ST/st', 'ND/nd', 'RD/rd', respectively
1531 if ((len > 1) && (num[len - 2] == '1'))
1532 last = 0;
1534 switch (last)
1536 case '1':
1537 if (type == TH_UPPER)
1538 return numTH[0];
1539 return numth[0];
1540 case '2':
1541 if (type == TH_UPPER)
1542 return numTH[1];
1543 return numth[1];
1544 case '3':
1545 if (type == TH_UPPER)
1546 return numTH[2];
1547 return numth[2];
1548 default:
1549 if (type == TH_UPPER)
1550 return numTH[3];
1551 return numth[3];
1555 /* ----------
1556 * Convert string-number to ordinal string-number
1557 * type --> 0 upper, 1 lower
1558 * ----------
1560 static char *
1561 str_numth(char *dest, char *num, int type)
1563 if (dest != num)
1564 strcpy(dest, num);
1565 strcat(dest, get_th(num, type));
1566 return dest;
1569 /*****************************************************************************
1570 * upper/lower/initcap functions
1571 *****************************************************************************/
1573 #ifdef USE_ICU
1575 typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
1576 const UChar *src, int32_t srcLength,
1577 const char *locale,
1578 UErrorCode *pErrorCode);
1580 static int32_t
1581 icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
1582 UChar **buff_dest, UChar *buff_source, int32_t len_source)
1584 UErrorCode status;
1585 int32_t len_dest;
1587 len_dest = len_source; /* try first with same length */
1588 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1589 status = U_ZERO_ERROR;
1590 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1591 mylocale->info.icu.locale, &status);
1592 if (status == U_BUFFER_OVERFLOW_ERROR)
1594 /* try again with adjusted length */
1595 pfree(*buff_dest);
1596 *buff_dest = palloc(len_dest * sizeof(**buff_dest));
1597 status = U_ZERO_ERROR;
1598 len_dest = func(*buff_dest, len_dest, buff_source, len_source,
1599 mylocale->info.icu.locale, &status);
1601 if (U_FAILURE(status))
1602 ereport(ERROR,
1603 (errmsg("case conversion failed: %s", u_errorName(status))));
1604 return len_dest;
1607 static int32_t
1608 u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
1609 const UChar *src, int32_t srcLength,
1610 const char *locale,
1611 UErrorCode *pErrorCode)
1613 return u_strToTitle(dest, destCapacity, src, srcLength,
1614 NULL, locale, pErrorCode);
1617 #endif /* USE_ICU */
1620 * If the system provides the needed functions for wide-character manipulation
1621 * (which are all standardized by C99), then we implement upper/lower/initcap
1622 * using wide-character functions, if necessary. Otherwise we use the
1623 * traditional <ctype.h> functions, which of course will not work as desired
1624 * in multibyte character sets. Note that in either case we are effectively
1625 * assuming that the database character encoding matches the encoding implied
1626 * by LC_CTYPE.
1630 * collation-aware, wide-character-aware lower function
1632 * We pass the number of bytes so we can pass varlena and char*
1633 * to this function. The result is a palloc'd, null-terminated string.
1635 char *
1636 str_tolower(const char *buff, size_t nbytes, Oid collid)
1638 char *result;
1639 pg_locale_t mylocale;
1641 if (!buff)
1642 return NULL;
1644 if (!OidIsValid(collid))
1647 * This typically means that the parser could not resolve a conflict
1648 * of implicit collations, so report it that way.
1650 ereport(ERROR,
1651 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1652 errmsg("could not determine which collation to use for %s function",
1653 "lower()"),
1654 errhint("Use the COLLATE clause to set the collation explicitly.")));
1657 mylocale = pg_newlocale_from_collation(collid);
1659 /* C/POSIX collations use this path regardless of database encoding */
1660 if (mylocale->ctype_is_c)
1662 result = asc_tolower(buff, nbytes);
1664 else
1666 #ifdef USE_ICU
1667 if (mylocale->provider == COLLPROVIDER_ICU)
1669 int32_t len_uchar;
1670 int32_t len_conv;
1671 UChar *buff_uchar;
1672 UChar *buff_conv;
1674 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1675 len_conv = icu_convert_case(u_strToLower, mylocale,
1676 &buff_conv, buff_uchar, len_uchar);
1677 icu_from_uchar(&result, buff_conv, len_conv);
1678 pfree(buff_uchar);
1679 pfree(buff_conv);
1681 else
1682 #endif
1683 if (mylocale->provider == COLLPROVIDER_BUILTIN)
1685 const char *src = buff;
1686 size_t srclen = nbytes;
1687 size_t dstsize;
1688 char *dst;
1689 size_t needed;
1691 Assert(GetDatabaseEncoding() == PG_UTF8);
1693 /* first try buffer of equal size plus terminating NUL */
1694 dstsize = srclen + 1;
1695 dst = palloc(dstsize);
1697 needed = unicode_strlower(dst, dstsize, src, srclen);
1698 if (needed + 1 > dstsize)
1700 /* grow buffer if needed and retry */
1701 dstsize = needed + 1;
1702 dst = repalloc(dst, dstsize);
1703 needed = unicode_strlower(dst, dstsize, src, srclen);
1704 Assert(needed + 1 == dstsize);
1707 Assert(dst[needed] == '\0');
1708 result = dst;
1710 else
1712 Assert(mylocale->provider == COLLPROVIDER_LIBC);
1714 if (pg_database_encoding_max_length() > 1)
1716 wchar_t *workspace;
1717 size_t curr_char;
1718 size_t result_size;
1720 /* Overflow paranoia */
1721 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1722 ereport(ERROR,
1723 (errcode(ERRCODE_OUT_OF_MEMORY),
1724 errmsg("out of memory")));
1726 /* Output workspace cannot have more codes than input bytes */
1727 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1729 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1731 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1732 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
1735 * Make result large enough; case change might change number
1736 * of bytes
1738 result_size = curr_char * pg_database_encoding_max_length() + 1;
1739 result = palloc(result_size);
1741 wchar2char(result, workspace, result_size, mylocale);
1742 pfree(workspace);
1744 else
1746 char *p;
1748 result = pnstrdup(buff, nbytes);
1751 * Note: we assume that tolower_l() will not be so broken as
1752 * to need an isupper_l() guard test. When using the default
1753 * collation, we apply the traditional Postgres behavior that
1754 * forces ASCII-style treatment of I/i, but in non-default
1755 * collations you get exactly what the collation says.
1757 for (p = result; *p; p++)
1759 if (mylocale->is_default)
1760 *p = pg_tolower((unsigned char) *p);
1761 else
1762 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
1768 return result;
1772 * collation-aware, wide-character-aware upper function
1774 * We pass the number of bytes so we can pass varlena and char*
1775 * to this function. The result is a palloc'd, null-terminated string.
1777 char *
1778 str_toupper(const char *buff, size_t nbytes, Oid collid)
1780 char *result;
1781 pg_locale_t mylocale;
1783 if (!buff)
1784 return NULL;
1786 if (!OidIsValid(collid))
1789 * This typically means that the parser could not resolve a conflict
1790 * of implicit collations, so report it that way.
1792 ereport(ERROR,
1793 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1794 errmsg("could not determine which collation to use for %s function",
1795 "upper()"),
1796 errhint("Use the COLLATE clause to set the collation explicitly.")));
1799 mylocale = pg_newlocale_from_collation(collid);
1801 /* C/POSIX collations use this path regardless of database encoding */
1802 if (mylocale->ctype_is_c)
1804 result = asc_toupper(buff, nbytes);
1806 else
1808 #ifdef USE_ICU
1809 if (mylocale->provider == COLLPROVIDER_ICU)
1811 int32_t len_uchar,
1812 len_conv;
1813 UChar *buff_uchar;
1814 UChar *buff_conv;
1816 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
1817 len_conv = icu_convert_case(u_strToUpper, mylocale,
1818 &buff_conv, buff_uchar, len_uchar);
1819 icu_from_uchar(&result, buff_conv, len_conv);
1820 pfree(buff_uchar);
1821 pfree(buff_conv);
1823 else
1824 #endif
1825 if (mylocale->provider == COLLPROVIDER_BUILTIN)
1827 const char *src = buff;
1828 size_t srclen = nbytes;
1829 size_t dstsize;
1830 char *dst;
1831 size_t needed;
1833 Assert(GetDatabaseEncoding() == PG_UTF8);
1835 /* first try buffer of equal size plus terminating NUL */
1836 dstsize = srclen + 1;
1837 dst = palloc(dstsize);
1839 needed = unicode_strupper(dst, dstsize, src, srclen);
1840 if (needed + 1 > dstsize)
1842 /* grow buffer if needed and retry */
1843 dstsize = needed + 1;
1844 dst = repalloc(dst, dstsize);
1845 needed = unicode_strupper(dst, dstsize, src, srclen);
1846 Assert(needed + 1 == dstsize);
1849 Assert(dst[needed] == '\0');
1850 result = dst;
1852 else
1854 Assert(mylocale->provider == COLLPROVIDER_LIBC);
1856 if (pg_database_encoding_max_length() > 1)
1858 wchar_t *workspace;
1859 size_t curr_char;
1860 size_t result_size;
1862 /* Overflow paranoia */
1863 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
1864 ereport(ERROR,
1865 (errcode(ERRCODE_OUT_OF_MEMORY),
1866 errmsg("out of memory")));
1868 /* Output workspace cannot have more codes than input bytes */
1869 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
1871 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
1873 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
1874 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
1877 * Make result large enough; case change might change number
1878 * of bytes
1880 result_size = curr_char * pg_database_encoding_max_length() + 1;
1881 result = palloc(result_size);
1883 wchar2char(result, workspace, result_size, mylocale);
1884 pfree(workspace);
1886 else
1888 char *p;
1890 result = pnstrdup(buff, nbytes);
1893 * Note: we assume that toupper_l() will not be so broken as
1894 * to need an islower_l() guard test. When using the default
1895 * collation, we apply the traditional Postgres behavior that
1896 * forces ASCII-style treatment of I/i, but in non-default
1897 * collations you get exactly what the collation says.
1899 for (p = result; *p; p++)
1901 if (mylocale->is_default)
1902 *p = pg_toupper((unsigned char) *p);
1903 else
1904 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
1910 return result;
1913 struct WordBoundaryState
1915 const char *str;
1916 size_t len;
1917 size_t offset;
1918 bool init;
1919 bool prev_alnum;
1923 * Simple word boundary iterator that draws boundaries each time the result of
1924 * pg_u_isalnum() changes.
1926 static size_t
1927 initcap_wbnext(void *state)
1929 struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
1931 while (wbstate->offset < wbstate->len &&
1932 wbstate->str[wbstate->offset] != '\0')
1934 pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
1935 wbstate->offset);
1936 bool curr_alnum = pg_u_isalnum(u, true);
1938 if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
1940 size_t prev_offset = wbstate->offset;
1942 wbstate->init = true;
1943 wbstate->offset += unicode_utf8len(u);
1944 wbstate->prev_alnum = curr_alnum;
1945 return prev_offset;
1948 wbstate->offset += unicode_utf8len(u);
1951 return wbstate->len;
1955 * collation-aware, wide-character-aware initcap function
1957 * We pass the number of bytes so we can pass varlena and char*
1958 * to this function. The result is a palloc'd, null-terminated string.
1960 char *
1961 str_initcap(const char *buff, size_t nbytes, Oid collid)
1963 char *result;
1964 int wasalnum = false;
1965 pg_locale_t mylocale;
1967 if (!buff)
1968 return NULL;
1970 if (!OidIsValid(collid))
1973 * This typically means that the parser could not resolve a conflict
1974 * of implicit collations, so report it that way.
1976 ereport(ERROR,
1977 (errcode(ERRCODE_INDETERMINATE_COLLATION),
1978 errmsg("could not determine which collation to use for %s function",
1979 "initcap()"),
1980 errhint("Use the COLLATE clause to set the collation explicitly.")));
1983 mylocale = pg_newlocale_from_collation(collid);
1985 /* C/POSIX collations use this path regardless of database encoding */
1986 if (mylocale->ctype_is_c)
1988 result = asc_initcap(buff, nbytes);
1990 else
1992 #ifdef USE_ICU
1993 if (mylocale->provider == COLLPROVIDER_ICU)
1995 int32_t len_uchar,
1996 len_conv;
1997 UChar *buff_uchar;
1998 UChar *buff_conv;
2000 len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
2001 len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
2002 &buff_conv, buff_uchar, len_uchar);
2003 icu_from_uchar(&result, buff_conv, len_conv);
2004 pfree(buff_uchar);
2005 pfree(buff_conv);
2007 else
2008 #endif
2009 if (mylocale->provider == COLLPROVIDER_BUILTIN)
2011 const char *src = buff;
2012 size_t srclen = nbytes;
2013 size_t dstsize;
2014 char *dst;
2015 size_t needed;
2016 struct WordBoundaryState wbstate = {
2017 .str = src,
2018 .len = srclen,
2019 .offset = 0,
2020 .init = false,
2021 .prev_alnum = false,
2024 Assert(GetDatabaseEncoding() == PG_UTF8);
2026 /* first try buffer of equal size plus terminating NUL */
2027 dstsize = srclen + 1;
2028 dst = palloc(dstsize);
2030 needed = unicode_strtitle(dst, dstsize, src, srclen,
2031 initcap_wbnext, &wbstate);
2032 if (needed + 1 > dstsize)
2034 /* reset iterator */
2035 wbstate.offset = 0;
2036 wbstate.init = false;
2038 /* grow buffer if needed and retry */
2039 dstsize = needed + 1;
2040 dst = repalloc(dst, dstsize);
2041 needed = unicode_strtitle(dst, dstsize, src, srclen,
2042 initcap_wbnext, &wbstate);
2043 Assert(needed + 1 == dstsize);
2046 result = dst;
2048 else
2050 Assert(mylocale->provider == COLLPROVIDER_LIBC);
2052 if (pg_database_encoding_max_length() > 1)
2054 wchar_t *workspace;
2055 size_t curr_char;
2056 size_t result_size;
2058 /* Overflow paranoia */
2059 if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
2060 ereport(ERROR,
2061 (errcode(ERRCODE_OUT_OF_MEMORY),
2062 errmsg("out of memory")));
2064 /* Output workspace cannot have more codes than input bytes */
2065 workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
2067 char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
2069 for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
2071 if (wasalnum)
2072 workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
2073 else
2074 workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
2075 wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
2079 * Make result large enough; case change might change number
2080 * of bytes
2082 result_size = curr_char * pg_database_encoding_max_length() + 1;
2083 result = palloc(result_size);
2085 wchar2char(result, workspace, result_size, mylocale);
2086 pfree(workspace);
2088 else
2090 char *p;
2092 result = pnstrdup(buff, nbytes);
2095 * Note: we assume that toupper_l()/tolower_l() will not be so
2096 * broken as to need guard tests. When using the default
2097 * collation, we apply the traditional Postgres behavior that
2098 * forces ASCII-style treatment of I/i, but in non-default
2099 * collations you get exactly what the collation says.
2101 for (p = result; *p; p++)
2103 if (mylocale->is_default)
2105 if (wasalnum)
2106 *p = pg_tolower((unsigned char) *p);
2107 else
2108 *p = pg_toupper((unsigned char) *p);
2110 else
2112 if (wasalnum)
2113 *p = tolower_l((unsigned char) *p, mylocale->info.lt);
2114 else
2115 *p = toupper_l((unsigned char) *p, mylocale->info.lt);
2117 wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
2123 return result;
2127 * ASCII-only lower function
2129 * We pass the number of bytes so we can pass varlena and char*
2130 * to this function. The result is a palloc'd, null-terminated string.
2132 char *
2133 asc_tolower(const char *buff, size_t nbytes)
2135 char *result;
2136 char *p;
2138 if (!buff)
2139 return NULL;
2141 result = pnstrdup(buff, nbytes);
2143 for (p = result; *p; p++)
2144 *p = pg_ascii_tolower((unsigned char) *p);
2146 return result;
2150 * ASCII-only upper function
2152 * We pass the number of bytes so we can pass varlena and char*
2153 * to this function. The result is a palloc'd, null-terminated string.
2155 char *
2156 asc_toupper(const char *buff, size_t nbytes)
2158 char *result;
2159 char *p;
2161 if (!buff)
2162 return NULL;
2164 result = pnstrdup(buff, nbytes);
2166 for (p = result; *p; p++)
2167 *p = pg_ascii_toupper((unsigned char) *p);
2169 return result;
2173 * ASCII-only initcap function
2175 * We pass the number of bytes so we can pass varlena and char*
2176 * to this function. The result is a palloc'd, null-terminated string.
2178 char *
2179 asc_initcap(const char *buff, size_t nbytes)
2181 char *result;
2182 char *p;
2183 int wasalnum = false;
2185 if (!buff)
2186 return NULL;
2188 result = pnstrdup(buff, nbytes);
2190 for (p = result; *p; p++)
2192 char c;
2194 if (wasalnum)
2195 *p = c = pg_ascii_tolower((unsigned char) *p);
2196 else
2197 *p = c = pg_ascii_toupper((unsigned char) *p);
2198 /* we don't trust isalnum() here */
2199 wasalnum = ((c >= 'A' && c <= 'Z') ||
2200 (c >= 'a' && c <= 'z') ||
2201 (c >= '0' && c <= '9'));
2204 return result;
2207 /* convenience routines for when the input is null-terminated */
2209 static char *
2210 str_tolower_z(const char *buff, Oid collid)
2212 return str_tolower(buff, strlen(buff), collid);
2215 static char *
2216 str_toupper_z(const char *buff, Oid collid)
2218 return str_toupper(buff, strlen(buff), collid);
2221 static char *
2222 str_initcap_z(const char *buff, Oid collid)
2224 return str_initcap(buff, strlen(buff), collid);
2227 static char *
2228 asc_tolower_z(const char *buff)
2230 return asc_tolower(buff, strlen(buff));
2233 static char *
2234 asc_toupper_z(const char *buff)
2236 return asc_toupper(buff, strlen(buff));
2239 /* asc_initcap_z is not currently needed */
2242 /* ----------
2243 * Skip TM / th in FROM_CHAR
2245 * If S_THth is on, skip two chars, assuming there are two available
2246 * ----------
2248 #define SKIP_THth(ptr, _suf) \
2249 do { \
2250 if (S_THth(_suf)) \
2252 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2253 if (*(ptr)) (ptr) += pg_mblen(ptr); \
2255 } while (0)
2258 #ifdef DEBUG_TO_FROM_CHAR
2259 /* -----------
2260 * DEBUG: Call for debug and for index checking; (Show ASCII char
2261 * and defined keyword for each used position
2262 * ----------
2264 static void
2265 dump_index(const KeyWord *k, const int *index)
2267 int i,
2268 count = 0,
2269 free_i = 0;
2271 elog(DEBUG_elog_output, "TO-FROM_CHAR: Dump KeyWord Index:");
2273 for (i = 0; i < KeyWord_INDEX_SIZE; i++)
2275 if (index[i] != -1)
2277 elog(DEBUG_elog_output, "\t%c: %s, ", i + 32, k[index[i]].name);
2278 count++;
2280 else
2282 free_i++;
2283 elog(DEBUG_elog_output, "\t(%d) %c %d", i, i + 32, index[i]);
2286 elog(DEBUG_elog_output, "\n\t\tUsed positions: %d,\n\t\tFree positions: %d",
2287 count, free_i);
2289 #endif /* DEBUG */
2291 /* ----------
2292 * Return true if next format picture is not digit value
2293 * ----------
2295 static bool
2296 is_next_separator(FormatNode *n)
2298 if (n->type == NODE_TYPE_END)
2299 return false;
2301 if (n->type == NODE_TYPE_ACTION && S_THth(n->suffix))
2302 return true;
2305 * Next node
2307 n++;
2309 /* end of format string is treated like a non-digit separator */
2310 if (n->type == NODE_TYPE_END)
2311 return true;
2313 if (n->type == NODE_TYPE_ACTION)
2315 if (n->key->is_digit)
2316 return false;
2318 return true;
2320 else if (n->character[1] == '\0' &&
2321 isdigit((unsigned char) n->character[0]))
2322 return false;
2324 return true; /* some non-digit input (separator) */
2328 static int
2329 adjust_partial_year_to_2020(int year)
2332 * Adjust all dates toward 2020; this is effectively what happens when we
2333 * assume '70' is 1970 and '69' is 2069.
2335 /* Force 0-69 into the 2000's */
2336 if (year < 70)
2337 return year + 2000;
2338 /* Force 70-99 into the 1900's */
2339 else if (year < 100)
2340 return year + 1900;
2341 /* Force 100-519 into the 2000's */
2342 else if (year < 520)
2343 return year + 2000;
2344 /* Force 520-999 into the 1000's */
2345 else if (year < 1000)
2346 return year + 1000;
2347 else
2348 return year;
2352 static int
2353 strspace_len(const char *str)
2355 int len = 0;
2357 while (*str && isspace((unsigned char) *str))
2359 str++;
2360 len++;
2362 return len;
2366 * Set the date mode of a from-char conversion.
2368 * Puke if the date mode has already been set, and the caller attempts to set
2369 * it to a conflicting mode.
2371 * Returns true on success, false on failure (if escontext points to an
2372 * ErrorSaveContext; otherwise errors are thrown).
2374 static bool
2375 from_char_set_mode(TmFromChar *tmfc, const FromCharDateMode mode,
2376 Node *escontext)
2378 if (mode != FROM_CHAR_DATE_NONE)
2380 if (tmfc->mode == FROM_CHAR_DATE_NONE)
2381 tmfc->mode = mode;
2382 else if (tmfc->mode != mode)
2383 ereturn(escontext, false,
2384 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2385 errmsg("invalid combination of date conventions"),
2386 errhint("Do not mix Gregorian and ISO week date "
2387 "conventions in a formatting template.")));
2389 return true;
2393 * Set the integer pointed to by 'dest' to the given value.
2395 * Puke if the destination integer has previously been set to some other
2396 * non-zero value.
2398 * Returns true on success, false on failure (if escontext points to an
2399 * ErrorSaveContext; otherwise errors are thrown).
2401 static bool
2402 from_char_set_int(int *dest, const int value, const FormatNode *node,
2403 Node *escontext)
2405 if (*dest != 0 && *dest != value)
2406 ereturn(escontext, false,
2407 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2408 errmsg("conflicting values for \"%s\" field in formatting string",
2409 node->key->name),
2410 errdetail("This value contradicts a previous setting "
2411 "for the same field type.")));
2412 *dest = value;
2413 return true;
2417 * Read a single integer from the source string, into the int pointed to by
2418 * 'dest'. If 'dest' is NULL, the result is discarded.
2420 * In fixed-width mode (the node does not have the FM suffix), consume at most
2421 * 'len' characters. However, any leading whitespace isn't counted in 'len'.
2423 * We use strtol() to recover the integer value from the source string, in
2424 * accordance with the given FormatNode.
2426 * If the conversion completes successfully, src will have been advanced to
2427 * point at the character immediately following the last character used in the
2428 * conversion.
2430 * Returns the number of characters consumed, or -1 on error (if escontext
2431 * points to an ErrorSaveContext; otherwise errors are thrown).
2433 * Note that from_char_parse_int() provides a more convenient wrapper where
2434 * the length of the field is the same as the length of the format keyword (as
2435 * with DD and MI).
2437 static int
2438 from_char_parse_int_len(int *dest, const char **src, const int len, FormatNode *node,
2439 Node *escontext)
2441 long result;
2442 char copy[DCH_MAX_ITEM_SIZ + 1];
2443 const char *init = *src;
2444 int used;
2447 * Skip any whitespace before parsing the integer.
2449 *src += strspace_len(*src);
2451 Assert(len <= DCH_MAX_ITEM_SIZ);
2452 used = (int) strlcpy(copy, *src, len + 1);
2454 if (S_FM(node->suffix) || is_next_separator(node))
2457 * This node is in Fill Mode, or the next node is known to be a
2458 * non-digit value, so we just slurp as many characters as we can get.
2460 char *endptr;
2462 errno = 0;
2463 result = strtol(init, &endptr, 10);
2464 *src = endptr;
2466 else
2469 * We need to pull exactly the number of characters given in 'len' out
2470 * of the string, and convert those.
2472 char *last;
2474 if (used < len)
2475 ereturn(escontext, -1,
2476 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2477 errmsg("source string too short for \"%s\" formatting field",
2478 node->key->name),
2479 errdetail("Field requires %d characters, but only %d remain.",
2480 len, used),
2481 errhint("If your source string is not fixed-width, "
2482 "try using the \"FM\" modifier.")));
2484 errno = 0;
2485 result = strtol(copy, &last, 10);
2486 used = last - copy;
2488 if (used > 0 && used < len)
2489 ereturn(escontext, -1,
2490 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2491 errmsg("invalid value \"%s\" for \"%s\"",
2492 copy, node->key->name),
2493 errdetail("Field requires %d characters, but only %d could be parsed.",
2494 len, used),
2495 errhint("If your source string is not fixed-width, "
2496 "try using the \"FM\" modifier.")));
2498 *src += used;
2501 if (*src == init)
2502 ereturn(escontext, -1,
2503 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2504 errmsg("invalid value \"%s\" for \"%s\"",
2505 copy, node->key->name),
2506 errdetail("Value must be an integer.")));
2508 if (errno == ERANGE || result < INT_MIN || result > INT_MAX)
2509 ereturn(escontext, -1,
2510 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2511 errmsg("value for \"%s\" in source string is out of range",
2512 node->key->name),
2513 errdetail("Value must be in the range %d to %d.",
2514 INT_MIN, INT_MAX)));
2516 if (dest != NULL)
2518 if (!from_char_set_int(dest, (int) result, node, escontext))
2519 return -1;
2522 return *src - init;
2526 * Call from_char_parse_int_len(), using the length of the format keyword as
2527 * the expected length of the field.
2529 * Don't call this function if the field differs in length from the format
2530 * keyword (as with HH24; the keyword length is 4, but the field length is 2).
2531 * In such cases, call from_char_parse_int_len() instead to specify the
2532 * required length explicitly.
2534 static int
2535 from_char_parse_int(int *dest, const char **src, FormatNode *node,
2536 Node *escontext)
2538 return from_char_parse_int_len(dest, src, node->key->len, node, escontext);
2542 * Sequentially search null-terminated "array" for a case-insensitive match
2543 * to the initial character(s) of "name".
2545 * Returns array index of match, or -1 for no match.
2547 * *len is set to the length of the match, or 0 for no match.
2549 * Case-insensitivity is defined per pg_ascii_tolower, so this is only
2550 * suitable for comparisons to ASCII strings.
2552 static int
2553 seq_search_ascii(const char *name, const char *const *array, int *len)
2555 unsigned char firstc;
2556 const char *const *a;
2558 *len = 0;
2560 /* empty string can't match anything */
2561 if (!*name)
2562 return -1;
2564 /* we handle first char specially to gain some speed */
2565 firstc = pg_ascii_tolower((unsigned char) *name);
2567 for (a = array; *a != NULL; a++)
2569 const char *p;
2570 const char *n;
2572 /* compare first chars */
2573 if (pg_ascii_tolower((unsigned char) **a) != firstc)
2574 continue;
2576 /* compare rest of string */
2577 for (p = *a + 1, n = name + 1;; p++, n++)
2579 /* return success if we matched whole array entry */
2580 if (*p == '\0')
2582 *len = n - name;
2583 return a - array;
2585 /* else, must have another character in "name" ... */
2586 if (*n == '\0')
2587 break;
2588 /* ... and it must match */
2589 if (pg_ascii_tolower((unsigned char) *p) !=
2590 pg_ascii_tolower((unsigned char) *n))
2591 break;
2595 return -1;
2599 * Sequentially search an array of possibly non-English words for
2600 * a case-insensitive match to the initial character(s) of "name".
2602 * This has the same API as seq_search_ascii(), but we use a more general
2603 * case-folding transformation to achieve case-insensitivity. Case folding
2604 * is done per the rules of the collation identified by "collid".
2606 * The array is treated as const, but we don't declare it that way because
2607 * the arrays exported by pg_locale.c aren't const.
2609 static int
2610 seq_search_localized(const char *name, char **array, int *len, Oid collid)
2612 char **a;
2613 char *upper_name;
2614 char *lower_name;
2616 *len = 0;
2618 /* empty string can't match anything */
2619 if (!*name)
2620 return -1;
2623 * The case-folding processing done below is fairly expensive, so before
2624 * doing that, make a quick pass to see if there is an exact match.
2626 for (a = array; *a != NULL; a++)
2628 int element_len = strlen(*a);
2630 if (strncmp(name, *a, element_len) == 0)
2632 *len = element_len;
2633 return a - array;
2638 * Fold to upper case, then to lower case, so that we can match reliably
2639 * even in languages in which case conversions are not injective.
2641 upper_name = str_toupper(name, strlen(name), collid);
2642 lower_name = str_tolower(upper_name, strlen(upper_name), collid);
2643 pfree(upper_name);
2645 for (a = array; *a != NULL; a++)
2647 char *upper_element;
2648 char *lower_element;
2649 int element_len;
2651 /* Likewise upper/lower-case array element */
2652 upper_element = str_toupper(*a, strlen(*a), collid);
2653 lower_element = str_tolower(upper_element, strlen(upper_element),
2654 collid);
2655 pfree(upper_element);
2656 element_len = strlen(lower_element);
2658 /* Match? */
2659 if (strncmp(lower_name, lower_element, element_len) == 0)
2661 *len = element_len;
2662 pfree(lower_element);
2663 pfree(lower_name);
2664 return a - array;
2666 pfree(lower_element);
2669 pfree(lower_name);
2670 return -1;
2674 * Perform a sequential search in 'array' (or 'localized_array', if that's
2675 * not NULL) for an entry matching the first character(s) of the 'src'
2676 * string case-insensitively.
2678 * The 'array' is presumed to be English words (all-ASCII), but
2679 * if 'localized_array' is supplied, that might be non-English
2680 * so we need a more expensive case-folding transformation
2681 * (which will follow the rules of the collation 'collid').
2683 * If a match is found, copy the array index of the match into the integer
2684 * pointed to by 'dest' and advance 'src' to the end of the part of the string
2685 * which matched.
2687 * Returns true on match, false on failure (if escontext points to an
2688 * ErrorSaveContext; otherwise errors are thrown).
2690 * 'node' is used only for error reports: node->key->name identifies the
2691 * field type we were searching for.
2693 static bool
2694 from_char_seq_search(int *dest, const char **src, const char *const *array,
2695 char **localized_array, Oid collid,
2696 FormatNode *node, Node *escontext)
2698 int len;
2700 if (localized_array == NULL)
2701 *dest = seq_search_ascii(*src, array, &len);
2702 else
2703 *dest = seq_search_localized(*src, localized_array, &len, collid);
2705 if (len <= 0)
2708 * In the error report, truncate the string at the next whitespace (if
2709 * any) to avoid including irrelevant data.
2711 char *copy = pstrdup(*src);
2712 char *c;
2714 for (c = copy; *c; c++)
2716 if (scanner_isspace(*c))
2718 *c = '\0';
2719 break;
2723 ereturn(escontext, false,
2724 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
2725 errmsg("invalid value \"%s\" for \"%s\"",
2726 copy, node->key->name),
2727 errdetail("The given value did not match any of "
2728 "the allowed values for this field.")));
2730 *src += len;
2731 return true;
2734 /* ----------
2735 * Process a TmToChar struct as denoted by a list of FormatNodes.
2736 * The formatted data is written to the string pointed to by 'out'.
2737 * ----------
2739 static void
2740 DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out, Oid collid)
2742 FormatNode *n;
2743 char *s;
2744 struct fmt_tm *tm = &in->tm;
2745 int i;
2747 /* cache localized days and months */
2748 cache_locale_time();
2750 s = out;
2751 for (n = node; n->type != NODE_TYPE_END; n++)
2753 if (n->type != NODE_TYPE_ACTION)
2755 strcpy(s, n->character);
2756 s += strlen(s);
2757 continue;
2760 switch (n->key->id)
2762 case DCH_A_M:
2763 case DCH_P_M:
2764 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2765 ? P_M_STR : A_M_STR);
2766 s += strlen(s);
2767 break;
2768 case DCH_AM:
2769 case DCH_PM:
2770 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2771 ? PM_STR : AM_STR);
2772 s += strlen(s);
2773 break;
2774 case DCH_a_m:
2775 case DCH_p_m:
2776 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2777 ? p_m_STR : a_m_STR);
2778 s += strlen(s);
2779 break;
2780 case DCH_am:
2781 case DCH_pm:
2782 strcpy(s, (tm->tm_hour % HOURS_PER_DAY >= HOURS_PER_DAY / 2)
2783 ? pm_STR : am_STR);
2784 s += strlen(s);
2785 break;
2786 case DCH_HH:
2787 case DCH_HH12:
2790 * display time as shown on a 12-hour clock, even for
2791 * intervals
2793 sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2794 tm->tm_hour % (HOURS_PER_DAY / 2) == 0 ?
2795 (long long) (HOURS_PER_DAY / 2) :
2796 (long long) (tm->tm_hour % (HOURS_PER_DAY / 2)));
2797 if (S_THth(n->suffix))
2798 str_numth(s, s, S_TH_TYPE(n->suffix));
2799 s += strlen(s);
2800 break;
2801 case DCH_HH24:
2802 sprintf(s, "%0*lld", S_FM(n->suffix) ? 0 : (tm->tm_hour >= 0) ? 2 : 3,
2803 (long long) tm->tm_hour);
2804 if (S_THth(n->suffix))
2805 str_numth(s, s, S_TH_TYPE(n->suffix));
2806 s += strlen(s);
2807 break;
2808 case DCH_MI:
2809 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_min >= 0) ? 2 : 3,
2810 tm->tm_min);
2811 if (S_THth(n->suffix))
2812 str_numth(s, s, S_TH_TYPE(n->suffix));
2813 s += strlen(s);
2814 break;
2815 case DCH_SS:
2816 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_sec >= 0) ? 2 : 3,
2817 tm->tm_sec);
2818 if (S_THth(n->suffix))
2819 str_numth(s, s, S_TH_TYPE(n->suffix));
2820 s += strlen(s);
2821 break;
2823 #define DCH_to_char_fsec(frac_fmt, frac_val) \
2824 sprintf(s, frac_fmt, (int) (frac_val)); \
2825 if (S_THth(n->suffix)) \
2826 str_numth(s, s, S_TH_TYPE(n->suffix)); \
2827 s += strlen(s)
2829 case DCH_FF1: /* tenth of second */
2830 DCH_to_char_fsec("%01d", in->fsec / 100000);
2831 break;
2832 case DCH_FF2: /* hundredth of second */
2833 DCH_to_char_fsec("%02d", in->fsec / 10000);
2834 break;
2835 case DCH_FF3:
2836 case DCH_MS: /* millisecond */
2837 DCH_to_char_fsec("%03d", in->fsec / 1000);
2838 break;
2839 case DCH_FF4: /* tenth of a millisecond */
2840 DCH_to_char_fsec("%04d", in->fsec / 100);
2841 break;
2842 case DCH_FF5: /* hundredth of a millisecond */
2843 DCH_to_char_fsec("%05d", in->fsec / 10);
2844 break;
2845 case DCH_FF6:
2846 case DCH_US: /* microsecond */
2847 DCH_to_char_fsec("%06d", in->fsec);
2848 break;
2849 #undef DCH_to_char_fsec
2850 case DCH_SSSS:
2851 sprintf(s, "%lld",
2852 (long long) (tm->tm_hour * SECS_PER_HOUR +
2853 tm->tm_min * SECS_PER_MINUTE +
2854 tm->tm_sec));
2855 if (S_THth(n->suffix))
2856 str_numth(s, s, S_TH_TYPE(n->suffix));
2857 s += strlen(s);
2858 break;
2859 case DCH_tz:
2860 INVALID_FOR_INTERVAL;
2861 if (tmtcTzn(in))
2863 /* We assume here that timezone names aren't localized */
2864 char *p = asc_tolower_z(tmtcTzn(in));
2866 strcpy(s, p);
2867 pfree(p);
2868 s += strlen(s);
2870 break;
2871 case DCH_TZ:
2872 INVALID_FOR_INTERVAL;
2873 if (tmtcTzn(in))
2875 strcpy(s, tmtcTzn(in));
2876 s += strlen(s);
2878 break;
2879 case DCH_TZH:
2880 INVALID_FOR_INTERVAL;
2881 sprintf(s, "%c%02d",
2882 (tm->tm_gmtoff >= 0) ? '+' : '-',
2883 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2884 s += strlen(s);
2885 break;
2886 case DCH_TZM:
2887 INVALID_FOR_INTERVAL;
2888 sprintf(s, "%02d",
2889 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2890 s += strlen(s);
2891 break;
2892 case DCH_OF:
2893 INVALID_FOR_INTERVAL;
2894 sprintf(s, "%c%0*d",
2895 (tm->tm_gmtoff >= 0) ? '+' : '-',
2896 S_FM(n->suffix) ? 0 : 2,
2897 abs((int) tm->tm_gmtoff) / SECS_PER_HOUR);
2898 s += strlen(s);
2899 if (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR != 0)
2901 sprintf(s, ":%02d",
2902 (abs((int) tm->tm_gmtoff) % SECS_PER_HOUR) / SECS_PER_MINUTE);
2903 s += strlen(s);
2905 break;
2906 case DCH_A_D:
2907 case DCH_B_C:
2908 INVALID_FOR_INTERVAL;
2909 strcpy(s, (tm->tm_year <= 0 ? B_C_STR : A_D_STR));
2910 s += strlen(s);
2911 break;
2912 case DCH_AD:
2913 case DCH_BC:
2914 INVALID_FOR_INTERVAL;
2915 strcpy(s, (tm->tm_year <= 0 ? BC_STR : AD_STR));
2916 s += strlen(s);
2917 break;
2918 case DCH_a_d:
2919 case DCH_b_c:
2920 INVALID_FOR_INTERVAL;
2921 strcpy(s, (tm->tm_year <= 0 ? b_c_STR : a_d_STR));
2922 s += strlen(s);
2923 break;
2924 case DCH_ad:
2925 case DCH_bc:
2926 INVALID_FOR_INTERVAL;
2927 strcpy(s, (tm->tm_year <= 0 ? bc_STR : ad_STR));
2928 s += strlen(s);
2929 break;
2930 case DCH_MONTH:
2931 INVALID_FOR_INTERVAL;
2932 if (!tm->tm_mon)
2933 break;
2934 if (S_TM(n->suffix))
2936 char *str = str_toupper_z(localized_full_months[tm->tm_mon - 1], collid);
2938 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2939 strcpy(s, str);
2940 else
2941 ereport(ERROR,
2942 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2943 errmsg("localized string format value too long")));
2945 else
2946 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2947 asc_toupper_z(months_full[tm->tm_mon - 1]));
2948 s += strlen(s);
2949 break;
2950 case DCH_Month:
2951 INVALID_FOR_INTERVAL;
2952 if (!tm->tm_mon)
2953 break;
2954 if (S_TM(n->suffix))
2956 char *str = str_initcap_z(localized_full_months[tm->tm_mon - 1], collid);
2958 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2959 strcpy(s, str);
2960 else
2961 ereport(ERROR,
2962 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2963 errmsg("localized string format value too long")));
2965 else
2966 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2967 months_full[tm->tm_mon - 1]);
2968 s += strlen(s);
2969 break;
2970 case DCH_month:
2971 INVALID_FOR_INTERVAL;
2972 if (!tm->tm_mon)
2973 break;
2974 if (S_TM(n->suffix))
2976 char *str = str_tolower_z(localized_full_months[tm->tm_mon - 1], collid);
2978 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2979 strcpy(s, str);
2980 else
2981 ereport(ERROR,
2982 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
2983 errmsg("localized string format value too long")));
2985 else
2986 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
2987 asc_tolower_z(months_full[tm->tm_mon - 1]));
2988 s += strlen(s);
2989 break;
2990 case DCH_MON:
2991 INVALID_FOR_INTERVAL;
2992 if (!tm->tm_mon)
2993 break;
2994 if (S_TM(n->suffix))
2996 char *str = str_toupper_z(localized_abbrev_months[tm->tm_mon - 1], collid);
2998 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
2999 strcpy(s, str);
3000 else
3001 ereport(ERROR,
3002 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3003 errmsg("localized string format value too long")));
3005 else
3006 strcpy(s, asc_toupper_z(months[tm->tm_mon - 1]));
3007 s += strlen(s);
3008 break;
3009 case DCH_Mon:
3010 INVALID_FOR_INTERVAL;
3011 if (!tm->tm_mon)
3012 break;
3013 if (S_TM(n->suffix))
3015 char *str = str_initcap_z(localized_abbrev_months[tm->tm_mon - 1], collid);
3017 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3018 strcpy(s, str);
3019 else
3020 ereport(ERROR,
3021 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3022 errmsg("localized string format value too long")));
3024 else
3025 strcpy(s, months[tm->tm_mon - 1]);
3026 s += strlen(s);
3027 break;
3028 case DCH_mon:
3029 INVALID_FOR_INTERVAL;
3030 if (!tm->tm_mon)
3031 break;
3032 if (S_TM(n->suffix))
3034 char *str = str_tolower_z(localized_abbrev_months[tm->tm_mon - 1], collid);
3036 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3037 strcpy(s, str);
3038 else
3039 ereport(ERROR,
3040 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3041 errmsg("localized string format value too long")));
3043 else
3044 strcpy(s, asc_tolower_z(months[tm->tm_mon - 1]));
3045 s += strlen(s);
3046 break;
3047 case DCH_MM:
3048 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (tm->tm_mon >= 0) ? 2 : 3,
3049 tm->tm_mon);
3050 if (S_THth(n->suffix))
3051 str_numth(s, s, S_TH_TYPE(n->suffix));
3052 s += strlen(s);
3053 break;
3054 case DCH_DAY:
3055 INVALID_FOR_INTERVAL;
3056 if (S_TM(n->suffix))
3058 char *str = str_toupper_z(localized_full_days[tm->tm_wday], collid);
3060 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3061 strcpy(s, str);
3062 else
3063 ereport(ERROR,
3064 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3065 errmsg("localized string format value too long")));
3067 else
3068 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3069 asc_toupper_z(days[tm->tm_wday]));
3070 s += strlen(s);
3071 break;
3072 case DCH_Day:
3073 INVALID_FOR_INTERVAL;
3074 if (S_TM(n->suffix))
3076 char *str = str_initcap_z(localized_full_days[tm->tm_wday], collid);
3078 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3079 strcpy(s, str);
3080 else
3081 ereport(ERROR,
3082 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3083 errmsg("localized string format value too long")));
3085 else
3086 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3087 days[tm->tm_wday]);
3088 s += strlen(s);
3089 break;
3090 case DCH_day:
3091 INVALID_FOR_INTERVAL;
3092 if (S_TM(n->suffix))
3094 char *str = str_tolower_z(localized_full_days[tm->tm_wday], collid);
3096 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3097 strcpy(s, str);
3098 else
3099 ereport(ERROR,
3100 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3101 errmsg("localized string format value too long")));
3103 else
3104 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
3105 asc_tolower_z(days[tm->tm_wday]));
3106 s += strlen(s);
3107 break;
3108 case DCH_DY:
3109 INVALID_FOR_INTERVAL;
3110 if (S_TM(n->suffix))
3112 char *str = str_toupper_z(localized_abbrev_days[tm->tm_wday], collid);
3114 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3115 strcpy(s, str);
3116 else
3117 ereport(ERROR,
3118 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3119 errmsg("localized string format value too long")));
3121 else
3122 strcpy(s, asc_toupper_z(days_short[tm->tm_wday]));
3123 s += strlen(s);
3124 break;
3125 case DCH_Dy:
3126 INVALID_FOR_INTERVAL;
3127 if (S_TM(n->suffix))
3129 char *str = str_initcap_z(localized_abbrev_days[tm->tm_wday], collid);
3131 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3132 strcpy(s, str);
3133 else
3134 ereport(ERROR,
3135 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3136 errmsg("localized string format value too long")));
3138 else
3139 strcpy(s, days_short[tm->tm_wday]);
3140 s += strlen(s);
3141 break;
3142 case DCH_dy:
3143 INVALID_FOR_INTERVAL;
3144 if (S_TM(n->suffix))
3146 char *str = str_tolower_z(localized_abbrev_days[tm->tm_wday], collid);
3148 if (strlen(str) <= (n->key->len + TM_SUFFIX_LEN) * DCH_MAX_ITEM_SIZ)
3149 strcpy(s, str);
3150 else
3151 ereport(ERROR,
3152 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
3153 errmsg("localized string format value too long")));
3155 else
3156 strcpy(s, asc_tolower_z(days_short[tm->tm_wday]));
3157 s += strlen(s);
3158 break;
3159 case DCH_DDD:
3160 case DCH_IDDD:
3161 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 3,
3162 (n->key->id == DCH_DDD) ?
3163 tm->tm_yday :
3164 date2isoyearday(tm->tm_year, tm->tm_mon, tm->tm_mday));
3165 if (S_THth(n->suffix))
3166 str_numth(s, s, S_TH_TYPE(n->suffix));
3167 s += strlen(s);
3168 break;
3169 case DCH_DD:
3170 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2, tm->tm_mday);
3171 if (S_THth(n->suffix))
3172 str_numth(s, s, S_TH_TYPE(n->suffix));
3173 s += strlen(s);
3174 break;
3175 case DCH_D:
3176 INVALID_FOR_INTERVAL;
3177 sprintf(s, "%d", tm->tm_wday + 1);
3178 if (S_THth(n->suffix))
3179 str_numth(s, s, S_TH_TYPE(n->suffix));
3180 s += strlen(s);
3181 break;
3182 case DCH_ID:
3183 INVALID_FOR_INTERVAL;
3184 sprintf(s, "%d", (tm->tm_wday == 0) ? 7 : tm->tm_wday);
3185 if (S_THth(n->suffix))
3186 str_numth(s, s, S_TH_TYPE(n->suffix));
3187 s += strlen(s);
3188 break;
3189 case DCH_WW:
3190 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3191 (tm->tm_yday - 1) / 7 + 1);
3192 if (S_THth(n->suffix))
3193 str_numth(s, s, S_TH_TYPE(n->suffix));
3194 s += strlen(s);
3195 break;
3196 case DCH_IW:
3197 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : 2,
3198 date2isoweek(tm->tm_year, tm->tm_mon, tm->tm_mday));
3199 if (S_THth(n->suffix))
3200 str_numth(s, s, S_TH_TYPE(n->suffix));
3201 s += strlen(s);
3202 break;
3203 case DCH_Q:
3204 if (!tm->tm_mon)
3205 break;
3206 sprintf(s, "%d", (tm->tm_mon - 1) / 3 + 1);
3207 if (S_THth(n->suffix))
3208 str_numth(s, s, S_TH_TYPE(n->suffix));
3209 s += strlen(s);
3210 break;
3211 case DCH_CC:
3212 if (is_interval) /* straight calculation */
3213 i = tm->tm_year / 100;
3214 else
3216 if (tm->tm_year > 0)
3217 /* Century 20 == 1901 - 2000 */
3218 i = (tm->tm_year - 1) / 100 + 1;
3219 else
3220 /* Century 6BC == 600BC - 501BC */
3221 i = tm->tm_year / 100 - 1;
3223 if (i <= 99 && i >= -99)
3224 sprintf(s, "%0*d", S_FM(n->suffix) ? 0 : (i >= 0) ? 2 : 3, i);
3225 else
3226 sprintf(s, "%d", i);
3227 if (S_THth(n->suffix))
3228 str_numth(s, s, S_TH_TYPE(n->suffix));
3229 s += strlen(s);
3230 break;
3231 case DCH_Y_YYY:
3232 i = ADJUST_YEAR(tm->tm_year, is_interval) / 1000;
3233 sprintf(s, "%d,%03d", i,
3234 ADJUST_YEAR(tm->tm_year, is_interval) - (i * 1000));
3235 if (S_THth(n->suffix))
3236 str_numth(s, s, S_TH_TYPE(n->suffix));
3237 s += strlen(s);
3238 break;
3239 case DCH_YYYY:
3240 case DCH_IYYY:
3241 sprintf(s, "%0*d",
3242 S_FM(n->suffix) ? 0 :
3243 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 4 : 5,
3244 (n->key->id == DCH_YYYY ?
3245 ADJUST_YEAR(tm->tm_year, is_interval) :
3246 ADJUST_YEAR(date2isoyear(tm->tm_year,
3247 tm->tm_mon,
3248 tm->tm_mday),
3249 is_interval)));
3250 if (S_THth(n->suffix))
3251 str_numth(s, s, S_TH_TYPE(n->suffix));
3252 s += strlen(s);
3253 break;
3254 case DCH_YYY:
3255 case DCH_IYY:
3256 sprintf(s, "%0*d",
3257 S_FM(n->suffix) ? 0 :
3258 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 3 : 4,
3259 (n->key->id == DCH_YYY ?
3260 ADJUST_YEAR(tm->tm_year, is_interval) :
3261 ADJUST_YEAR(date2isoyear(tm->tm_year,
3262 tm->tm_mon,
3263 tm->tm_mday),
3264 is_interval)) % 1000);
3265 if (S_THth(n->suffix))
3266 str_numth(s, s, S_TH_TYPE(n->suffix));
3267 s += strlen(s);
3268 break;
3269 case DCH_YY:
3270 case DCH_IY:
3271 sprintf(s, "%0*d",
3272 S_FM(n->suffix) ? 0 :
3273 (ADJUST_YEAR(tm->tm_year, is_interval) >= 0) ? 2 : 3,
3274 (n->key->id == DCH_YY ?
3275 ADJUST_YEAR(tm->tm_year, is_interval) :
3276 ADJUST_YEAR(date2isoyear(tm->tm_year,
3277 tm->tm_mon,
3278 tm->tm_mday),
3279 is_interval)) % 100);
3280 if (S_THth(n->suffix))
3281 str_numth(s, s, S_TH_TYPE(n->suffix));
3282 s += strlen(s);
3283 break;
3284 case DCH_Y:
3285 case DCH_I:
3286 sprintf(s, "%1d",
3287 (n->key->id == DCH_Y ?
3288 ADJUST_YEAR(tm->tm_year, is_interval) :
3289 ADJUST_YEAR(date2isoyear(tm->tm_year,
3290 tm->tm_mon,
3291 tm->tm_mday),
3292 is_interval)) % 10);
3293 if (S_THth(n->suffix))
3294 str_numth(s, s, S_TH_TYPE(n->suffix));
3295 s += strlen(s);
3296 break;
3297 case DCH_RM:
3298 /* FALLTHROUGH */
3299 case DCH_rm:
3302 * For intervals, values like '12 month' will be reduced to 0
3303 * month and some years. These should be processed.
3305 if (!tm->tm_mon && !tm->tm_year)
3306 break;
3307 else
3309 int mon = 0;
3310 const char *const *months;
3312 if (n->key->id == DCH_RM)
3313 months = rm_months_upper;
3314 else
3315 months = rm_months_lower;
3318 * Compute the position in the roman-numeral array. Note
3319 * that the contents of the array are reversed, December
3320 * being first and January last.
3322 if (tm->tm_mon == 0)
3325 * This case is special, and tracks the case of full
3326 * interval years.
3328 mon = tm->tm_year >= 0 ? 0 : MONTHS_PER_YEAR - 1;
3330 else if (tm->tm_mon < 0)
3333 * Negative case. In this case, the calculation is
3334 * reversed, where -1 means December, -2 November,
3335 * etc.
3337 mon = -1 * (tm->tm_mon + 1);
3339 else
3342 * Common case, with a strictly positive value. The
3343 * position in the array matches with the value of
3344 * tm_mon.
3346 mon = MONTHS_PER_YEAR - tm->tm_mon;
3349 sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -4,
3350 months[mon]);
3351 s += strlen(s);
3353 break;
3354 case DCH_W:
3355 sprintf(s, "%d", (tm->tm_mday - 1) / 7 + 1);
3356 if (S_THth(n->suffix))
3357 str_numth(s, s, S_TH_TYPE(n->suffix));
3358 s += strlen(s);
3359 break;
3360 case DCH_J:
3361 sprintf(s, "%d", date2j(tm->tm_year, tm->tm_mon, tm->tm_mday));
3362 if (S_THth(n->suffix))
3363 str_numth(s, s, S_TH_TYPE(n->suffix));
3364 s += strlen(s);
3365 break;
3369 *s = '\0';
3373 * Process the string 'in' as denoted by the array of FormatNodes 'node[]'.
3374 * The TmFromChar struct pointed to by 'out' is populated with the results.
3376 * 'collid' identifies the collation to use, if needed.
3377 * 'std' specifies standard parsing mode.
3379 * If escontext points to an ErrorSaveContext, data errors will be reported
3380 * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
3381 * whether an error occurred. Otherwise, errors are thrown.
3383 * Note: we currently don't have any to_interval() function, so there
3384 * is no need here for INVALID_FOR_INTERVAL checks.
3386 static void
3387 DCH_from_char(FormatNode *node, const char *in, TmFromChar *out,
3388 Oid collid, bool std, Node *escontext)
3390 FormatNode *n;
3391 const char *s;
3392 int len,
3393 value;
3394 bool fx_mode = std;
3396 /* number of extra skipped characters (more than given in format string) */
3397 int extra_skip = 0;
3399 /* cache localized days and months */
3400 cache_locale_time();
3402 for (n = node, s = in; n->type != NODE_TYPE_END && *s != '\0'; n++)
3405 * Ignore spaces at the beginning of the string and before fields when
3406 * not in FX (fixed width) mode.
3408 if (!fx_mode && (n->type != NODE_TYPE_ACTION || n->key->id != DCH_FX) &&
3409 (n->type == NODE_TYPE_ACTION || n == node))
3411 while (*s != '\0' && isspace((unsigned char) *s))
3413 s++;
3414 extra_skip++;
3418 if (n->type == NODE_TYPE_SPACE || n->type == NODE_TYPE_SEPARATOR)
3420 if (std)
3423 * Standard mode requires strict matching between format
3424 * string separators/spaces and input string.
3426 Assert(n->character[0] && !n->character[1]);
3428 if (*s == n->character[0])
3429 s++;
3430 else
3431 ereturn(escontext,,
3432 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3433 errmsg("unmatched format separator \"%c\"",
3434 n->character[0])));
3436 else if (!fx_mode)
3439 * In non FX (fixed format) mode one format string space or
3440 * separator match to one space or separator in input string.
3441 * Or match nothing if there is no space or separator in the
3442 * current position of input string.
3444 extra_skip--;
3445 if (isspace((unsigned char) *s) || is_separator_char(s))
3447 s++;
3448 extra_skip++;
3451 else
3454 * In FX mode, on format string space or separator we consume
3455 * exactly one character from input string. Notice we don't
3456 * insist that the consumed character match the format's
3457 * character.
3459 s += pg_mblen(s);
3461 continue;
3463 else if (n->type != NODE_TYPE_ACTION)
3466 * Text character, so consume one character from input string.
3467 * Notice we don't insist that the consumed character match the
3468 * format's character.
3470 if (!fx_mode)
3473 * In non FX mode we might have skipped some extra characters
3474 * (more than specified in format string) before. In this
3475 * case we don't skip input string character, because it might
3476 * be part of field.
3478 if (extra_skip > 0)
3479 extra_skip--;
3480 else
3481 s += pg_mblen(s);
3483 else
3485 int chlen = pg_mblen(s);
3488 * Standard mode requires strict match of format characters.
3490 if (std && n->type == NODE_TYPE_CHAR &&
3491 strncmp(s, n->character, chlen) != 0)
3492 ereturn(escontext,,
3493 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3494 errmsg("unmatched format character \"%s\"",
3495 n->character)));
3497 s += chlen;
3499 continue;
3502 if (!from_char_set_mode(out, n->key->date_mode, escontext))
3503 return;
3505 switch (n->key->id)
3507 case DCH_FX:
3508 fx_mode = true;
3509 break;
3510 case DCH_A_M:
3511 case DCH_P_M:
3512 case DCH_a_m:
3513 case DCH_p_m:
3514 if (!from_char_seq_search(&value, &s, ampm_strings_long,
3515 NULL, InvalidOid,
3516 n, escontext))
3517 return;
3518 if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3519 return;
3520 out->clock = CLOCK_12_HOUR;
3521 break;
3522 case DCH_AM:
3523 case DCH_PM:
3524 case DCH_am:
3525 case DCH_pm:
3526 if (!from_char_seq_search(&value, &s, ampm_strings,
3527 NULL, InvalidOid,
3528 n, escontext))
3529 return;
3530 if (!from_char_set_int(&out->pm, value % 2, n, escontext))
3531 return;
3532 out->clock = CLOCK_12_HOUR;
3533 break;
3534 case DCH_HH:
3535 case DCH_HH12:
3536 if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3537 return;
3538 out->clock = CLOCK_12_HOUR;
3539 SKIP_THth(s, n->suffix);
3540 break;
3541 case DCH_HH24:
3542 if (from_char_parse_int_len(&out->hh, &s, 2, n, escontext) < 0)
3543 return;
3544 SKIP_THth(s, n->suffix);
3545 break;
3546 case DCH_MI:
3547 if (from_char_parse_int(&out->mi, &s, n, escontext) < 0)
3548 return;
3549 SKIP_THth(s, n->suffix);
3550 break;
3551 case DCH_SS:
3552 if (from_char_parse_int(&out->ss, &s, n, escontext) < 0)
3553 return;
3554 SKIP_THth(s, n->suffix);
3555 break;
3556 case DCH_MS: /* millisecond */
3557 len = from_char_parse_int_len(&out->ms, &s, 3, n, escontext);
3558 if (len < 0)
3559 return;
3562 * 25 is 0.25 and 250 is 0.25 too; 025 is 0.025 and not 0.25
3564 out->ms *= len == 1 ? 100 :
3565 len == 2 ? 10 : 1;
3567 SKIP_THth(s, n->suffix);
3568 break;
3569 case DCH_FF1:
3570 case DCH_FF2:
3571 case DCH_FF3:
3572 case DCH_FF4:
3573 case DCH_FF5:
3574 case DCH_FF6:
3575 out->ff = n->key->id - DCH_FF1 + 1;
3576 /* FALLTHROUGH */
3577 case DCH_US: /* microsecond */
3578 len = from_char_parse_int_len(&out->us, &s,
3579 n->key->id == DCH_US ? 6 :
3580 out->ff, n, escontext);
3581 if (len < 0)
3582 return;
3584 out->us *= len == 1 ? 100000 :
3585 len == 2 ? 10000 :
3586 len == 3 ? 1000 :
3587 len == 4 ? 100 :
3588 len == 5 ? 10 : 1;
3590 SKIP_THth(s, n->suffix);
3591 break;
3592 case DCH_SSSS:
3593 if (from_char_parse_int(&out->ssss, &s, n, escontext) < 0)
3594 return;
3595 SKIP_THth(s, n->suffix);
3596 break;
3597 case DCH_tz:
3598 case DCH_TZ:
3600 int tzlen;
3602 tzlen = DecodeTimezoneAbbrevPrefix(s,
3603 &out->gmtoffset,
3604 &out->tzp);
3605 if (tzlen > 0)
3607 out->has_tz = true;
3608 /* we only need the zone abbrev for DYNTZ case */
3609 if (out->tzp)
3610 out->abbrev = pnstrdup(s, tzlen);
3611 out->tzsign = 0; /* drop any earlier TZH/TZM info */
3612 s += tzlen;
3613 break;
3615 else if (isalpha((unsigned char) *s))
3618 * It doesn't match any abbreviation, but it starts
3619 * with a letter. OF format certainly won't succeed;
3620 * assume it's a misspelled abbreviation and complain
3621 * accordingly.
3623 ereturn(escontext,,
3624 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3625 errmsg("invalid value \"%s\" for \"%s\"",
3626 s, n->key->name),
3627 errdetail("Time zone abbreviation is not recognized.")));
3629 /* otherwise parse it like OF */
3631 /* FALLTHROUGH */
3632 case DCH_OF:
3633 /* OF is equivalent to TZH or TZH:TZM */
3634 /* see TZH comments below */
3635 if (*s == '+' || *s == '-' || *s == ' ')
3637 out->tzsign = *s == '-' ? -1 : +1;
3638 s++;
3640 else
3642 if (extra_skip > 0 && *(s - 1) == '-')
3643 out->tzsign = -1;
3644 else
3645 out->tzsign = +1;
3647 if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3648 return;
3649 if (*s == ':')
3651 s++;
3652 if (from_char_parse_int_len(&out->tzm, &s, 2, n,
3653 escontext) < 0)
3654 return;
3656 break;
3657 case DCH_TZH:
3660 * Value of TZH might be negative. And the issue is that we
3661 * might swallow minus sign as the separator. So, if we have
3662 * skipped more characters than specified in the format
3663 * string, then we consider prepending last skipped minus to
3664 * TZH.
3666 if (*s == '+' || *s == '-' || *s == ' ')
3668 out->tzsign = *s == '-' ? -1 : +1;
3669 s++;
3671 else
3673 if (extra_skip > 0 && *(s - 1) == '-')
3674 out->tzsign = -1;
3675 else
3676 out->tzsign = +1;
3679 if (from_char_parse_int_len(&out->tzh, &s, 2, n, escontext) < 0)
3680 return;
3681 break;
3682 case DCH_TZM:
3683 /* assign positive timezone sign if TZH was not seen before */
3684 if (!out->tzsign)
3685 out->tzsign = +1;
3686 if (from_char_parse_int_len(&out->tzm, &s, 2, n, escontext) < 0)
3687 return;
3688 break;
3689 case DCH_A_D:
3690 case DCH_B_C:
3691 case DCH_a_d:
3692 case DCH_b_c:
3693 if (!from_char_seq_search(&value, &s, adbc_strings_long,
3694 NULL, InvalidOid,
3695 n, escontext))
3696 return;
3697 if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3698 return;
3699 break;
3700 case DCH_AD:
3701 case DCH_BC:
3702 case DCH_ad:
3703 case DCH_bc:
3704 if (!from_char_seq_search(&value, &s, adbc_strings,
3705 NULL, InvalidOid,
3706 n, escontext))
3707 return;
3708 if (!from_char_set_int(&out->bc, value % 2, n, escontext))
3709 return;
3710 break;
3711 case DCH_MONTH:
3712 case DCH_Month:
3713 case DCH_month:
3714 if (!from_char_seq_search(&value, &s, months_full,
3715 S_TM(n->suffix) ? localized_full_months : NULL,
3716 collid,
3717 n, escontext))
3718 return;
3719 if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3720 return;
3721 break;
3722 case DCH_MON:
3723 case DCH_Mon:
3724 case DCH_mon:
3725 if (!from_char_seq_search(&value, &s, months,
3726 S_TM(n->suffix) ? localized_abbrev_months : NULL,
3727 collid,
3728 n, escontext))
3729 return;
3730 if (!from_char_set_int(&out->mm, value + 1, n, escontext))
3731 return;
3732 break;
3733 case DCH_MM:
3734 if (from_char_parse_int(&out->mm, &s, n, escontext) < 0)
3735 return;
3736 SKIP_THth(s, n->suffix);
3737 break;
3738 case DCH_DAY:
3739 case DCH_Day:
3740 case DCH_day:
3741 if (!from_char_seq_search(&value, &s, days,
3742 S_TM(n->suffix) ? localized_full_days : NULL,
3743 collid,
3744 n, escontext))
3745 return;
3746 if (!from_char_set_int(&out->d, value, n, escontext))
3747 return;
3748 out->d++;
3749 break;
3750 case DCH_DY:
3751 case DCH_Dy:
3752 case DCH_dy:
3753 if (!from_char_seq_search(&value, &s, days_short,
3754 S_TM(n->suffix) ? localized_abbrev_days : NULL,
3755 collid,
3756 n, escontext))
3757 return;
3758 if (!from_char_set_int(&out->d, value, n, escontext))
3759 return;
3760 out->d++;
3761 break;
3762 case DCH_DDD:
3763 if (from_char_parse_int(&out->ddd, &s, n, escontext) < 0)
3764 return;
3765 SKIP_THth(s, n->suffix);
3766 break;
3767 case DCH_IDDD:
3768 if (from_char_parse_int_len(&out->ddd, &s, 3, n, escontext) < 0)
3769 return;
3770 SKIP_THth(s, n->suffix);
3771 break;
3772 case DCH_DD:
3773 if (from_char_parse_int(&out->dd, &s, n, escontext) < 0)
3774 return;
3775 SKIP_THth(s, n->suffix);
3776 break;
3777 case DCH_D:
3778 if (from_char_parse_int(&out->d, &s, n, escontext) < 0)
3779 return;
3780 SKIP_THth(s, n->suffix);
3781 break;
3782 case DCH_ID:
3783 if (from_char_parse_int_len(&out->d, &s, 1, n, escontext) < 0)
3784 return;
3785 /* Shift numbering to match Gregorian where Sunday = 1 */
3786 if (++out->d > 7)
3787 out->d = 1;
3788 SKIP_THth(s, n->suffix);
3789 break;
3790 case DCH_WW:
3791 case DCH_IW:
3792 if (from_char_parse_int(&out->ww, &s, n, escontext) < 0)
3793 return;
3794 SKIP_THth(s, n->suffix);
3795 break;
3796 case DCH_Q:
3799 * We ignore 'Q' when converting to date because it is unclear
3800 * which date in the quarter to use, and some people specify
3801 * both quarter and month, so if it was honored it might
3802 * conflict with the supplied month. That is also why we don't
3803 * throw an error.
3805 * We still parse the source string for an integer, but it
3806 * isn't stored anywhere in 'out'.
3808 if (from_char_parse_int((int *) NULL, &s, n, escontext) < 0)
3809 return;
3810 SKIP_THth(s, n->suffix);
3811 break;
3812 case DCH_CC:
3813 if (from_char_parse_int(&out->cc, &s, n, escontext) < 0)
3814 return;
3815 SKIP_THth(s, n->suffix);
3816 break;
3817 case DCH_Y_YYY:
3819 int matched,
3820 years,
3821 millennia,
3822 nch;
3824 matched = sscanf(s, "%d,%03d%n", &millennia, &years, &nch);
3825 if (matched < 2)
3826 ereturn(escontext,,
3827 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3828 errmsg("invalid input string for \"Y,YYY\"")));
3829 years += (millennia * 1000);
3830 if (!from_char_set_int(&out->year, years, n, escontext))
3831 return;
3832 out->yysz = 4;
3833 s += nch;
3834 SKIP_THth(s, n->suffix);
3836 break;
3837 case DCH_YYYY:
3838 case DCH_IYYY:
3839 if (from_char_parse_int(&out->year, &s, n, escontext) < 0)
3840 return;
3841 out->yysz = 4;
3842 SKIP_THth(s, n->suffix);
3843 break;
3844 case DCH_YYY:
3845 case DCH_IYY:
3846 len = from_char_parse_int(&out->year, &s, n, escontext);
3847 if (len < 0)
3848 return;
3849 if (len < 4)
3850 out->year = adjust_partial_year_to_2020(out->year);
3851 out->yysz = 3;
3852 SKIP_THth(s, n->suffix);
3853 break;
3854 case DCH_YY:
3855 case DCH_IY:
3856 len = from_char_parse_int(&out->year, &s, n, escontext);
3857 if (len < 0)
3858 return;
3859 if (len < 4)
3860 out->year = adjust_partial_year_to_2020(out->year);
3861 out->yysz = 2;
3862 SKIP_THth(s, n->suffix);
3863 break;
3864 case DCH_Y:
3865 case DCH_I:
3866 len = from_char_parse_int(&out->year, &s, n, escontext);
3867 if (len < 0)
3868 return;
3869 if (len < 4)
3870 out->year = adjust_partial_year_to_2020(out->year);
3871 out->yysz = 1;
3872 SKIP_THth(s, n->suffix);
3873 break;
3874 case DCH_RM:
3875 case DCH_rm:
3876 if (!from_char_seq_search(&value, &s, rm_months_lower,
3877 NULL, InvalidOid,
3878 n, escontext))
3879 return;
3880 if (!from_char_set_int(&out->mm, MONTHS_PER_YEAR - value, n,
3881 escontext))
3882 return;
3883 break;
3884 case DCH_W:
3885 if (from_char_parse_int(&out->w, &s, n, escontext) < 0)
3886 return;
3887 SKIP_THth(s, n->suffix);
3888 break;
3889 case DCH_J:
3890 if (from_char_parse_int(&out->j, &s, n, escontext) < 0)
3891 return;
3892 SKIP_THth(s, n->suffix);
3893 break;
3896 /* Ignore all spaces after fields */
3897 if (!fx_mode)
3899 extra_skip = 0;
3900 while (*s != '\0' && isspace((unsigned char) *s))
3902 s++;
3903 extra_skip++;
3909 * Standard parsing mode doesn't allow unmatched format patterns or
3910 * trailing characters in the input string.
3912 if (std)
3914 if (n->type != NODE_TYPE_END)
3915 ereturn(escontext,,
3916 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3917 errmsg("input string is too short for datetime format")));
3919 while (*s != '\0' && isspace((unsigned char) *s))
3920 s++;
3922 if (*s != '\0')
3923 ereturn(escontext,,
3924 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
3925 errmsg("trailing characters remain in input string after datetime format")));
3930 * The invariant for DCH cache entry management is that DCHCounter is equal
3931 * to the maximum age value among the existing entries, and we increment it
3932 * whenever an access occurs. If we approach overflow, deal with that by
3933 * halving all the age values, so that we retain a fairly accurate idea of
3934 * which entries are oldest.
3936 static inline void
3937 DCH_prevent_counter_overflow(void)
3939 if (DCHCounter >= (INT_MAX - 1))
3941 for (int i = 0; i < n_DCHCache; i++)
3942 DCHCache[i]->age >>= 1;
3943 DCHCounter >>= 1;
3948 * Get mask of date/time/zone components present in format nodes.
3950 static int
3951 DCH_datetime_type(FormatNode *node)
3953 FormatNode *n;
3954 int flags = 0;
3956 for (n = node; n->type != NODE_TYPE_END; n++)
3958 if (n->type != NODE_TYPE_ACTION)
3959 continue;
3961 switch (n->key->id)
3963 case DCH_FX:
3964 break;
3965 case DCH_A_M:
3966 case DCH_P_M:
3967 case DCH_a_m:
3968 case DCH_p_m:
3969 case DCH_AM:
3970 case DCH_PM:
3971 case DCH_am:
3972 case DCH_pm:
3973 case DCH_HH:
3974 case DCH_HH12:
3975 case DCH_HH24:
3976 case DCH_MI:
3977 case DCH_SS:
3978 case DCH_MS: /* millisecond */
3979 case DCH_US: /* microsecond */
3980 case DCH_FF1:
3981 case DCH_FF2:
3982 case DCH_FF3:
3983 case DCH_FF4:
3984 case DCH_FF5:
3985 case DCH_FF6:
3986 case DCH_SSSS:
3987 flags |= DCH_TIMED;
3988 break;
3989 case DCH_tz:
3990 case DCH_TZ:
3991 case DCH_OF:
3992 case DCH_TZH:
3993 case DCH_TZM:
3994 flags |= DCH_ZONED;
3995 break;
3996 case DCH_A_D:
3997 case DCH_B_C:
3998 case DCH_a_d:
3999 case DCH_b_c:
4000 case DCH_AD:
4001 case DCH_BC:
4002 case DCH_ad:
4003 case DCH_bc:
4004 case DCH_MONTH:
4005 case DCH_Month:
4006 case DCH_month:
4007 case DCH_MON:
4008 case DCH_Mon:
4009 case DCH_mon:
4010 case DCH_MM:
4011 case DCH_DAY:
4012 case DCH_Day:
4013 case DCH_day:
4014 case DCH_DY:
4015 case DCH_Dy:
4016 case DCH_dy:
4017 case DCH_DDD:
4018 case DCH_IDDD:
4019 case DCH_DD:
4020 case DCH_D:
4021 case DCH_ID:
4022 case DCH_WW:
4023 case DCH_Q:
4024 case DCH_CC:
4025 case DCH_Y_YYY:
4026 case DCH_YYYY:
4027 case DCH_IYYY:
4028 case DCH_YYY:
4029 case DCH_IYY:
4030 case DCH_YY:
4031 case DCH_IY:
4032 case DCH_Y:
4033 case DCH_I:
4034 case DCH_RM:
4035 case DCH_rm:
4036 case DCH_W:
4037 case DCH_J:
4038 flags |= DCH_DATED;
4039 break;
4043 return flags;
4046 /* select a DCHCacheEntry to hold the given format picture */
4047 static DCHCacheEntry *
4048 DCH_cache_getnew(const char *str, bool std)
4050 DCHCacheEntry *ent;
4052 /* Ensure we can advance DCHCounter below */
4053 DCH_prevent_counter_overflow();
4056 * If cache is full, remove oldest entry (or recycle first not-valid one)
4058 if (n_DCHCache >= DCH_CACHE_ENTRIES)
4060 DCHCacheEntry *old = DCHCache[0];
4062 #ifdef DEBUG_TO_FROM_CHAR
4063 elog(DEBUG_elog_output, "cache is full (%d)", n_DCHCache);
4064 #endif
4065 if (old->valid)
4067 for (int i = 1; i < DCH_CACHE_ENTRIES; i++)
4069 ent = DCHCache[i];
4070 if (!ent->valid)
4072 old = ent;
4073 break;
4075 if (ent->age < old->age)
4076 old = ent;
4079 #ifdef DEBUG_TO_FROM_CHAR
4080 elog(DEBUG_elog_output, "OLD: '%s' AGE: %d", old->str, old->age);
4081 #endif
4082 old->valid = false;
4083 strlcpy(old->str, str, DCH_CACHE_SIZE + 1);
4084 old->age = (++DCHCounter);
4085 /* caller is expected to fill format, then set valid */
4086 return old;
4088 else
4090 #ifdef DEBUG_TO_FROM_CHAR
4091 elog(DEBUG_elog_output, "NEW (%d)", n_DCHCache);
4092 #endif
4093 Assert(DCHCache[n_DCHCache] == NULL);
4094 DCHCache[n_DCHCache] = ent = (DCHCacheEntry *)
4095 MemoryContextAllocZero(TopMemoryContext, sizeof(DCHCacheEntry));
4096 ent->valid = false;
4097 strlcpy(ent->str, str, DCH_CACHE_SIZE + 1);
4098 ent->std = std;
4099 ent->age = (++DCHCounter);
4100 /* caller is expected to fill format, then set valid */
4101 ++n_DCHCache;
4102 return ent;
4106 /* look for an existing DCHCacheEntry matching the given format picture */
4107 static DCHCacheEntry *
4108 DCH_cache_search(const char *str, bool std)
4110 /* Ensure we can advance DCHCounter below */
4111 DCH_prevent_counter_overflow();
4113 for (int i = 0; i < n_DCHCache; i++)
4115 DCHCacheEntry *ent = DCHCache[i];
4117 if (ent->valid && strcmp(ent->str, str) == 0 && ent->std == std)
4119 ent->age = (++DCHCounter);
4120 return ent;
4124 return NULL;
4127 /* Find or create a DCHCacheEntry for the given format picture */
4128 static DCHCacheEntry *
4129 DCH_cache_fetch(const char *str, bool std)
4131 DCHCacheEntry *ent;
4133 if ((ent = DCH_cache_search(str, std)) == NULL)
4136 * Not in the cache, must run parser and save a new format-picture to
4137 * the cache. Do not mark the cache entry valid until parsing
4138 * succeeds.
4140 ent = DCH_cache_getnew(str, std);
4142 parse_format(ent->format, str, DCH_keywords, DCH_suff, DCH_index,
4143 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4145 ent->valid = true;
4147 return ent;
4151 * Format a date/time or interval into a string according to fmt.
4152 * We parse fmt into a list of FormatNodes. This is then passed to DCH_to_char
4153 * for formatting.
4155 static text *
4156 datetime_to_char_body(TmToChar *tmtc, text *fmt, bool is_interval, Oid collid)
4158 FormatNode *format;
4159 char *fmt_str,
4160 *result;
4161 bool incache;
4162 int fmt_len;
4163 text *res;
4166 * Convert fmt to C string
4168 fmt_str = text_to_cstring(fmt);
4169 fmt_len = strlen(fmt_str);
4172 * Allocate workspace for result as C string
4174 result = palloc((fmt_len * DCH_MAX_ITEM_SIZ) + 1);
4175 *result = '\0';
4177 if (fmt_len > DCH_CACHE_SIZE)
4180 * Allocate new memory if format picture is bigger than static cache
4181 * and do not use cache (call parser always)
4183 incache = false;
4185 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4187 parse_format(format, fmt_str, DCH_keywords,
4188 DCH_suff, DCH_index, DCH_FLAG, NULL);
4190 else
4193 * Use cache buffers
4195 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4197 incache = true;
4198 format = ent->format;
4201 /* The real work is here */
4202 DCH_to_char(format, is_interval, tmtc, result, collid);
4204 if (!incache)
4205 pfree(format);
4207 pfree(fmt_str);
4209 /* convert C-string result to TEXT format */
4210 res = cstring_to_text(result);
4212 pfree(result);
4213 return res;
4216 /****************************************************************************
4217 * Public routines
4218 ***************************************************************************/
4220 /* -------------------
4221 * TIMESTAMP to_char()
4222 * -------------------
4224 Datum
4225 timestamp_to_char(PG_FUNCTION_ARGS)
4227 Timestamp dt = PG_GETARG_TIMESTAMP(0);
4228 text *fmt = PG_GETARG_TEXT_PP(1),
4229 *res;
4230 TmToChar tmtc;
4231 struct pg_tm tt;
4232 struct fmt_tm *tm;
4233 int thisdate;
4235 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4236 PG_RETURN_NULL();
4238 ZERO_tmtc(&tmtc);
4239 tm = tmtcTm(&tmtc);
4241 if (timestamp2tm(dt, NULL, &tt, &tmtcFsec(&tmtc), NULL, NULL) != 0)
4242 ereport(ERROR,
4243 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4244 errmsg("timestamp out of range")));
4246 /* calculate wday and yday, because timestamp2tm doesn't */
4247 thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4248 tt.tm_wday = (thisdate + 1) % 7;
4249 tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4251 COPY_tm(tm, &tt);
4253 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4254 PG_RETURN_NULL();
4256 PG_RETURN_TEXT_P(res);
4259 Datum
4260 timestamptz_to_char(PG_FUNCTION_ARGS)
4262 TimestampTz dt = PG_GETARG_TIMESTAMP(0);
4263 text *fmt = PG_GETARG_TEXT_PP(1),
4264 *res;
4265 TmToChar tmtc;
4266 int tz;
4267 struct pg_tm tt;
4268 struct fmt_tm *tm;
4269 int thisdate;
4271 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || TIMESTAMP_NOT_FINITE(dt))
4272 PG_RETURN_NULL();
4274 ZERO_tmtc(&tmtc);
4275 tm = tmtcTm(&tmtc);
4277 if (timestamp2tm(dt, &tz, &tt, &tmtcFsec(&tmtc), &tmtcTzn(&tmtc), NULL) != 0)
4278 ereport(ERROR,
4279 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4280 errmsg("timestamp out of range")));
4282 /* calculate wday and yday, because timestamp2tm doesn't */
4283 thisdate = date2j(tt.tm_year, tt.tm_mon, tt.tm_mday);
4284 tt.tm_wday = (thisdate + 1) % 7;
4285 tt.tm_yday = thisdate - date2j(tt.tm_year, 1, 1) + 1;
4287 COPY_tm(tm, &tt);
4289 if (!(res = datetime_to_char_body(&tmtc, fmt, false, PG_GET_COLLATION())))
4290 PG_RETURN_NULL();
4292 PG_RETURN_TEXT_P(res);
4296 /* -------------------
4297 * INTERVAL to_char()
4298 * -------------------
4300 Datum
4301 interval_to_char(PG_FUNCTION_ARGS)
4303 Interval *it = PG_GETARG_INTERVAL_P(0);
4304 text *fmt = PG_GETARG_TEXT_PP(1),
4305 *res;
4306 TmToChar tmtc;
4307 struct fmt_tm *tm;
4308 struct pg_itm tt,
4309 *itm = &tt;
4311 if (VARSIZE_ANY_EXHDR(fmt) <= 0 || INTERVAL_NOT_FINITE(it))
4312 PG_RETURN_NULL();
4314 ZERO_tmtc(&tmtc);
4315 tm = tmtcTm(&tmtc);
4317 interval2itm(*it, itm);
4318 tmtc.fsec = itm->tm_usec;
4319 tm->tm_sec = itm->tm_sec;
4320 tm->tm_min = itm->tm_min;
4321 tm->tm_hour = itm->tm_hour;
4322 tm->tm_mday = itm->tm_mday;
4323 tm->tm_mon = itm->tm_mon;
4324 tm->tm_year = itm->tm_year;
4326 /* wday is meaningless, yday approximates the total span in days */
4327 tm->tm_yday = (tm->tm_year * MONTHS_PER_YEAR + tm->tm_mon) * DAYS_PER_MONTH + tm->tm_mday;
4329 if (!(res = datetime_to_char_body(&tmtc, fmt, true, PG_GET_COLLATION())))
4330 PG_RETURN_NULL();
4332 PG_RETURN_TEXT_P(res);
4335 /* ---------------------
4336 * TO_TIMESTAMP()
4338 * Make Timestamp from date_str which is formatted at argument 'fmt'
4339 * ( to_timestamp is reverse to_char() )
4340 * ---------------------
4342 Datum
4343 to_timestamp(PG_FUNCTION_ARGS)
4345 text *date_txt = PG_GETARG_TEXT_PP(0);
4346 text *fmt = PG_GETARG_TEXT_PP(1);
4347 Oid collid = PG_GET_COLLATION();
4348 Timestamp result;
4349 int tz;
4350 struct pg_tm tm;
4351 struct fmt_tz ftz;
4352 fsec_t fsec;
4353 int fprec;
4355 do_to_timestamp(date_txt, fmt, collid, false,
4356 &tm, &fsec, &ftz, &fprec, NULL, NULL);
4358 /* Use the specified time zone, if any. */
4359 if (ftz.has_tz)
4360 tz = ftz.gmtoffset;
4361 else
4362 tz = DetermineTimeZoneOffset(&tm, session_timezone);
4364 if (tm2timestamp(&tm, fsec, &tz, &result) != 0)
4365 ereport(ERROR,
4366 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4367 errmsg("timestamp out of range")));
4369 /* Use the specified fractional precision, if any. */
4370 if (fprec)
4371 AdjustTimestampForTypmod(&result, fprec, NULL);
4373 PG_RETURN_TIMESTAMP(result);
4376 /* ----------
4377 * TO_DATE
4378 * Make Date from date_str which is formatted at argument 'fmt'
4379 * ----------
4381 Datum
4382 to_date(PG_FUNCTION_ARGS)
4384 text *date_txt = PG_GETARG_TEXT_PP(0);
4385 text *fmt = PG_GETARG_TEXT_PP(1);
4386 Oid collid = PG_GET_COLLATION();
4387 DateADT result;
4388 struct pg_tm tm;
4389 struct fmt_tz ftz;
4390 fsec_t fsec;
4392 do_to_timestamp(date_txt, fmt, collid, false,
4393 &tm, &fsec, &ftz, NULL, NULL, NULL);
4395 /* Prevent overflow in Julian-day routines */
4396 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4397 ereport(ERROR,
4398 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4399 errmsg("date out of range: \"%s\"",
4400 text_to_cstring(date_txt))));
4402 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) - POSTGRES_EPOCH_JDATE;
4404 /* Now check for just-out-of-range dates */
4405 if (!IS_VALID_DATE(result))
4406 ereport(ERROR,
4407 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4408 errmsg("date out of range: \"%s\"",
4409 text_to_cstring(date_txt))));
4411 PG_RETURN_DATEADT(result);
4415 * Convert the 'date_txt' input to a datetime type using argument 'fmt'
4416 * as a format string. The collation 'collid' may be used for case-folding
4417 * rules in some cases. 'strict' specifies standard parsing mode.
4419 * The actual data type (returned in 'typid', 'typmod') is determined by
4420 * the presence of date/time/zone components in the format string.
4422 * When a timezone component is present, the corresponding offset is
4423 * returned in '*tz'.
4425 * If escontext points to an ErrorSaveContext, data errors will be reported
4426 * by filling that struct; the caller must test SOFT_ERROR_OCCURRED() to see
4427 * whether an error occurred. Otherwise, errors are thrown.
4429 Datum
4430 parse_datetime(text *date_txt, text *fmt, Oid collid, bool strict,
4431 Oid *typid, int32 *typmod, int *tz,
4432 Node *escontext)
4434 struct pg_tm tm;
4435 struct fmt_tz ftz;
4436 fsec_t fsec;
4437 int fprec;
4438 uint32 flags;
4440 if (!do_to_timestamp(date_txt, fmt, collid, strict,
4441 &tm, &fsec, &ftz, &fprec, &flags, escontext))
4442 return (Datum) 0;
4444 *typmod = fprec ? fprec : -1; /* fractional part precision */
4446 if (flags & DCH_DATED)
4448 if (flags & DCH_TIMED)
4450 if (flags & DCH_ZONED)
4452 TimestampTz result;
4454 if (ftz.has_tz)
4456 *tz = ftz.gmtoffset;
4458 else
4461 * Time zone is present in format string, but not in input
4462 * string. Assuming do_to_timestamp() triggers no error
4463 * this should be possible only in non-strict case.
4465 Assert(!strict);
4467 ereturn(escontext, (Datum) 0,
4468 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4469 errmsg("missing time zone in input string for type timestamptz")));
4472 if (tm2timestamp(&tm, fsec, tz, &result) != 0)
4473 ereturn(escontext, (Datum) 0,
4474 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4475 errmsg("timestamptz out of range")));
4477 AdjustTimestampForTypmod(&result, *typmod, escontext);
4479 *typid = TIMESTAMPTZOID;
4480 return TimestampTzGetDatum(result);
4482 else
4484 Timestamp result;
4486 if (tm2timestamp(&tm, fsec, NULL, &result) != 0)
4487 ereturn(escontext, (Datum) 0,
4488 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4489 errmsg("timestamp out of range")));
4491 AdjustTimestampForTypmod(&result, *typmod, escontext);
4493 *typid = TIMESTAMPOID;
4494 return TimestampGetDatum(result);
4497 else
4499 if (flags & DCH_ZONED)
4501 ereturn(escontext, (Datum) 0,
4502 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4503 errmsg("datetime format is zoned but not timed")));
4505 else
4507 DateADT result;
4509 /* Prevent overflow in Julian-day routines */
4510 if (!IS_VALID_JULIAN(tm.tm_year, tm.tm_mon, tm.tm_mday))
4511 ereturn(escontext, (Datum) 0,
4512 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4513 errmsg("date out of range: \"%s\"",
4514 text_to_cstring(date_txt))));
4516 result = date2j(tm.tm_year, tm.tm_mon, tm.tm_mday) -
4517 POSTGRES_EPOCH_JDATE;
4519 /* Now check for just-out-of-range dates */
4520 if (!IS_VALID_DATE(result))
4521 ereturn(escontext, (Datum) 0,
4522 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4523 errmsg("date out of range: \"%s\"",
4524 text_to_cstring(date_txt))));
4526 *typid = DATEOID;
4527 return DateADTGetDatum(result);
4531 else if (flags & DCH_TIMED)
4533 if (flags & DCH_ZONED)
4535 TimeTzADT *result = palloc(sizeof(TimeTzADT));
4537 if (ftz.has_tz)
4539 *tz = ftz.gmtoffset;
4541 else
4544 * Time zone is present in format string, but not in input
4545 * string. Assuming do_to_timestamp() triggers no error this
4546 * should be possible only in non-strict case.
4548 Assert(!strict);
4550 ereturn(escontext, (Datum) 0,
4551 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4552 errmsg("missing time zone in input string for type timetz")));
4555 if (tm2timetz(&tm, fsec, *tz, result) != 0)
4556 ereturn(escontext, (Datum) 0,
4557 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4558 errmsg("timetz out of range")));
4560 AdjustTimeForTypmod(&result->time, *typmod);
4562 *typid = TIMETZOID;
4563 return TimeTzADTPGetDatum(result);
4565 else
4567 TimeADT result;
4569 if (tm2time(&tm, fsec, &result) != 0)
4570 ereturn(escontext, (Datum) 0,
4571 (errcode(ERRCODE_DATETIME_VALUE_OUT_OF_RANGE),
4572 errmsg("time out of range")));
4574 AdjustTimeForTypmod(&result, *typmod);
4576 *typid = TIMEOID;
4577 return TimeADTGetDatum(result);
4580 else
4582 ereturn(escontext, (Datum) 0,
4583 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4584 errmsg("datetime format is not dated and not timed")));
4589 * Parses the datetime format string in 'fmt_str' and returns true if it
4590 * contains a timezone specifier, false if not.
4592 bool
4593 datetime_format_has_tz(const char *fmt_str)
4595 bool incache;
4596 int fmt_len = strlen(fmt_str);
4597 int result;
4598 FormatNode *format;
4600 if (fmt_len > DCH_CACHE_SIZE)
4603 * Allocate new memory if format picture is bigger than static cache
4604 * and do not use cache (call parser always)
4606 incache = false;
4608 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4610 parse_format(format, fmt_str, DCH_keywords,
4611 DCH_suff, DCH_index, DCH_FLAG, NULL);
4613 else
4616 * Use cache buffers
4618 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, false);
4620 incache = true;
4621 format = ent->format;
4624 result = DCH_datetime_type(format);
4626 if (!incache)
4627 pfree(format);
4629 return result & DCH_ZONED;
4633 * do_to_timestamp: shared code for to_timestamp and to_date
4635 * Parse the 'date_txt' according to 'fmt', return results as a struct pg_tm,
4636 * fractional seconds, struct fmt_tz, and fractional precision.
4638 * 'collid' identifies the collation to use, if needed.
4639 * 'std' specifies standard parsing mode.
4641 * Bit mask of date/time/zone components found in 'fmt' is returned in 'flags',
4642 * if that is not NULL.
4644 * Returns true on success, false on failure (if escontext points to an
4645 * ErrorSaveContext; otherwise errors are thrown). Note that currently,
4646 * soft-error behavior is provided for bad data but not bad format.
4648 * We parse 'fmt' into a list of FormatNodes, which is then passed to
4649 * DCH_from_char to populate a TmFromChar with the parsed contents of
4650 * 'date_txt'.
4652 * The TmFromChar is then analysed and converted into the final results in
4653 * struct 'tm', 'fsec', struct 'tz', and 'fprec'.
4655 static bool
4656 do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std,
4657 struct pg_tm *tm, fsec_t *fsec, struct fmt_tz *tz,
4658 int *fprec, uint32 *flags, Node *escontext)
4660 FormatNode *format = NULL;
4661 TmFromChar tmfc;
4662 int fmt_len;
4663 char *date_str;
4664 int fmask;
4665 bool incache = false;
4667 Assert(tm != NULL);
4668 Assert(fsec != NULL);
4670 date_str = text_to_cstring(date_txt);
4672 ZERO_tmfc(&tmfc);
4673 ZERO_tm(tm);
4674 *fsec = 0;
4675 tz->has_tz = false;
4676 if (fprec)
4677 *fprec = 0;
4678 if (flags)
4679 *flags = 0;
4680 fmask = 0; /* bit mask for ValidateDate() */
4682 fmt_len = VARSIZE_ANY_EXHDR(fmt);
4684 if (fmt_len)
4686 char *fmt_str;
4688 fmt_str = text_to_cstring(fmt);
4690 if (fmt_len > DCH_CACHE_SIZE)
4693 * Allocate new memory if format picture is bigger than static
4694 * cache and do not use cache (call parser always)
4696 format = (FormatNode *) palloc((fmt_len + 1) * sizeof(FormatNode));
4698 parse_format(format, fmt_str, DCH_keywords, DCH_suff, DCH_index,
4699 DCH_FLAG | (std ? STD_FLAG : 0), NULL);
4701 else
4704 * Use cache buffers
4706 DCHCacheEntry *ent = DCH_cache_fetch(fmt_str, std);
4708 incache = true;
4709 format = ent->format;
4712 #ifdef DEBUG_TO_FROM_CHAR
4713 /* dump_node(format, fmt_len); */
4714 /* dump_index(DCH_keywords, DCH_index); */
4715 #endif
4717 DCH_from_char(format, date_str, &tmfc, collid, std, escontext);
4718 pfree(fmt_str);
4719 if (SOFT_ERROR_OCCURRED(escontext))
4720 goto fail;
4722 if (flags)
4723 *flags = DCH_datetime_type(format);
4725 if (!incache)
4727 pfree(format);
4728 format = NULL;
4732 DEBUG_TMFC(&tmfc);
4735 * Convert to_date/to_timestamp input fields to standard 'tm'
4737 if (tmfc.ssss)
4739 int x = tmfc.ssss;
4741 tm->tm_hour = x / SECS_PER_HOUR;
4742 x %= SECS_PER_HOUR;
4743 tm->tm_min = x / SECS_PER_MINUTE;
4744 x %= SECS_PER_MINUTE;
4745 tm->tm_sec = x;
4748 if (tmfc.ss)
4749 tm->tm_sec = tmfc.ss;
4750 if (tmfc.mi)
4751 tm->tm_min = tmfc.mi;
4752 if (tmfc.hh)
4753 tm->tm_hour = tmfc.hh;
4755 if (tmfc.clock == CLOCK_12_HOUR)
4757 if (tm->tm_hour < 1 || tm->tm_hour > HOURS_PER_DAY / 2)
4759 errsave(escontext,
4760 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4761 errmsg("hour \"%d\" is invalid for the 12-hour clock",
4762 tm->tm_hour),
4763 errhint("Use the 24-hour clock, or give an hour between 1 and 12.")));
4764 goto fail;
4767 if (tmfc.pm && tm->tm_hour < HOURS_PER_DAY / 2)
4768 tm->tm_hour += HOURS_PER_DAY / 2;
4769 else if (!tmfc.pm && tm->tm_hour == HOURS_PER_DAY / 2)
4770 tm->tm_hour = 0;
4773 if (tmfc.year)
4776 * If CC and YY (or Y) are provided, use YY as 2 low-order digits for
4777 * the year in the given century. Keep in mind that the 21st century
4778 * AD runs from 2001-2100, not 2000-2099; 6th century BC runs from
4779 * 600BC to 501BC.
4781 if (tmfc.cc && tmfc.yysz <= 2)
4783 if (tmfc.bc)
4784 tmfc.cc = -tmfc.cc;
4785 tm->tm_year = tmfc.year % 100;
4786 if (tm->tm_year)
4788 if (tmfc.cc >= 0)
4789 tm->tm_year += (tmfc.cc - 1) * 100;
4790 else
4791 tm->tm_year = (tmfc.cc + 1) * 100 - tm->tm_year + 1;
4793 else
4795 /* find century year for dates ending in "00" */
4796 tm->tm_year = tmfc.cc * 100 + ((tmfc.cc >= 0) ? 0 : 1);
4799 else
4801 /* If a 4-digit year is provided, we use that and ignore CC. */
4802 tm->tm_year = tmfc.year;
4803 if (tmfc.bc)
4804 tm->tm_year = -tm->tm_year;
4805 /* correct for our representation of BC years */
4806 if (tm->tm_year < 0)
4807 tm->tm_year++;
4809 fmask |= DTK_M(YEAR);
4811 else if (tmfc.cc)
4813 /* use first year of century */
4814 if (tmfc.bc)
4815 tmfc.cc = -tmfc.cc;
4816 if (tmfc.cc >= 0)
4817 /* +1 because 21st century started in 2001 */
4818 tm->tm_year = (tmfc.cc - 1) * 100 + 1;
4819 else
4820 /* +1 because year == 599 is 600 BC */
4821 tm->tm_year = tmfc.cc * 100 + 1;
4822 fmask |= DTK_M(YEAR);
4825 if (tmfc.j)
4827 j2date(tmfc.j, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4828 fmask |= DTK_DATE_M;
4831 if (tmfc.ww)
4833 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4836 * If tmfc.d is not set, then the date is left at the beginning of
4837 * the ISO week (Monday).
4839 if (tmfc.d)
4840 isoweekdate2date(tmfc.ww, tmfc.d, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4841 else
4842 isoweek2date(tmfc.ww, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4843 fmask |= DTK_DATE_M;
4845 else
4846 tmfc.ddd = (tmfc.ww - 1) * 7 + 1;
4849 if (tmfc.w)
4850 tmfc.dd = (tmfc.w - 1) * 7 + 1;
4851 if (tmfc.dd)
4853 tm->tm_mday = tmfc.dd;
4854 fmask |= DTK_M(DAY);
4856 if (tmfc.mm)
4858 tm->tm_mon = tmfc.mm;
4859 fmask |= DTK_M(MONTH);
4862 if (tmfc.ddd && (tm->tm_mon <= 1 || tm->tm_mday <= 1))
4865 * The month and day field have not been set, so we use the
4866 * day-of-year field to populate them. Depending on the date mode,
4867 * this field may be interpreted as a Gregorian day-of-year, or an ISO
4868 * week date day-of-year.
4871 if (!tm->tm_year && !tmfc.bc)
4873 errsave(escontext,
4874 (errcode(ERRCODE_INVALID_DATETIME_FORMAT),
4875 errmsg("cannot calculate day of year without year information")));
4876 goto fail;
4879 if (tmfc.mode == FROM_CHAR_DATE_ISOWEEK)
4881 int j0; /* zeroth day of the ISO year, in Julian */
4883 j0 = isoweek2j(tm->tm_year, 1) - 1;
4885 j2date(j0 + tmfc.ddd, &tm->tm_year, &tm->tm_mon, &tm->tm_mday);
4886 fmask |= DTK_DATE_M;
4888 else
4890 const int *y;
4891 int i;
4893 static const int ysum[2][13] = {
4894 {0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
4895 {0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}};
4897 y = ysum[isleap(tm->tm_year)];
4899 for (i = 1; i <= MONTHS_PER_YEAR; i++)
4901 if (tmfc.ddd <= y[i])
4902 break;
4904 if (tm->tm_mon <= 1)
4905 tm->tm_mon = i;
4907 if (tm->tm_mday <= 1)
4908 tm->tm_mday = tmfc.ddd - y[i - 1];
4910 fmask |= DTK_M(MONTH) | DTK_M(DAY);
4914 if (tmfc.ms)
4915 *fsec += tmfc.ms * 1000;
4916 if (tmfc.us)
4917 *fsec += tmfc.us;
4918 if (fprec)
4919 *fprec = tmfc.ff; /* fractional precision, if specified */
4921 /* Range-check date fields according to bit mask computed above */
4922 if (fmask != 0)
4924 /* We already dealt with AD/BC, so pass isjulian = true */
4925 int dterr = ValidateDate(fmask, true, false, false, tm);
4927 if (dterr != 0)
4930 * Force the error to be DTERR_FIELD_OVERFLOW even if ValidateDate
4931 * said DTERR_MD_FIELD_OVERFLOW, because we don't want to print an
4932 * irrelevant hint about datestyle.
4934 DateTimeParseError(DTERR_FIELD_OVERFLOW, NULL,
4935 date_str, "timestamp", escontext);
4936 goto fail;
4940 /* Range-check time fields too */
4941 if (tm->tm_hour < 0 || tm->tm_hour >= HOURS_PER_DAY ||
4942 tm->tm_min < 0 || tm->tm_min >= MINS_PER_HOUR ||
4943 tm->tm_sec < 0 || tm->tm_sec >= SECS_PER_MINUTE ||
4944 *fsec < INT64CONST(0) || *fsec >= USECS_PER_SEC)
4946 DateTimeParseError(DTERR_FIELD_OVERFLOW, NULL,
4947 date_str, "timestamp", escontext);
4948 goto fail;
4952 * If timezone info was present, reduce it to a GMT offset. (We cannot do
4953 * this until we've filled all of the tm struct, since the zone's offset
4954 * might be time-varying.)
4956 if (tmfc.tzsign)
4958 /* TZH and/or TZM fields */
4959 if (tmfc.tzh < 0 || tmfc.tzh > MAX_TZDISP_HOUR ||
4960 tmfc.tzm < 0 || tmfc.tzm >= MINS_PER_HOUR)
4962 DateTimeParseError(DTERR_TZDISP_OVERFLOW, NULL,
4963 date_str, "timestamp", escontext);
4964 goto fail;
4967 tz->has_tz = true;
4968 tz->gmtoffset = (tmfc.tzh * MINS_PER_HOUR + tmfc.tzm) * SECS_PER_MINUTE;
4969 /* note we are flipping the sign convention here */
4970 if (tmfc.tzsign > 0)
4971 tz->gmtoffset = -tz->gmtoffset;
4973 else if (tmfc.has_tz)
4975 /* TZ field */
4976 tz->has_tz = true;
4977 if (tmfc.tzp == NULL)
4979 /* fixed-offset abbreviation; flip the sign convention */
4980 tz->gmtoffset = -tmfc.gmtoffset;
4982 else
4984 /* dynamic-offset abbreviation, resolve using specified time */
4985 tz->gmtoffset = DetermineTimeZoneAbbrevOffset(tm, tmfc.abbrev,
4986 tmfc.tzp);
4990 DEBUG_TM(tm);
4992 if (format && !incache)
4993 pfree(format);
4994 pfree(date_str);
4996 return true;
4998 fail:
4999 if (format && !incache)
5000 pfree(format);
5001 pfree(date_str);
5003 return false;
5007 /**********************************************************************
5008 * the NUMBER version part
5009 *********************************************************************/
5012 static char *
5013 fill_str(char *str, int c, int max)
5015 memset(str, c, max);
5016 *(str + max) = '\0';
5017 return str;
5020 #define zeroize_NUM(_n) \
5021 do { \
5022 (_n)->flag = 0; \
5023 (_n)->lsign = 0; \
5024 (_n)->pre = 0; \
5025 (_n)->post = 0; \
5026 (_n)->pre_lsign_num = 0; \
5027 (_n)->need_locale = 0; \
5028 (_n)->multi = 0; \
5029 (_n)->zero_start = 0; \
5030 (_n)->zero_end = 0; \
5031 } while(0)
5033 /* This works the same as DCH_prevent_counter_overflow */
5034 static inline void
5035 NUM_prevent_counter_overflow(void)
5037 if (NUMCounter >= (INT_MAX - 1))
5039 for (int i = 0; i < n_NUMCache; i++)
5040 NUMCache[i]->age >>= 1;
5041 NUMCounter >>= 1;
5045 /* select a NUMCacheEntry to hold the given format picture */
5046 static NUMCacheEntry *
5047 NUM_cache_getnew(const char *str)
5049 NUMCacheEntry *ent;
5051 /* Ensure we can advance NUMCounter below */
5052 NUM_prevent_counter_overflow();
5055 * If cache is full, remove oldest entry (or recycle first not-valid one)
5057 if (n_NUMCache >= NUM_CACHE_ENTRIES)
5059 NUMCacheEntry *old = NUMCache[0];
5061 #ifdef DEBUG_TO_FROM_CHAR
5062 elog(DEBUG_elog_output, "Cache is full (%d)", n_NUMCache);
5063 #endif
5064 if (old->valid)
5066 for (int i = 1; i < NUM_CACHE_ENTRIES; i++)
5068 ent = NUMCache[i];
5069 if (!ent->valid)
5071 old = ent;
5072 break;
5074 if (ent->age < old->age)
5075 old = ent;
5078 #ifdef DEBUG_TO_FROM_CHAR
5079 elog(DEBUG_elog_output, "OLD: \"%s\" AGE: %d", old->str, old->age);
5080 #endif
5081 old->valid = false;
5082 strlcpy(old->str, str, NUM_CACHE_SIZE + 1);
5083 old->age = (++NUMCounter);
5084 /* caller is expected to fill format and Num, then set valid */
5085 return old;
5087 else
5089 #ifdef DEBUG_TO_FROM_CHAR
5090 elog(DEBUG_elog_output, "NEW (%d)", n_NUMCache);
5091 #endif
5092 Assert(NUMCache[n_NUMCache] == NULL);
5093 NUMCache[n_NUMCache] = ent = (NUMCacheEntry *)
5094 MemoryContextAllocZero(TopMemoryContext, sizeof(NUMCacheEntry));
5095 ent->valid = false;
5096 strlcpy(ent->str, str, NUM_CACHE_SIZE + 1);
5097 ent->age = (++NUMCounter);
5098 /* caller is expected to fill format and Num, then set valid */
5099 ++n_NUMCache;
5100 return ent;
5104 /* look for an existing NUMCacheEntry matching the given format picture */
5105 static NUMCacheEntry *
5106 NUM_cache_search(const char *str)
5108 /* Ensure we can advance NUMCounter below */
5109 NUM_prevent_counter_overflow();
5111 for (int i = 0; i < n_NUMCache; i++)
5113 NUMCacheEntry *ent = NUMCache[i];
5115 if (ent->valid && strcmp(ent->str, str) == 0)
5117 ent->age = (++NUMCounter);
5118 return ent;
5122 return NULL;
5125 /* Find or create a NUMCacheEntry for the given format picture */
5126 static NUMCacheEntry *
5127 NUM_cache_fetch(const char *str)
5129 NUMCacheEntry *ent;
5131 if ((ent = NUM_cache_search(str)) == NULL)
5134 * Not in the cache, must run parser and save a new format-picture to
5135 * the cache. Do not mark the cache entry valid until parsing
5136 * succeeds.
5138 ent = NUM_cache_getnew(str);
5140 zeroize_NUM(&ent->Num);
5142 parse_format(ent->format, str, NUM_keywords,
5143 NULL, NUM_index, NUM_FLAG, &ent->Num);
5145 ent->valid = true;
5147 return ent;
5150 /* ----------
5151 * Cache routine for NUM to_char version
5152 * ----------
5154 static FormatNode *
5155 NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree)
5157 FormatNode *format = NULL;
5158 char *str;
5160 str = text_to_cstring(pars_str);
5162 if (len > NUM_CACHE_SIZE)
5165 * Allocate new memory if format picture is bigger than static cache
5166 * and do not use cache (call parser always)
5168 format = (FormatNode *) palloc((len + 1) * sizeof(FormatNode));
5170 *shouldFree = true;
5172 zeroize_NUM(Num);
5174 parse_format(format, str, NUM_keywords,
5175 NULL, NUM_index, NUM_FLAG, Num);
5177 else
5180 * Use cache buffers
5182 NUMCacheEntry *ent = NUM_cache_fetch(str);
5184 *shouldFree = false;
5186 format = ent->format;
5189 * Copy cache to used struct
5191 Num->flag = ent->Num.flag;
5192 Num->lsign = ent->Num.lsign;
5193 Num->pre = ent->Num.pre;
5194 Num->post = ent->Num.post;
5195 Num->pre_lsign_num = ent->Num.pre_lsign_num;
5196 Num->need_locale = ent->Num.need_locale;
5197 Num->multi = ent->Num.multi;
5198 Num->zero_start = ent->Num.zero_start;
5199 Num->zero_end = ent->Num.zero_end;
5202 #ifdef DEBUG_TO_FROM_CHAR
5203 /* dump_node(format, len); */
5204 dump_index(NUM_keywords, NUM_index);
5205 #endif
5207 pfree(str);
5208 return format;
5213 * Convert integer to Roman numerals
5214 * Result is upper-case and not blank-padded (NUM_processor converts as needed)
5215 * If input is out-of-range, produce '###############'
5217 static char *
5218 int_to_roman(int number)
5220 int len,
5221 num;
5222 char *p,
5223 *result,
5224 numstr[12];
5226 result = (char *) palloc(16);
5227 *result = '\0';
5230 * This range limit is the same as in Oracle(TM). The difficulty with
5231 * handling 4000 or more is that we'd need to use more than 3 "M"'s, and
5232 * more than 3 of the same digit isn't considered a valid Roman string.
5234 if (number > 3999 || number < 1)
5236 fill_str(result, '#', 15);
5237 return result;
5240 /* Convert to decimal, then examine each digit */
5241 len = snprintf(numstr, sizeof(numstr), "%d", number);
5242 Assert(len > 0 && len <= 4);
5244 for (p = numstr; *p != '\0'; p++, --len)
5246 num = *p - ('0' + 1);
5247 if (num < 0)
5248 continue; /* ignore zeroes */
5249 /* switch on current column position */
5250 switch (len)
5252 case 4:
5253 while (num-- >= 0)
5254 strcat(result, "M");
5255 break;
5256 case 3:
5257 strcat(result, rm100[num]);
5258 break;
5259 case 2:
5260 strcat(result, rm10[num]);
5261 break;
5262 case 1:
5263 strcat(result, rm1[num]);
5264 break;
5267 return result;
5272 /* ----------
5273 * Locale
5274 * ----------
5276 static void
5277 NUM_prepare_locale(NUMProc *Np)
5279 if (Np->Num->need_locale)
5281 struct lconv *lconv;
5284 * Get locales
5286 lconv = PGLC_localeconv();
5289 * Positive / Negative number sign
5291 if (lconv->negative_sign && *lconv->negative_sign)
5292 Np->L_negative_sign = lconv->negative_sign;
5293 else
5294 Np->L_negative_sign = "-";
5296 if (lconv->positive_sign && *lconv->positive_sign)
5297 Np->L_positive_sign = lconv->positive_sign;
5298 else
5299 Np->L_positive_sign = "+";
5302 * Number decimal point
5304 if (lconv->decimal_point && *lconv->decimal_point)
5305 Np->decimal = lconv->decimal_point;
5307 else
5308 Np->decimal = ".";
5310 if (!IS_LDECIMAL(Np->Num))
5311 Np->decimal = ".";
5314 * Number thousands separator
5316 * Some locales (e.g. broken glibc pt_BR), have a comma for decimal,
5317 * but "" for thousands_sep, so we set the thousands_sep too.
5318 * http://archives.postgresql.org/pgsql-hackers/2007-11/msg00772.php
5320 if (lconv->thousands_sep && *lconv->thousands_sep)
5321 Np->L_thousands_sep = lconv->thousands_sep;
5322 /* Make sure thousands separator doesn't match decimal point symbol. */
5323 else if (strcmp(Np->decimal, ",") != 0)
5324 Np->L_thousands_sep = ",";
5325 else
5326 Np->L_thousands_sep = ".";
5329 * Currency symbol
5331 if (lconv->currency_symbol && *lconv->currency_symbol)
5332 Np->L_currency_symbol = lconv->currency_symbol;
5333 else
5334 Np->L_currency_symbol = " ";
5336 else
5339 * Default values
5341 Np->L_negative_sign = "-";
5342 Np->L_positive_sign = "+";
5343 Np->decimal = ".";
5345 Np->L_thousands_sep = ",";
5346 Np->L_currency_symbol = " ";
5350 /* ----------
5351 * Return pointer of last relevant number after decimal point
5352 * 12.0500 --> last relevant is '5'
5353 * 12.0000 --> last relevant is '.'
5354 * If there is no decimal point, return NULL (which will result in same
5355 * behavior as if FM hadn't been specified).
5356 * ----------
5358 static char *
5359 get_last_relevant_decnum(char *num)
5361 char *result,
5362 *p = strchr(num, '.');
5364 #ifdef DEBUG_TO_FROM_CHAR
5365 elog(DEBUG_elog_output, "get_last_relevant_decnum()");
5366 #endif
5368 if (!p)
5369 return NULL;
5371 result = p;
5373 while (*(++p))
5375 if (*p != '0')
5376 result = p;
5379 return result;
5383 * These macros are used in NUM_processor() and its subsidiary routines.
5384 * OVERLOAD_TEST: true if we've reached end of input string
5385 * AMOUNT_TEST(s): true if at least s bytes remain in string
5387 #define OVERLOAD_TEST (Np->inout_p >= Np->inout + input_len)
5388 #define AMOUNT_TEST(s) (Np->inout_p <= Np->inout + (input_len - (s)))
5390 /* ----------
5391 * Number extraction for TO_NUMBER()
5392 * ----------
5394 static void
5395 NUM_numpart_from_char(NUMProc *Np, int id, int input_len)
5397 bool isread = false;
5399 #ifdef DEBUG_TO_FROM_CHAR
5400 elog(DEBUG_elog_output, " --- scan start --- id=%s",
5401 (id == NUM_0 || id == NUM_9) ? "NUM_0/9" : id == NUM_DEC ? "NUM_DEC" : "???");
5402 #endif
5404 if (OVERLOAD_TEST)
5405 return;
5407 if (*Np->inout_p == ' ')
5408 Np->inout_p++;
5410 if (OVERLOAD_TEST)
5411 return;
5414 * read sign before number
5416 if (*Np->number == ' ' && (id == NUM_0 || id == NUM_9) &&
5417 (Np->read_pre + Np->read_post) == 0)
5419 #ifdef DEBUG_TO_FROM_CHAR
5420 elog(DEBUG_elog_output, "Try read sign (%c), locale positive: %s, negative: %s",
5421 *Np->inout_p, Np->L_positive_sign, Np->L_negative_sign);
5422 #endif
5425 * locale sign
5427 if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_PRE)
5429 int x = 0;
5431 #ifdef DEBUG_TO_FROM_CHAR
5432 elog(DEBUG_elog_output, "Try read locale pre-sign (%c)", *Np->inout_p);
5433 #endif
5434 if ((x = strlen(Np->L_negative_sign)) &&
5435 AMOUNT_TEST(x) &&
5436 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5438 Np->inout_p += x;
5439 *Np->number = '-';
5441 else if ((x = strlen(Np->L_positive_sign)) &&
5442 AMOUNT_TEST(x) &&
5443 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5445 Np->inout_p += x;
5446 *Np->number = '+';
5449 else
5451 #ifdef DEBUG_TO_FROM_CHAR
5452 elog(DEBUG_elog_output, "Try read simple sign (%c)", *Np->inout_p);
5453 #endif
5456 * simple + - < >
5458 if (*Np->inout_p == '-' || (IS_BRACKET(Np->Num) &&
5459 *Np->inout_p == '<'))
5461 *Np->number = '-'; /* set - */
5462 Np->inout_p++;
5464 else if (*Np->inout_p == '+')
5466 *Np->number = '+'; /* set + */
5467 Np->inout_p++;
5472 if (OVERLOAD_TEST)
5473 return;
5475 #ifdef DEBUG_TO_FROM_CHAR
5476 elog(DEBUG_elog_output, "Scan for numbers (%c), current number: '%s'", *Np->inout_p, Np->number);
5477 #endif
5480 * read digit or decimal point
5482 if (isdigit((unsigned char) *Np->inout_p))
5484 if (Np->read_dec && Np->read_post == Np->Num->post)
5485 return;
5487 *Np->number_p = *Np->inout_p;
5488 Np->number_p++;
5490 if (Np->read_dec)
5491 Np->read_post++;
5492 else
5493 Np->read_pre++;
5495 isread = true;
5497 #ifdef DEBUG_TO_FROM_CHAR
5498 elog(DEBUG_elog_output, "Read digit (%c)", *Np->inout_p);
5499 #endif
5501 else if (IS_DECIMAL(Np->Num) && Np->read_dec == false)
5504 * We need not test IS_LDECIMAL(Np->Num) explicitly here, because
5505 * Np->decimal is always just "." if we don't have a D format token.
5506 * So we just unconditionally match to Np->decimal.
5508 int x = strlen(Np->decimal);
5510 #ifdef DEBUG_TO_FROM_CHAR
5511 elog(DEBUG_elog_output, "Try read decimal point (%c)",
5512 *Np->inout_p);
5513 #endif
5514 if (x && AMOUNT_TEST(x) && strncmp(Np->inout_p, Np->decimal, x) == 0)
5516 Np->inout_p += x - 1;
5517 *Np->number_p = '.';
5518 Np->number_p++;
5519 Np->read_dec = true;
5520 isread = true;
5524 if (OVERLOAD_TEST)
5525 return;
5528 * Read sign behind "last" number
5530 * We need sign detection because determine exact position of post-sign is
5531 * difficult:
5533 * FM9999.9999999S -> 123.001- 9.9S -> .5- FM9.999999MI ->
5534 * 5.01-
5536 if (*Np->number == ' ' && Np->read_pre + Np->read_post > 0)
5539 * locale sign (NUM_S) is always anchored behind a last number, if: -
5540 * locale sign expected - last read char was NUM_0/9 or NUM_DEC - and
5541 * next char is not digit
5543 if (IS_LSIGN(Np->Num) && isread &&
5544 (Np->inout_p + 1) < Np->inout + input_len &&
5545 !isdigit((unsigned char) *(Np->inout_p + 1)))
5547 int x;
5548 char *tmp = Np->inout_p++;
5550 #ifdef DEBUG_TO_FROM_CHAR
5551 elog(DEBUG_elog_output, "Try read locale post-sign (%c)", *Np->inout_p);
5552 #endif
5553 if ((x = strlen(Np->L_negative_sign)) &&
5554 AMOUNT_TEST(x) &&
5555 strncmp(Np->inout_p, Np->L_negative_sign, x) == 0)
5557 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5558 *Np->number = '-';
5560 else if ((x = strlen(Np->L_positive_sign)) &&
5561 AMOUNT_TEST(x) &&
5562 strncmp(Np->inout_p, Np->L_positive_sign, x) == 0)
5564 Np->inout_p += x - 1; /* -1 .. NUM_processor() do inout_p++ */
5565 *Np->number = '+';
5567 if (*Np->number == ' ')
5568 /* no sign read */
5569 Np->inout_p = tmp;
5573 * try read non-locale sign, it's happen only if format is not exact
5574 * and we cannot determine sign position of MI/PL/SG, an example:
5576 * FM9.999999MI -> 5.01-
5578 * if (.... && IS_LSIGN(Np->Num)==false) prevents read wrong formats
5579 * like to_number('1 -', '9S') where sign is not anchored to last
5580 * number.
5582 else if (isread == false && IS_LSIGN(Np->Num) == false &&
5583 (IS_PLUS(Np->Num) || IS_MINUS(Np->Num)))
5585 #ifdef DEBUG_TO_FROM_CHAR
5586 elog(DEBUG_elog_output, "Try read simple post-sign (%c)", *Np->inout_p);
5587 #endif
5590 * simple + -
5592 if (*Np->inout_p == '-' || *Np->inout_p == '+')
5593 /* NUM_processor() do inout_p++ */
5594 *Np->number = *Np->inout_p;
5599 #define IS_PREDEC_SPACE(_n) \
5600 (IS_ZERO((_n)->Num)==false && \
5601 (_n)->number == (_n)->number_p && \
5602 *(_n)->number == '0' && \
5603 (_n)->Num->post != 0)
5605 /* ----------
5606 * Add digit or sign to number-string
5607 * ----------
5609 static void
5610 NUM_numpart_to_char(NUMProc *Np, int id)
5612 int end;
5614 if (IS_ROMAN(Np->Num))
5615 return;
5617 /* Note: in this elog() output not set '\0' in 'inout' */
5619 #ifdef DEBUG_TO_FROM_CHAR
5622 * Np->num_curr is number of current item in format-picture, it is not
5623 * current position in inout!
5625 elog(DEBUG_elog_output,
5626 "SIGN_WROTE: %d, CURRENT: %d, NUMBER_P: \"%s\", INOUT: \"%s\"",
5627 Np->sign_wrote,
5628 Np->num_curr,
5629 Np->number_p,
5630 Np->inout);
5631 #endif
5632 Np->num_in = false;
5635 * Write sign if real number will write to output Note: IS_PREDEC_SPACE()
5636 * handle "9.9" --> " .1"
5638 if (Np->sign_wrote == false &&
5639 (Np->num_curr >= Np->out_pre_spaces || (IS_ZERO(Np->Num) && Np->Num->zero_start == Np->num_curr)) &&
5640 (IS_PREDEC_SPACE(Np) == false || (Np->last_relevant && *Np->last_relevant == '.')))
5642 if (IS_LSIGN(Np->Num))
5644 if (Np->Num->lsign == NUM_LSIGN_PRE)
5646 if (Np->sign == '-')
5647 strcpy(Np->inout_p, Np->L_negative_sign);
5648 else
5649 strcpy(Np->inout_p, Np->L_positive_sign);
5650 Np->inout_p += strlen(Np->inout_p);
5651 Np->sign_wrote = true;
5654 else if (IS_BRACKET(Np->Num))
5656 *Np->inout_p = Np->sign == '+' ? ' ' : '<';
5657 ++Np->inout_p;
5658 Np->sign_wrote = true;
5660 else if (Np->sign == '+')
5662 if (!IS_FILLMODE(Np->Num))
5664 *Np->inout_p = ' '; /* Write + */
5665 ++Np->inout_p;
5667 Np->sign_wrote = true;
5669 else if (Np->sign == '-')
5670 { /* Write - */
5671 *Np->inout_p = '-';
5672 ++Np->inout_p;
5673 Np->sign_wrote = true;
5679 * digits / FM / Zero / Dec. point
5681 if (id == NUM_9 || id == NUM_0 || id == NUM_D || id == NUM_DEC)
5683 if (Np->num_curr < Np->out_pre_spaces &&
5684 (Np->Num->zero_start > Np->num_curr || !IS_ZERO(Np->Num)))
5687 * Write blank space
5689 if (!IS_FILLMODE(Np->Num))
5691 *Np->inout_p = ' '; /* Write ' ' */
5692 ++Np->inout_p;
5695 else if (IS_ZERO(Np->Num) &&
5696 Np->num_curr < Np->out_pre_spaces &&
5697 Np->Num->zero_start <= Np->num_curr)
5700 * Write ZERO
5702 *Np->inout_p = '0'; /* Write '0' */
5703 ++Np->inout_p;
5704 Np->num_in = true;
5706 else
5709 * Write Decimal point
5711 if (*Np->number_p == '.')
5713 if (!Np->last_relevant || *Np->last_relevant != '.')
5715 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
5716 Np->inout_p += strlen(Np->inout_p);
5720 * Ora 'n' -- FM9.9 --> 'n.'
5722 else if (IS_FILLMODE(Np->Num) &&
5723 Np->last_relevant && *Np->last_relevant == '.')
5725 strcpy(Np->inout_p, Np->decimal); /* Write DEC/D */
5726 Np->inout_p += strlen(Np->inout_p);
5729 else
5732 * Write Digits
5734 if (Np->last_relevant && Np->number_p > Np->last_relevant &&
5735 id != NUM_0)
5739 * '0.1' -- 9.9 --> ' .1'
5741 else if (IS_PREDEC_SPACE(Np))
5743 if (!IS_FILLMODE(Np->Num))
5745 *Np->inout_p = ' ';
5746 ++Np->inout_p;
5750 * '0' -- FM9.9 --> '0.'
5752 else if (Np->last_relevant && *Np->last_relevant == '.')
5754 *Np->inout_p = '0';
5755 ++Np->inout_p;
5758 else
5760 *Np->inout_p = *Np->number_p; /* Write DIGIT */
5761 ++Np->inout_p;
5762 Np->num_in = true;
5765 /* do no exceed string length */
5766 if (*Np->number_p)
5767 ++Np->number_p;
5770 end = Np->num_count + (Np->out_pre_spaces ? 1 : 0) + (IS_DECIMAL(Np->Num) ? 1 : 0);
5772 if (Np->last_relevant && Np->last_relevant == Np->number_p)
5773 end = Np->num_curr;
5775 if (Np->num_curr + 1 == end)
5777 if (Np->sign_wrote == true && IS_BRACKET(Np->Num))
5779 *Np->inout_p = Np->sign == '+' ? ' ' : '>';
5780 ++Np->inout_p;
5782 else if (IS_LSIGN(Np->Num) && Np->Num->lsign == NUM_LSIGN_POST)
5784 if (Np->sign == '-')
5785 strcpy(Np->inout_p, Np->L_negative_sign);
5786 else
5787 strcpy(Np->inout_p, Np->L_positive_sign);
5788 Np->inout_p += strlen(Np->inout_p);
5793 ++Np->num_curr;
5797 * Skip over "n" input characters, but only if they aren't numeric data
5799 static void
5800 NUM_eat_non_data_chars(NUMProc *Np, int n, int input_len)
5802 while (n-- > 0)
5804 if (OVERLOAD_TEST)
5805 break; /* end of input */
5806 if (strchr("0123456789.,+-", *Np->inout_p) != NULL)
5807 break; /* it's a data character */
5808 Np->inout_p += pg_mblen(Np->inout_p);
5812 static char *
5813 NUM_processor(FormatNode *node, NUMDesc *Num, char *inout,
5814 char *number, int input_len, int to_char_out_pre_spaces,
5815 int sign, bool is_to_char, Oid collid)
5817 FormatNode *n;
5818 NUMProc _Np,
5819 *Np = &_Np;
5820 const char *pattern;
5821 int pattern_len;
5823 MemSet(Np, 0, sizeof(NUMProc));
5825 Np->Num = Num;
5826 Np->is_to_char = is_to_char;
5827 Np->number = number;
5828 Np->inout = inout;
5829 Np->last_relevant = NULL;
5830 Np->read_post = 0;
5831 Np->read_pre = 0;
5832 Np->read_dec = false;
5834 if (Np->Num->zero_start)
5835 --Np->Num->zero_start;
5837 if (IS_EEEE(Np->Num))
5839 if (!Np->is_to_char)
5840 ereport(ERROR,
5841 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5842 errmsg("\"EEEE\" not supported for input")));
5843 return strcpy(inout, number);
5847 * Roman correction
5849 if (IS_ROMAN(Np->Num))
5851 if (!Np->is_to_char)
5852 ereport(ERROR,
5853 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
5854 errmsg("\"RN\" not supported for input")));
5856 Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post =
5857 Np->Num->pre = Np->out_pre_spaces = Np->sign = 0;
5859 if (IS_FILLMODE(Np->Num))
5861 Np->Num->flag = 0;
5862 Np->Num->flag |= NUM_F_FILLMODE;
5864 else
5865 Np->Num->flag = 0;
5866 Np->Num->flag |= NUM_F_ROMAN;
5870 * Sign
5872 if (is_to_char)
5874 Np->sign = sign;
5876 /* MI/PL/SG - write sign itself and not in number */
5877 if (IS_PLUS(Np->Num) || IS_MINUS(Np->Num))
5879 if (IS_PLUS(Np->Num) && IS_MINUS(Np->Num) == false)
5880 Np->sign_wrote = false; /* need sign */
5881 else
5882 Np->sign_wrote = true; /* needn't sign */
5884 else
5886 if (Np->sign != '-')
5888 if (IS_FILLMODE(Np->Num))
5889 Np->Num->flag &= ~NUM_F_BRACKET;
5892 if (Np->sign == '+' && IS_FILLMODE(Np->Num) && IS_LSIGN(Np->Num) == false)
5893 Np->sign_wrote = true; /* needn't sign */
5894 else
5895 Np->sign_wrote = false; /* need sign */
5897 if (Np->Num->lsign == NUM_LSIGN_PRE && Np->Num->pre == Np->Num->pre_lsign_num)
5898 Np->Num->lsign = NUM_LSIGN_POST;
5901 else
5902 Np->sign = false;
5905 * Count
5907 Np->num_count = Np->Num->post + Np->Num->pre - 1;
5909 if (is_to_char)
5911 Np->out_pre_spaces = to_char_out_pre_spaces;
5913 if (IS_FILLMODE(Np->Num) && IS_DECIMAL(Np->Num))
5915 Np->last_relevant = get_last_relevant_decnum(Np->number);
5918 * If any '0' specifiers are present, make sure we don't strip
5919 * those digits. But don't advance last_relevant beyond the last
5920 * character of the Np->number string, which is a hazard if the
5921 * number got shortened due to precision limitations.
5923 if (Np->last_relevant && Np->Num->zero_end > Np->out_pre_spaces)
5925 int last_zero_pos;
5926 char *last_zero;
5928 /* note that Np->number cannot be zero-length here */
5929 last_zero_pos = strlen(Np->number) - 1;
5930 last_zero_pos = Min(last_zero_pos,
5931 Np->Num->zero_end - Np->out_pre_spaces);
5932 last_zero = Np->number + last_zero_pos;
5933 if (Np->last_relevant < last_zero)
5934 Np->last_relevant = last_zero;
5938 if (Np->sign_wrote == false && Np->out_pre_spaces == 0)
5939 ++Np->num_count;
5941 else
5943 Np->out_pre_spaces = 0;
5944 *Np->number = ' '; /* sign space */
5945 *(Np->number + 1) = '\0';
5948 Np->num_in = 0;
5949 Np->num_curr = 0;
5951 #ifdef DEBUG_TO_FROM_CHAR
5952 elog(DEBUG_elog_output,
5953 "\n\tSIGN: '%c'\n\tNUM: '%s'\n\tPRE: %d\n\tPOST: %d\n\tNUM_COUNT: %d\n\tNUM_PRE: %d\n\tSIGN_WROTE: %s\n\tZERO: %s\n\tZERO_START: %d\n\tZERO_END: %d\n\tLAST_RELEVANT: %s\n\tBRACKET: %s\n\tPLUS: %s\n\tMINUS: %s\n\tFILLMODE: %s\n\tROMAN: %s\n\tEEEE: %s",
5954 Np->sign,
5955 Np->number,
5956 Np->Num->pre,
5957 Np->Num->post,
5958 Np->num_count,
5959 Np->out_pre_spaces,
5960 Np->sign_wrote ? "Yes" : "No",
5961 IS_ZERO(Np->Num) ? "Yes" : "No",
5962 Np->Num->zero_start,
5963 Np->Num->zero_end,
5964 Np->last_relevant ? Np->last_relevant : "<not set>",
5965 IS_BRACKET(Np->Num) ? "Yes" : "No",
5966 IS_PLUS(Np->Num) ? "Yes" : "No",
5967 IS_MINUS(Np->Num) ? "Yes" : "No",
5968 IS_FILLMODE(Np->Num) ? "Yes" : "No",
5969 IS_ROMAN(Np->Num) ? "Yes" : "No",
5970 IS_EEEE(Np->Num) ? "Yes" : "No"
5972 #endif
5975 * Locale
5977 NUM_prepare_locale(Np);
5980 * Processor direct cycle
5982 if (Np->is_to_char)
5983 Np->number_p = Np->number;
5984 else
5985 Np->number_p = Np->number + 1; /* first char is space for sign */
5987 for (n = node, Np->inout_p = Np->inout; n->type != NODE_TYPE_END; n++)
5989 if (!Np->is_to_char)
5992 * Check at least one byte remains to be scanned. (In actions
5993 * below, must use AMOUNT_TEST if we want to read more bytes than
5994 * that.)
5996 if (OVERLOAD_TEST)
5997 break;
6001 * Format pictures actions
6003 if (n->type == NODE_TYPE_ACTION)
6006 * Create/read digit/zero/blank/sign/special-case
6008 * 'NUM_S' note: The locale sign is anchored to number and we
6009 * read/write it when we work with first or last number
6010 * (NUM_0/NUM_9). This is why NUM_S is missing in switch().
6012 * Notice the "Np->inout_p++" at the bottom of the loop. This is
6013 * why most of the actions advance inout_p one less than you might
6014 * expect. In cases where we don't want that increment to happen,
6015 * a switch case ends with "continue" not "break".
6017 switch (n->key->id)
6019 case NUM_9:
6020 case NUM_0:
6021 case NUM_DEC:
6022 case NUM_D:
6023 if (Np->is_to_char)
6025 NUM_numpart_to_char(Np, n->key->id);
6026 continue; /* for() */
6028 else
6030 NUM_numpart_from_char(Np, n->key->id, input_len);
6031 break; /* switch() case: */
6034 case NUM_COMMA:
6035 if (Np->is_to_char)
6037 if (!Np->num_in)
6039 if (IS_FILLMODE(Np->Num))
6040 continue;
6041 else
6042 *Np->inout_p = ' ';
6044 else
6045 *Np->inout_p = ',';
6047 else
6049 if (!Np->num_in)
6051 if (IS_FILLMODE(Np->Num))
6052 continue;
6054 if (*Np->inout_p != ',')
6055 continue;
6057 break;
6059 case NUM_G:
6060 pattern = Np->L_thousands_sep;
6061 pattern_len = strlen(pattern);
6062 if (Np->is_to_char)
6064 if (!Np->num_in)
6066 if (IS_FILLMODE(Np->Num))
6067 continue;
6068 else
6070 /* just in case there are MB chars */
6071 pattern_len = pg_mbstrlen(pattern);
6072 memset(Np->inout_p, ' ', pattern_len);
6073 Np->inout_p += pattern_len - 1;
6076 else
6078 strcpy(Np->inout_p, pattern);
6079 Np->inout_p += pattern_len - 1;
6082 else
6084 if (!Np->num_in)
6086 if (IS_FILLMODE(Np->Num))
6087 continue;
6091 * Because L_thousands_sep typically contains data
6092 * characters (either '.' or ','), we can't use
6093 * NUM_eat_non_data_chars here. Instead skip only if
6094 * the input matches L_thousands_sep.
6096 if (AMOUNT_TEST(pattern_len) &&
6097 strncmp(Np->inout_p, pattern, pattern_len) == 0)
6098 Np->inout_p += pattern_len - 1;
6099 else
6100 continue;
6102 break;
6104 case NUM_L:
6105 pattern = Np->L_currency_symbol;
6106 if (Np->is_to_char)
6108 strcpy(Np->inout_p, pattern);
6109 Np->inout_p += strlen(pattern) - 1;
6111 else
6113 NUM_eat_non_data_chars(Np, pg_mbstrlen(pattern), input_len);
6114 continue;
6116 break;
6118 case NUM_RN:
6119 if (IS_FILLMODE(Np->Num))
6121 strcpy(Np->inout_p, Np->number_p);
6122 Np->inout_p += strlen(Np->inout_p) - 1;
6124 else
6126 sprintf(Np->inout_p, "%15s", Np->number_p);
6127 Np->inout_p += strlen(Np->inout_p) - 1;
6129 break;
6131 case NUM_rn:
6132 if (IS_FILLMODE(Np->Num))
6134 strcpy(Np->inout_p, asc_tolower_z(Np->number_p));
6135 Np->inout_p += strlen(Np->inout_p) - 1;
6137 else
6139 sprintf(Np->inout_p, "%15s", asc_tolower_z(Np->number_p));
6140 Np->inout_p += strlen(Np->inout_p) - 1;
6142 break;
6144 case NUM_th:
6145 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
6146 Np->sign == '-' || IS_DECIMAL(Np->Num))
6147 continue;
6149 if (Np->is_to_char)
6151 strcpy(Np->inout_p, get_th(Np->number, TH_LOWER));
6152 Np->inout_p += 1;
6154 else
6156 /* All variants of 'th' occupy 2 characters */
6157 NUM_eat_non_data_chars(Np, 2, input_len);
6158 continue;
6160 break;
6162 case NUM_TH:
6163 if (IS_ROMAN(Np->Num) || *Np->number == '#' ||
6164 Np->sign == '-' || IS_DECIMAL(Np->Num))
6165 continue;
6167 if (Np->is_to_char)
6169 strcpy(Np->inout_p, get_th(Np->number, TH_UPPER));
6170 Np->inout_p += 1;
6172 else
6174 /* All variants of 'TH' occupy 2 characters */
6175 NUM_eat_non_data_chars(Np, 2, input_len);
6176 continue;
6178 break;
6180 case NUM_MI:
6181 if (Np->is_to_char)
6183 if (Np->sign == '-')
6184 *Np->inout_p = '-';
6185 else if (IS_FILLMODE(Np->Num))
6186 continue;
6187 else
6188 *Np->inout_p = ' ';
6190 else
6192 if (*Np->inout_p == '-')
6193 *Np->number = '-';
6194 else
6196 NUM_eat_non_data_chars(Np, 1, input_len);
6197 continue;
6200 break;
6202 case NUM_PL:
6203 if (Np->is_to_char)
6205 if (Np->sign == '+')
6206 *Np->inout_p = '+';
6207 else if (IS_FILLMODE(Np->Num))
6208 continue;
6209 else
6210 *Np->inout_p = ' ';
6212 else
6214 if (*Np->inout_p == '+')
6215 *Np->number = '+';
6216 else
6218 NUM_eat_non_data_chars(Np, 1, input_len);
6219 continue;
6222 break;
6224 case NUM_SG:
6225 if (Np->is_to_char)
6226 *Np->inout_p = Np->sign;
6227 else
6229 if (*Np->inout_p == '-')
6230 *Np->number = '-';
6231 else if (*Np->inout_p == '+')
6232 *Np->number = '+';
6233 else
6235 NUM_eat_non_data_chars(Np, 1, input_len);
6236 continue;
6239 break;
6241 default:
6242 continue;
6243 break;
6246 else
6249 * In TO_CHAR, non-pattern characters in the format are copied to
6250 * the output. In TO_NUMBER, we skip one input character for each
6251 * non-pattern format character, whether or not it matches the
6252 * format character.
6254 if (Np->is_to_char)
6256 strcpy(Np->inout_p, n->character);
6257 Np->inout_p += strlen(Np->inout_p);
6259 else
6261 Np->inout_p += pg_mblen(Np->inout_p);
6263 continue;
6265 Np->inout_p++;
6268 if (Np->is_to_char)
6270 *Np->inout_p = '\0';
6271 return Np->inout;
6273 else
6275 if (*(Np->number_p - 1) == '.')
6276 *(Np->number_p - 1) = '\0';
6277 else
6278 *Np->number_p = '\0';
6281 * Correction - precision of dec. number
6283 Np->Num->post = Np->read_post;
6285 #ifdef DEBUG_TO_FROM_CHAR
6286 elog(DEBUG_elog_output, "TO_NUMBER (number): '%s'", Np->number);
6287 #endif
6288 return Np->number;
6292 /* ----------
6293 * MACRO: Start part of NUM - for all NUM's to_char variants
6294 * (sorry, but I hate copy same code - macro is better..)
6295 * ----------
6297 #define NUM_TOCHAR_prepare \
6298 do { \
6299 int len = VARSIZE_ANY_EXHDR(fmt); \
6300 if (len <= 0 || len >= (INT_MAX-VARHDRSZ)/NUM_MAX_ITEM_SIZ) \
6301 PG_RETURN_TEXT_P(cstring_to_text("")); \
6302 result = (text *) palloc0((len * NUM_MAX_ITEM_SIZ) + 1 + VARHDRSZ); \
6303 format = NUM_cache(len, &Num, fmt, &shouldFree); \
6304 } while (0)
6306 /* ----------
6307 * MACRO: Finish part of NUM
6308 * ----------
6310 #define NUM_TOCHAR_finish \
6311 do { \
6312 int len; \
6314 NUM_processor(format, &Num, VARDATA(result), numstr, 0, out_pre_spaces, sign, true, PG_GET_COLLATION()); \
6316 if (shouldFree) \
6317 pfree(format); \
6319 /* \
6320 * Convert null-terminated representation of result to standard text. \
6321 * The result is usually much bigger than it needs to be, but there \
6322 * seems little point in realloc'ing it smaller. \
6323 */ \
6324 len = strlen(VARDATA(result)); \
6325 SET_VARSIZE(result, len + VARHDRSZ); \
6326 } while (0)
6328 /* -------------------
6329 * NUMERIC to_number() (convert string to numeric)
6330 * -------------------
6332 Datum
6333 numeric_to_number(PG_FUNCTION_ARGS)
6335 text *value = PG_GETARG_TEXT_PP(0);
6336 text *fmt = PG_GETARG_TEXT_PP(1);
6337 NUMDesc Num;
6338 Datum result;
6339 FormatNode *format;
6340 char *numstr;
6341 bool shouldFree;
6342 int len = 0;
6343 int scale,
6344 precision;
6346 len = VARSIZE_ANY_EXHDR(fmt);
6348 if (len <= 0 || len >= INT_MAX / NUM_MAX_ITEM_SIZ)
6349 PG_RETURN_NULL();
6351 format = NUM_cache(len, &Num, fmt, &shouldFree);
6353 numstr = (char *) palloc((len * NUM_MAX_ITEM_SIZ) + 1);
6355 NUM_processor(format, &Num, VARDATA_ANY(value), numstr,
6356 VARSIZE_ANY_EXHDR(value), 0, 0, false, PG_GET_COLLATION());
6358 scale = Num.post;
6359 precision = Num.pre + Num.multi + scale;
6361 if (shouldFree)
6362 pfree(format);
6364 result = DirectFunctionCall3(numeric_in,
6365 CStringGetDatum(numstr),
6366 ObjectIdGetDatum(InvalidOid),
6367 Int32GetDatum(((precision << 16) | scale) + VARHDRSZ));
6369 if (IS_MULTI(&Num))
6371 Numeric x;
6372 Numeric a = int64_to_numeric(10);
6373 Numeric b = int64_to_numeric(-Num.multi);
6375 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6376 NumericGetDatum(a),
6377 NumericGetDatum(b)));
6378 result = DirectFunctionCall2(numeric_mul,
6379 result,
6380 NumericGetDatum(x));
6383 pfree(numstr);
6384 return result;
6387 /* ------------------
6388 * NUMERIC to_char()
6389 * ------------------
6391 Datum
6392 numeric_to_char(PG_FUNCTION_ARGS)
6394 Numeric value = PG_GETARG_NUMERIC(0);
6395 text *fmt = PG_GETARG_TEXT_PP(1);
6396 NUMDesc Num;
6397 FormatNode *format;
6398 text *result;
6399 bool shouldFree;
6400 int out_pre_spaces = 0,
6401 sign = 0;
6402 char *numstr,
6403 *orgnum,
6406 NUM_TOCHAR_prepare;
6409 * On DateType depend part (numeric)
6411 if (IS_ROMAN(&Num))
6413 int32 intvalue;
6414 bool err;
6416 /* Round and convert to int */
6417 intvalue = numeric_int4_opt_error(value, &err);
6418 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6419 if (err)
6420 intvalue = PG_INT32_MAX;
6421 numstr = int_to_roman(intvalue);
6423 else if (IS_EEEE(&Num))
6425 orgnum = numeric_out_sci(value, Num.post);
6428 * numeric_out_sci() does not emit a sign for positive numbers. We
6429 * need to add a space in this case so that positive and negative
6430 * numbers are aligned. Also must check for NaN/infinity cases, which
6431 * we handle the same way as in float8_to_char.
6433 if (strcmp(orgnum, "NaN") == 0 ||
6434 strcmp(orgnum, "Infinity") == 0 ||
6435 strcmp(orgnum, "-Infinity") == 0)
6438 * Allow 6 characters for the leading sign, the decimal point,
6439 * "e", the exponent's sign and two exponent digits.
6441 numstr = (char *) palloc(Num.pre + Num.post + 7);
6442 fill_str(numstr, '#', Num.pre + Num.post + 6);
6443 *numstr = ' ';
6444 *(numstr + Num.pre + 1) = '.';
6446 else if (*orgnum != '-')
6448 numstr = (char *) palloc(strlen(orgnum) + 2);
6449 *numstr = ' ';
6450 strcpy(numstr + 1, orgnum);
6452 else
6454 numstr = orgnum;
6457 else
6459 int numstr_pre_len;
6460 Numeric val = value;
6461 Numeric x;
6463 if (IS_MULTI(&Num))
6465 Numeric a = int64_to_numeric(10);
6466 Numeric b = int64_to_numeric(Num.multi);
6468 x = DatumGetNumeric(DirectFunctionCall2(numeric_power,
6469 NumericGetDatum(a),
6470 NumericGetDatum(b)));
6471 val = DatumGetNumeric(DirectFunctionCall2(numeric_mul,
6472 NumericGetDatum(value),
6473 NumericGetDatum(x)));
6474 Num.pre += Num.multi;
6477 x = DatumGetNumeric(DirectFunctionCall2(numeric_round,
6478 NumericGetDatum(val),
6479 Int32GetDatum(Num.post)));
6480 orgnum = DatumGetCString(DirectFunctionCall1(numeric_out,
6481 NumericGetDatum(x)));
6483 if (*orgnum == '-')
6485 sign = '-';
6486 numstr = orgnum + 1;
6488 else
6490 sign = '+';
6491 numstr = orgnum;
6494 if ((p = strchr(numstr, '.')))
6495 numstr_pre_len = p - numstr;
6496 else
6497 numstr_pre_len = strlen(numstr);
6499 /* needs padding? */
6500 if (numstr_pre_len < Num.pre)
6501 out_pre_spaces = Num.pre - numstr_pre_len;
6502 /* overflowed prefix digit format? */
6503 else if (numstr_pre_len > Num.pre)
6505 numstr = (char *) palloc(Num.pre + Num.post + 2);
6506 fill_str(numstr, '#', Num.pre + Num.post + 1);
6507 *(numstr + Num.pre) = '.';
6511 NUM_TOCHAR_finish;
6512 PG_RETURN_TEXT_P(result);
6515 /* ---------------
6516 * INT4 to_char()
6517 * ---------------
6519 Datum
6520 int4_to_char(PG_FUNCTION_ARGS)
6522 int32 value = PG_GETARG_INT32(0);
6523 text *fmt = PG_GETARG_TEXT_PP(1);
6524 NUMDesc Num;
6525 FormatNode *format;
6526 text *result;
6527 bool shouldFree;
6528 int out_pre_spaces = 0,
6529 sign = 0;
6530 char *numstr,
6531 *orgnum;
6533 NUM_TOCHAR_prepare;
6536 * On DateType depend part (int32)
6538 if (IS_ROMAN(&Num))
6539 numstr = int_to_roman(value);
6540 else if (IS_EEEE(&Num))
6542 /* we can do it easily because float8 won't lose any precision */
6543 float8 val = (float8) value;
6545 orgnum = (char *) psprintf("%+.*e", Num.post, val);
6548 * Swap a leading positive sign for a space.
6550 if (*orgnum == '+')
6551 *orgnum = ' ';
6553 numstr = orgnum;
6555 else
6557 int numstr_pre_len;
6559 if (IS_MULTI(&Num))
6561 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6562 Int32GetDatum(value * ((int32) pow((double) 10, (double) Num.multi)))));
6563 Num.pre += Num.multi;
6565 else
6567 orgnum = DatumGetCString(DirectFunctionCall1(int4out,
6568 Int32GetDatum(value)));
6571 if (*orgnum == '-')
6573 sign = '-';
6574 orgnum++;
6576 else
6577 sign = '+';
6579 numstr_pre_len = strlen(orgnum);
6581 /* post-decimal digits? Pad out with zeros. */
6582 if (Num.post)
6584 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6585 strcpy(numstr, orgnum);
6586 *(numstr + numstr_pre_len) = '.';
6587 memset(numstr + numstr_pre_len + 1, '0', Num.post);
6588 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6590 else
6591 numstr = orgnum;
6593 /* needs padding? */
6594 if (numstr_pre_len < Num.pre)
6595 out_pre_spaces = Num.pre - numstr_pre_len;
6596 /* overflowed prefix digit format? */
6597 else if (numstr_pre_len > Num.pre)
6599 numstr = (char *) palloc(Num.pre + Num.post + 2);
6600 fill_str(numstr, '#', Num.pre + Num.post + 1);
6601 *(numstr + Num.pre) = '.';
6605 NUM_TOCHAR_finish;
6606 PG_RETURN_TEXT_P(result);
6609 /* ---------------
6610 * INT8 to_char()
6611 * ---------------
6613 Datum
6614 int8_to_char(PG_FUNCTION_ARGS)
6616 int64 value = PG_GETARG_INT64(0);
6617 text *fmt = PG_GETARG_TEXT_PP(1);
6618 NUMDesc Num;
6619 FormatNode *format;
6620 text *result;
6621 bool shouldFree;
6622 int out_pre_spaces = 0,
6623 sign = 0;
6624 char *numstr,
6625 *orgnum;
6627 NUM_TOCHAR_prepare;
6630 * On DateType depend part (int64)
6632 if (IS_ROMAN(&Num))
6634 int32 intvalue;
6636 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6637 if (value <= PG_INT32_MAX && value >= PG_INT32_MIN)
6638 intvalue = (int32) value;
6639 else
6640 intvalue = PG_INT32_MAX;
6641 numstr = int_to_roman(intvalue);
6643 else if (IS_EEEE(&Num))
6645 /* to avoid loss of precision, must go via numeric not float8 */
6646 orgnum = numeric_out_sci(int64_to_numeric(value),
6647 Num.post);
6650 * numeric_out_sci() does not emit a sign for positive numbers. We
6651 * need to add a space in this case so that positive and negative
6652 * numbers are aligned. We don't have to worry about NaN/inf here.
6654 if (*orgnum != '-')
6656 numstr = (char *) palloc(strlen(orgnum) + 2);
6657 *numstr = ' ';
6658 strcpy(numstr + 1, orgnum);
6660 else
6662 numstr = orgnum;
6665 else
6667 int numstr_pre_len;
6669 if (IS_MULTI(&Num))
6671 double multi = pow((double) 10, (double) Num.multi);
6673 value = DatumGetInt64(DirectFunctionCall2(int8mul,
6674 Int64GetDatum(value),
6675 DirectFunctionCall1(dtoi8,
6676 Float8GetDatum(multi))));
6677 Num.pre += Num.multi;
6680 orgnum = DatumGetCString(DirectFunctionCall1(int8out,
6681 Int64GetDatum(value)));
6683 if (*orgnum == '-')
6685 sign = '-';
6686 orgnum++;
6688 else
6689 sign = '+';
6691 numstr_pre_len = strlen(orgnum);
6693 /* post-decimal digits? Pad out with zeros. */
6694 if (Num.post)
6696 numstr = (char *) palloc(numstr_pre_len + Num.post + 2);
6697 strcpy(numstr, orgnum);
6698 *(numstr + numstr_pre_len) = '.';
6699 memset(numstr + numstr_pre_len + 1, '0', Num.post);
6700 *(numstr + numstr_pre_len + Num.post + 1) = '\0';
6702 else
6703 numstr = orgnum;
6705 /* needs padding? */
6706 if (numstr_pre_len < Num.pre)
6707 out_pre_spaces = Num.pre - numstr_pre_len;
6708 /* overflowed prefix digit format? */
6709 else if (numstr_pre_len > Num.pre)
6711 numstr = (char *) palloc(Num.pre + Num.post + 2);
6712 fill_str(numstr, '#', Num.pre + Num.post + 1);
6713 *(numstr + Num.pre) = '.';
6717 NUM_TOCHAR_finish;
6718 PG_RETURN_TEXT_P(result);
6721 /* -----------------
6722 * FLOAT4 to_char()
6723 * -----------------
6725 Datum
6726 float4_to_char(PG_FUNCTION_ARGS)
6728 float4 value = PG_GETARG_FLOAT4(0);
6729 text *fmt = PG_GETARG_TEXT_PP(1);
6730 NUMDesc Num;
6731 FormatNode *format;
6732 text *result;
6733 bool shouldFree;
6734 int out_pre_spaces = 0,
6735 sign = 0;
6736 char *numstr,
6739 NUM_TOCHAR_prepare;
6741 if (IS_ROMAN(&Num))
6743 int32 intvalue;
6745 /* See notes in ftoi4() */
6746 value = rint(value);
6747 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6748 if (!isnan(value) && FLOAT4_FITS_IN_INT32(value))
6749 intvalue = (int32) value;
6750 else
6751 intvalue = PG_INT32_MAX;
6752 numstr = int_to_roman(intvalue);
6754 else if (IS_EEEE(&Num))
6756 if (isnan(value) || isinf(value))
6759 * Allow 6 characters for the leading sign, the decimal point,
6760 * "e", the exponent's sign and two exponent digits.
6762 numstr = (char *) palloc(Num.pre + Num.post + 7);
6763 fill_str(numstr, '#', Num.pre + Num.post + 6);
6764 *numstr = ' ';
6765 *(numstr + Num.pre + 1) = '.';
6767 else
6769 numstr = psprintf("%+.*e", Num.post, value);
6772 * Swap a leading positive sign for a space.
6774 if (*numstr == '+')
6775 *numstr = ' ';
6778 else
6780 float4 val = value;
6781 char *orgnum;
6782 int numstr_pre_len;
6784 if (IS_MULTI(&Num))
6786 float multi = pow((double) 10, (double) Num.multi);
6788 val = value * multi;
6789 Num.pre += Num.multi;
6792 orgnum = psprintf("%.0f", fabs(val));
6793 numstr_pre_len = strlen(orgnum);
6795 /* adjust post digits to fit max float digits */
6796 if (numstr_pre_len >= FLT_DIG)
6797 Num.post = 0;
6798 else if (numstr_pre_len + Num.post > FLT_DIG)
6799 Num.post = FLT_DIG - numstr_pre_len;
6800 orgnum = psprintf("%.*f", Num.post, val);
6802 if (*orgnum == '-')
6803 { /* < 0 */
6804 sign = '-';
6805 numstr = orgnum + 1;
6807 else
6809 sign = '+';
6810 numstr = orgnum;
6813 if ((p = strchr(numstr, '.')))
6814 numstr_pre_len = p - numstr;
6815 else
6816 numstr_pre_len = strlen(numstr);
6818 /* needs padding? */
6819 if (numstr_pre_len < Num.pre)
6820 out_pre_spaces = Num.pre - numstr_pre_len;
6821 /* overflowed prefix digit format? */
6822 else if (numstr_pre_len > Num.pre)
6824 numstr = (char *) palloc(Num.pre + Num.post + 2);
6825 fill_str(numstr, '#', Num.pre + Num.post + 1);
6826 *(numstr + Num.pre) = '.';
6830 NUM_TOCHAR_finish;
6831 PG_RETURN_TEXT_P(result);
6834 /* -----------------
6835 * FLOAT8 to_char()
6836 * -----------------
6838 Datum
6839 float8_to_char(PG_FUNCTION_ARGS)
6841 float8 value = PG_GETARG_FLOAT8(0);
6842 text *fmt = PG_GETARG_TEXT_PP(1);
6843 NUMDesc Num;
6844 FormatNode *format;
6845 text *result;
6846 bool shouldFree;
6847 int out_pre_spaces = 0,
6848 sign = 0;
6849 char *numstr,
6852 NUM_TOCHAR_prepare;
6854 if (IS_ROMAN(&Num))
6856 int32 intvalue;
6858 /* See notes in dtoi4() */
6859 value = rint(value);
6860 /* On overflow, just use PG_INT32_MAX; int_to_roman will cope */
6861 if (!isnan(value) && FLOAT8_FITS_IN_INT32(value))
6862 intvalue = (int32) value;
6863 else
6864 intvalue = PG_INT32_MAX;
6865 numstr = int_to_roman(intvalue);
6867 else if (IS_EEEE(&Num))
6869 if (isnan(value) || isinf(value))
6872 * Allow 6 characters for the leading sign, the decimal point,
6873 * "e", the exponent's sign and two exponent digits.
6875 numstr = (char *) palloc(Num.pre + Num.post + 7);
6876 fill_str(numstr, '#', Num.pre + Num.post + 6);
6877 *numstr = ' ';
6878 *(numstr + Num.pre + 1) = '.';
6880 else
6882 numstr = psprintf("%+.*e", Num.post, value);
6885 * Swap a leading positive sign for a space.
6887 if (*numstr == '+')
6888 *numstr = ' ';
6891 else
6893 float8 val = value;
6894 char *orgnum;
6895 int numstr_pre_len;
6897 if (IS_MULTI(&Num))
6899 double multi = pow((double) 10, (double) Num.multi);
6901 val = value * multi;
6902 Num.pre += Num.multi;
6905 orgnum = psprintf("%.0f", fabs(val));
6906 numstr_pre_len = strlen(orgnum);
6908 /* adjust post digits to fit max double digits */
6909 if (numstr_pre_len >= DBL_DIG)
6910 Num.post = 0;
6911 else if (numstr_pre_len + Num.post > DBL_DIG)
6912 Num.post = DBL_DIG - numstr_pre_len;
6913 orgnum = psprintf("%.*f", Num.post, val);
6915 if (*orgnum == '-')
6916 { /* < 0 */
6917 sign = '-';
6918 numstr = orgnum + 1;
6920 else
6922 sign = '+';
6923 numstr = orgnum;
6926 if ((p = strchr(numstr, '.')))
6927 numstr_pre_len = p - numstr;
6928 else
6929 numstr_pre_len = strlen(numstr);
6931 /* needs padding? */
6932 if (numstr_pre_len < Num.pre)
6933 out_pre_spaces = Num.pre - numstr_pre_len;
6934 /* overflowed prefix digit format? */
6935 else if (numstr_pre_len > Num.pre)
6937 numstr = (char *) palloc(Num.pre + Num.post + 2);
6938 fill_str(numstr, '#', Num.pre + Num.post + 1);
6939 *(numstr + Num.pre) = '.';
6943 NUM_TOCHAR_finish;
6944 PG_RETURN_TEXT_P(result);