src/backend/utils/misc/tzparser.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * tzparser.c
   4  *        Functions for parsing timezone offset files
   5  *
   6  * Note: we generally should not throw any errors in this file, but instead
   7  * try to return an error code.  This is not completely bulletproof at
   8  * present --- in particular out-of-memory will throw an error.  Could
   9  * probably fix with PG_TRY if necessary.
  10  *
  11  *
  12  * Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
  13  * Portions Copyright (c) 1994, Regents of the University of California
  14  *
  15  * IDENTIFICATION
  16  *        $PostgreSQL$
  17  *
  18  *-------------------------------------------------------------------------
  19  */
  20
  21 #include "postgres.h"
  22
  23 #include <ctype.h>
  24
  25 #include "miscadmin.h"
  26 #include "storage/fd.h"
  27 #include "utils/datetime.h"
  28 #include "utils/memutils.h"
  29 #include "utils/tzparser.h"
  30
  31
  32 #define WHITESPACE " \t\n\r"
  33
  34 static int      tz_elevel;                      /* to avoid passing this around a lot */
  35
  36 static bool validateTzEntry(tzEntry *tzentry);
  37 static bool splitTzLine(const char *filename, int lineno,
  38                         char *line, tzEntry *tzentry);
  39 static int addToArray(tzEntry **base, int *arraysize, int n,
  40                    tzEntry *entry, bool override);
  41 static int ParseTzFile(const char *filename, int depth,
  42                         tzEntry **base, int *arraysize, int n);
  43
  44
  45 /*
  46  * Apply additional validation checks to a tzEntry
  47  *
  48  * Returns TRUE if OK, else false
  49  */
  50 static bool
  51 validateTzEntry(tzEntry *tzentry)
  52 {
  53         unsigned char *p;
  54
  55         /*
  56          * Check restrictions imposed by datetkntbl storage format (see
  57          * datetime.c)
  58          */
  59         if (strlen(tzentry->abbrev) > TOKMAXLEN)
  60         {
  61                 ereport(tz_elevel,
  62                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  63                                  errmsg("time zone abbreviation \"%s\" is too long (maximum %d characters) in time zone file \"%s\", line %d",
  64                                                 tzentry->abbrev, TOKMAXLEN,
  65                                                 tzentry->filename, tzentry->lineno)));
  66                 return false;
  67         }
  68         if (tzentry->offset % 900 != 0)
  69         {
  70                 ereport(tz_elevel,
  71                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  72                                  errmsg("time zone offset %d is not a multiple of 900 sec (15 min) in time zone file \"%s\", line %d",
  73                                                 tzentry->offset,
  74                                                 tzentry->filename, tzentry->lineno)));
  75                 return false;
  76         }
  77
  78         /*
  79          * Sanity-check the offset: shouldn't exceed 14 hours
  80          */
  81         if (tzentry->offset > 14 * 60 * 60 ||
  82                 tzentry->offset < -14 * 60 * 60)
  83         {
  84                 ereport(tz_elevel,
  85                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
  86                                  errmsg("time zone offset %d is out of range in time zone file \"%s\", line %d",
  87                                                 tzentry->offset,
  88                                                 tzentry->filename, tzentry->lineno)));
  89                 return false;
  90         }
  91
  92         /*
  93          * Convert abbrev to lowercase (must match datetime.c's conversion)
  94          */
  95         for (p = (unsigned char *) tzentry->abbrev; *p; p++)
  96                 *p = pg_tolower(*p);
  97
  98         return true;
  99 }
 100
 101 /*
 102  * Attempt to parse the line as a timezone abbrev spec (name, offset, dst)
 103  *
 104  * Returns TRUE if OK, else false; data is stored in *tzentry
 105  */
 106 static bool
 107 splitTzLine(const char *filename, int lineno, char *line, tzEntry *tzentry)
 108 {
 109         char       *abbrev;
 110         char       *offset;
 111         char       *offset_endptr;
 112         char       *remain;
 113         char       *is_dst;
 114
 115         tzentry->lineno = lineno;
 116         tzentry->filename = filename;
 117
 118         abbrev = strtok(line, WHITESPACE);
 119         if (!abbrev)
 120         {
 121                 ereport(tz_elevel,
 122                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 123                                  errmsg("missing time zone abbreviation in time zone file \"%s\", line %d",
 124                                                 filename, lineno)));
 125                 return false;
 126         }
 127         tzentry->abbrev = abbrev;
 128
 129         offset = strtok(NULL, WHITESPACE);
 130         if (!offset)
 131         {
 132                 ereport(tz_elevel,
 133                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 134                  errmsg("missing time zone offset in time zone file \"%s\", line %d",
 135                                 filename, lineno)));
 136                 return false;
 137         }
 138         tzentry->offset = strtol(offset, &offset_endptr, 10);
 139         if (offset_endptr == offset || *offset_endptr != '\0')
 140         {
 141                 ereport(tz_elevel,
 142                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 143                                  errmsg("invalid number for time zone offset in time zone file \"%s\", line %d",
 144                                                 filename, lineno)));
 145                 return false;
 146         }
 147
 148         is_dst = strtok(NULL, WHITESPACE);
 149         if (is_dst && pg_strcasecmp(is_dst, "D") == 0)
 150         {
 151                 tzentry->is_dst = true;
 152                 remain = strtok(NULL, WHITESPACE);
 153         }
 154         else
 155         {
 156                 /* there was no 'D' dst specifier */
 157                 tzentry->is_dst = false;
 158                 remain = is_dst;
 159         }
 160
 161         if (!remain)                            /* no more non-whitespace chars */
 162                 return true;
 163
 164         if (remain[0] != '#')           /* must be a comment */
 165         {
 166                 ereport(tz_elevel,
 167                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 168                                  errmsg("invalid syntax in time zone file \"%s\", line %d",
 169                                                 filename, lineno)));
 170                 return false;
 171         }
 172         return true;
 173 }
 174
 175 /*
 176  * Insert entry into sorted array
 177  *
 178  * *base: base address of array (changeable if must enlarge array)
 179  * *arraysize: allocated length of array (changeable if must enlarge array)
 180  * n: current number of valid elements in array
 181  * entry: new data to insert
 182  * override: TRUE if OK to override
 183  *
 184  * Returns the new array length (new value for n), or -1 if error
 185  */
 186 static int
 187 addToArray(tzEntry **base, int *arraysize, int n,
 188                    tzEntry *entry, bool override)
 189 {
 190         tzEntry    *arrayptr;
 191         int                     low;
 192         int                     high;
 193
 194         /*
 195          * Search the array for a duplicate; as a useful side effect, the array is
 196          * maintained in sorted order.  We use strcmp() to ensure we match the
 197          * sort order datetime.c expects.
 198          */
 199         arrayptr = *base;
 200         low = 0;
 201         high = n - 1;
 202         while (low <= high)
 203         {
 204                 int                     mid = (low + high) >> 1;
 205                 tzEntry    *midptr = arrayptr + mid;
 206                 int                     cmp;
 207
 208                 cmp = strcmp(entry->abbrev, midptr->abbrev);
 209                 if (cmp < 0)
 210                         high = mid - 1;
 211                 else if (cmp > 0)
 212                         low = mid + 1;
 213                 else
 214                 {
 215                         /*
 216                          * Found a duplicate entry; complain unless it's the same.
 217                          */
 218                         if (midptr->offset == entry->offset &&
 219                                 midptr->is_dst == entry->is_dst)
 220                         {
 221                                 /* return unchanged array */
 222                                 return n;
 223                         }
 224                         if (override)
 225                         {
 226                                 /* same abbrev but something is different, override */
 227                                 midptr->offset = entry->offset;
 228                                 midptr->is_dst = entry->is_dst;
 229                                 return n;
 230                         }
 231                         /* same abbrev but something is different, complain */
 232                         ereport(tz_elevel,
 233                                         (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 234                                   errmsg("time zone abbreviation \"%s\" is multiply defined",
 235                                                  entry->abbrev),
 236                                          errdetail("Entry in time zone file \"%s\", line %d, conflicts with entry in file \"%s\", line %d.",
 237                                                            midptr->filename, midptr->lineno,
 238                                                            entry->filename, entry->lineno)));
 239                         return -1;
 240                 }
 241         }
 242
 243         /*
 244          * No match, insert at position "low".
 245          */
 246         if (n >= *arraysize)
 247         {
 248                 *arraysize *= 2;
 249                 *base = (tzEntry *) repalloc(*base, *arraysize * sizeof(tzEntry));
 250         }
 251
 252         arrayptr = *base + low;
 253
 254         memmove(arrayptr + 1, arrayptr, (n - low) * sizeof(tzEntry));
 255
 256         memcpy(arrayptr, entry, sizeof(tzEntry));
 257
 258         /* Must dup the abbrev to ensure it survives */
 259         arrayptr->abbrev = pstrdup(entry->abbrev);
 260
 261         return n + 1;
 262 }
 263
 264 /*
 265  * Parse a single timezone abbrev file --- can recurse to handle @INCLUDE
 266  *
 267  * filename: user-specified file name (does not include path)
 268  * depth: current recursion depth
 269  * *base: array for results (changeable if must enlarge array)
 270  * *arraysize: allocated length of array (changeable if must enlarge array)
 271  * n: current number of valid elements in array
 272  *
 273  * Returns the new array length (new value for n), or -1 if error
 274  */
 275 static int
 276 ParseTzFile(const char *filename, int depth,
 277                         tzEntry **base, int *arraysize, int n)
 278 {
 279         char            share_path[MAXPGPATH];
 280         char            file_path[MAXPGPATH];
 281         FILE       *tzFile;
 282         char            tzbuf[1024];
 283         char       *line;
 284         tzEntry         tzentry;
 285         int                     lineno = 0;
 286         bool            override = false;
 287         const char *p;
 288
 289         /*
 290          * We enforce that the filename is all alpha characters.  This may be
 291          * overly restrictive, but we don't want to allow access to anything
 292          * outside the timezonesets directory, so for instance '/' *must* be
 293          * rejected.
 294          */
 295         for (p = filename; *p; p++)
 296         {
 297                 if (!isalpha((unsigned char) *p))
 298                 {
 299                         /* at level 0, we need no ereport since guc.c will say enough */
 300                         if (depth > 0)
 301                                 ereport(tz_elevel,
 302                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 303                                                  errmsg("invalid time zone file name \"%s\"",
 304                                                                 filename)));
 305                         return -1;
 306                 }
 307         }
 308
 309         /*
 310          * The maximal recursion depth is a pretty arbitrary setting. It is hard
 311          * to imagine that someone needs more than 3 levels so stick with this
 312          * conservative setting until someone complains.
 313          */
 314         if (depth > 3)
 315         {
 316                 ereport(tz_elevel,
 317                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 318                          errmsg("time zone file recursion limit exceeded in file \"%s\"",
 319                                         filename)));
 320                 return -1;
 321         }
 322
 323         get_share_path(my_exec_path, share_path);
 324         snprintf(file_path, sizeof(file_path), "%s/timezonesets/%s",
 325                          share_path, filename);
 326         tzFile = AllocateFile(file_path, "r");
 327         if (!tzFile)
 328         {
 329                 /* at level 0, if file doesn't exist, guc.c's complaint is enough */
 330                 if (errno != ENOENT || depth > 0)
 331                         ereport(tz_elevel,
 332                                         (errcode_for_file_access(),
 333                                          errmsg("could not read time zone file \"%s\": %m",
 334                                                         filename)));
 335                 return -1;
 336         }
 337
 338         while (!feof(tzFile))
 339         {
 340                 lineno++;
 341                 if (fgets(tzbuf, sizeof(tzbuf), tzFile) == NULL)
 342                 {
 343                         if (ferror(tzFile))
 344                         {
 345                                 ereport(tz_elevel,
 346                                                 (errcode_for_file_access(),
 347                                                  errmsg("could not read time zone file \"%s\": %m",
 348                                                                 filename)));
 349                                 return -1;
 350                         }
 351                         /* else we're at EOF after all */
 352                         break;
 353                 }
 354                 if (strlen(tzbuf) == sizeof(tzbuf) - 1)
 355                 {
 356                         /* the line is too long for tzbuf */
 357                         ereport(tz_elevel,
 358                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
 359                                  errmsg("line is too long in time zone file \"%s\", line %d",
 360                                                 filename, lineno)));
 361                         return -1;
 362                 }
 363
 364                 /* skip over whitespace */
 365                 line = tzbuf;
 366                 while (*line && isspace((unsigned char) *line))
 367                         line++;
 368
 369                 if (*line == '\0')              /* empty line */
 370                         continue;
 371                 if (*line == '#')               /* comment line */
 372                         continue;
 373
 374                 if (pg_strncasecmp(line, "@INCLUDE", strlen("@INCLUDE")) == 0)
 375                 {
 376                         /* pstrdup so we can use filename in result data structure */
 377                         char       *includeFile = pstrdup(line + strlen("@INCLUDE"));
 378
 379                         includeFile = strtok(includeFile, WHITESPACE);
 380                         if (!includeFile || !*includeFile)
 381                         {
 382                                 ereport(tz_elevel,
 383                                                 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 384                                                  errmsg("@INCLUDE without file name in time zone file \"%s\", line %d",
 385                                                                 filename, lineno)));
 386                                 return -1;
 387                         }
 388                         n = ParseTzFile(includeFile, depth + 1,
 389                                                         base, arraysize, n);
 390                         if (n < 0)
 391                                 return -1;
 392                         continue;
 393                 }
 394
 395                 if (pg_strncasecmp(line, "@OVERRIDE", strlen("@OVERRIDE")) == 0)
 396                 {
 397                         override = true;
 398                         continue;
 399                 }
 400
 401                 if (!splitTzLine(filename, lineno, line, &tzentry))
 402                         return -1;
 403                 if (!validateTzEntry(&tzentry))
 404                         return -1;
 405                 n = addToArray(base, arraysize, n, &tzentry, override);
 406                 if (n < 0)
 407                         return -1;
 408         }
 409
 410         FreeFile(tzFile);
 411
 412         return n;
 413 }
 414
 415 /*
 416  * load_tzoffsets --- read and parse the specified timezone offset file
 417  *
 418  * filename: name specified by user
 419  * doit: whether to actually apply the new values, or just check
 420  * elevel: elog reporting level (will be less than ERROR)
 421  *
 422  * Returns TRUE if OK, FALSE if not; should avoid erroring out
 423  */
 424 bool
 425 load_tzoffsets(const char *filename, bool doit, int elevel)
 426 {
 427         MemoryContext tmpContext;
 428         MemoryContext oldContext;
 429         tzEntry    *array;
 430         int                     arraysize;
 431         int                     n;
 432
 433         tz_elevel = elevel;
 434
 435         /*
 436          * Create a temp memory context to work in.  This makes it easy to clean
 437          * up afterwards.
 438          */
 439         tmpContext = AllocSetContextCreate(CurrentMemoryContext,
 440                                                                            "TZParserMemory",
 441                                                                            ALLOCSET_SMALL_MINSIZE,
 442                                                                            ALLOCSET_SMALL_INITSIZE,
 443                                                                            ALLOCSET_SMALL_MAXSIZE);
 444         oldContext = MemoryContextSwitchTo(tmpContext);
 445
 446         /* Initialize array at a reasonable size */
 447         arraysize = 128;
 448         array = (tzEntry *) palloc(arraysize * sizeof(tzEntry));
 449
 450         /* Parse the file(s) */
 451         n = ParseTzFile(filename, 0, &array, &arraysize, 0);
 452
 453         /* If no errors and we should apply the result, pass it to datetime.c */
 454         if (n >= 0 && doit)
 455                 InstallTimeZoneAbbrevs(array, n);
 456
 457         /* Clean up */
 458         MemoryContextSwitchTo(oldContext);
 459         MemoryContextDelete(tmpContext);
 460
 461         return (n >= 0);
 462 }