src/backend/utils/cache/inval.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * inval.c
   4  *        POSTGRES cache invalidation dispatcher code.
   5  *
   6  *      This is subtle stuff, so pay attention:
   7  *
   8  *      When a tuple is updated or deleted, our standard time qualification rules
   9  *      consider that it is *still valid* so long as we are in the same command,
  10  *      ie, until the next CommandCounterIncrement() or transaction commit.
  11  *      (See utils/time/tqual.c, and note that system catalogs are generally
  12  *      scanned under SnapshotNow rules by the system, or plain user snapshots
  13  *      for user queries.)      At the command boundary, the old tuple stops
  14  *      being valid and the new version, if any, becomes valid.  Therefore,
  15  *      we cannot simply flush a tuple from the system caches during heap_update()
  16  *      or heap_delete().  The tuple is still good at that point; what's more,
  17  *      even if we did flush it, it might be reloaded into the caches by a later
  18  *      request in the same command.  So the correct behavior is to keep a list
  19  *      of outdated (updated/deleted) tuples and then do the required cache
  20  *      flushes at the next command boundary.  We must also keep track of
  21  *      inserted tuples so that we can flush "negative" cache entries that match
  22  *      the new tuples; again, that mustn't happen until end of command.
  23  *
  24  *      Once we have finished the command, we still need to remember inserted
  25  *      tuples (including new versions of updated tuples), so that we can flush
  26  *      them from the caches if we abort the transaction.  Similarly, we'd better
  27  *      be able to flush "negative" cache entries that may have been loaded in
  28  *      place of deleted tuples, so we still need the deleted ones too.
  29  *
  30  *      If we successfully complete the transaction, we have to broadcast all
  31  *      these invalidation events to other backends (via the SI message queue)
  32  *      so that they can flush obsolete entries from their caches.      Note we have
  33  *      to record the transaction commit before sending SI messages, otherwise
  34  *      the other backends won't see our updated tuples as good.
  35  *
  36  *      When a subtransaction aborts, we can process and discard any events
  37  *      it has queued.  When a subtransaction commits, we just add its events
  38  *      to the pending lists of the parent transaction.
  39  *
  40  *      In short, we need to remember until xact end every insert or delete
  41  *      of a tuple that might be in the system caches.  Updates are treated as
  42  *      two events, delete + insert, for simplicity.  (There are cases where
  43  *      it'd be possible to record just one event, but we don't currently try.)
  44  *
  45  *      We do not need to register EVERY tuple operation in this way, just those
  46  *      on tuples in relations that have associated catcaches.  We do, however,
  47  *      have to register every operation on every tuple that *could* be in a
  48  *      catcache, whether or not it currently is in our cache.  Also, if the
  49  *      tuple is in a relation that has multiple catcaches, we need to register
  50  *      an invalidation message for each such catcache.  catcache.c's
  51  *      PrepareToInvalidateCacheTuple() routine provides the knowledge of which
  52  *      catcaches may need invalidation for a given tuple.
  53  *
  54  *      Also, whenever we see an operation on a pg_class or pg_attribute tuple,
  55  *      we register a relcache flush operation for the relation described by that
  56  *      tuple.  pg_class updates trigger an smgr flush operation as well.
  57  *
  58  *      We keep the relcache and smgr flush requests in lists separate from the
  59  *      catcache tuple flush requests.  This allows us to issue all the pending
  60  *      catcache flushes before we issue relcache flushes, which saves us from
  61  *      loading a catcache tuple during relcache load only to flush it again
  62  *      right away.  Also, we avoid queuing multiple relcache flush requests for
  63  *      the same relation, since a relcache flush is relatively expensive to do.
  64  *      (XXX is it worth testing likewise for duplicate catcache flush entries?
  65  *      Probably not.)
  66  *
  67  *      If a relcache flush is issued for a system relation that we preload
  68  *      from the relcache init file, we must also delete the init file so that
  69  *      it will be rebuilt during the next backend restart.  The actual work of
  70  *      manipulating the init file is in relcache.c, but we keep track of the
  71  *      need for it here.
  72  *
  73  *      The request lists proper are kept in CurTransactionContext of their
  74  *      creating (sub)transaction, since they can be forgotten on abort of that
  75  *      transaction but must be kept till top-level commit otherwise.  For
  76  *      simplicity we keep the controlling list-of-lists in TopTransactionContext.
  77  *
  78  *
  79  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  80  * Portions Copyright (c) 1994, Regents of the University of California
  81  *
  82  * IDENTIFICATION
  83  *        $PostgreSQL$
  84  *
  85  *-------------------------------------------------------------------------
  86  */
  87 #include "postgres.h"
  88
  89 #include "access/twophase_rmgr.h"
  90 #include "access/xact.h"
  91 #include "catalog/catalog.h"
  92 #include "miscadmin.h"
  93 #include "storage/sinval.h"
  94 #include "storage/smgr.h"
  95 #include "utils/inval.h"
  96 #include "utils/memutils.h"
  97 #include "utils/rel.h"
  98 #include "utils/syscache.h"
  99
 100
 101 /*
 102  * To minimize palloc traffic, we keep pending requests in successively-
 103  * larger chunks (a slightly more sophisticated version of an expansible
 104  * array).      All request types can be stored as SharedInvalidationMessage
 105  * records.  The ordering of requests within a list is never significant.
 106  */
 107 typedef struct InvalidationChunk
 108 {
 109         struct InvalidationChunk *next;         /* list link */
 110         int                     nitems;                 /* # items currently stored in chunk */
 111         int                     maxitems;               /* size of allocated array in this chunk */
 112         SharedInvalidationMessage msgs[1];      /* VARIABLE LENGTH ARRAY */
 113 } InvalidationChunk;                    /* VARIABLE LENGTH STRUCTURE */
 114
 115 typedef struct InvalidationListHeader
 116 {
 117         InvalidationChunk *cclist;      /* list of chunks holding catcache msgs */
 118         InvalidationChunk *rclist;      /* list of chunks holding relcache/smgr msgs */
 119 } InvalidationListHeader;
 120
 121 /*----------------
 122  * Invalidation info is divided into two lists:
 123  *      1) events so far in current command, not yet reflected to caches.
 124  *      2) events in previous commands of current transaction; these have
 125  *         been reflected to local caches, and must be either broadcast to
 126  *         other backends or rolled back from local cache when we commit
 127  *         or abort the transaction.
 128  * Actually, we need two such lists for each level of nested transaction,
 129  * so that we can discard events from an aborted subtransaction.  When
 130  * a subtransaction commits, we append its lists to the parent's lists.
 131  *
 132  * The relcache-file-invalidated flag can just be a simple boolean,
 133  * since we only act on it at transaction commit; we don't care which
 134  * command of the transaction set it.
 135  *----------------
 136  */
 137
 138 typedef struct TransInvalidationInfo
 139 {
 140         /* Back link to parent transaction's info */
 141         struct TransInvalidationInfo *parent;
 142
 143         /* Subtransaction nesting depth */
 144         int                     my_level;
 145
 146         /* head of current-command event list */
 147         InvalidationListHeader CurrentCmdInvalidMsgs;
 148
 149         /* head of previous-commands event list */
 150         InvalidationListHeader PriorCmdInvalidMsgs;
 151
 152         /* init file must be invalidated? */
 153         bool            RelcacheInitFileInval;
 154 } TransInvalidationInfo;
 155
 156 static TransInvalidationInfo *transInvalInfo = NULL;
 157
 158 /*
 159  * Dynamically-registered callback functions.  Current implementation
 160  * assumes there won't be very many of these at once; could improve if needed.
 161  */
 162
 163 #define MAX_SYSCACHE_CALLBACKS 20
 164 #define MAX_RELCACHE_CALLBACKS 5
 165
 166 static struct SYSCACHECALLBACK
 167 {
 168         int16           id;                             /* cache number */
 169         SyscacheCallbackFunction function;
 170         Datum           arg;
 171 }       syscache_callback_list[MAX_SYSCACHE_CALLBACKS];
 172
 173 static int      syscache_callback_count = 0;
 174
 175 static struct RELCACHECALLBACK
 176 {
 177         RelcacheCallbackFunction function;
 178         Datum           arg;
 179 }       relcache_callback_list[MAX_RELCACHE_CALLBACKS];
 180
 181 static int      relcache_callback_count = 0;
 182
 183 /* info values for 2PC callback */
 184 #define TWOPHASE_INFO_MSG                       0       /* SharedInvalidationMessage */
 185 #define TWOPHASE_INFO_FILE_BEFORE       1       /* relcache file inval */
 186 #define TWOPHASE_INFO_FILE_AFTER        2       /* relcache file inval */
 187
 188 static void PersistInvalidationMessage(SharedInvalidationMessage *msg);
 189
 190
 191 /* ----------------------------------------------------------------
 192  *                              Invalidation list support functions
 193  *
 194  * These three routines encapsulate processing of the "chunked"
 195  * representation of what is logically just a list of messages.
 196  * ----------------------------------------------------------------
 197  */
 198
 199 /*
 200  * AddInvalidationMessage
 201  *              Add an invalidation message to a list (of chunks).
 202  *
 203  * Note that we do not pay any great attention to maintaining the original
 204  * ordering of the messages.
 205  */
 206 static void
 207 AddInvalidationMessage(InvalidationChunk **listHdr,
 208                                            SharedInvalidationMessage *msg)
 209 {
 210         InvalidationChunk *chunk = *listHdr;
 211
 212         if (chunk == NULL)
 213         {
 214                 /* First time through; create initial chunk */
 215 #define FIRSTCHUNKSIZE 32
 216                 chunk = (InvalidationChunk *)
 217                         MemoryContextAlloc(CurTransactionContext,
 218                                                            sizeof(InvalidationChunk) +
 219                                         (FIRSTCHUNKSIZE - 1) *sizeof(SharedInvalidationMessage));
 220                 chunk->nitems = 0;
 221                 chunk->maxitems = FIRSTCHUNKSIZE;
 222                 chunk->next = *listHdr;
 223                 *listHdr = chunk;
 224         }
 225         else if (chunk->nitems >= chunk->maxitems)
 226         {
 227                 /* Need another chunk; double size of last chunk */
 228                 int                     chunksize = 2 * chunk->maxitems;
 229
 230                 chunk = (InvalidationChunk *)
 231                         MemoryContextAlloc(CurTransactionContext,
 232                                                            sizeof(InvalidationChunk) +
 233                                                  (chunksize - 1) *sizeof(SharedInvalidationMessage));
 234                 chunk->nitems = 0;
 235                 chunk->maxitems = chunksize;
 236                 chunk->next = *listHdr;
 237                 *listHdr = chunk;
 238         }
 239         /* Okay, add message to current chunk */
 240         chunk->msgs[chunk->nitems] = *msg;
 241         chunk->nitems++;
 242 }
 243
 244 /*
 245  * Append one list of invalidation message chunks to another, resetting
 246  * the source chunk-list pointer to NULL.
 247  */
 248 static void
 249 AppendInvalidationMessageList(InvalidationChunk **destHdr,
 250                                                           InvalidationChunk **srcHdr)
 251 {
 252         InvalidationChunk *chunk = *srcHdr;
 253
 254         if (chunk == NULL)
 255                 return;                                 /* nothing to do */
 256
 257         while (chunk->next != NULL)
 258                 chunk = chunk->next;
 259
 260         chunk->next = *destHdr;
 261
 262         *destHdr = *srcHdr;
 263
 264         *srcHdr = NULL;
 265 }
 266
 267 /*
 268  * Process a list of invalidation messages.
 269  *
 270  * This is a macro that executes the given code fragment for each message in
 271  * a message chunk list.  The fragment should refer to the message as *msg.
 272  */
 273 #define ProcessMessageList(listHdr, codeFragment) \
 274         do { \
 275                 InvalidationChunk *_chunk; \
 276                 for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \
 277                 { \
 278                         int             _cindex; \
 279                         for (_cindex = 0; _cindex < _chunk->nitems; _cindex++) \
 280                         { \
 281                                 SharedInvalidationMessage *msg = &_chunk->msgs[_cindex]; \
 282                                 codeFragment; \
 283                         } \
 284                 } \
 285         } while (0)
 286
 287 /*
 288  * Process a list of invalidation messages group-wise.
 289  *
 290  * As above, but the code fragment can handle an array of messages.
 291  * The fragment should refer to the messages as msgs[], with n entries.
 292  */
 293 #define ProcessMessageListMulti(listHdr, codeFragment) \
 294         do { \
 295                 InvalidationChunk *_chunk; \
 296                 for (_chunk = (listHdr); _chunk != NULL; _chunk = _chunk->next) \
 297                 { \
 298                         SharedInvalidationMessage *msgs = _chunk->msgs; \
 299                         int             n = _chunk->nitems; \
 300                         codeFragment; \
 301                 } \
 302         } while (0)
 303
 304
 305 /* ----------------------------------------------------------------
 306  *                              Invalidation set support functions
 307  *
 308  * These routines understand about the division of a logical invalidation
 309  * list into separate physical lists for catcache and relcache/smgr entries.
 310  * ----------------------------------------------------------------
 311  */
 312
 313 /*
 314  * Add a catcache inval entry
 315  */
 316 static void
 317 AddCatcacheInvalidationMessage(InvalidationListHeader *hdr,
 318                                                            int id, uint32 hashValue,
 319                                                            ItemPointer tuplePtr, Oid dbId)
 320 {
 321         SharedInvalidationMessage msg;
 322
 323         msg.cc.id = (int16) id;
 324         msg.cc.tuplePtr = *tuplePtr;
 325         msg.cc.dbId = dbId;
 326         msg.cc.hashValue = hashValue;
 327         AddInvalidationMessage(&hdr->cclist, &msg);
 328 }
 329
 330 /*
 331  * Add a relcache inval entry
 332  */
 333 static void
 334 AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
 335                                                            Oid dbId, Oid relId)
 336 {
 337         SharedInvalidationMessage msg;
 338
 339         /* Don't add a duplicate item */
 340         /* We assume dbId need not be checked because it will never change */
 341         ProcessMessageList(hdr->rclist,
 342                                            if (msg->rc.id == SHAREDINVALRELCACHE_ID &&
 343                                                    msg->rc.relId == relId)
 344                                            return);
 345
 346         /* OK, add the item */
 347         msg.rc.id = SHAREDINVALRELCACHE_ID;
 348         msg.rc.dbId = dbId;
 349         msg.rc.relId = relId;
 350         AddInvalidationMessage(&hdr->rclist, &msg);
 351 }
 352
 353 /*
 354  * Add an smgr inval entry
 355  */
 356 static void
 357 AddSmgrInvalidationMessage(InvalidationListHeader *hdr,
 358                                                    RelFileNode rnode)
 359 {
 360         SharedInvalidationMessage msg;
 361
 362         /* Don't add a duplicate item */
 363         ProcessMessageList(hdr->rclist,
 364                                            if (msg->sm.id == SHAREDINVALSMGR_ID &&
 365                                                    RelFileNodeEquals(msg->sm.rnode, rnode))
 366                                            return);
 367
 368         /* OK, add the item */
 369         msg.sm.id = SHAREDINVALSMGR_ID;
 370         msg.sm.rnode = rnode;
 371         AddInvalidationMessage(&hdr->rclist, &msg);
 372 }
 373
 374 /*
 375  * Append one list of invalidation messages to another, resetting
 376  * the source list to empty.
 377  */
 378 static void
 379 AppendInvalidationMessages(InvalidationListHeader *dest,
 380                                                    InvalidationListHeader *src)
 381 {
 382         AppendInvalidationMessageList(&dest->cclist, &src->cclist);
 383         AppendInvalidationMessageList(&dest->rclist, &src->rclist);
 384 }
 385
 386 /*
 387  * Execute the given function for all the messages in an invalidation list.
 388  * The list is not altered.
 389  *
 390  * catcache entries are processed first, for reasons mentioned above.
 391  */
 392 static void
 393 ProcessInvalidationMessages(InvalidationListHeader *hdr,
 394                                                         void (*func) (SharedInvalidationMessage *msg))
 395 {
 396         ProcessMessageList(hdr->cclist, func(msg));
 397         ProcessMessageList(hdr->rclist, func(msg));
 398 }
 399
 400 /*
 401  * As above, but the function is able to process an array of messages
 402  * rather than just one at a time.
 403  */
 404 static void
 405 ProcessInvalidationMessagesMulti(InvalidationListHeader *hdr,
 406                                  void (*func) (const SharedInvalidationMessage *msgs, int n))
 407 {
 408         ProcessMessageListMulti(hdr->cclist, func(msgs, n));
 409         ProcessMessageListMulti(hdr->rclist, func(msgs, n));
 410 }
 411
 412 /* ----------------------------------------------------------------
 413  *                                        private support functions
 414  * ----------------------------------------------------------------
 415  */
 416
 417 /*
 418  * RegisterCatcacheInvalidation
 419  *
 420  * Register an invalidation event for a catcache tuple entry.
 421  */
 422 static void
 423 RegisterCatcacheInvalidation(int cacheId,
 424                                                          uint32 hashValue,
 425                                                          ItemPointer tuplePtr,
 426                                                          Oid dbId)
 427 {
 428         AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
 429                                                                    cacheId, hashValue, tuplePtr, dbId);
 430 }
 431
 432 /*
 433  * RegisterRelcacheInvalidation
 434  *
 435  * As above, but register a relcache invalidation event.
 436  */
 437 static void
 438 RegisterRelcacheInvalidation(Oid dbId, Oid relId)
 439 {
 440         AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
 441                                                                    dbId, relId);
 442
 443         /*
 444          * Most of the time, relcache invalidation is associated with system
 445          * catalog updates, but there are a few cases where it isn't.  Quick hack
 446          * to ensure that the next CommandCounterIncrement() will think that we
 447          * need to do CommandEndInvalidationMessages().
 448          */
 449         (void) GetCurrentCommandId(true);
 450
 451         /*
 452          * If the relation being invalidated is one of those cached in the
 453          * relcache init file, mark that we need to zap that file at commit.
 454          */
 455         if (RelationIdIsInInitFile(relId))
 456                 transInvalInfo->RelcacheInitFileInval = true;
 457 }
 458
 459 /*
 460  * RegisterSmgrInvalidation
 461  *
 462  * As above, but register an smgr invalidation event.
 463  */
 464 static void
 465 RegisterSmgrInvalidation(RelFileNode rnode)
 466 {
 467         AddSmgrInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
 468                                                            rnode);
 469
 470         /*
 471          * As above, just in case there is not an associated catalog change.
 472          */
 473         (void) GetCurrentCommandId(true);
 474 }
 475
 476 /*
 477  * LocalExecuteInvalidationMessage
 478  *
 479  * Process a single invalidation message (which could be of any type).
 480  * Only the local caches are flushed; this does not transmit the message
 481  * to other backends.
 482  */
 483 static void
 484 LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
 485 {
 486         int                     i;
 487
 488         if (msg->id >= 0)
 489         {
 490                 if (msg->cc.dbId == MyDatabaseId || msg->cc.dbId == 0)
 491                 {
 492                         CatalogCacheIdInvalidate(msg->cc.id,
 493                                                                          msg->cc.hashValue,
 494                                                                          &msg->cc.tuplePtr);
 495
 496                         for (i = 0; i < syscache_callback_count; i++)
 497                         {
 498                                 struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i;
 499
 500                                 if (ccitem->id == msg->cc.id)
 501                                         (*ccitem->function) (ccitem->arg,
 502                                                                                  msg->cc.id, &msg->cc.tuplePtr);
 503                         }
 504                 }
 505         }
 506         else if (msg->id == SHAREDINVALRELCACHE_ID)
 507         {
 508                 if (msg->rc.dbId == MyDatabaseId || msg->rc.dbId == InvalidOid)
 509                 {
 510                         RelationCacheInvalidateEntry(msg->rc.relId);
 511
 512                         for (i = 0; i < relcache_callback_count; i++)
 513                         {
 514                                 struct RELCACHECALLBACK *ccitem = relcache_callback_list + i;
 515
 516                                 (*ccitem->function) (ccitem->arg, msg->rc.relId);
 517                         }
 518                 }
 519         }
 520         else if (msg->id == SHAREDINVALSMGR_ID)
 521         {
 522                 /*
 523                  * We could have smgr entries for relations of other databases, so no
 524                  * short-circuit test is possible here.
 525                  */
 526                 smgrclosenode(msg->sm.rnode);
 527         }
 528         else
 529                 elog(FATAL, "unrecognized SI message id: %d", msg->id);
 530 }
 531
 532 /*
 533  *              InvalidateSystemCaches
 534  *
 535  *              This blows away all tuples in the system catalog caches and
 536  *              all the cached relation descriptors and smgr cache entries.
 537  *              Relation descriptors that have positive refcounts are then rebuilt.
 538  *
 539  *              We call this when we see a shared-inval-queue overflow signal,
 540  *              since that tells us we've lost some shared-inval messages and hence
 541  *              don't know what needs to be invalidated.
 542  */
 543 static void
 544 InvalidateSystemCaches(void)
 545 {
 546         int                     i;
 547
 548         ResetCatalogCaches();
 549         RelationCacheInvalidate();      /* gets smgr cache too */
 550
 551         for (i = 0; i < syscache_callback_count; i++)
 552         {
 553                 struct SYSCACHECALLBACK *ccitem = syscache_callback_list + i;
 554
 555                 (*ccitem->function) (ccitem->arg, ccitem->id, NULL);
 556         }
 557
 558         for (i = 0; i < relcache_callback_count; i++)
 559         {
 560                 struct RELCACHECALLBACK *ccitem = relcache_callback_list + i;
 561
 562                 (*ccitem->function) (ccitem->arg, InvalidOid);
 563         }
 564 }
 565
 566 /*
 567  * PrepareForTupleInvalidation
 568  *              Detect whether invalidation of this tuple implies invalidation
 569  *              of catalog/relation cache entries; if so, register inval events.
 570  */
 571 static void
 572 PrepareForTupleInvalidation(Relation relation, HeapTuple tuple)
 573 {
 574         Oid                     tupleRelId;
 575         Oid                     databaseId;
 576         Oid                     relationId;
 577
 578         /* Do nothing during bootstrap */
 579         if (IsBootstrapProcessingMode())
 580                 return;
 581
 582         /*
 583          * We only need to worry about invalidation for tuples that are in system
 584          * relations; user-relation tuples are never in catcaches and can't affect
 585          * the relcache either.
 586          */
 587         if (!IsSystemRelation(relation))
 588                 return;
 589
 590         /*
 591          * TOAST tuples can likewise be ignored here. Note that TOAST tables are
 592          * considered system relations so they are not filtered by the above test.
 593          */
 594         if (IsToastRelation(relation))
 595                 return;
 596
 597         /*
 598          * First let the catcache do its thing
 599          */
 600         PrepareToInvalidateCacheTuple(relation, tuple,
 601                                                                   RegisterCatcacheInvalidation);
 602
 603         /*
 604          * Now, is this tuple one of the primary definers of a relcache entry?
 605          */
 606         tupleRelId = RelationGetRelid(relation);
 607
 608         if (tupleRelId == RelationRelationId)
 609         {
 610                 Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);
 611                 RelFileNode rnode;
 612
 613                 relationId = HeapTupleGetOid(tuple);
 614                 if (classtup->relisshared)
 615                         databaseId = InvalidOid;
 616                 else
 617                         databaseId = MyDatabaseId;
 618
 619                 /*
 620                  * We need to send out an smgr inval as well as a relcache inval. This
 621                  * is needed because other backends might possibly possess smgr cache
 622                  * but not relcache entries for the target relation.
 623                  *
 624                  * Note: during a pg_class row update that assigns a new relfilenode
 625                  * or reltablespace value, we will be called on both the old and new
 626                  * tuples, and thus will broadcast invalidation messages showing both
 627                  * the old and new RelFileNode values.  This ensures that other
 628                  * backends will close smgr references to the old file.
 629                  *
 630                  * XXX possible future cleanup: it might be better to trigger smgr
 631                  * flushes explicitly, rather than indirectly from pg_class updates.
 632                  */
 633                 if (classtup->reltablespace)
 634                         rnode.spcNode = classtup->reltablespace;
 635                 else
 636                         rnode.spcNode = MyDatabaseTableSpace;
 637                 rnode.dbNode = databaseId;
 638                 rnode.relNode = classtup->relfilenode;
 639                 RegisterSmgrInvalidation(rnode);
 640         }
 641         else if (tupleRelId == AttributeRelationId)
 642         {
 643                 Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple);
 644
 645                 relationId = atttup->attrelid;
 646
 647                 /*
 648                  * KLUGE ALERT: we always send the relcache event with MyDatabaseId,
 649                  * even if the rel in question is shared (which we can't easily tell).
 650                  * This essentially means that only backends in this same database
 651                  * will react to the relcache flush request.  This is in fact
 652                  * appropriate, since only those backends could see our pg_attribute
 653                  * change anyway.  It looks a bit ugly though.  (In practice, shared
 654                  * relations can't have schema changes after bootstrap, so we should
 655                  * never come here for a shared rel anyway.)
 656                  */
 657                 databaseId = MyDatabaseId;
 658         }
 659         else if (tupleRelId == IndexRelationId)
 660         {
 661                 Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple);
 662
 663                 /*
 664                  * When a pg_index row is updated, we should send out a relcache inval
 665                  * for the index relation.      As above, we don't know the shared status
 666                  * of the index, but in practice it doesn't matter since indexes of
 667                  * shared catalogs can't have such updates.
 668                  */
 669                 relationId = indextup->indexrelid;
 670                 databaseId = MyDatabaseId;
 671         }
 672         else
 673                 return;
 674
 675         /*
 676          * Yes.  We need to register a relcache invalidation event.
 677          */
 678         RegisterRelcacheInvalidation(databaseId, relationId);
 679 }
 680
 681
 682 /* ----------------------------------------------------------------
 683  *                                        public functions
 684  * ----------------------------------------------------------------
 685  */
 686
 687 /*
 688  * AcceptInvalidationMessages
 689  *              Read and process invalidation messages from the shared invalidation
 690  *              message queue.
 691  *
 692  * Note:
 693  *              This should be called as the first step in processing a transaction.
 694  */
 695 void
 696 AcceptInvalidationMessages(void)
 697 {
 698         ReceiveSharedInvalidMessages(LocalExecuteInvalidationMessage,
 699                                                                  InvalidateSystemCaches);
 700
 701         /*
 702          * Test code to force cache flushes anytime a flush could happen.
 703          *
 704          * If used with CLOBBER_FREED_MEMORY, CLOBBER_CACHE_ALWAYS provides a
 705          * fairly thorough test that the system contains no cache-flush hazards.
 706          * However, it also makes the system unbelievably slow --- the regression
 707          * tests take about 100 times longer than normal.
 708          *
 709          * If you're a glutton for punishment, try CLOBBER_CACHE_RECURSIVELY. This
 710          * slows things by at least a factor of 10000, so I wouldn't suggest
 711          * trying to run the entire regression tests that way.  It's useful to try
 712          * a few simple tests, to make sure that cache reload isn't subject to
 713          * internal cache-flush hazards, but after you've done a few thousand
 714          * recursive reloads it's unlikely you'll learn more.
 715          */
 716 #if defined(CLOBBER_CACHE_ALWAYS)
 717         {
 718                 static bool in_recursion = false;
 719
 720                 if (!in_recursion)
 721                 {
 722                         in_recursion = true;
 723                         InvalidateSystemCaches();
 724                         in_recursion = false;
 725                 }
 726         }
 727 #elif defined(CLOBBER_CACHE_RECURSIVELY)
 728         InvalidateSystemCaches();
 729 #endif
 730 }
 731
 732 /*
 733  * AtStart_Inval
 734  *              Initialize inval lists at start of a main transaction.
 735  */
 736 void
 737 AtStart_Inval(void)
 738 {
 739         Assert(transInvalInfo == NULL);
 740         transInvalInfo = (TransInvalidationInfo *)
 741                 MemoryContextAllocZero(TopTransactionContext,
 742                                                            sizeof(TransInvalidationInfo));
 743         transInvalInfo->my_level = GetCurrentTransactionNestLevel();
 744 }
 745
 746 /*
 747  * AtPrepare_Inval
 748  *              Save the inval lists state at 2PC transaction prepare.
 749  *
 750  * In this phase we just generate 2PC records for all the pending invalidation
 751  * work.
 752  */
 753 void
 754 AtPrepare_Inval(void)
 755 {
 756         /* Must be at top of stack */
 757         Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
 758
 759         /*
 760          * Relcache init file invalidation requires processing both before and
 761          * after we send the SI messages.
 762          */
 763         if (transInvalInfo->RelcacheInitFileInval)
 764                 RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_BEFORE,
 765                                                            NULL, 0);
 766
 767         AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 768                                                            &transInvalInfo->CurrentCmdInvalidMsgs);
 769
 770         ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 771                                                                 PersistInvalidationMessage);
 772
 773         if (transInvalInfo->RelcacheInitFileInval)
 774                 RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_FILE_AFTER,
 775                                                            NULL, 0);
 776 }
 777
 778 /*
 779  * PostPrepare_Inval
 780  *              Clean up after successful PREPARE.
 781  *
 782  * Here, we want to act as though the transaction aborted, so that we will
 783  * undo any syscache changes it made, thereby bringing us into sync with the
 784  * outside world, which doesn't believe the transaction committed yet.
 785  *
 786  * If the prepared transaction is later aborted, there is nothing more to
 787  * do; if it commits, we will receive the consequent inval messages just
 788  * like everyone else.
 789  */
 790 void
 791 PostPrepare_Inval(void)
 792 {
 793         AtEOXact_Inval(false);
 794 }
 795
 796 /*
 797  * AtSubStart_Inval
 798  *              Initialize inval lists at start of a subtransaction.
 799  */
 800 void
 801 AtSubStart_Inval(void)
 802 {
 803         TransInvalidationInfo *myInfo;
 804
 805         Assert(transInvalInfo != NULL);
 806         myInfo = (TransInvalidationInfo *)
 807                 MemoryContextAllocZero(TopTransactionContext,
 808                                                            sizeof(TransInvalidationInfo));
 809         myInfo->parent = transInvalInfo;
 810         myInfo->my_level = GetCurrentTransactionNestLevel();
 811         transInvalInfo = myInfo;
 812 }
 813
 814 /*
 815  * PersistInvalidationMessage
 816  *              Write an invalidation message to the 2PC state file.
 817  */
 818 static void
 819 PersistInvalidationMessage(SharedInvalidationMessage *msg)
 820 {
 821         RegisterTwoPhaseRecord(TWOPHASE_RM_INVAL_ID, TWOPHASE_INFO_MSG,
 822                                                    msg, sizeof(SharedInvalidationMessage));
 823 }
 824
 825 /*
 826  * inval_twophase_postcommit
 827  *              Process an invalidation message from the 2PC state file.
 828  */
 829 void
 830 inval_twophase_postcommit(TransactionId xid, uint16 info,
 831                                                   void *recdata, uint32 len)
 832 {
 833         SharedInvalidationMessage *msg;
 834
 835         switch (info)
 836         {
 837                 case TWOPHASE_INFO_MSG:
 838                         msg = (SharedInvalidationMessage *) recdata;
 839                         Assert(len == sizeof(SharedInvalidationMessage));
 840                         SendSharedInvalidMessages(msg, 1);
 841                         break;
 842                 case TWOPHASE_INFO_FILE_BEFORE:
 843                         RelationCacheInitFileInvalidate(true);
 844                         break;
 845                 case TWOPHASE_INFO_FILE_AFTER:
 846                         RelationCacheInitFileInvalidate(false);
 847                         break;
 848                 default:
 849                         Assert(false);
 850                         break;
 851         }
 852 }
 853
 854
 855 /*
 856  * AtEOXact_Inval
 857  *              Process queued-up invalidation messages at end of main transaction.
 858  *
 859  * If isCommit, we must send out the messages in our PriorCmdInvalidMsgs list
 860  * to the shared invalidation message queue.  Note that these will be read
 861  * not only by other backends, but also by our own backend at the next
 862  * transaction start (via AcceptInvalidationMessages).  This means that
 863  * we can skip immediate local processing of anything that's still in
 864  * CurrentCmdInvalidMsgs, and just send that list out too.
 865  *
 866  * If not isCommit, we are aborting, and must locally process the messages
 867  * in PriorCmdInvalidMsgs.      No messages need be sent to other backends,
 868  * since they'll not have seen our changed tuples anyway.  We can forget
 869  * about CurrentCmdInvalidMsgs too, since those changes haven't touched
 870  * the caches yet.
 871  *
 872  * In any case, reset the various lists to empty.  We need not physically
 873  * free memory here, since TopTransactionContext is about to be emptied
 874  * anyway.
 875  *
 876  * Note:
 877  *              This should be called as the last step in processing a transaction.
 878  */
 879 void
 880 AtEOXact_Inval(bool isCommit)
 881 {
 882         if (isCommit)
 883         {
 884                 /* Must be at top of stack */
 885                 Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
 886
 887                 /*
 888                  * Relcache init file invalidation requires processing both before and
 889                  * after we send the SI messages.  However, we need not do anything
 890                  * unless we committed.
 891                  */
 892                 if (transInvalInfo->RelcacheInitFileInval)
 893                         RelationCacheInitFileInvalidate(true);
 894
 895                 AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 896                                                                    &transInvalInfo->CurrentCmdInvalidMsgs);
 897
 898                 ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs,
 899                                                                                  SendSharedInvalidMessages);
 900
 901                 if (transInvalInfo->RelcacheInitFileInval)
 902                         RelationCacheInitFileInvalidate(false);
 903         }
 904         else if (transInvalInfo != NULL)
 905         {
 906                 /* Must be at top of stack */
 907                 Assert(transInvalInfo->parent == NULL);
 908
 909                 ProcessInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
 910                                                                         LocalExecuteInvalidationMessage);
 911         }
 912
 913         /* Need not free anything explicitly */
 914         transInvalInfo = NULL;
 915 }
 916
 917 /*
 918  * AtEOSubXact_Inval
 919  *              Process queued-up invalidation messages at end of subtransaction.
 920  *
 921  * If isCommit, process CurrentCmdInvalidMsgs if any (there probably aren't),
 922  * and then attach both CurrentCmdInvalidMsgs and PriorCmdInvalidMsgs to the
 923  * parent's PriorCmdInvalidMsgs list.
 924  *
 925  * If not isCommit, we are aborting, and must locally process the messages
 926  * in PriorCmdInvalidMsgs.      No messages need be sent to other backends.
 927  * We can forget about CurrentCmdInvalidMsgs too, since those changes haven't
 928  * touched the caches yet.
 929  *
 930  * In any case, pop the transaction stack.      We need not physically free memory
 931  * here, since CurTransactionContext is about to be emptied anyway
 932  * (if aborting).  Beware of the possibility of aborting the same nesting
 933  * level twice, though.
 934  */
 935 void
 936 AtEOSubXact_Inval(bool isCommit)
 937 {
 938         int                     my_level = GetCurrentTransactionNestLevel();
 939         TransInvalidationInfo *myInfo = transInvalInfo;
 940
 941         if (isCommit)
 942         {
 943                 /* Must be at non-top of stack */
 944                 Assert(myInfo != NULL && myInfo->parent != NULL);
 945                 Assert(myInfo->my_level == my_level);
 946
 947                 /* If CurrentCmdInvalidMsgs still has anything, fix it */
 948                 CommandEndInvalidationMessages();
 949
 950                 /* Pass up my inval messages to parent */
 951                 AppendInvalidationMessages(&myInfo->parent->PriorCmdInvalidMsgs,
 952                                                                    &myInfo->PriorCmdInvalidMsgs);
 953
 954                 /* Pending relcache inval becomes parent's problem too */
 955                 if (myInfo->RelcacheInitFileInval)
 956                         myInfo->parent->RelcacheInitFileInval = true;
 957
 958                 /* Pop the transaction state stack */
 959                 transInvalInfo = myInfo->parent;
 960
 961                 /* Need not free anything else explicitly */
 962                 pfree(myInfo);
 963         }
 964         else if (myInfo != NULL && myInfo->my_level == my_level)
 965         {
 966                 /* Must be at non-top of stack */
 967                 Assert(myInfo->parent != NULL);
 968
 969                 ProcessInvalidationMessages(&myInfo->PriorCmdInvalidMsgs,
 970                                                                         LocalExecuteInvalidationMessage);
 971
 972                 /* Pop the transaction state stack */
 973                 transInvalInfo = myInfo->parent;
 974
 975                 /* Need not free anything else explicitly */
 976                 pfree(myInfo);
 977         }
 978 }
 979
 980 /*
 981  * CommandEndInvalidationMessages
 982  *              Process queued-up invalidation messages at end of one command
 983  *              in a transaction.
 984  *
 985  * Here, we send no messages to the shared queue, since we don't know yet if
 986  * we will commit.      We do need to locally process the CurrentCmdInvalidMsgs
 987  * list, so as to flush our caches of any entries we have outdated in the
 988  * current command.  We then move the current-cmd list over to become part
 989  * of the prior-cmds list.
 990  *
 991  * Note:
 992  *              This should be called during CommandCounterIncrement(),
 993  *              after we have advanced the command ID.
 994  */
 995 void
 996 CommandEndInvalidationMessages(void)
 997 {
 998         /*
 999          * You might think this shouldn't be called outside any transaction, but
1000          * bootstrap does it, and also ABORT issued when not in a transaction. So
1001          * just quietly return if no state to work on.
1002          */
1003         if (transInvalInfo == NULL)
1004                 return;
1005
1006         ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
1007                                                                 LocalExecuteInvalidationMessage);
1008         AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs,
1009                                                            &transInvalInfo->CurrentCmdInvalidMsgs);
1010 }
1011
1012
1013 /*
1014  * BeginNonTransactionalInvalidation
1015  *              Prepare for invalidation messages for nontransactional updates.
1016  *
1017  * A nontransactional invalidation is one that must be sent whether or not
1018  * the current transaction eventually commits.  We arrange for all invals
1019  * queued between this call and EndNonTransactionalInvalidation() to be sent
1020  * immediately when the latter is called.
1021  *
1022  * Currently, this is only used by heap_page_prune(), and only when it is
1023  * invoked during VACUUM FULL's first pass over a table.  We expect therefore
1024  * that we are not inside a subtransaction and there are no already-pending
1025  * invalidations.  This could be relaxed by setting up a new nesting level of
1026  * invalidation data, but for now there's no need.  Note that heap_page_prune
1027  * knows that this function does not change any state, and therefore there's
1028  * no need to worry about cleaning up if there's an elog(ERROR) before
1029  * reaching EndNonTransactionalInvalidation (the invals will just be thrown
1030  * away if that happens).
1031  */
1032 void
1033 BeginNonTransactionalInvalidation(void)
1034 {
1035         /* Must be at top of stack */
1036         Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
1037
1038         /* Must not have any previously-queued activity */
1039         Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
1040         Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);
1041         Assert(transInvalInfo->CurrentCmdInvalidMsgs.cclist == NULL);
1042         Assert(transInvalInfo->CurrentCmdInvalidMsgs.rclist == NULL);
1043         Assert(transInvalInfo->RelcacheInitFileInval == false);
1044 }
1045
1046 /*
1047  * EndNonTransactionalInvalidation
1048  *              Process queued-up invalidation messages for nontransactional updates.
1049  *
1050  * We expect to find messages in CurrentCmdInvalidMsgs only (else there
1051  * was a CommandCounterIncrement within the "nontransactional" update).
1052  * We must process them locally and send them out to the shared invalidation
1053  * message queue.
1054  *
1055  * We must also reset the lists to empty and explicitly free memory (we can't
1056  * rely on end-of-transaction cleanup for that).
1057  */
1058 void
1059 EndNonTransactionalInvalidation(void)
1060 {
1061         InvalidationChunk *chunk;
1062         InvalidationChunk *next;
1063
1064         /* Must be at top of stack */
1065         Assert(transInvalInfo != NULL && transInvalInfo->parent == NULL);
1066
1067         /* Must not have any prior-command messages */
1068         Assert(transInvalInfo->PriorCmdInvalidMsgs.cclist == NULL);
1069         Assert(transInvalInfo->PriorCmdInvalidMsgs.rclist == NULL);
1070
1071         /*
1072          * At present, this function is only used for CTID-changing updates; since
1073          * the relcache init file doesn't store any tuple CTIDs, we don't have to
1074          * invalidate it.  That might not be true forever though, in which case
1075          * we'd need code similar to AtEOXact_Inval.
1076          */
1077
1078         /* Send out the invals */
1079         ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs,
1080                                                                 LocalExecuteInvalidationMessage);
1081         ProcessInvalidationMessagesMulti(&transInvalInfo->CurrentCmdInvalidMsgs,
1082                                                                          SendSharedInvalidMessages);
1083
1084         /* Clean up and release memory */
1085         for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.cclist;
1086                  chunk != NULL;
1087                  chunk = next)
1088         {
1089                 next = chunk->next;
1090                 pfree(chunk);
1091         }
1092         for (chunk = transInvalInfo->CurrentCmdInvalidMsgs.rclist;
1093                  chunk != NULL;
1094                  chunk = next)
1095         {
1096                 next = chunk->next;
1097                 pfree(chunk);
1098         }
1099         transInvalInfo->CurrentCmdInvalidMsgs.cclist = NULL;
1100         transInvalInfo->CurrentCmdInvalidMsgs.rclist = NULL;
1101         transInvalInfo->RelcacheInitFileInval = false;
1102 }
1103
1104
1105 /*
1106  * CacheInvalidateHeapTuple
1107  *              Register the given tuple for invalidation at end of command
1108  *              (ie, current command is creating or outdating this tuple).
1109  */
1110 void
1111 CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple)
1112 {
1113         PrepareForTupleInvalidation(relation, tuple);
1114 }
1115
1116 /*
1117  * CacheInvalidateRelcache
1118  *              Register invalidation of the specified relation's relcache entry
1119  *              at end of command.
1120  *
1121  * This is used in places that need to force relcache rebuild but aren't
1122  * changing any of the tuples recognized as contributors to the relcache
1123  * entry by PrepareForTupleInvalidation.  (An example is dropping an index.)
1124  * We assume in particular that relfilenode/reltablespace aren't changing
1125  * (so the rd_node value is still good).
1126  *
1127  * XXX most callers of this probably don't need to force an smgr flush.
1128  */
1129 void
1130 CacheInvalidateRelcache(Relation relation)
1131 {
1132         Oid                     databaseId;
1133         Oid                     relationId;
1134
1135         relationId = RelationGetRelid(relation);
1136         if (relation->rd_rel->relisshared)
1137                 databaseId = InvalidOid;
1138         else
1139                 databaseId = MyDatabaseId;
1140
1141         RegisterRelcacheInvalidation(databaseId, relationId);
1142         RegisterSmgrInvalidation(relation->rd_node);
1143 }
1144
1145 /*
1146  * CacheInvalidateRelcacheByTuple
1147  *              As above, but relation is identified by passing its pg_class tuple.
1148  */
1149 void
1150 CacheInvalidateRelcacheByTuple(HeapTuple classTuple)
1151 {
1152         Form_pg_class classtup = (Form_pg_class) GETSTRUCT(classTuple);
1153         Oid                     databaseId;
1154         Oid                     relationId;
1155         RelFileNode rnode;
1156
1157         relationId = HeapTupleGetOid(classTuple);
1158         if (classtup->relisshared)
1159                 databaseId = InvalidOid;
1160         else
1161                 databaseId = MyDatabaseId;
1162         if (classtup->reltablespace)
1163                 rnode.spcNode = classtup->reltablespace;
1164         else
1165                 rnode.spcNode = MyDatabaseTableSpace;
1166         rnode.dbNode = databaseId;
1167         rnode.relNode = classtup->relfilenode;
1168
1169         RegisterRelcacheInvalidation(databaseId, relationId);
1170         RegisterSmgrInvalidation(rnode);
1171 }
1172
1173 /*
1174  * CacheInvalidateRelcacheByRelid
1175  *              As above, but relation is identified by passing its OID.
1176  *              This is the least efficient of the three options; use one of
1177  *              the above routines if you have a Relation or pg_class tuple.
1178  */
1179 void
1180 CacheInvalidateRelcacheByRelid(Oid relid)
1181 {
1182         HeapTuple       tup;
1183
1184         tup = SearchSysCache(RELOID,
1185                                                  ObjectIdGetDatum(relid),
1186                                                  0, 0, 0);
1187         if (!HeapTupleIsValid(tup))
1188                 elog(ERROR, "cache lookup failed for relation %u", relid);
1189         CacheInvalidateRelcacheByTuple(tup);
1190         ReleaseSysCache(tup);
1191 }
1192
1193 /*
1194  * CacheRegisterSyscacheCallback
1195  *              Register the specified function to be called for all future
1196  *              invalidation events in the specified cache.  The cache ID and the
1197  *              TID of the tuple being invalidated will be passed to the function.
1198  *
1199  * NOTE: NULL will be passed for the TID if a cache reset request is received.
1200  * In this case the called routines should flush all cached state.
1201  */
1202 void
1203 CacheRegisterSyscacheCallback(int cacheid,
1204                                                           SyscacheCallbackFunction func,
1205                                                           Datum arg)
1206 {
1207         if (syscache_callback_count >= MAX_SYSCACHE_CALLBACKS)
1208                 elog(FATAL, "out of syscache_callback_list slots");
1209
1210         syscache_callback_list[syscache_callback_count].id = cacheid;
1211         syscache_callback_list[syscache_callback_count].function = func;
1212         syscache_callback_list[syscache_callback_count].arg = arg;
1213
1214         ++syscache_callback_count;
1215 }
1216
1217 /*
1218  * CacheRegisterRelcacheCallback
1219  *              Register the specified function to be called for all future
1220  *              relcache invalidation events.  The OID of the relation being
1221  *              invalidated will be passed to the function.
1222  *
1223  * NOTE: InvalidOid will be passed if a cache reset request is received.
1224  * In this case the called routines should flush all cached state.
1225  */
1226 void
1227 CacheRegisterRelcacheCallback(RelcacheCallbackFunction func,
1228                                                           Datum arg)
1229 {
1230         if (relcache_callback_count >= MAX_RELCACHE_CALLBACKS)
1231                 elog(FATAL, "out of relcache_callback_list slots");
1232
1233         relcache_callback_list[relcache_callback_count].function = func;
1234         relcache_callback_list[relcache_callback_count].arg = arg;
1235
1236         ++relcache_callback_count;
1237 }