src/backend/storage/lmgr/deadlock.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * deadlock.c
   4  *        POSTGRES deadlock detection code
   5  *
   6  * See src/backend/storage/lmgr/README for a description of the deadlock
   7  * detection and resolution algorithms.
   8  *
   9  *
  10  * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
  11  * Portions Copyright (c) 1994, Regents of the University of California
  12  *
  13  *
  14  * IDENTIFICATION
  15  *        src/backend/storage/lmgr/deadlock.c
  16  *
  17  *      Interface:
  18  *
  19  *      DeadLockCheck()
  20  *      DeadLockReport()
  21  *      RememberSimpleDeadLock()
  22  *      InitDeadLockChecking()
  23  *
  24  *-------------------------------------------------------------------------
  25  */
  26 #include "postgres.h"
  27
  28 #include "miscadmin.h"
  29 #include "pg_trace.h"
  30 #include "pgstat.h"
  31 #include "storage/lmgr.h"
  32 #include "storage/proc.h"
  33 #include "utils/memutils.h"
  34
  35
  36 /*
  37  * One edge in the waits-for graph.
  38  *
  39  * waiter and blocker may or may not be members of a lock group, but if either
  40  * is, it will be the leader rather than any other member of the lock group.
  41  * The group leaders act as representatives of the whole group even though
  42  * those particular processes need not be waiting at all.  There will be at
  43  * least one member of the waiter's lock group on the wait queue for the given
  44  * lock, maybe more.
  45  */
  46 typedef struct
  47 {
  48         PGPROC     *waiter;                     /* the leader of the waiting lock group */
  49         PGPROC     *blocker;            /* the leader of the group it is waiting for */
  50         LOCK       *lock;                       /* the lock being waited for */
  51         int                     pred;                   /* workspace for TopoSort */
  52         int                     link;                   /* workspace for TopoSort */
  53 } EDGE;
  54
  55 /* One potential reordering of a lock's wait queue */
  56 typedef struct
  57 {
  58         LOCK       *lock;                       /* the lock whose wait queue is described */
  59         PGPROC    **procs;                      /* array of PGPROC *'s in new wait order */
  60         int                     nProcs;
  61 } WAIT_ORDER;
  62
  63 /*
  64  * Information saved about each edge in a detected deadlock cycle.  This
  65  * is used to print a diagnostic message upon failure.
  66  *
  67  * Note: because we want to examine this info after releasing the lock
  68  * manager's partition locks, we can't just store LOCK and PGPROC pointers;
  69  * we must extract out all the info we want to be able to print.
  70  */
  71 typedef struct
  72 {
  73         LOCKTAG         locktag;                /* ID of awaited lock object */
  74         LOCKMODE        lockmode;               /* type of lock we're waiting for */
  75         int                     pid;                    /* PID of blocked backend */
  76 } DEADLOCK_INFO;
  77
  78
  79 static bool DeadLockCheckRecurse(PGPROC *proc);
  80 static int      TestConfiguration(PGPROC *startProc);
  81 static bool FindLockCycle(PGPROC *checkProc,
  82                                                   EDGE *softEdges, int *nSoftEdges);
  83 static bool FindLockCycleRecurse(PGPROC *checkProc, int depth,
  84                                                                  EDGE *softEdges, int *nSoftEdges);
  85 static bool FindLockCycleRecurseMember(PGPROC *checkProc,
  86                                                                            PGPROC *checkProcLeader,
  87                                                                            int depth, EDGE *softEdges, int *nSoftEdges);
  88 static bool ExpandConstraints(EDGE *constraints, int nConstraints);
  89 static bool TopoSort(LOCK *lock, EDGE *constraints, int nConstraints,
  90                                          PGPROC **ordering);
  91
  92 #ifdef DEBUG_DEADLOCK
  93 static void PrintLockQueue(LOCK *lock, const char *info);
  94 #endif
  95
  96
  97 /*
  98  * Working space for the deadlock detector
  99  */
 100
 101 /* Workspace for FindLockCycle */
 102 static PGPROC **visitedProcs;   /* Array of visited procs */
 103 static int      nVisitedProcs;
 104
 105 /* Workspace for TopoSort */
 106 static PGPROC **topoProcs;              /* Array of not-yet-output procs */
 107 static int *beforeConstraints;  /* Counts of remaining before-constraints */
 108 static int *afterConstraints;   /* List head for after-constraints */
 109
 110 /* Output area for ExpandConstraints */
 111 static WAIT_ORDER *waitOrders;  /* Array of proposed queue rearrangements */
 112 static int      nWaitOrders;
 113 static PGPROC **waitOrderProcs; /* Space for waitOrders queue contents */
 114
 115 /* Current list of constraints being considered */
 116 static EDGE *curConstraints;
 117 static int      nCurConstraints;
 118 static int      maxCurConstraints;
 119
 120 /* Storage space for results from FindLockCycle */
 121 static EDGE *possibleConstraints;
 122 static int      nPossibleConstraints;
 123 static int      maxPossibleConstraints;
 124 static DEADLOCK_INFO *deadlockDetails;
 125 static int      nDeadlockDetails;
 126
 127 /* PGPROC pointer of any blocking autovacuum worker found */
 128 static PGPROC *blocking_autovacuum_proc = NULL;
 129
 130
 131 /*
 132  * InitDeadLockChecking -- initialize deadlock checker during backend startup
 133  *
 134  * This does per-backend initialization of the deadlock checker; primarily,
 135  * allocation of working memory for DeadLockCheck.  We do this per-backend
 136  * since there's no percentage in making the kernel do copy-on-write
 137  * inheritance of workspace from the postmaster.  We want to allocate the
 138  * space at startup because (a) the deadlock checker might be invoked when
 139  * there's no free memory left, and (b) the checker is normally run inside a
 140  * signal handler, which is a very dangerous place to invoke palloc from.
 141  */
 142 void
 143 InitDeadLockChecking(void)
 144 {
 145         MemoryContext oldcxt;
 146
 147         /* Make sure allocations are permanent */
 148         oldcxt = MemoryContextSwitchTo(TopMemoryContext);
 149
 150         /*
 151          * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and
 152          * deadlockDetails[].
 153          */
 154         visitedProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
 155         deadlockDetails = (DEADLOCK_INFO *) palloc(MaxBackends * sizeof(DEADLOCK_INFO));
 156
 157         /*
 158          * TopoSort needs to consider at most MaxBackends wait-queue entries, and
 159          * it needn't run concurrently with FindLockCycle.
 160          */
 161         topoProcs = visitedProcs;       /* re-use this space */
 162         beforeConstraints = (int *) palloc(MaxBackends * sizeof(int));
 163         afterConstraints = (int *) palloc(MaxBackends * sizeof(int));
 164
 165         /*
 166          * We need to consider rearranging at most MaxBackends/2 wait queues
 167          * (since it takes at least two waiters in a queue to create a soft edge),
 168          * and the expanded form of the wait queues can't involve more than
 169          * MaxBackends total waiters.
 170          */
 171         waitOrders = (WAIT_ORDER *)
 172                 palloc((MaxBackends / 2) * sizeof(WAIT_ORDER));
 173         waitOrderProcs = (PGPROC **) palloc(MaxBackends * sizeof(PGPROC *));
 174
 175         /*
 176          * Allow at most MaxBackends distinct constraints in a configuration. (Is
 177          * this enough?  In practice it seems it should be, but I don't quite see
 178          * how to prove it.  If we run out, we might fail to find a workable wait
 179          * queue rearrangement even though one exists.)  NOTE that this number
 180          * limits the maximum recursion depth of DeadLockCheckRecurse. Making it
 181          * really big might potentially allow a stack-overflow problem.
 182          */
 183         maxCurConstraints = MaxBackends;
 184         curConstraints = (EDGE *) palloc(maxCurConstraints * sizeof(EDGE));
 185
 186         /*
 187          * Allow up to 3*MaxBackends constraints to be saved without having to
 188          * re-run TestConfiguration.  (This is probably more than enough, but we
 189          * can survive if we run low on space by doing excess runs of
 190          * TestConfiguration to re-compute constraint lists each time needed.) The
 191          * last MaxBackends entries in possibleConstraints[] are reserved as
 192          * output workspace for FindLockCycle.
 193          */
 194         maxPossibleConstraints = MaxBackends * 4;
 195         possibleConstraints =
 196                 (EDGE *) palloc(maxPossibleConstraints * sizeof(EDGE));
 197
 198         MemoryContextSwitchTo(oldcxt);
 199 }
 200
 201 /*
 202  * DeadLockCheck -- Checks for deadlocks for a given process
 203  *
 204  * This code looks for deadlocks involving the given process.  If any
 205  * are found, it tries to rearrange lock wait queues to resolve the
 206  * deadlock.  If resolution is impossible, return DS_HARD_DEADLOCK ---
 207  * the caller is then expected to abort the given proc's transaction.
 208  *
 209  * Caller must already have locked all partitions of the lock tables.
 210  *
 211  * On failure, deadlock details are recorded in deadlockDetails[] for
 212  * subsequent printing by DeadLockReport().  That activity is separate
 213  * because (a) we don't want to do it while holding all those LWLocks,
 214  * and (b) we are typically invoked inside a signal handler.
 215  */
 216 DeadLockState
 217 DeadLockCheck(PGPROC *proc)
 218 {
 219         /* Initialize to "no constraints" */
 220         nCurConstraints = 0;
 221         nPossibleConstraints = 0;
 222         nWaitOrders = 0;
 223
 224         /* Initialize to not blocked by an autovacuum worker */
 225         blocking_autovacuum_proc = NULL;
 226
 227         /* Search for deadlocks and possible fixes */
 228         if (DeadLockCheckRecurse(proc))
 229         {
 230                 /*
 231                  * Call FindLockCycle one more time, to record the correct
 232                  * deadlockDetails[] for the basic state with no rearrangements.
 233                  */
 234                 int                     nSoftEdges;
 235
 236                 TRACE_POSTGRESQL_DEADLOCK_FOUND();
 237
 238                 nWaitOrders = 0;
 239                 if (!FindLockCycle(proc, possibleConstraints, &nSoftEdges))
 240                         elog(FATAL, "deadlock seems to have disappeared");
 241
 242                 return DS_HARD_DEADLOCK;        /* cannot find a non-deadlocked state */
 243         }
 244
 245         /* Apply any needed rearrangements of wait queues */
 246         for (int i = 0; i < nWaitOrders; i++)
 247         {
 248                 LOCK       *lock = waitOrders[i].lock;
 249                 PGPROC    **procs = waitOrders[i].procs;
 250                 int                     nProcs = waitOrders[i].nProcs;
 251                 dclist_head *waitQueue = &lock->waitProcs;
 252
 253                 Assert(nProcs == dclist_count(waitQueue));
 254
 255 #ifdef DEBUG_DEADLOCK
 256                 PrintLockQueue(lock, "DeadLockCheck:");
 257 #endif
 258
 259                 /* Reset the queue and re-add procs in the desired order */
 260                 dclist_init(waitQueue);
 261                 for (int j = 0; j < nProcs; j++)
 262                         dclist_push_tail(waitQueue, &procs[j]->links);
 263
 264 #ifdef DEBUG_DEADLOCK
 265                 PrintLockQueue(lock, "rearranged to:");
 266 #endif
 267
 268                 /* See if any waiters for the lock can be woken up now */
 269                 ProcLockWakeup(GetLocksMethodTable(lock), lock);
 270         }
 271
 272         /* Return code tells caller if we had to escape a deadlock or not */
 273         if (nWaitOrders > 0)
 274                 return DS_SOFT_DEADLOCK;
 275         else if (blocking_autovacuum_proc != NULL)
 276                 return DS_BLOCKED_BY_AUTOVACUUM;
 277         else
 278                 return DS_NO_DEADLOCK;
 279 }
 280
 281 /*
 282  * Return the PGPROC of the autovacuum that's blocking a process.
 283  *
 284  * We reset the saved pointer as soon as we pass it back.
 285  */
 286 PGPROC *
 287 GetBlockingAutoVacuumPgproc(void)
 288 {
 289         PGPROC     *ptr;
 290
 291         ptr = blocking_autovacuum_proc;
 292         blocking_autovacuum_proc = NULL;
 293
 294         return ptr;
 295 }
 296
 297 /*
 298  * DeadLockCheckRecurse -- recursively search for valid orderings
 299  *
 300  * curConstraints[] holds the current set of constraints being considered
 301  * by an outer level of recursion.  Add to this each possible solution
 302  * constraint for any cycle detected at this level.
 303  *
 304  * Returns true if no solution exists.  Returns false if a deadlock-free
 305  * state is attainable, in which case waitOrders[] shows the required
 306  * rearrangements of lock wait queues (if any).
 307  */
 308 static bool
 309 DeadLockCheckRecurse(PGPROC *proc)
 310 {
 311         int                     nEdges;
 312         int                     oldPossibleConstraints;
 313         bool            savedList;
 314         int                     i;
 315
 316         nEdges = TestConfiguration(proc);
 317         if (nEdges < 0)
 318                 return true;                    /* hard deadlock --- no solution */
 319         if (nEdges == 0)
 320                 return false;                   /* good configuration found */
 321         if (nCurConstraints >= maxCurConstraints)
 322                 return true;                    /* out of room for active constraints? */
 323         oldPossibleConstraints = nPossibleConstraints;
 324         if (nPossibleConstraints + nEdges + MaxBackends <= maxPossibleConstraints)
 325         {
 326                 /* We can save the edge list in possibleConstraints[] */
 327                 nPossibleConstraints += nEdges;
 328                 savedList = true;
 329         }
 330         else
 331         {
 332                 /* Not room; will need to regenerate the edges on-the-fly */
 333                 savedList = false;
 334         }
 335
 336         /*
 337          * Try each available soft edge as an addition to the configuration.
 338          */
 339         for (i = 0; i < nEdges; i++)
 340         {
 341                 if (!savedList && i > 0)
 342                 {
 343                         /* Regenerate the list of possible added constraints */
 344                         if (nEdges != TestConfiguration(proc))
 345                                 elog(FATAL, "inconsistent results during deadlock check");
 346                 }
 347                 curConstraints[nCurConstraints] =
 348                         possibleConstraints[oldPossibleConstraints + i];
 349                 nCurConstraints++;
 350                 if (!DeadLockCheckRecurse(proc))
 351                         return false;           /* found a valid solution! */
 352                 /* give up on that added constraint, try again */
 353                 nCurConstraints--;
 354         }
 355         nPossibleConstraints = oldPossibleConstraints;
 356         return true;                            /* no solution found */
 357 }
 358
 359
 360 /*--------------------
 361  * Test a configuration (current set of constraints) for validity.
 362  *
 363  * Returns:
 364  *              0: the configuration is good (no deadlocks)
 365  *         -1: the configuration has a hard deadlock or is not self-consistent
 366  *              >0: the configuration has one or more soft deadlocks
 367  *
 368  * In the soft-deadlock case, one of the soft cycles is chosen arbitrarily
 369  * and a list of its soft edges is returned beginning at
 370  * possibleConstraints+nPossibleConstraints.  The return value is the
 371  * number of soft edges.
 372  *--------------------
 373  */
 374 static int
 375 TestConfiguration(PGPROC *startProc)
 376 {
 377         int                     softFound = 0;
 378         EDGE       *softEdges = possibleConstraints + nPossibleConstraints;
 379         int                     nSoftEdges;
 380         int                     i;
 381
 382         /*
 383          * Make sure we have room for FindLockCycle's output.
 384          */
 385         if (nPossibleConstraints + MaxBackends > maxPossibleConstraints)
 386                 return -1;
 387
 388         /*
 389          * Expand current constraint set into wait orderings.  Fail if the
 390          * constraint set is not self-consistent.
 391          */
 392         if (!ExpandConstraints(curConstraints, nCurConstraints))
 393                 return -1;
 394
 395         /*
 396          * Check for cycles involving startProc or any of the procs mentioned in
 397          * constraints.  We check startProc last because if it has a soft cycle
 398          * still to be dealt with, we want to deal with that first.
 399          */
 400         for (i = 0; i < nCurConstraints; i++)
 401         {
 402                 if (FindLockCycle(curConstraints[i].waiter, softEdges, &nSoftEdges))
 403                 {
 404                         if (nSoftEdges == 0)
 405                                 return -1;              /* hard deadlock detected */
 406                         softFound = nSoftEdges;
 407                 }
 408                 if (FindLockCycle(curConstraints[i].blocker, softEdges, &nSoftEdges))
 409                 {
 410                         if (nSoftEdges == 0)
 411                                 return -1;              /* hard deadlock detected */
 412                         softFound = nSoftEdges;
 413                 }
 414         }
 415         if (FindLockCycle(startProc, softEdges, &nSoftEdges))
 416         {
 417                 if (nSoftEdges == 0)
 418                         return -1;                      /* hard deadlock detected */
 419                 softFound = nSoftEdges;
 420         }
 421         return softFound;
 422 }
 423
 424
 425 /*
 426  * FindLockCycle -- basic check for deadlock cycles
 427  *
 428  * Scan outward from the given proc to see if there is a cycle in the
 429  * waits-for graph that includes this proc.  Return true if a cycle
 430  * is found, else false.  If a cycle is found, we return a list of
 431  * the "soft edges", if any, included in the cycle.  These edges could
 432  * potentially be eliminated by rearranging wait queues.  We also fill
 433  * deadlockDetails[] with information about the detected cycle; this info
 434  * is not used by the deadlock algorithm itself, only to print a useful
 435  * message after failing.
 436  *
 437  * Since we need to be able to check hypothetical configurations that would
 438  * exist after wait queue rearrangement, the routine pays attention to the
 439  * table of hypothetical queue orders in waitOrders[].  These orders will
 440  * be believed in preference to the actual ordering seen in the locktable.
 441  */
 442 static bool
 443 FindLockCycle(PGPROC *checkProc,
 444                           EDGE *softEdges,      /* output argument */
 445                           int *nSoftEdges)      /* output argument */
 446 {
 447         nVisitedProcs = 0;
 448         nDeadlockDetails = 0;
 449         *nSoftEdges = 0;
 450         return FindLockCycleRecurse(checkProc, 0, softEdges, nSoftEdges);
 451 }
 452
 453 static bool
 454 FindLockCycleRecurse(PGPROC *checkProc,
 455                                          int depth,
 456                                          EDGE *softEdges,       /* output argument */
 457                                          int *nSoftEdges)       /* output argument */
 458 {
 459         int                     i;
 460         dlist_iter      iter;
 461
 462         /*
 463          * If this process is a lock group member, check the leader instead. (Note
 464          * that we might be the leader, in which case this is a no-op.)
 465          */
 466         if (checkProc->lockGroupLeader != NULL)
 467                 checkProc = checkProc->lockGroupLeader;
 468
 469         /*
 470          * Have we already seen this proc?
 471          */
 472         for (i = 0; i < nVisitedProcs; i++)
 473         {
 474                 if (visitedProcs[i] == checkProc)
 475                 {
 476                         /* If we return to starting point, we have a deadlock cycle */
 477                         if (i == 0)
 478                         {
 479                                 /*
 480                                  * record total length of cycle --- outer levels will now fill
 481                                  * deadlockDetails[]
 482                                  */
 483                                 Assert(depth <= MaxBackends);
 484                                 nDeadlockDetails = depth;
 485
 486                                 return true;
 487                         }
 488
 489                         /*
 490                          * Otherwise, we have a cycle but it does not include the start
 491                          * point, so say "no deadlock".
 492                          */
 493                         return false;
 494                 }
 495         }
 496         /* Mark proc as seen */
 497         Assert(nVisitedProcs < MaxBackends);
 498         visitedProcs[nVisitedProcs++] = checkProc;
 499
 500         /*
 501          * If the process is waiting, there is an outgoing waits-for edge to each
 502          * process that blocks it.
 503          */
 504         if (checkProc->links.next != NULL && checkProc->waitLock != NULL &&
 505                 FindLockCycleRecurseMember(checkProc, checkProc, depth, softEdges,
 506                                                                    nSoftEdges))
 507                 return true;
 508
 509         /*
 510          * If the process is not waiting, there could still be outgoing waits-for
 511          * edges if it is part of a lock group, because other members of the lock
 512          * group might be waiting even though this process is not.  (Given lock
 513          * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
 514          * that is a deadlock even neither of B1 and A2 are waiting for anything.)
 515          */
 516         dlist_foreach(iter, &checkProc->lockGroupMembers)
 517         {
 518                 PGPROC     *memberProc;
 519
 520                 memberProc = dlist_container(PGPROC, lockGroupLink, iter.cur);
 521
 522                 if (memberProc->links.next != NULL && memberProc->waitLock != NULL &&
 523                         memberProc != checkProc &&
 524                         FindLockCycleRecurseMember(memberProc, checkProc, depth, softEdges,
 525                                                                            nSoftEdges))
 526                         return true;
 527         }
 528
 529         return false;
 530 }
 531
 532 static bool
 533 FindLockCycleRecurseMember(PGPROC *checkProc,
 534                                                    PGPROC *checkProcLeader,
 535                                                    int depth,
 536                                                    EDGE *softEdges, /* output argument */
 537                                                    int *nSoftEdges) /* output argument */
 538 {
 539         PGPROC     *proc;
 540         LOCK       *lock = checkProc->waitLock;
 541         dlist_iter      proclock_iter;
 542         LockMethod      lockMethodTable;
 543         int                     conflictMask;
 544         int                     i;
 545         int                     numLockModes,
 546                                 lm;
 547
 548         /*
 549          * The relation extension lock can never participate in actual deadlock
 550          * cycle.  See Assert in LockAcquireExtended.  So, there is no advantage
 551          * in checking wait edges from it.
 552          */
 553         if (LOCK_LOCKTAG(*lock) == LOCKTAG_RELATION_EXTEND)
 554                 return false;
 555
 556         lockMethodTable = GetLocksMethodTable(lock);
 557         numLockModes = lockMethodTable->numLockModes;
 558         conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode];
 559
 560         /*
 561          * Scan for procs that already hold conflicting locks.  These are "hard"
 562          * edges in the waits-for graph.
 563          */
 564         dlist_foreach(proclock_iter, &lock->procLocks)
 565         {
 566                 PROCLOCK   *proclock = dlist_container(PROCLOCK, lockLink, proclock_iter.cur);
 567                 PGPROC     *leader;
 568
 569                 proc = proclock->tag.myProc;
 570                 leader = proc->lockGroupLeader == NULL ? proc : proc->lockGroupLeader;
 571
 572                 /* A proc never blocks itself or any other lock group member */
 573                 if (leader != checkProcLeader)
 574                 {
 575                         for (lm = 1; lm <= numLockModes; lm++)
 576                         {
 577                                 if ((proclock->holdMask & LOCKBIT_ON(lm)) &&
 578                                         (conflictMask & LOCKBIT_ON(lm)))
 579                                 {
 580                                         /* This proc hard-blocks checkProc */
 581                                         if (FindLockCycleRecurse(proc, depth + 1,
 582                                                                                          softEdges, nSoftEdges))
 583                                         {
 584                                                 /* fill deadlockDetails[] */
 585                                                 DEADLOCK_INFO *info = &deadlockDetails[depth];
 586
 587                                                 info->locktag = lock->tag;
 588                                                 info->lockmode = checkProc->waitLockMode;
 589                                                 info->pid = checkProc->pid;
 590
 591                                                 return true;
 592                                         }
 593
 594                                         /*
 595                                          * No deadlock here, but see if this proc is an autovacuum
 596                                          * that is directly hard-blocking our own proc.  If so,
 597                                          * report it so that the caller can send a cancel signal
 598                                          * to it, if appropriate.  If there's more than one such
 599                                          * proc, it's indeterminate which one will be reported.
 600                                          *
 601                                          * We don't touch autovacuums that are indirectly blocking
 602                                          * us; it's up to the direct blockee to take action.  This
 603                                          * rule simplifies understanding the behavior and ensures
 604                                          * that an autovacuum won't be canceled with less than
 605                                          * deadlock_timeout grace period.
 606                                          *
 607                                          * Note we read statusFlags without any locking.  This is
 608                                          * OK only for checking the PROC_IS_AUTOVACUUM flag,
 609                                          * because that flag is set at process start and never
 610                                          * reset.  There is logic elsewhere to avoid canceling an
 611                                          * autovacuum that is working to prevent XID wraparound
 612                                          * problems (which needs to read a different statusFlags
 613                                          * bit), but we don't do that here to avoid grabbing
 614                                          * ProcArrayLock.
 615                                          */
 616                                         if (checkProc == MyProc &&
 617                                                 proc->statusFlags & PROC_IS_AUTOVACUUM)
 618                                                 blocking_autovacuum_proc = proc;
 619
 620                                         /* We're done looking at this proclock */
 621                                         break;
 622                                 }
 623                         }
 624                 }
 625         }
 626
 627         /*
 628          * Scan for procs that are ahead of this one in the lock's wait queue.
 629          * Those that have conflicting requests soft-block this one.  This must be
 630          * done after the hard-block search, since if another proc both hard- and
 631          * soft-blocks this one, we want to call it a hard edge.
 632          *
 633          * If there is a proposed re-ordering of the lock's wait order, use that
 634          * rather than the current wait order.
 635          */
 636         for (i = 0; i < nWaitOrders; i++)
 637         {
 638                 if (waitOrders[i].lock == lock)
 639                         break;
 640         }
 641
 642         if (i < nWaitOrders)
 643         {
 644                 /* Use the given hypothetical wait queue order */
 645                 PGPROC    **procs = waitOrders[i].procs;
 646                 int                     queue_size = waitOrders[i].nProcs;
 647
 648                 for (i = 0; i < queue_size; i++)
 649                 {
 650                         PGPROC     *leader;
 651
 652                         proc = procs[i];
 653                         leader = proc->lockGroupLeader == NULL ? proc :
 654                                 proc->lockGroupLeader;
 655
 656                         /*
 657                          * TopoSort will always return an ordering with group members
 658                          * adjacent to each other in the wait queue (see comments
 659                          * therein). So, as soon as we reach a process in the same lock
 660                          * group as checkProc, we know we've found all the conflicts that
 661                          * precede any member of the lock group lead by checkProcLeader.
 662                          */
 663                         if (leader == checkProcLeader)
 664                                 break;
 665
 666                         /* Is there a conflict with this guy's request? */
 667                         if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0)
 668                         {
 669                                 /* This proc soft-blocks checkProc */
 670                                 if (FindLockCycleRecurse(proc, depth + 1,
 671                                                                                  softEdges, nSoftEdges))
 672                                 {
 673                                         /* fill deadlockDetails[] */
 674                                         DEADLOCK_INFO *info = &deadlockDetails[depth];
 675
 676                                         info->locktag = lock->tag;
 677                                         info->lockmode = checkProc->waitLockMode;
 678                                         info->pid = checkProc->pid;
 679
 680                                         /*
 681                                          * Add this edge to the list of soft edges in the cycle
 682                                          */
 683                                         Assert(*nSoftEdges < MaxBackends);
 684                                         softEdges[*nSoftEdges].waiter = checkProcLeader;
 685                                         softEdges[*nSoftEdges].blocker = leader;
 686                                         softEdges[*nSoftEdges].lock = lock;
 687                                         (*nSoftEdges)++;
 688                                         return true;
 689                                 }
 690                         }
 691                 }
 692         }
 693         else
 694         {
 695                 PGPROC     *lastGroupMember = NULL;
 696                 dlist_iter      proc_iter;
 697                 dclist_head *waitQueue;
 698
 699                 /* Use the true lock wait queue order */
 700                 waitQueue = &lock->waitProcs;
 701
 702                 /*
 703                  * Find the last member of the lock group that is present in the wait
 704                  * queue.  Anything after this is not a soft lock conflict. If group
 705                  * locking is not in use, then we know immediately which process we're
 706                  * looking for, but otherwise we've got to search the wait queue to
 707                  * find the last process actually present.
 708                  */
 709                 if (checkProc->lockGroupLeader == NULL)
 710                         lastGroupMember = checkProc;
 711                 else
 712                 {
 713                         dclist_foreach(proc_iter, waitQueue)
 714                         {
 715                                 proc = dlist_container(PGPROC, links, proc_iter.cur);
 716
 717                                 if (proc->lockGroupLeader == checkProcLeader)
 718                                         lastGroupMember = proc;
 719                         }
 720                         Assert(lastGroupMember != NULL);
 721                 }
 722
 723                 /*
 724                  * OK, now rescan (or scan) the queue to identify the soft conflicts.
 725                  */
 726                 dclist_foreach(proc_iter, waitQueue)
 727                 {
 728                         PGPROC     *leader;
 729
 730                         proc = dlist_container(PGPROC, links, proc_iter.cur);
 731
 732                         leader = proc->lockGroupLeader == NULL ? proc :
 733                                 proc->lockGroupLeader;
 734
 735                         /* Done when we reach the target proc */
 736                         if (proc == lastGroupMember)
 737                                 break;
 738
 739                         /* Is there a conflict with this guy's request? */
 740                         if ((LOCKBIT_ON(proc->waitLockMode) & conflictMask) != 0 &&
 741                                 leader != checkProcLeader)
 742                         {
 743                                 /* This proc soft-blocks checkProc */
 744                                 if (FindLockCycleRecurse(proc, depth + 1,
 745                                                                                  softEdges, nSoftEdges))
 746                                 {
 747                                         /* fill deadlockDetails[] */
 748                                         DEADLOCK_INFO *info = &deadlockDetails[depth];
 749
 750                                         info->locktag = lock->tag;
 751                                         info->lockmode = checkProc->waitLockMode;
 752                                         info->pid = checkProc->pid;
 753
 754                                         /*
 755                                          * Add this edge to the list of soft edges in the cycle
 756                                          */
 757                                         Assert(*nSoftEdges < MaxBackends);
 758                                         softEdges[*nSoftEdges].waiter = checkProcLeader;
 759                                         softEdges[*nSoftEdges].blocker = leader;
 760                                         softEdges[*nSoftEdges].lock = lock;
 761                                         (*nSoftEdges)++;
 762                                         return true;
 763                                 }
 764                         }
 765                 }
 766         }
 767
 768         /*
 769          * No conflict detected here.
 770          */
 771         return false;
 772 }
 773
 774
 775 /*
 776  * ExpandConstraints -- expand a list of constraints into a set of
 777  *              specific new orderings for affected wait queues
 778  *
 779  * Input is a list of soft edges to be reversed.  The output is a list
 780  * of nWaitOrders WAIT_ORDER structs in waitOrders[], with PGPROC array
 781  * workspace in waitOrderProcs[].
 782  *
 783  * Returns true if able to build an ordering that satisfies all the
 784  * constraints, false if not (there are contradictory constraints).
 785  */
 786 static bool
 787 ExpandConstraints(EDGE *constraints,
 788                                   int nConstraints)
 789 {
 790         int                     nWaitOrderProcs = 0;
 791         int                     i,
 792                                 j;
 793
 794         nWaitOrders = 0;
 795
 796         /*
 797          * Scan constraint list backwards.  This is because the last-added
 798          * constraint is the only one that could fail, and so we want to test it
 799          * for inconsistency first.
 800          */
 801         for (i = nConstraints; --i >= 0;)
 802         {
 803                 LOCK       *lock = constraints[i].lock;
 804
 805                 /* Did we already make a list for this lock? */
 806                 for (j = nWaitOrders; --j >= 0;)
 807                 {
 808                         if (waitOrders[j].lock == lock)
 809                                 break;
 810                 }
 811                 if (j >= 0)
 812                         continue;
 813                 /* No, so allocate a new list */
 814                 waitOrders[nWaitOrders].lock = lock;
 815                 waitOrders[nWaitOrders].procs = waitOrderProcs + nWaitOrderProcs;
 816                 waitOrders[nWaitOrders].nProcs = dclist_count(&lock->waitProcs);
 817                 nWaitOrderProcs += dclist_count(&lock->waitProcs);
 818                 Assert(nWaitOrderProcs <= MaxBackends);
 819
 820                 /*
 821                  * Do the topo sort.  TopoSort need not examine constraints after this
 822                  * one, since they must be for different locks.
 823                  */
 824                 if (!TopoSort(lock, constraints, i + 1,
 825                                           waitOrders[nWaitOrders].procs))
 826                         return false;
 827                 nWaitOrders++;
 828         }
 829         return true;
 830 }
 831
 832
 833 /*
 834  * TopoSort -- topological sort of a wait queue
 835  *
 836  * Generate a re-ordering of a lock's wait queue that satisfies given
 837  * constraints about certain procs preceding others.  (Each such constraint
 838  * is a fact of a partial ordering.)  Minimize rearrangement of the queue
 839  * not needed to achieve the partial ordering.
 840  *
 841  * This is a lot simpler and slower than, for example, the topological sort
 842  * algorithm shown in Knuth's Volume 1.  However, Knuth's method doesn't
 843  * try to minimize the damage to the existing order.  In practice we are
 844  * not likely to be working with more than a few constraints, so the apparent
 845  * slowness of the algorithm won't really matter.
 846  *
 847  * The initial queue ordering is taken directly from the lock's wait queue.
 848  * The output is an array of PGPROC pointers, of length equal to the lock's
 849  * wait queue length (the caller is responsible for providing this space).
 850  * The partial order is specified by an array of EDGE structs.  Each EDGE
 851  * is one that we need to reverse, therefore the "waiter" must appear before
 852  * the "blocker" in the output array.  The EDGE array may well contain
 853  * edges associated with other locks; these should be ignored.
 854  *
 855  * Returns true if able to build an ordering that satisfies all the
 856  * constraints, false if not (there are contradictory constraints).
 857  */
 858 static bool
 859 TopoSort(LOCK *lock,
 860                  EDGE *constraints,
 861                  int nConstraints,
 862                  PGPROC **ordering)             /* output argument */
 863 {
 864         dclist_head *waitQueue = &lock->waitProcs;
 865         int                     queue_size = dclist_count(waitQueue);
 866         PGPROC     *proc;
 867         int                     i,
 868                                 j,
 869                                 jj,
 870                                 k,
 871                                 kk,
 872                                 last;
 873         dlist_iter      proc_iter;
 874
 875         /* First, fill topoProcs[] array with the procs in their current order */
 876         i = 0;
 877         dclist_foreach(proc_iter, waitQueue)
 878         {
 879                 proc = dlist_container(PGPROC, links, proc_iter.cur);
 880                 topoProcs[i++] = proc;
 881         }
 882         Assert(i == queue_size);
 883
 884         /*
 885          * Scan the constraints, and for each proc in the array, generate a count
 886          * of the number of constraints that say it must be before something else,
 887          * plus a list of the constraints that say it must be after something
 888          * else. The count for the j'th proc is stored in beforeConstraints[j],
 889          * and the head of its list in afterConstraints[j].  Each constraint
 890          * stores its list link in constraints[i].link (note any constraint will
 891          * be in just one list). The array index for the before-proc of the i'th
 892          * constraint is remembered in constraints[i].pred.
 893          *
 894          * Note that it's not necessarily the case that every constraint affects
 895          * this particular wait queue.  Prior to group locking, a process could be
 896          * waiting for at most one lock.  But a lock group can be waiting for
 897          * zero, one, or multiple locks.  Since topoProcs[] is an array of the
 898          * processes actually waiting, while constraints[] is an array of group
 899          * leaders, we've got to scan through topoProcs[] for each constraint,
 900          * checking whether both a waiter and a blocker for that group are
 901          * present.  If so, the constraint is relevant to this wait queue; if not,
 902          * it isn't.
 903          */
 904         MemSet(beforeConstraints, 0, queue_size * sizeof(int));
 905         MemSet(afterConstraints, 0, queue_size * sizeof(int));
 906         for (i = 0; i < nConstraints; i++)
 907         {
 908                 /*
 909                  * Find a representative process that is on the lock queue and part of
 910                  * the waiting lock group.  This may or may not be the leader, which
 911                  * may or may not be waiting at all.  If there are any other processes
 912                  * in the same lock group on the queue, set their number of
 913                  * beforeConstraints to -1 to indicate that they should be emitted
 914                  * with their groupmates rather than considered separately.
 915                  *
 916                  * In this loop and the similar one just below, it's critical that we
 917                  * consistently select the same representative member of any one lock
 918                  * group, so that all the constraints are associated with the same
 919                  * proc, and the -1's are only associated with not-representative
 920                  * members.  We select the last one in the topoProcs array.
 921                  */
 922                 proc = constraints[i].waiter;
 923                 Assert(proc != NULL);
 924                 jj = -1;
 925                 for (j = queue_size; --j >= 0;)
 926                 {
 927                         PGPROC     *waiter = topoProcs[j];
 928
 929                         if (waiter == proc || waiter->lockGroupLeader == proc)
 930                         {
 931                                 Assert(waiter->waitLock == lock);
 932                                 if (jj == -1)
 933                                         jj = j;
 934                                 else
 935                                 {
 936                                         Assert(beforeConstraints[j] <= 0);
 937                                         beforeConstraints[j] = -1;
 938                                 }
 939                         }
 940                 }
 941
 942                 /* If no matching waiter, constraint is not relevant to this lock. */
 943                 if (jj < 0)
 944                         continue;
 945
 946                 /*
 947                  * Similarly, find a representative process that is on the lock queue
 948                  * and waiting for the blocking lock group.  Again, this could be the
 949                  * leader but does not need to be.
 950                  */
 951                 proc = constraints[i].blocker;
 952                 Assert(proc != NULL);
 953                 kk = -1;
 954                 for (k = queue_size; --k >= 0;)
 955                 {
 956                         PGPROC     *blocker = topoProcs[k];
 957
 958                         if (blocker == proc || blocker->lockGroupLeader == proc)
 959                         {
 960                                 Assert(blocker->waitLock == lock);
 961                                 if (kk == -1)
 962                                         kk = k;
 963                                 else
 964                                 {
 965                                         Assert(beforeConstraints[k] <= 0);
 966                                         beforeConstraints[k] = -1;
 967                                 }
 968                         }
 969                 }
 970
 971                 /* If no matching blocker, constraint is not relevant to this lock. */
 972                 if (kk < 0)
 973                         continue;
 974
 975                 Assert(beforeConstraints[jj] >= 0);
 976                 beforeConstraints[jj]++;        /* waiter must come before */
 977                 /* add this constraint to list of after-constraints for blocker */
 978                 constraints[i].pred = jj;
 979                 constraints[i].link = afterConstraints[kk];
 980                 afterConstraints[kk] = i + 1;
 981         }
 982
 983         /*--------------------
 984          * Now scan the topoProcs array backwards.  At each step, output the
 985          * last proc that has no remaining before-constraints plus any other
 986          * members of the same lock group; then decrease the beforeConstraints
 987          * count of each of the procs it was constrained against.
 988          * i = index of ordering[] entry we want to output this time
 989          * j = search index for topoProcs[]
 990          * k = temp for scanning constraint list for proc j
 991          * last = last non-null index in topoProcs (avoid redundant searches)
 992          *--------------------
 993          */
 994         last = queue_size - 1;
 995         for (i = queue_size - 1; i >= 0;)
 996         {
 997                 int                     c;
 998                 int                     nmatches = 0;
 999
1000                 /* Find next candidate to output */
1001                 while (topoProcs[last] == NULL)
1002                         last--;
1003                 for (j = last; j >= 0; j--)
1004                 {
1005                         if (topoProcs[j] != NULL && beforeConstraints[j] == 0)
1006                                 break;
1007                 }
1008
1009                 /* If no available candidate, topological sort fails */
1010                 if (j < 0)
1011                         return false;
1012
1013                 /*
1014                  * Output everything in the lock group.  There's no point in
1015                  * outputting an ordering where members of the same lock group are not
1016                  * consecutive on the wait queue: if some other waiter is between two
1017                  * requests that belong to the same group, then either it conflicts
1018                  * with both of them and is certainly not a solution; or it conflicts
1019                  * with at most one of them and is thus isomorphic to an ordering
1020                  * where the group members are consecutive.
1021                  */
1022                 proc = topoProcs[j];
1023                 if (proc->lockGroupLeader != NULL)
1024                         proc = proc->lockGroupLeader;
1025                 Assert(proc != NULL);
1026                 for (c = 0; c <= last; ++c)
1027                 {
1028                         if (topoProcs[c] == proc || (topoProcs[c] != NULL &&
1029                                                                                  topoProcs[c]->lockGroupLeader == proc))
1030                         {
1031                                 ordering[i - nmatches] = topoProcs[c];
1032                                 topoProcs[c] = NULL;
1033                                 ++nmatches;
1034                         }
1035                 }
1036                 Assert(nmatches > 0);
1037                 i -= nmatches;
1038
1039                 /* Update beforeConstraints counts of its predecessors */
1040                 for (k = afterConstraints[j]; k > 0; k = constraints[k - 1].link)
1041                         beforeConstraints[constraints[k - 1].pred]--;
1042         }
1043
1044         /* Done */
1045         return true;
1046 }
1047
1048 #ifdef DEBUG_DEADLOCK
1049 static void
1050 PrintLockQueue(LOCK *lock, const char *info)
1051 {
1052         dclist_head *waitQueue = &lock->waitProcs;
1053         dlist_iter      proc_iter;
1054
1055         printf("%s lock %p queue ", info, lock);
1056
1057         dclist_foreach(proc_iter, waitQueue)
1058         {
1059                 PGPROC     *proc = dlist_container(PGPROC, links, proc_iter.cur);
1060
1061                 printf(" %d", proc->pid);
1062         }
1063         printf("\n");
1064         fflush(stdout);
1065 }
1066 #endif
1067
1068 /*
1069  * Report a detected deadlock, with available details.
1070  */
1071 void
1072 DeadLockReport(void)
1073 {
1074         StringInfoData clientbuf;       /* errdetail for client */
1075         StringInfoData logbuf;          /* errdetail for server log */
1076         StringInfoData locktagbuf;
1077         int                     i;
1078
1079         initStringInfo(&clientbuf);
1080         initStringInfo(&logbuf);
1081         initStringInfo(&locktagbuf);
1082
1083         /* Generate the "waits for" lines sent to the client */
1084         for (i = 0; i < nDeadlockDetails; i++)
1085         {
1086                 DEADLOCK_INFO *info = &deadlockDetails[i];
1087                 int                     nextpid;
1088
1089                 /* The last proc waits for the first one... */
1090                 if (i < nDeadlockDetails - 1)
1091                         nextpid = info[1].pid;
1092                 else
1093                         nextpid = deadlockDetails[0].pid;
1094
1095                 /* reset locktagbuf to hold next object description */
1096                 resetStringInfo(&locktagbuf);
1097
1098                 DescribeLockTag(&locktagbuf, &info->locktag);
1099
1100                 if (i > 0)
1101                         appendStringInfoChar(&clientbuf, '\n');
1102
1103                 appendStringInfo(&clientbuf,
1104                                                  _("Process %d waits for %s on %s; blocked by process %d."),
1105                                                  info->pid,
1106                                                  GetLockmodeName(info->locktag.locktag_lockmethodid,
1107                                                                                  info->lockmode),
1108                                                  locktagbuf.data,
1109                                                  nextpid);
1110         }
1111
1112         /* Duplicate all the above for the server ... */
1113         appendBinaryStringInfo(&logbuf, clientbuf.data, clientbuf.len);
1114
1115         /* ... and add info about query strings */
1116         for (i = 0; i < nDeadlockDetails; i++)
1117         {
1118                 DEADLOCK_INFO *info = &deadlockDetails[i];
1119
1120                 appendStringInfoChar(&logbuf, '\n');
1121
1122                 appendStringInfo(&logbuf,
1123                                                  _("Process %d: %s"),
1124                                                  info->pid,
1125                                                  pgstat_get_backend_current_activity(info->pid, false));
1126         }
1127
1128         pgstat_report_deadlock();
1129
1130         ereport(ERROR,
1131                         (errcode(ERRCODE_T_R_DEADLOCK_DETECTED),
1132                          errmsg("deadlock detected"),
1133                          errdetail_internal("%s", clientbuf.data),
1134                          errdetail_log("%s", logbuf.data),
1135                          errhint("See server log for query details.")));
1136 }
1137
1138 /*
1139  * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
1140  * detects a trivial (two-way) deadlock.  proc1 wants to block for lockmode
1141  * on lock, but proc2 is already waiting and would be blocked by proc1.
1142  */
1143 void
1144 RememberSimpleDeadLock(PGPROC *proc1,
1145                                            LOCKMODE lockmode,
1146                                            LOCK *lock,
1147                                            PGPROC *proc2)
1148 {
1149         DEADLOCK_INFO *info = &deadlockDetails[0];
1150
1151         info->locktag = lock->tag;
1152         info->lockmode = lockmode;
1153         info->pid = proc1->pid;
1154         info++;
1155         info->locktag = proc2->waitLock->tag;
1156         info->lockmode = proc2->waitLockMode;
1157         info->pid = proc2->pid;
1158         nDeadlockDetails = 2;
1159 }