1 /*-------------------------------------------------------------------------
4 * POSTGRES deadlock detection code
6 * See src/backend/storage/lmgr/README for a description of the deadlock
7 * detection and resolution algorithms.
10 * Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
11 * Portions Copyright (c) 1994, Regents of the University of California
15 * src/backend/storage/lmgr/deadlock.c
21 * RememberSimpleDeadLock()
22 * InitDeadLockChecking()
24 *-------------------------------------------------------------------------
28 #include "miscadmin.h"
31 #include "storage/lmgr.h"
32 #include "storage/proc.h"
33 #include "utils/memutils.h"
37 * One edge in the waits-for graph.
39 * waiter and blocker may or may not be members of a lock group, but if either
40 * is, it will be the leader rather than any other member of the lock group.
41 * The group leaders act as representatives of the whole group even though
42 * those particular processes need not be waiting at all. There will be at
43 * least one member of the waiter's lock group on the wait queue for the given
48 PGPROC
*waiter
; /* the leader of the waiting lock group */
49 PGPROC
*blocker
; /* the leader of the group it is waiting for */
50 LOCK
*lock
; /* the lock being waited for */
51 int pred
; /* workspace for TopoSort */
52 int link
; /* workspace for TopoSort */
55 /* One potential reordering of a lock's wait queue */
58 LOCK
*lock
; /* the lock whose wait queue is described */
59 PGPROC
**procs
; /* array of PGPROC *'s in new wait order */
64 * Information saved about each edge in a detected deadlock cycle. This
65 * is used to print a diagnostic message upon failure.
67 * Note: because we want to examine this info after releasing the lock
68 * manager's partition locks, we can't just store LOCK and PGPROC pointers;
69 * we must extract out all the info we want to be able to print.
73 LOCKTAG locktag
; /* ID of awaited lock object */
74 LOCKMODE lockmode
; /* type of lock we're waiting for */
75 int pid
; /* PID of blocked backend */
79 static bool DeadLockCheckRecurse(PGPROC
*proc
);
80 static int TestConfiguration(PGPROC
*startProc
);
81 static bool FindLockCycle(PGPROC
*checkProc
,
82 EDGE
*softEdges
, int *nSoftEdges
);
83 static bool FindLockCycleRecurse(PGPROC
*checkProc
, int depth
,
84 EDGE
*softEdges
, int *nSoftEdges
);
85 static bool FindLockCycleRecurseMember(PGPROC
*checkProc
,
86 PGPROC
*checkProcLeader
,
87 int depth
, EDGE
*softEdges
, int *nSoftEdges
);
88 static bool ExpandConstraints(EDGE
*constraints
, int nConstraints
);
89 static bool TopoSort(LOCK
*lock
, EDGE
*constraints
, int nConstraints
,
93 static void PrintLockQueue(LOCK
*lock
, const char *info
);
98 * Working space for the deadlock detector
101 /* Workspace for FindLockCycle */
102 static PGPROC
**visitedProcs
; /* Array of visited procs */
103 static int nVisitedProcs
;
105 /* Workspace for TopoSort */
106 static PGPROC
**topoProcs
; /* Array of not-yet-output procs */
107 static int *beforeConstraints
; /* Counts of remaining before-constraints */
108 static int *afterConstraints
; /* List head for after-constraints */
110 /* Output area for ExpandConstraints */
111 static WAIT_ORDER
*waitOrders
; /* Array of proposed queue rearrangements */
112 static int nWaitOrders
;
113 static PGPROC
**waitOrderProcs
; /* Space for waitOrders queue contents */
115 /* Current list of constraints being considered */
116 static EDGE
*curConstraints
;
117 static int nCurConstraints
;
118 static int maxCurConstraints
;
120 /* Storage space for results from FindLockCycle */
121 static EDGE
*possibleConstraints
;
122 static int nPossibleConstraints
;
123 static int maxPossibleConstraints
;
124 static DEADLOCK_INFO
*deadlockDetails
;
125 static int nDeadlockDetails
;
127 /* PGPROC pointer of any blocking autovacuum worker found */
128 static PGPROC
*blocking_autovacuum_proc
= NULL
;
132 * InitDeadLockChecking -- initialize deadlock checker during backend startup
134 * This does per-backend initialization of the deadlock checker; primarily,
135 * allocation of working memory for DeadLockCheck. We do this per-backend
136 * since there's no percentage in making the kernel do copy-on-write
137 * inheritance of workspace from the postmaster. We want to allocate the
138 * space at startup because (a) the deadlock checker might be invoked when
139 * there's no free memory left, and (b) the checker is normally run inside a
140 * signal handler, which is a very dangerous place to invoke palloc from.
143 InitDeadLockChecking(void)
145 MemoryContext oldcxt
;
147 /* Make sure allocations are permanent */
148 oldcxt
= MemoryContextSwitchTo(TopMemoryContext
);
151 * FindLockCycle needs at most MaxBackends entries in visitedProcs[] and
154 visitedProcs
= (PGPROC
**) palloc(MaxBackends
* sizeof(PGPROC
*));
155 deadlockDetails
= (DEADLOCK_INFO
*) palloc(MaxBackends
* sizeof(DEADLOCK_INFO
));
158 * TopoSort needs to consider at most MaxBackends wait-queue entries, and
159 * it needn't run concurrently with FindLockCycle.
161 topoProcs
= visitedProcs
; /* re-use this space */
162 beforeConstraints
= (int *) palloc(MaxBackends
* sizeof(int));
163 afterConstraints
= (int *) palloc(MaxBackends
* sizeof(int));
166 * We need to consider rearranging at most MaxBackends/2 wait queues
167 * (since it takes at least two waiters in a queue to create a soft edge),
168 * and the expanded form of the wait queues can't involve more than
169 * MaxBackends total waiters.
171 waitOrders
= (WAIT_ORDER
*)
172 palloc((MaxBackends
/ 2) * sizeof(WAIT_ORDER
));
173 waitOrderProcs
= (PGPROC
**) palloc(MaxBackends
* sizeof(PGPROC
*));
176 * Allow at most MaxBackends distinct constraints in a configuration. (Is
177 * this enough? In practice it seems it should be, but I don't quite see
178 * how to prove it. If we run out, we might fail to find a workable wait
179 * queue rearrangement even though one exists.) NOTE that this number
180 * limits the maximum recursion depth of DeadLockCheckRecurse. Making it
181 * really big might potentially allow a stack-overflow problem.
183 maxCurConstraints
= MaxBackends
;
184 curConstraints
= (EDGE
*) palloc(maxCurConstraints
* sizeof(EDGE
));
187 * Allow up to 3*MaxBackends constraints to be saved without having to
188 * re-run TestConfiguration. (This is probably more than enough, but we
189 * can survive if we run low on space by doing excess runs of
190 * TestConfiguration to re-compute constraint lists each time needed.) The
191 * last MaxBackends entries in possibleConstraints[] are reserved as
192 * output workspace for FindLockCycle.
194 maxPossibleConstraints
= MaxBackends
* 4;
195 possibleConstraints
=
196 (EDGE
*) palloc(maxPossibleConstraints
* sizeof(EDGE
));
198 MemoryContextSwitchTo(oldcxt
);
202 * DeadLockCheck -- Checks for deadlocks for a given process
204 * This code looks for deadlocks involving the given process. If any
205 * are found, it tries to rearrange lock wait queues to resolve the
206 * deadlock. If resolution is impossible, return DS_HARD_DEADLOCK ---
207 * the caller is then expected to abort the given proc's transaction.
209 * Caller must already have locked all partitions of the lock tables.
211 * On failure, deadlock details are recorded in deadlockDetails[] for
212 * subsequent printing by DeadLockReport(). That activity is separate
213 * because (a) we don't want to do it while holding all those LWLocks,
214 * and (b) we are typically invoked inside a signal handler.
217 DeadLockCheck(PGPROC
*proc
)
219 /* Initialize to "no constraints" */
221 nPossibleConstraints
= 0;
224 /* Initialize to not blocked by an autovacuum worker */
225 blocking_autovacuum_proc
= NULL
;
227 /* Search for deadlocks and possible fixes */
228 if (DeadLockCheckRecurse(proc
))
231 * Call FindLockCycle one more time, to record the correct
232 * deadlockDetails[] for the basic state with no rearrangements.
236 TRACE_POSTGRESQL_DEADLOCK_FOUND();
239 if (!FindLockCycle(proc
, possibleConstraints
, &nSoftEdges
))
240 elog(FATAL
, "deadlock seems to have disappeared");
242 return DS_HARD_DEADLOCK
; /* cannot find a non-deadlocked state */
245 /* Apply any needed rearrangements of wait queues */
246 for (int i
= 0; i
< nWaitOrders
; i
++)
248 LOCK
*lock
= waitOrders
[i
].lock
;
249 PGPROC
**procs
= waitOrders
[i
].procs
;
250 int nProcs
= waitOrders
[i
].nProcs
;
251 dclist_head
*waitQueue
= &lock
->waitProcs
;
253 Assert(nProcs
== dclist_count(waitQueue
));
255 #ifdef DEBUG_DEADLOCK
256 PrintLockQueue(lock
, "DeadLockCheck:");
259 /* Reset the queue and re-add procs in the desired order */
260 dclist_init(waitQueue
);
261 for (int j
= 0; j
< nProcs
; j
++)
262 dclist_push_tail(waitQueue
, &procs
[j
]->links
);
264 #ifdef DEBUG_DEADLOCK
265 PrintLockQueue(lock
, "rearranged to:");
268 /* See if any waiters for the lock can be woken up now */
269 ProcLockWakeup(GetLocksMethodTable(lock
), lock
);
272 /* Return code tells caller if we had to escape a deadlock or not */
274 return DS_SOFT_DEADLOCK
;
275 else if (blocking_autovacuum_proc
!= NULL
)
276 return DS_BLOCKED_BY_AUTOVACUUM
;
278 return DS_NO_DEADLOCK
;
282 * Return the PGPROC of the autovacuum that's blocking a process.
284 * We reset the saved pointer as soon as we pass it back.
287 GetBlockingAutoVacuumPgproc(void)
291 ptr
= blocking_autovacuum_proc
;
292 blocking_autovacuum_proc
= NULL
;
298 * DeadLockCheckRecurse -- recursively search for valid orderings
300 * curConstraints[] holds the current set of constraints being considered
301 * by an outer level of recursion. Add to this each possible solution
302 * constraint for any cycle detected at this level.
304 * Returns true if no solution exists. Returns false if a deadlock-free
305 * state is attainable, in which case waitOrders[] shows the required
306 * rearrangements of lock wait queues (if any).
309 DeadLockCheckRecurse(PGPROC
*proc
)
312 int oldPossibleConstraints
;
316 nEdges
= TestConfiguration(proc
);
318 return true; /* hard deadlock --- no solution */
320 return false; /* good configuration found */
321 if (nCurConstraints
>= maxCurConstraints
)
322 return true; /* out of room for active constraints? */
323 oldPossibleConstraints
= nPossibleConstraints
;
324 if (nPossibleConstraints
+ nEdges
+ MaxBackends
<= maxPossibleConstraints
)
326 /* We can save the edge list in possibleConstraints[] */
327 nPossibleConstraints
+= nEdges
;
332 /* Not room; will need to regenerate the edges on-the-fly */
337 * Try each available soft edge as an addition to the configuration.
339 for (i
= 0; i
< nEdges
; i
++)
341 if (!savedList
&& i
> 0)
343 /* Regenerate the list of possible added constraints */
344 if (nEdges
!= TestConfiguration(proc
))
345 elog(FATAL
, "inconsistent results during deadlock check");
347 curConstraints
[nCurConstraints
] =
348 possibleConstraints
[oldPossibleConstraints
+ i
];
350 if (!DeadLockCheckRecurse(proc
))
351 return false; /* found a valid solution! */
352 /* give up on that added constraint, try again */
355 nPossibleConstraints
= oldPossibleConstraints
;
356 return true; /* no solution found */
360 /*--------------------
361 * Test a configuration (current set of constraints) for validity.
364 * 0: the configuration is good (no deadlocks)
365 * -1: the configuration has a hard deadlock or is not self-consistent
366 * >0: the configuration has one or more soft deadlocks
368 * In the soft-deadlock case, one of the soft cycles is chosen arbitrarily
369 * and a list of its soft edges is returned beginning at
370 * possibleConstraints+nPossibleConstraints. The return value is the
371 * number of soft edges.
372 *--------------------
375 TestConfiguration(PGPROC
*startProc
)
378 EDGE
*softEdges
= possibleConstraints
+ nPossibleConstraints
;
383 * Make sure we have room for FindLockCycle's output.
385 if (nPossibleConstraints
+ MaxBackends
> maxPossibleConstraints
)
389 * Expand current constraint set into wait orderings. Fail if the
390 * constraint set is not self-consistent.
392 if (!ExpandConstraints(curConstraints
, nCurConstraints
))
396 * Check for cycles involving startProc or any of the procs mentioned in
397 * constraints. We check startProc last because if it has a soft cycle
398 * still to be dealt with, we want to deal with that first.
400 for (i
= 0; i
< nCurConstraints
; i
++)
402 if (FindLockCycle(curConstraints
[i
].waiter
, softEdges
, &nSoftEdges
))
405 return -1; /* hard deadlock detected */
406 softFound
= nSoftEdges
;
408 if (FindLockCycle(curConstraints
[i
].blocker
, softEdges
, &nSoftEdges
))
411 return -1; /* hard deadlock detected */
412 softFound
= nSoftEdges
;
415 if (FindLockCycle(startProc
, softEdges
, &nSoftEdges
))
418 return -1; /* hard deadlock detected */
419 softFound
= nSoftEdges
;
426 * FindLockCycle -- basic check for deadlock cycles
428 * Scan outward from the given proc to see if there is a cycle in the
429 * waits-for graph that includes this proc. Return true if a cycle
430 * is found, else false. If a cycle is found, we return a list of
431 * the "soft edges", if any, included in the cycle. These edges could
432 * potentially be eliminated by rearranging wait queues. We also fill
433 * deadlockDetails[] with information about the detected cycle; this info
434 * is not used by the deadlock algorithm itself, only to print a useful
435 * message after failing.
437 * Since we need to be able to check hypothetical configurations that would
438 * exist after wait queue rearrangement, the routine pays attention to the
439 * table of hypothetical queue orders in waitOrders[]. These orders will
440 * be believed in preference to the actual ordering seen in the locktable.
443 FindLockCycle(PGPROC
*checkProc
,
444 EDGE
*softEdges
, /* output argument */
445 int *nSoftEdges
) /* output argument */
448 nDeadlockDetails
= 0;
450 return FindLockCycleRecurse(checkProc
, 0, softEdges
, nSoftEdges
);
454 FindLockCycleRecurse(PGPROC
*checkProc
,
456 EDGE
*softEdges
, /* output argument */
457 int *nSoftEdges
) /* output argument */
463 * If this process is a lock group member, check the leader instead. (Note
464 * that we might be the leader, in which case this is a no-op.)
466 if (checkProc
->lockGroupLeader
!= NULL
)
467 checkProc
= checkProc
->lockGroupLeader
;
470 * Have we already seen this proc?
472 for (i
= 0; i
< nVisitedProcs
; i
++)
474 if (visitedProcs
[i
] == checkProc
)
476 /* If we return to starting point, we have a deadlock cycle */
480 * record total length of cycle --- outer levels will now fill
483 Assert(depth
<= MaxBackends
);
484 nDeadlockDetails
= depth
;
490 * Otherwise, we have a cycle but it does not include the start
491 * point, so say "no deadlock".
496 /* Mark proc as seen */
497 Assert(nVisitedProcs
< MaxBackends
);
498 visitedProcs
[nVisitedProcs
++] = checkProc
;
501 * If the process is waiting, there is an outgoing waits-for edge to each
502 * process that blocks it.
504 if (checkProc
->links
.next
!= NULL
&& checkProc
->waitLock
!= NULL
&&
505 FindLockCycleRecurseMember(checkProc
, checkProc
, depth
, softEdges
,
510 * If the process is not waiting, there could still be outgoing waits-for
511 * edges if it is part of a lock group, because other members of the lock
512 * group might be waiting even though this process is not. (Given lock
513 * groups {A1, A2} and {B1, B2}, if A1 waits for B1 and B2 waits for A2,
514 * that is a deadlock even neither of B1 and A2 are waiting for anything.)
516 dlist_foreach(iter
, &checkProc
->lockGroupMembers
)
520 memberProc
= dlist_container(PGPROC
, lockGroupLink
, iter
.cur
);
522 if (memberProc
->links
.next
!= NULL
&& memberProc
->waitLock
!= NULL
&&
523 memberProc
!= checkProc
&&
524 FindLockCycleRecurseMember(memberProc
, checkProc
, depth
, softEdges
,
533 FindLockCycleRecurseMember(PGPROC
*checkProc
,
534 PGPROC
*checkProcLeader
,
536 EDGE
*softEdges
, /* output argument */
537 int *nSoftEdges
) /* output argument */
540 LOCK
*lock
= checkProc
->waitLock
;
541 dlist_iter proclock_iter
;
542 LockMethod lockMethodTable
;
549 * The relation extension lock can never participate in actual deadlock
550 * cycle. See Assert in LockAcquireExtended. So, there is no advantage
551 * in checking wait edges from it.
553 if (LOCK_LOCKTAG(*lock
) == LOCKTAG_RELATION_EXTEND
)
556 lockMethodTable
= GetLocksMethodTable(lock
);
557 numLockModes
= lockMethodTable
->numLockModes
;
558 conflictMask
= lockMethodTable
->conflictTab
[checkProc
->waitLockMode
];
561 * Scan for procs that already hold conflicting locks. These are "hard"
562 * edges in the waits-for graph.
564 dlist_foreach(proclock_iter
, &lock
->procLocks
)
566 PROCLOCK
*proclock
= dlist_container(PROCLOCK
, lockLink
, proclock_iter
.cur
);
569 proc
= proclock
->tag
.myProc
;
570 leader
= proc
->lockGroupLeader
== NULL
? proc
: proc
->lockGroupLeader
;
572 /* A proc never blocks itself or any other lock group member */
573 if (leader
!= checkProcLeader
)
575 for (lm
= 1; lm
<= numLockModes
; lm
++)
577 if ((proclock
->holdMask
& LOCKBIT_ON(lm
)) &&
578 (conflictMask
& LOCKBIT_ON(lm
)))
580 /* This proc hard-blocks checkProc */
581 if (FindLockCycleRecurse(proc
, depth
+ 1,
582 softEdges
, nSoftEdges
))
584 /* fill deadlockDetails[] */
585 DEADLOCK_INFO
*info
= &deadlockDetails
[depth
];
587 info
->locktag
= lock
->tag
;
588 info
->lockmode
= checkProc
->waitLockMode
;
589 info
->pid
= checkProc
->pid
;
595 * No deadlock here, but see if this proc is an autovacuum
596 * that is directly hard-blocking our own proc. If so,
597 * report it so that the caller can send a cancel signal
598 * to it, if appropriate. If there's more than one such
599 * proc, it's indeterminate which one will be reported.
601 * We don't touch autovacuums that are indirectly blocking
602 * us; it's up to the direct blockee to take action. This
603 * rule simplifies understanding the behavior and ensures
604 * that an autovacuum won't be canceled with less than
605 * deadlock_timeout grace period.
607 * Note we read statusFlags without any locking. This is
608 * OK only for checking the PROC_IS_AUTOVACUUM flag,
609 * because that flag is set at process start and never
610 * reset. There is logic elsewhere to avoid canceling an
611 * autovacuum that is working to prevent XID wraparound
612 * problems (which needs to read a different statusFlags
613 * bit), but we don't do that here to avoid grabbing
616 if (checkProc
== MyProc
&&
617 proc
->statusFlags
& PROC_IS_AUTOVACUUM
)
618 blocking_autovacuum_proc
= proc
;
620 /* We're done looking at this proclock */
628 * Scan for procs that are ahead of this one in the lock's wait queue.
629 * Those that have conflicting requests soft-block this one. This must be
630 * done after the hard-block search, since if another proc both hard- and
631 * soft-blocks this one, we want to call it a hard edge.
633 * If there is a proposed re-ordering of the lock's wait order, use that
634 * rather than the current wait order.
636 for (i
= 0; i
< nWaitOrders
; i
++)
638 if (waitOrders
[i
].lock
== lock
)
644 /* Use the given hypothetical wait queue order */
645 PGPROC
**procs
= waitOrders
[i
].procs
;
646 int queue_size
= waitOrders
[i
].nProcs
;
648 for (i
= 0; i
< queue_size
; i
++)
653 leader
= proc
->lockGroupLeader
== NULL
? proc
:
654 proc
->lockGroupLeader
;
657 * TopoSort will always return an ordering with group members
658 * adjacent to each other in the wait queue (see comments
659 * therein). So, as soon as we reach a process in the same lock
660 * group as checkProc, we know we've found all the conflicts that
661 * precede any member of the lock group lead by checkProcLeader.
663 if (leader
== checkProcLeader
)
666 /* Is there a conflict with this guy's request? */
667 if ((LOCKBIT_ON(proc
->waitLockMode
) & conflictMask
) != 0)
669 /* This proc soft-blocks checkProc */
670 if (FindLockCycleRecurse(proc
, depth
+ 1,
671 softEdges
, nSoftEdges
))
673 /* fill deadlockDetails[] */
674 DEADLOCK_INFO
*info
= &deadlockDetails
[depth
];
676 info
->locktag
= lock
->tag
;
677 info
->lockmode
= checkProc
->waitLockMode
;
678 info
->pid
= checkProc
->pid
;
681 * Add this edge to the list of soft edges in the cycle
683 Assert(*nSoftEdges
< MaxBackends
);
684 softEdges
[*nSoftEdges
].waiter
= checkProcLeader
;
685 softEdges
[*nSoftEdges
].blocker
= leader
;
686 softEdges
[*nSoftEdges
].lock
= lock
;
695 PGPROC
*lastGroupMember
= NULL
;
696 dlist_iter proc_iter
;
697 dclist_head
*waitQueue
;
699 /* Use the true lock wait queue order */
700 waitQueue
= &lock
->waitProcs
;
703 * Find the last member of the lock group that is present in the wait
704 * queue. Anything after this is not a soft lock conflict. If group
705 * locking is not in use, then we know immediately which process we're
706 * looking for, but otherwise we've got to search the wait queue to
707 * find the last process actually present.
709 if (checkProc
->lockGroupLeader
== NULL
)
710 lastGroupMember
= checkProc
;
713 dclist_foreach(proc_iter
, waitQueue
)
715 proc
= dlist_container(PGPROC
, links
, proc_iter
.cur
);
717 if (proc
->lockGroupLeader
== checkProcLeader
)
718 lastGroupMember
= proc
;
720 Assert(lastGroupMember
!= NULL
);
724 * OK, now rescan (or scan) the queue to identify the soft conflicts.
726 dclist_foreach(proc_iter
, waitQueue
)
730 proc
= dlist_container(PGPROC
, links
, proc_iter
.cur
);
732 leader
= proc
->lockGroupLeader
== NULL
? proc
:
733 proc
->lockGroupLeader
;
735 /* Done when we reach the target proc */
736 if (proc
== lastGroupMember
)
739 /* Is there a conflict with this guy's request? */
740 if ((LOCKBIT_ON(proc
->waitLockMode
) & conflictMask
) != 0 &&
741 leader
!= checkProcLeader
)
743 /* This proc soft-blocks checkProc */
744 if (FindLockCycleRecurse(proc
, depth
+ 1,
745 softEdges
, nSoftEdges
))
747 /* fill deadlockDetails[] */
748 DEADLOCK_INFO
*info
= &deadlockDetails
[depth
];
750 info
->locktag
= lock
->tag
;
751 info
->lockmode
= checkProc
->waitLockMode
;
752 info
->pid
= checkProc
->pid
;
755 * Add this edge to the list of soft edges in the cycle
757 Assert(*nSoftEdges
< MaxBackends
);
758 softEdges
[*nSoftEdges
].waiter
= checkProcLeader
;
759 softEdges
[*nSoftEdges
].blocker
= leader
;
760 softEdges
[*nSoftEdges
].lock
= lock
;
769 * No conflict detected here.
776 * ExpandConstraints -- expand a list of constraints into a set of
777 * specific new orderings for affected wait queues
779 * Input is a list of soft edges to be reversed. The output is a list
780 * of nWaitOrders WAIT_ORDER structs in waitOrders[], with PGPROC array
781 * workspace in waitOrderProcs[].
783 * Returns true if able to build an ordering that satisfies all the
784 * constraints, false if not (there are contradictory constraints).
787 ExpandConstraints(EDGE
*constraints
,
790 int nWaitOrderProcs
= 0;
797 * Scan constraint list backwards. This is because the last-added
798 * constraint is the only one that could fail, and so we want to test it
799 * for inconsistency first.
801 for (i
= nConstraints
; --i
>= 0;)
803 LOCK
*lock
= constraints
[i
].lock
;
805 /* Did we already make a list for this lock? */
806 for (j
= nWaitOrders
; --j
>= 0;)
808 if (waitOrders
[j
].lock
== lock
)
813 /* No, so allocate a new list */
814 waitOrders
[nWaitOrders
].lock
= lock
;
815 waitOrders
[nWaitOrders
].procs
= waitOrderProcs
+ nWaitOrderProcs
;
816 waitOrders
[nWaitOrders
].nProcs
= dclist_count(&lock
->waitProcs
);
817 nWaitOrderProcs
+= dclist_count(&lock
->waitProcs
);
818 Assert(nWaitOrderProcs
<= MaxBackends
);
821 * Do the topo sort. TopoSort need not examine constraints after this
822 * one, since they must be for different locks.
824 if (!TopoSort(lock
, constraints
, i
+ 1,
825 waitOrders
[nWaitOrders
].procs
))
834 * TopoSort -- topological sort of a wait queue
836 * Generate a re-ordering of a lock's wait queue that satisfies given
837 * constraints about certain procs preceding others. (Each such constraint
838 * is a fact of a partial ordering.) Minimize rearrangement of the queue
839 * not needed to achieve the partial ordering.
841 * This is a lot simpler and slower than, for example, the topological sort
842 * algorithm shown in Knuth's Volume 1. However, Knuth's method doesn't
843 * try to minimize the damage to the existing order. In practice we are
844 * not likely to be working with more than a few constraints, so the apparent
845 * slowness of the algorithm won't really matter.
847 * The initial queue ordering is taken directly from the lock's wait queue.
848 * The output is an array of PGPROC pointers, of length equal to the lock's
849 * wait queue length (the caller is responsible for providing this space).
850 * The partial order is specified by an array of EDGE structs. Each EDGE
851 * is one that we need to reverse, therefore the "waiter" must appear before
852 * the "blocker" in the output array. The EDGE array may well contain
853 * edges associated with other locks; these should be ignored.
855 * Returns true if able to build an ordering that satisfies all the
856 * constraints, false if not (there are contradictory constraints).
862 PGPROC
**ordering
) /* output argument */
864 dclist_head
*waitQueue
= &lock
->waitProcs
;
865 int queue_size
= dclist_count(waitQueue
);
873 dlist_iter proc_iter
;
875 /* First, fill topoProcs[] array with the procs in their current order */
877 dclist_foreach(proc_iter
, waitQueue
)
879 proc
= dlist_container(PGPROC
, links
, proc_iter
.cur
);
880 topoProcs
[i
++] = proc
;
882 Assert(i
== queue_size
);
885 * Scan the constraints, and for each proc in the array, generate a count
886 * of the number of constraints that say it must be before something else,
887 * plus a list of the constraints that say it must be after something
888 * else. The count for the j'th proc is stored in beforeConstraints[j],
889 * and the head of its list in afterConstraints[j]. Each constraint
890 * stores its list link in constraints[i].link (note any constraint will
891 * be in just one list). The array index for the before-proc of the i'th
892 * constraint is remembered in constraints[i].pred.
894 * Note that it's not necessarily the case that every constraint affects
895 * this particular wait queue. Prior to group locking, a process could be
896 * waiting for at most one lock. But a lock group can be waiting for
897 * zero, one, or multiple locks. Since topoProcs[] is an array of the
898 * processes actually waiting, while constraints[] is an array of group
899 * leaders, we've got to scan through topoProcs[] for each constraint,
900 * checking whether both a waiter and a blocker for that group are
901 * present. If so, the constraint is relevant to this wait queue; if not,
904 MemSet(beforeConstraints
, 0, queue_size
* sizeof(int));
905 MemSet(afterConstraints
, 0, queue_size
* sizeof(int));
906 for (i
= 0; i
< nConstraints
; i
++)
909 * Find a representative process that is on the lock queue and part of
910 * the waiting lock group. This may or may not be the leader, which
911 * may or may not be waiting at all. If there are any other processes
912 * in the same lock group on the queue, set their number of
913 * beforeConstraints to -1 to indicate that they should be emitted
914 * with their groupmates rather than considered separately.
916 * In this loop and the similar one just below, it's critical that we
917 * consistently select the same representative member of any one lock
918 * group, so that all the constraints are associated with the same
919 * proc, and the -1's are only associated with not-representative
920 * members. We select the last one in the topoProcs array.
922 proc
= constraints
[i
].waiter
;
923 Assert(proc
!= NULL
);
925 for (j
= queue_size
; --j
>= 0;)
927 PGPROC
*waiter
= topoProcs
[j
];
929 if (waiter
== proc
|| waiter
->lockGroupLeader
== proc
)
931 Assert(waiter
->waitLock
== lock
);
936 Assert(beforeConstraints
[j
] <= 0);
937 beforeConstraints
[j
] = -1;
942 /* If no matching waiter, constraint is not relevant to this lock. */
947 * Similarly, find a representative process that is on the lock queue
948 * and waiting for the blocking lock group. Again, this could be the
949 * leader but does not need to be.
951 proc
= constraints
[i
].blocker
;
952 Assert(proc
!= NULL
);
954 for (k
= queue_size
; --k
>= 0;)
956 PGPROC
*blocker
= topoProcs
[k
];
958 if (blocker
== proc
|| blocker
->lockGroupLeader
== proc
)
960 Assert(blocker
->waitLock
== lock
);
965 Assert(beforeConstraints
[k
] <= 0);
966 beforeConstraints
[k
] = -1;
971 /* If no matching blocker, constraint is not relevant to this lock. */
975 Assert(beforeConstraints
[jj
] >= 0);
976 beforeConstraints
[jj
]++; /* waiter must come before */
977 /* add this constraint to list of after-constraints for blocker */
978 constraints
[i
].pred
= jj
;
979 constraints
[i
].link
= afterConstraints
[kk
];
980 afterConstraints
[kk
] = i
+ 1;
983 /*--------------------
984 * Now scan the topoProcs array backwards. At each step, output the
985 * last proc that has no remaining before-constraints plus any other
986 * members of the same lock group; then decrease the beforeConstraints
987 * count of each of the procs it was constrained against.
988 * i = index of ordering[] entry we want to output this time
989 * j = search index for topoProcs[]
990 * k = temp for scanning constraint list for proc j
991 * last = last non-null index in topoProcs (avoid redundant searches)
992 *--------------------
994 last
= queue_size
- 1;
995 for (i
= queue_size
- 1; i
>= 0;)
1000 /* Find next candidate to output */
1001 while (topoProcs
[last
] == NULL
)
1003 for (j
= last
; j
>= 0; j
--)
1005 if (topoProcs
[j
] != NULL
&& beforeConstraints
[j
] == 0)
1009 /* If no available candidate, topological sort fails */
1014 * Output everything in the lock group. There's no point in
1015 * outputting an ordering where members of the same lock group are not
1016 * consecutive on the wait queue: if some other waiter is between two
1017 * requests that belong to the same group, then either it conflicts
1018 * with both of them and is certainly not a solution; or it conflicts
1019 * with at most one of them and is thus isomorphic to an ordering
1020 * where the group members are consecutive.
1022 proc
= topoProcs
[j
];
1023 if (proc
->lockGroupLeader
!= NULL
)
1024 proc
= proc
->lockGroupLeader
;
1025 Assert(proc
!= NULL
);
1026 for (c
= 0; c
<= last
; ++c
)
1028 if (topoProcs
[c
] == proc
|| (topoProcs
[c
] != NULL
&&
1029 topoProcs
[c
]->lockGroupLeader
== proc
))
1031 ordering
[i
- nmatches
] = topoProcs
[c
];
1032 topoProcs
[c
] = NULL
;
1036 Assert(nmatches
> 0);
1039 /* Update beforeConstraints counts of its predecessors */
1040 for (k
= afterConstraints
[j
]; k
> 0; k
= constraints
[k
- 1].link
)
1041 beforeConstraints
[constraints
[k
- 1].pred
]--;
1048 #ifdef DEBUG_DEADLOCK
1050 PrintLockQueue(LOCK
*lock
, const char *info
)
1052 dclist_head
*waitQueue
= &lock
->waitProcs
;
1053 dlist_iter proc_iter
;
1055 printf("%s lock %p queue ", info
, lock
);
1057 dclist_foreach(proc_iter
, waitQueue
)
1059 PGPROC
*proc
= dlist_container(PGPROC
, links
, proc_iter
.cur
);
1061 printf(" %d", proc
->pid
);
1069 * Report a detected deadlock, with available details.
1072 DeadLockReport(void)
1074 StringInfoData clientbuf
; /* errdetail for client */
1075 StringInfoData logbuf
; /* errdetail for server log */
1076 StringInfoData locktagbuf
;
1079 initStringInfo(&clientbuf
);
1080 initStringInfo(&logbuf
);
1081 initStringInfo(&locktagbuf
);
1083 /* Generate the "waits for" lines sent to the client */
1084 for (i
= 0; i
< nDeadlockDetails
; i
++)
1086 DEADLOCK_INFO
*info
= &deadlockDetails
[i
];
1089 /* The last proc waits for the first one... */
1090 if (i
< nDeadlockDetails
- 1)
1091 nextpid
= info
[1].pid
;
1093 nextpid
= deadlockDetails
[0].pid
;
1095 /* reset locktagbuf to hold next object description */
1096 resetStringInfo(&locktagbuf
);
1098 DescribeLockTag(&locktagbuf
, &info
->locktag
);
1101 appendStringInfoChar(&clientbuf
, '\n');
1103 appendStringInfo(&clientbuf
,
1104 _("Process %d waits for %s on %s; blocked by process %d."),
1106 GetLockmodeName(info
->locktag
.locktag_lockmethodid
,
1112 /* Duplicate all the above for the server ... */
1113 appendBinaryStringInfo(&logbuf
, clientbuf
.data
, clientbuf
.len
);
1115 /* ... and add info about query strings */
1116 for (i
= 0; i
< nDeadlockDetails
; i
++)
1118 DEADLOCK_INFO
*info
= &deadlockDetails
[i
];
1120 appendStringInfoChar(&logbuf
, '\n');
1122 appendStringInfo(&logbuf
,
1123 _("Process %d: %s"),
1125 pgstat_get_backend_current_activity(info
->pid
, false));
1128 pgstat_report_deadlock();
1131 (errcode(ERRCODE_T_R_DEADLOCK_DETECTED
),
1132 errmsg("deadlock detected"),
1133 errdetail_internal("%s", clientbuf
.data
),
1134 errdetail_log("%s", logbuf
.data
),
1135 errhint("See server log for query details.")));
1139 * RememberSimpleDeadLock: set up info for DeadLockReport when ProcSleep
1140 * detects a trivial (two-way) deadlock. proc1 wants to block for lockmode
1141 * on lock, but proc2 is already waiting and would be blocked by proc1.
1144 RememberSimpleDeadLock(PGPROC
*proc1
,
1149 DEADLOCK_INFO
*info
= &deadlockDetails
[0];
1151 info
->locktag
= lock
->tag
;
1152 info
->lockmode
= lockmode
;
1153 info
->pid
= proc1
->pid
;
1155 info
->locktag
= proc2
->waitLock
->tag
;
1156 info
->lockmode
= proc2
->waitLockMode
;
1157 info
->pid
= proc2
->pid
;
1158 nDeadlockDetails
= 2;