3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
20 namespace Wikimedia\Rdbms
;
22 use Psr\Log\LoggerAwareInterface
;
23 use Psr\Log\LoggerInterface
;
24 use Psr\Log\NullLogger
;
26 use Wikimedia\ScopedCallback
;
27 use Wikimedia\Stats\StatsFactory
;
30 * Detect high-contention DB queries via profiling calls.
32 * This class is meant to work with an IDatabase object, which manages queries.
34 * @internal For use by Database only
39 class TransactionProfiler
implements LoggerAwareInterface
{
40 /** @var LoggerInterface */
42 /** @var StatsFactory */
43 private $statsFactory;
44 /** @var array<string,array> Map of (event name => map of FLD_* class constants) */
46 /** @var array<string,int> Map of (event name => current hits) */
48 /** @var array<string,int> Map of (event name => violation counter) */
50 /** @var array<string,int> Map of (event name => silence counter) */
54 * @var array<string,array> Map of (trx ID => (write start time, list of DBs involved))
55 * @phan-var array<string,array{start:float,conns:array<string,int>}>
57 private $dbTrxHoldingLocks;
60 * @var array[][] Map of (trx ID => list of (query name, start time, end time))
61 * @phan-var array<string,array<int,array{0:string|GeneralizedSQL,1:float,2:float}>>
63 private $dbTrxMethodTimes;
65 /** @var string|null HTTP request method; null for CLI mode */
68 /** @var float|null */
69 private $wallClockOverride;
71 /** Treat locks as long-running if they last longer than this many seconds */
72 private const DB_LOCK_THRESHOLD_SEC
= 3.0;
73 /** Include events in any violation logs if they last longer than this many seconds */
74 private const EVENT_THRESHOLD_SEC
= 0.25;
76 /** List of event names */
77 private const EVENT_NAMES
= [
88 /** List of event names with hit counters */
89 private const COUNTER_EVENT_NAMES
= [
96 /** Key to max expected value */
97 private const FLD_LIMIT
= 0;
98 /** Key to the function that set the max expected value */
99 private const FLD_FNAME
= 1;
101 /** Any type of expectation */
102 public const EXPECTATION_ANY
= 'any';
103 /** Any expectations about replica usage never occurring */
104 public const EXPECTATION_REPLICAS_ONLY
= 'replicas-only';
106 public function __construct() {
107 $this->initPlaceholderExpectations();
109 $this->dbTrxHoldingLocks
= [];
110 $this->dbTrxMethodTimes
= [];
112 $this->silenced
= array_fill_keys( self
::EVENT_NAMES
, 0 );
114 $this->setLogger( new NullLogger() );
115 $this->statsFactory
= StatsFactory
::newNull();
118 public function setLogger( LoggerInterface
$logger ) {
119 $this->logger
= $logger;
125 * @param StatsFactory $statsFactory
128 public function setStatsFactory( StatsFactory
$statsFactory ) {
129 $this->statsFactory
= $statsFactory;
133 * @param ?string $method HTTP method; null for CLI mode
136 public function setRequestMethod( ?
string $method ) {
137 $this->method
= $method;
141 * Temporarily ignore expectations until the returned object goes out of scope
143 * During this time, violation of expectations will not be logged and counters
144 * for expectations (e.g. "conns") will not be incremented.
146 * This will suppress warnings about event counters which have a limit of zero.
147 * The main use case is too avoid warnings about primary connections/writes and
148 * warnings about getting any primary/replica connections at all.
150 * @param string $type Class EXPECTATION_* constant [default: TransactionProfiler::EXPECTATION_ANY]
151 * @return ScopedCallback
153 public function silenceForScope( string $type = self
::EXPECTATION_ANY
) {
154 if ( $type === self
::EXPECTATION_REPLICAS_ONLY
) {
156 foreach ( [ 'writes', 'masterConns' ] as $event ) {
157 if ( $this->expect
[$event][self
::FLD_LIMIT
] === 0 ) {
162 $events = self
::EVENT_NAMES
;
165 foreach ( $events as $event ) {
166 ++
$this->silenced
[$event];
169 return new ScopedCallback( function () use ( $events ) {
170 foreach ( $events as $event ) {
171 --$this->silenced
[$event];
177 * Set performance expectations
179 * With conflicting expectations, the most narrow ones will be used
181 * @param string $event Event name, {@see self::EVENT_NAMES}
182 * @param float|int $limit Maximum event count, event value, or total event value
183 * @param string $fname Caller
186 public function setExpectation( string $event, $limit, string $fname ) {
187 if ( !isset( $this->expect
[$event] ) ) {
188 return; // obsolete/bogus expectation
191 if ( $limit <= $this->expect
[$event][self
::FLD_LIMIT
] ) {
192 // New limit is more restrictive
193 $this->expect
[$event] = [
194 self
::FLD_LIMIT
=> $limit,
195 self
::FLD_FNAME
=> $fname
201 * Set one or multiple performance expectations
203 * With conflicting expectations, the most narrow ones will be used
205 * Use this to initialize expectations or make them stricter mid-request
207 * @param array $expects Map of (event name => limit), {@see self::EVENT_NAMES}
208 * @param string $fname
211 public function setExpectations( array $expects, string $fname ) {
212 foreach ( $expects as $event => $value ) {
213 $this->setExpectation( $event, $value, $fname );
218 * Reset all performance expectations and hit counters
220 * Use this for unit testing or before applying a totally different set of expectations
221 * for a different part of the request, such as during "post-send" (execution after HTTP
222 * response completion)
226 public function resetExpectations() {
227 $this->initPlaceholderExpectations();
231 * Clear all expectations and hit counters and set new performance expectations
233 * Use this to apply a totally different set of expectations for a different part
234 * of the request, such as during "post-send" (execution after HTTP response completion)
236 * @param array $expects Map of (event name => limit), {@see self::EVENT_NAMES}
237 * @param string $fname
240 public function redefineExpectations( array $expects, string $fname ) {
241 $this->initPlaceholderExpectations();
242 $this->setExpectations( $expects, $fname );
246 * Mark a DB as having been connected to with a new handle
248 * Note that there can be multiple connections to a single DB.
250 * @param string $server DB server
251 * @param string|null $db DB name
252 * @param bool $isPrimaryWithReplicas If the server is the primary and there are replicas
254 public function recordConnection( $server, $db, bool $isPrimaryWithReplicas ) {
255 // Report when too many connections happen...
256 if ( $this->pingAndCheckThreshold( 'conns' ) ) {
257 $this->reportExpectationViolated(
259 "[connect to $server ($db)]",
264 // Report when too many primary connections happen...
265 if ( $isPrimaryWithReplicas && $this->pingAndCheckThreshold( 'masterConns' ) ) {
266 $this->reportExpectationViolated(
268 "[connect to $server ($db)]",
269 $this->hits
['masterConns']
275 * Mark a DB as in a transaction with one or more writes pending
277 * Note that there can be multiple connections to a single DB.
279 * @param string $server DB server
280 * @param string|null $db DB name
281 * @param string $id ID string of transaction
282 * @param float $startTime UNIX timestamp
284 public function transactionWritingIn( $server, $db, string $id, float $startTime ) {
285 $name = "{$db} {$server} TRX#$id";
286 if ( isset( $this->dbTrxHoldingLocks
[$name] ) ) {
287 $this->logger
->warning( "Nested transaction for '$name' - out of sync." );
289 $this->dbTrxHoldingLocks
[$name] = [
290 'start' => $startTime,
291 'conns' => [], // all connections involved
293 $this->dbTrxMethodTimes
[$name] = [];
295 foreach ( $this->dbTrxHoldingLocks
as $name => &$info ) {
296 // Track all DBs in transactions for this transaction
297 $info['conns'][$name] = 1;
302 * Register the name and time of a method for slow DB trx detection
304 * This assumes that all queries are synchronous (non-overlapping)
306 * @param string|GeneralizedSql $query Function name or generalized SQL
307 * @param float $sTime Starting UNIX wall time
308 * @param bool $isWrite Whether this is a write query
309 * @param int|null $rowCount Number of affected/read rows
310 * @param string $trxId Transaction id
311 * @param string|null $serverName db host name like db1234
313 public function recordQueryCompletion(
319 ?
string $serverName = null
321 $eTime = $this->getCurrentTime();
322 $elapsed = ( $eTime - $sTime );
324 if ( $isWrite && $this->isAboveThreshold( $rowCount, 'maxAffected' ) ) {
325 $this->reportExpectationViolated( 'maxAffected', $query, $rowCount, $trxId, $serverName );
326 } elseif ( !$isWrite && $this->isAboveThreshold( $rowCount, 'readQueryRows' ) ) {
327 $this->reportExpectationViolated( 'readQueryRows', $query, $rowCount, $trxId, $serverName );
330 // Report when too many writes/queries happen...
331 if ( $this->pingAndCheckThreshold( 'queries' ) ) {
332 $this->reportExpectationViolated( 'queries', $query, $this->hits
['queries'], $trxId, $serverName );
334 if ( $isWrite && $this->pingAndCheckThreshold( 'writes' ) ) {
335 $this->reportExpectationViolated( 'writes', $query, $this->hits
['writes'], $trxId, $serverName );
337 // Report slow queries...
338 if ( !$isWrite && $this->isAboveThreshold( $elapsed, 'readQueryTime' ) ) {
339 $this->reportExpectationViolated( 'readQueryTime', $query, $elapsed, $trxId, $serverName );
341 if ( $isWrite && $this->isAboveThreshold( $elapsed, 'writeQueryTime' ) ) {
342 $this->reportExpectationViolated( 'writeQueryTime', $query, $elapsed, $trxId, $serverName );
345 if ( !$this->dbTrxHoldingLocks
) {
348 } elseif ( !$isWrite && $elapsed < self
::EVENT_THRESHOLD_SEC
) {
349 // Not an important query nor slow enough
353 foreach ( $this->dbTrxHoldingLocks
as $name => $info ) {
354 $lastQuery = end( $this->dbTrxMethodTimes
[$name] );
356 // Additional query in the trx...
357 $lastEnd = $lastQuery[2];
358 if ( $sTime >= $lastEnd ) {
359 if ( ( $sTime - $lastEnd ) > self
::EVENT_THRESHOLD_SEC
) {
360 // Add an entry representing the time spent doing non-queries
361 $this->dbTrxMethodTimes
[$name][] = [ '...delay...', $lastEnd, $sTime ];
363 $this->dbTrxMethodTimes
[$name][] = [ $query, $sTime, $eTime ];
366 // First query in the trx...
367 if ( $sTime >= $info['start'] ) {
368 $this->dbTrxMethodTimes
[$name][] = [ $query, $sTime, $eTime ];
375 * Mark a DB as no longer in a transaction
377 * This will check if locks are possibly held for longer than
378 * needed and log any affected transactions to a special DB log.
379 * Note that there can be multiple connections to a single DB.
381 * @param string $server DB server
382 * @param string|null $db DB name
383 * @param string $id ID string of transaction
384 * @param float $writeTime Time spent in write queries
385 * @param int $affected Number of rows affected by writes
387 public function transactionWritingOut(
394 // Must match $name in transactionWritingIn()
395 $name = "{$db} {$server} TRX#$id";
396 if ( !isset( $this->dbTrxMethodTimes
[$name] ) ) {
397 $this->logger
->warning( "Detected no transaction for '$name' - out of sync." );
403 // Warn if too much time was spend writing...
404 if ( $this->isAboveThreshold( $writeTime, 'writeQueryTime' ) ) {
405 $this->reportExpectationViolated(
407 "[transaction writes to {$db} at {$server}]",
413 // Warn if too many rows were changed...
414 if ( $this->isAboveThreshold( $affected, 'maxAffected' ) ) {
415 $this->reportExpectationViolated(
417 "[transaction writes to {$db} at {$server}]",
422 // Fill in the last non-query period...
423 $lastQuery = end( $this->dbTrxMethodTimes
[$name] );
425 $now = $this->getCurrentTime();
426 $lastEnd = $lastQuery[2];
427 if ( ( $now - $lastEnd ) > self
::EVENT_THRESHOLD_SEC
) {
428 $this->dbTrxMethodTimes
[$name][] = [ '...delay...', $lastEnd, $now ];
431 // Check for any slow queries or non-query periods...
432 foreach ( $this->dbTrxMethodTimes
[$name] as $info ) {
433 $elapsed = ( $info[2] - $info[1] );
434 if ( $elapsed >= self
::DB_LOCK_THRESHOLD_SEC
) {
441 foreach ( $this->dbTrxMethodTimes
[$name] as $i => [ $query, $sTime, $end ] ) {
443 "%-2d %.3fs %s\n", $i, ( $end - $sTime ), $this->getGeneralizedSql( $query ) );
445 $this->logger
->warning( "Suboptimal transaction [{dbs}]:\n{trace}", [
446 'dbs' => implode( ', ', array_keys( $this->dbTrxHoldingLocks
[$name]['conns'] ) ),
447 'trace' => mb_substr( $trace, 0, 2000 )
450 unset( $this->dbTrxHoldingLocks
[$name] );
451 unset( $this->dbTrxMethodTimes
[$name] );
454 private function initPlaceholderExpectations() {
455 $this->expect
= array_fill_keys(
457 [ self
::FLD_LIMIT
=> INF
, self
::FLD_FNAME
=> null ]
460 $this->hits
= array_fill_keys( self
::COUNTER_EVENT_NAMES
, 0 );
461 $this->violations
= array_fill_keys( self
::EVENT_NAMES
, 0 );
465 * @param float|int $value
466 * @param string $event
469 private function isAboveThreshold( $value, string $event ) {
470 if ( $this->silenced
[$event] > 0 ) {
474 return ( $value > $this->expect
[$event][self
::FLD_LIMIT
] );
478 * @param string $event
481 private function pingAndCheckThreshold( string $event ) {
482 if ( $this->silenced
[$event] > 0 ) {
486 $newValue = ++
$this->hits
[$event];
487 $limit = $this->expect
[$event][self
::FLD_LIMIT
];
489 return ( $newValue > $limit );
493 * @param string $event
494 * @param string|GeneralizedSql $query
495 * @param float|int $actual
496 * @param string|null $trxId Transaction id
497 * @param string|null $serverName db host name like db1234
499 private function reportExpectationViolated(
503 ?
string $trxId = null,
504 ?
string $serverName = null
506 $violations = ++
$this->violations
[$event];
507 // First violation; check if this is a web request
508 if ( $violations === 1 && $this->method
!== null ) {
509 $this->statsFactory
->getCounter( 'rdbms_trxprofiler_warnings_total' )
510 ->setLabel( 'event', $event )
511 ->setLabel( 'method', $this->method
)
512 ->copyToStatsdAt( "rdbms_trxprofiler_warnings.$event.{$this->method}" )
516 $max = $this->expect
[$event][self
::FLD_LIMIT
];
517 $by = $this->expect
[$event][self
::FLD_FNAME
];
519 $message = "Expectation ($event <= $max) by $by not met (actual: {actualSeconds})";
521 $message .= ' in trx #{trxId}';
523 $message .= ":\n{query}\n";
525 $this->logger
->warning(
528 'db_log_category' => 'performance',
530 'maxSeconds' => $max,
532 'actualSeconds' => $actual,
533 'query' => $this->getGeneralizedSql( $query ),
534 'exception' => new RuntimeException(),
536 // Avoid truncated JSON in Logstash (T349140)
537 'fullQuery' => mb_substr( $this->getRawSql( $query ), 0, 2000 ),
538 'dbHost' => $serverName
544 * @param GeneralizedSql|string $query
547 private function getGeneralizedSql( $query ) {
548 return $query instanceof GeneralizedSql ?
$query->stringify() : $query;
552 * @param GeneralizedSql|string $query
555 private function getRawSql( $query ) {
556 return $query instanceof GeneralizedSql ?
$query->getRawSql() : $query;
560 * @return float UNIX timestamp
561 * @codeCoverageIgnore
563 private function getCurrentTime() {
564 return $this->wallClockOverride ?
: microtime( true );
568 * @param float|null &$time Mock UNIX timestamp for testing
569 * @codeCoverageIgnore
571 public function setMockTime( &$time ) {
572 $this->wallClockOverride
=& $time;