3 * Test revision text compression and decompression.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup Maintenance ExternalStorage
24 use MediaWiki\Revision\RevisionRecord
;
25 use MediaWiki\Revision\SlotRecord
;
26 use MediaWiki\Title\Title
;
28 // @codeCoverageIgnoreStart
29 require_once __DIR__
. '/../Maintenance.php';
30 // @codeCoverageIgnoreEnd
32 class TestCompression
extends Maintenance
{
33 public function __construct() {
34 parent
::__construct();
36 $this->addArg( 'title', 'The page to test' );
38 $this->addOption( 'type', 'The HistoryBlob subclass to use', false, true );
39 $this->addOption( 'start', 'The start date', false, true );
40 $this->addOption( 'limit', 'Maximum number of revisions to process', false, true );
43 public function execute() {
44 $lang = $this->getServiceContainer()->getLanguageFactory()->getLanguage( 'en' );
45 $title = Title
::newFromText( $this->getArg( 0 ) );
47 if ( $this->hasOption( 'start' ) ) {
48 $start = wfTimestamp( TS_MW
, strtotime( $this->getOption( 'start' ) ) );
49 echo "Starting from " . $lang->timeanddate( $start ) . "\n";
51 $start = '19700101000000';
53 if ( $this->hasOption( 'limit' ) ) {
54 $limit = $this->getOption( 'limit' );
60 $type = $this->getOption( 'type', ConcatenatedGzipHistoryBlob
::class );
62 $dbr = $this->getReplicaDB();
64 $revStore = $this->getServiceContainer()->getRevisionStore();
65 $res = $revStore->newSelectQueryBuilder( $dbr )
69 'page_namespace' => $title->getNamespace(),
70 'page_title' => $title->getDBkey(),
71 $dbr->expr( 'rev_timestamp', '>', $dbr->timestamp( $start ) ),
74 ->caller( __FILE__
)->fetchResultSet();
79 $uncompressedSize = 0;
80 $t = -microtime( true );
81 foreach ( $res as $row ) {
82 $revRecord = $revStore->newRevisionFromRow( $row );
83 $text = $revRecord->getSlot( SlotRecord
::MAIN
, RevisionRecord
::RAW
)
86 $uncompressedSize +
= strlen( $text );
87 $hashes[$row->rev_id
] = md5( $text );
88 $keys[$row->rev_id
] = $blob->addItem( $text );
89 if ( $untilHappy && !$blob->isHappy() ) {
94 $serialized = serialize( $blob );
95 $t +
= microtime( true );
96 # print_r( $blob->mDiffMap );
98 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
101 $uncompressedSize / strlen( $serialized ),
102 $lang->formatSize( $uncompressedSize ),
103 strlen( $serialized )
105 printf( "Compression time: %5.2f ms\n", $t * 1000 );
107 $t = -microtime( true );
108 $blob = unserialize( $serialized );
109 foreach ( $keys as $id => $key ) {
110 $text = $blob->getItem( $key );
111 if ( md5( $text ) != $hashes[$id] ) {
112 echo "Content hash mismatch for rev_id $id\n";
116 $t +
= microtime( true );
117 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
121 // @codeCoverageIgnoreStart
122 $maintClass = TestCompression
::class;
123 require_once RUN_MAINTENANCE_IF_MAIN
;
124 // @codeCoverageIgnoreEnd