4 * @subpackage Maintenance
7 die("This file is not complete; it's checked in so I don't forget it.");
10 * UTF-8 conversion of DOOOOOOOM
13 * 2. Make a convertlist of all pages
14 * 3. Enable CONVERTLOCK mode and switch to UTF-8
15 * 4. As quick as possible, convert the cur, images, *links, user, etc tables.
17 * 5. Unlock the wiki. Attempts to access pages on the convertlist will be
18 * trapped to read-only.
19 * 6. Go through the list, fixing up old revisions. Remove pages from the
25 * @subpackage Maintenance
28 /** Constructor, set the database */
29 function UtfUpdater() {
30 $this->db
=& wfGetDB( DB_MASTER
);
34 * @param string $string A string to be converted to UTF-8
36 function toUtf8( $string ) {
37 if( function_exists( 'iconv' ) ) {
38 # There are likely to be Windows code page 1252 chars in there.
39 # Convert them to the proper UTF-8 chars if possible.
40 return iconv( 'CP1252', 'UTF-8', $string );
42 # Will work from plain iso 8859-1 and may corrupt these chars
43 return utf8_encode( $string );
49 * @param string $table The table name to be truncated
51 function clearTable( $table ) {
52 print "Clearing $table...\n";
53 $tableName = $this->db
->tableName( $table );
54 $this->db
->query( 'TRUNCATE $tableName' );
58 * @param string $table Table to be converted
59 * @param string $key Primary key, to identify fields in the UPDATE. If NULL, all fields will be used to match.
60 * @param array $fields List of all fields to grab and convert. If null, will assume you want the $key, and will ask for DISTINCT.
61 * @param array $timestamp A field which should be updated to the current timestamp on changed records.
64 function convertTable( $table, $key, $fields = null, $timestamp = null ) {
65 $fname = 'UtfUpdater::convertTable';
69 # If working on one key only, there will be multiple rows.
70 # Use DISTINCT to return only one and save us some trouble.
71 $fields = array( $key );
72 $distinct = 'DISTINCT';
75 foreach( $fields as $field ) {
76 if( $condition ) $condition .= ' OR ';
77 $condition .= "$field RLIKE '[\x80-\xff]'";
79 $res = $this->db
->selectArray(
81 array_merge( $fields, array( $key ) ),
85 print "Converting " . $this->db
->numResults( $res ) . " rows from $table:\n";
87 while( $s = $this->db
->fetchObject( $res ) ) {
89 foreach( $fields as $field ) {
90 $set[] = $this->toUtf8( $s->$field );
93 $set[$timestamp] = $this->db
->timestamp();
96 $keyCond = array( $key, $s->$key );
99 foreach( $fields as $field ) {
100 $keyCond[$field] = $s->$field;
103 $this->db
->updateArray(
108 if( ++
$n %
100 == 0 ) echo "$n\n";
111 $this->db
->freeResult( $res );
116 * @param array $tables An array of table to be locked.
118 function lockTables( $tables ) {
120 foreach( $tables as $table ) {
121 $tableName = $this->db
->tableName( $table );
122 if( $query ) $query .= ', ';
123 $query .= '$tableName WRITE';
125 $this->db
->query( 'LOCK TABLES ' . $query );
131 function updateAll() {
132 $this->lockTables( array(
133 'linkscc', 'objectcache', 'searchindex', 'querycache',
134 'ipblocks', 'user', 'page', 'revision', 'recentchanges',
135 'brokenlinks', 'categorylinks', 'imagelinks', 'watchlist',
136 'image', 'oldimage', 'archive' ) );
138 # These are safe to clear out:
139 $this->clearTable( 'linkscc' );
140 $this->clearTable( 'objectcache' );
142 # These need to be rebuild if used:
143 $this->clearTable( 'searchindex' );
144 $this->clearTable( 'querycache' );
146 # And convert the rest...
147 $this->convertTable( 'ipblocks', 'ipb_id', array( 'ipb_reason' ) );
148 $this->convertTable( 'user', 'user_id',
149 array( 'user_name', 'user_real_name', 'user_options' ),
151 $this->convertTable( 'page', 'page_id',
152 array( 'page_title' ), 'page_touched' );
153 $this->convertTable( 'revision', 'rev_id',
154 array( 'rev_user_text', 'rev_comment' ) );
156 $this->convertTable( 'recentchanges', 'rc_id',
157 array( 'rc_user_text', 'rc_title', 'rc_comment' ) );
159 $this->convertTable( 'brokenlinks', 'bl_to' );
160 $this->convertTable( 'categorylinks', 'cl_to' );
161 $this->convertTable( 'imagelinks', 'il_to' );
162 $this->convertTable( 'watchlist', 'wl_title' );
164 # FIXME: We'll also need to change the files.
165 $this->convertTable( 'image', 'img_name',
166 array( 'img_name', 'img_description', 'img_user_text' ) );
167 $this->convertTable( 'oldimage', 'archive_name',
168 array( 'oi_name', 'oi_archive_name', 'oi_description', 'oi_user_text' ) );
170 # Don't change the ar_text entries; use $wgLegacyEncoding to read them at runtime
171 $this->convertTable( 'archive', null,
172 array( 'ar_title', 'ar_comment', 'ar_user_text' ) );
173 echo "Not converting text table: be sure to set \$wgLegacyEncoding!\n";
175 $this->db
->query( 'UNLOCK TABLES' );