ParsoidParser: Record ParserOptions watcher on ParserOutput object
[mediawiki.git] / includes / Revision / SlotRecord.php
blob0435680713ec2d0679df365682e27aeb38dba301
1 <?php
2 /**
3 * Value object representing a content slot associated with a page revision.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 namespace MediaWiki\Revision;
25 use Content;
26 use InvalidArgumentException;
27 use LogicException;
28 use OutOfBoundsException;
29 use Wikimedia\Assert\Assert;
30 use Wikimedia\NonSerializable\NonSerializableTrait;
32 /**
33 * Value object representing a content slot associated with a page revision.
34 * SlotRecord provides direct access to a Content object.
35 * That access may be implemented through a callback.
37 * @since 1.31
38 * @since 1.32 Renamed from MediaWiki\Storage\SlotRecord
40 class SlotRecord {
41 use NonSerializableTrait;
43 public const MAIN = 'main';
45 /**
46 * @var \stdClass database result row, as a raw object. Callbacks are supported for field values,
47 * to enable on-demand emulation of these values. This is primarily intended for use
48 * during schema migration.
50 private $row;
52 /**
53 * @var Content|callable
55 private $content;
57 /**
58 * @var bool
60 private $derived;
62 /**
63 * Returns a new SlotRecord just like the given $slot, except that calling getContent()
64 * will fail with an exception.
66 * @param SlotRecord $slot
68 * @return SlotRecord
70 public static function newWithSuppressedContent( SlotRecord $slot ) {
71 $row = $slot->row;
73 return new SlotRecord(
74 $row,
75 /**
76 * @return never
78 static function () {
79 throw new SuppressedDataException( 'Content suppressed!' );
84 /**
85 * Returns a SlotRecord for a derived slot.
87 * @param string $role
88 * @param Content $content Initial content
90 * @return SlotRecord
91 * @since 1.36
93 public static function newDerived( string $role, Content $content ) {
94 return self::newUnsaved( $role, $content, true );
97 /**
98 * Constructs a new SlotRecord from an existing SlotRecord, overriding some fields.
99 * The slot's content cannot be overwritten.
101 * @param SlotRecord $slot
102 * @param array $overrides
104 * @return SlotRecord
106 private static function newFromSlotRecord( SlotRecord $slot, array $overrides = [] ) {
107 $row = clone $slot->row;
108 $row->slot_id = null; // never copy the row ID!
110 foreach ( $overrides as $key => $value ) {
111 $row->$key = $value;
114 return new SlotRecord( $row, $slot->content, $slot->isDerived() );
118 * Constructs a new SlotRecord for a new revision, inheriting the content of the given SlotRecord
119 * of a previous revision.
121 * Note that a SlotRecord constructed this way are intended as prototypes,
122 * to be used wit newSaved(). They are incomplete, so some getters such as
123 * getRevision() will fail.
125 * @param SlotRecord $slot
127 * @return SlotRecord
129 public static function newInherited( SlotRecord $slot ) {
130 // We can't inherit from a Slot that's not attached to a revision.
131 $slot->getRevision();
132 $slot->getOrigin();
133 $slot->getAddress();
135 // NOTE: slot_origin and content_address are copied from $slot.
136 return self::newFromSlotRecord( $slot, [
137 'slot_revision_id' => null,
138 ] );
142 * Constructs a new Slot from a Content object for a new revision.
143 * This is the preferred way to construct a slot for storing Content that
144 * resulted from a user edit. The slot is assumed to be not inherited.
146 * Note that a SlotRecord constructed this way are intended as prototypes,
147 * to be used wit newSaved(). They are incomplete, so some getters such as
148 * getAddress() will fail.
150 * @param string $role
151 * @param Content $content
152 * @param bool $derived
153 * @return SlotRecord An incomplete proto-slot object, to be used with newSaved() later.
155 public static function newUnsaved( string $role, Content $content, bool $derived = false ) {
156 $row = [
157 'slot_id' => null, // not yet known
158 'slot_revision_id' => null, // not yet known
159 'slot_origin' => null, // not yet known, will be set in newSaved()
160 'content_size' => null, // compute later
161 'content_sha1' => null, // compute later
162 'slot_content_id' => null, // not yet known, will be set in newSaved()
163 'content_address' => null, // not yet known, will be set in newSaved()
164 'role_name' => $role,
165 'model_name' => $content->getModel(),
168 return new SlotRecord( (object)$row, $content, $derived );
172 * Constructs a complete SlotRecord for a newly saved revision, based on the incomplete
173 * proto-slot. This adds information that has only become available during saving,
174 * particularly the revision ID, content ID and content address.
176 * @param int $revisionId the revision the slot is to be associated with (field slot_revision_id).
177 * If $protoSlot already has a revision, it must be the same.
178 * @param int|null $contentId the ID of the row in the content table describing the content
179 * referenced by $contentAddress (field slot_content_id).
180 * If $protoSlot already has a content ID, it must be the same.
181 * @param string $contentAddress the slot's content address (field content_address).
182 * If $protoSlot already has an address, it must be the same.
183 * @param SlotRecord $protoSlot The proto-slot that was provided as input for creating a new
184 * revision. $protoSlot must have a content address if inherited.
186 * @return SlotRecord If the state of $protoSlot is inappropriate for saving a new revision.
188 public static function newSaved(
189 int $revisionId,
190 ?int $contentId,
191 string $contentAddress,
192 SlotRecord $protoSlot
194 if ( $protoSlot->hasRevision() && $protoSlot->getRevision() !== $revisionId ) {
195 throw new LogicException(
196 "Mismatching revision ID $revisionId: "
197 . "The slot already belongs to revision {$protoSlot->getRevision()}. "
198 . "Use SlotRecord::newInherited() to re-use content between revisions."
202 if ( $protoSlot->hasAddress() && $protoSlot->getAddress() !== $contentAddress ) {
203 throw new LogicException(
204 "Mismatching blob address $contentAddress: "
205 . "The slot already has content at {$protoSlot->getAddress()}."
209 if ( $protoSlot->hasContentId() && $protoSlot->getContentId() !== $contentId ) {
210 throw new LogicException(
211 "Mismatching content ID $contentId: "
212 . "The slot already has content row {$protoSlot->getContentId()} associated."
216 if ( $protoSlot->isInherited() ) {
217 if ( !$protoSlot->hasAddress() ) {
218 throw new InvalidArgumentException(
219 "An inherited blob should have a content address!"
222 if ( !$protoSlot->hasField( 'slot_origin' ) ) {
223 throw new InvalidArgumentException(
224 "A saved inherited slot should have an origin set!"
227 $origin = $protoSlot->getOrigin();
228 } else {
229 $origin = $revisionId;
232 return self::newFromSlotRecord( $protoSlot, [
233 'slot_revision_id' => $revisionId,
234 'slot_content_id' => $contentId,
235 'slot_origin' => $origin,
236 'content_address' => $contentAddress,
237 ] );
241 * The following fields are supported by the $row parameter:
243 * $row->blob_data
244 * $row->blob_address
246 * @param \stdClass $row A database row composed of fields of the slot and content tables,
247 * as a raw object. Any field value can be a callback that produces the field value
248 * given this SlotRecord as a parameter. However, plain strings cannot be used as
249 * callbacks here, for security reasons.
250 * @param Content|callable $content The content object associated with the slot, or a
251 * callback that will return that Content object, given this SlotRecord as a parameter.
252 * @param bool $derived Is this handler for a derived slot? Derived slots allow information that
253 * is derived from the content of a page to be stored even if it is generated
254 * asynchronously or updated later. Their size is not included in the revision size,
255 * their hash does not contribute to the revision hash, and updates are not included
256 * in revision history.
258 public function __construct( \stdClass $row, $content, bool $derived = false ) {
259 Assert::parameterType( [ 'Content', 'callable' ], $content, '$content' );
261 Assert::parameter(
262 property_exists( $row, 'slot_revision_id' ),
263 '$row->slot_revision_id',
264 'must exist'
266 Assert::parameter(
267 property_exists( $row, 'slot_content_id' ),
268 '$row->slot_content_id',
269 'must exist'
271 Assert::parameter(
272 property_exists( $row, 'content_address' ),
273 '$row->content_address',
274 'must exist'
276 Assert::parameter(
277 property_exists( $row, 'model_name' ),
278 '$row->model_name',
279 'must exist'
281 Assert::parameter(
282 property_exists( $row, 'slot_origin' ),
283 '$row->slot_origin',
284 'must exist'
286 Assert::parameter(
287 !property_exists( $row, 'slot_inherited' ),
288 '$row->slot_inherited',
289 'must not exist'
291 Assert::parameter(
292 !property_exists( $row, 'slot_revision' ),
293 '$row->slot_revision',
294 'must not exist'
297 $this->row = $row;
298 $this->content = $content;
299 $this->derived = $derived;
303 * Returns the Content of the given slot.
305 * @note This is free to load Content from whatever subsystem is necessary,
306 * performing potentially expensive operations and triggering I/O-related
307 * failure modes.
309 * @note This method does not apply audience filtering.
311 * @throws SuppressedDataException if access to the content is not allowed according
312 * to the audience check performed by RevisionRecord::getSlot().
313 * @throws BadRevisionException if the revision is permanently missing
314 * @throws RevisionAccessException for other storage access errors
316 * @return Content The slot's content. This is a direct reference to the internal instance,
317 * copy before exposing to application logic!
319 public function getContent() {
320 if ( $this->content instanceof Content ) {
321 return $this->content;
324 $obj = call_user_func( $this->content, $this );
326 Assert::postcondition(
327 $obj instanceof Content,
328 'Slot content callback should return a Content object'
331 $this->content = $obj;
333 return $this->content;
337 * Returns the string value of a data field from the database row supplied to the constructor.
338 * If the field was set to a callback, that callback is invoked and the result returned.
340 * @param string $name
342 * @throws OutOfBoundsException
343 * @throws IncompleteRevisionException
344 * @return mixed Returns the field's value, never null.
346 private function getField( $name ) {
347 if ( !isset( $this->row->$name ) ) {
348 // distinguish between unknown and uninitialized fields
349 if ( property_exists( $this->row, $name ) ) {
350 throw new IncompleteRevisionException(
351 'Uninitialized field: {name}',
352 [ 'name' => $name ]
354 } else {
355 throw new OutOfBoundsException( 'No such field: ' . $name );
359 $value = $this->row->$name;
361 // NOTE: allow callbacks, but don't trust plain string callables from the database!
362 if ( !is_string( $value ) && is_callable( $value ) ) {
363 $value = call_user_func( $value, $this );
364 $this->setField( $name, $value );
367 return $value;
371 * Returns the string value of a data field from the database row supplied to the constructor.
373 * @param string $name
375 * @throws OutOfBoundsException
376 * @throws IncompleteRevisionException
377 * @return string
379 private function getStringField( $name ) {
380 return strval( $this->getField( $name ) );
384 * Returns the int value of a data field from the database row supplied to the constructor.
386 * @param string $name
388 * @throws OutOfBoundsException
389 * @throws IncompleteRevisionException
390 * @return int
392 private function getIntField( $name ) {
393 return intval( $this->getField( $name ) );
397 * @param string $name
398 * @return bool whether this record contains the given field
400 private function hasField( $name ) {
401 if ( isset( $this->row->$name ) ) {
402 // if the field is a callback, resolve first, then re-check
403 if ( !is_string( $this->row->$name ) && is_callable( $this->row->$name ) ) {
404 $this->getField( $name );
408 return isset( $this->row->$name );
412 * Returns the ID of the revision this slot is associated with.
414 * @return int
416 public function getRevision() {
417 return $this->getIntField( 'slot_revision_id' );
421 * Returns the revision ID of the revision that originated the slot's content.
423 * @return int
425 public function getOrigin() {
426 return $this->getIntField( 'slot_origin' );
430 * Whether this slot was inherited from an older revision.
432 * If this SlotRecord is already attached to a revision, this returns true
433 * if the slot's revision of origin is the same as the revision it belongs to.
435 * If this SlotRecord is not yet attached to a revision, this returns true
436 * if the slot already has an address.
438 * @return bool
440 public function isInherited() {
441 if ( $this->hasRevision() ) {
442 return $this->getRevision() !== $this->getOrigin();
443 } else {
444 return $this->hasAddress();
449 * Whether this slot has an address. Slots will have an address if their
450 * content has been stored. While building a new revision,
451 * SlotRecords will not have an address associated.
453 * @return bool
455 public function hasAddress() {
456 return $this->hasField( 'content_address' );
460 * Whether this slot has an origin (revision ID that originated the slot's content.
462 * @since 1.32
464 * @return bool
466 public function hasOrigin() {
467 return $this->hasField( 'slot_origin' );
471 * Whether this slot has a content ID. Slots will have a content ID if their
472 * content has been stored in the content table. While building a new revision,
473 * SlotRecords will not have an ID associated.
475 * Also, during schema migration, hasContentId() may return false when encountering an
476 * un-migrated database entry in SCHEMA_COMPAT_WRITE_BOTH mode.
477 * It will however always return true for saved revisions on SCHEMA_COMPAT_READ_NEW mode,
478 * or without SCHEMA_COMPAT_WRITE_NEW mode. In the latter case, an emulated content ID
479 * is used, derived from the revision's text ID.
481 * Note that hasContentId() returning false while hasRevision() returns true always
482 * indicates an unmigrated row in SCHEMA_COMPAT_WRITE_BOTH mode, as described above.
483 * For an unsaved slot, both these methods would return false.
485 * @since 1.32
487 * @return bool
489 public function hasContentId() {
490 return $this->hasField( 'slot_content_id' );
494 * Whether this slot has revision ID associated. Slots will have a revision ID associated
495 * only if they were loaded as part of an existing revision. While building a new revision,
496 * Slotrecords will not have a revision ID associated.
498 * @return bool
500 public function hasRevision() {
501 return $this->hasField( 'slot_revision_id' );
505 * Returns the role of the slot.
507 * @return string
509 public function getRole() {
510 return $this->getStringField( 'role_name' );
514 * Returns the address of this slot's content.
515 * This address can be used with BlobStore to load the Content object.
517 * @return string
519 public function getAddress() {
520 return $this->getStringField( 'content_address' );
524 * Returns the ID of the content meta data row associated with the slot.
525 * This information should be irrelevant to application logic, it is here to allow
526 * the construction of a full row for the revision table.
528 * Note that this method may return an emulated value during schema migration in
529 * SCHEMA_COMPAT_WRITE_OLD mode. See RevisionStore::emulateContentId for more information.
531 * @return int
533 public function getContentId() {
534 return $this->getIntField( 'slot_content_id' );
538 * Returns the content size
540 * @return int size of the content, in bogo-bytes, as reported by Content::getSize.
542 public function getSize() {
543 try {
544 $size = $this->getIntField( 'content_size' );
545 } catch ( IncompleteRevisionException $ex ) {
546 $size = $this->getContent()->getSize();
547 $this->setField( 'content_size', $size );
550 return $size;
554 * Returns the content size
556 * @return string hash of the content.
558 public function getSha1() {
559 try {
560 $sha1 = $this->getStringField( 'content_sha1' );
561 } catch ( IncompleteRevisionException $ex ) {
562 $sha1 = null;
565 // Compute if missing. Missing could mean null or empty.
566 if ( $sha1 === null || $sha1 === '' ) {
567 $format = $this->hasField( 'format_name' )
568 ? $this->getStringField( 'format_name' )
569 : null;
571 $data = $this->getContent()->serialize( $format );
572 $sha1 = self::base36Sha1( $data );
573 $this->setField( 'content_sha1', $sha1 );
576 return $sha1;
580 * Returns the content model. This is the model name that decides
581 * which ContentHandler is appropriate for interpreting the
582 * data of the blob referenced by the address returned by getAddress().
584 * @return string the content model of the content
586 public function getModel() {
587 try {
588 $model = $this->getStringField( 'model_name' );
589 } catch ( IncompleteRevisionException $ex ) {
590 $model = $this->getContent()->getModel();
591 $this->setField( 'model_name', $model );
594 return $model;
598 * Returns the blob serialization format as a MIME type.
600 * @note When this method returns null, the caller is expected
601 * to auto-detect the serialization format, or to rely on
602 * the default format associated with the content model.
604 * @return string|null
606 public function getFormat() {
607 // XXX: we currently do not plan to store the format for each slot!
609 if ( $this->hasField( 'format_name' ) ) {
610 return $this->getStringField( 'format_name' );
613 return null;
617 * @param string $name
618 * @param string|int|null $value
620 private function setField( $name, $value ) {
621 $this->row->$name = $value;
625 * Get the base 36 SHA-1 value for a string of text
627 * MCR migration note: this replaced Revision::base36Sha1
629 * @param string $blob
630 * @return string
632 public static function base36Sha1( $blob ) {
633 return \Wikimedia\base_convert( sha1( $blob ), 16, 36, 31 );
637 * Returns true if $other has the same content as this slot.
638 * The check is performed based on the model, address size, and hash.
639 * Two slots can have the same content if they use different content addresses,
640 * but if they have the same address and the same model, they have the same content.
641 * Two slots can have the same content if they belong to different
642 * revisions or pages.
644 * Note that hasSameContent() may return false even if Content::equals returns true for
645 * the content of two slots. This may happen if the two slots have different serializations
646 * representing equivalent Content. Such false negatives are considered acceptable. Code
647 * that has to be absolutely sure the Content is really not the same if hasSameContent()
648 * returns false should call getContent() and compare the Content objects directly.
650 * @since 1.32
652 * @param SlotRecord $other
653 * @return bool
655 public function hasSameContent( SlotRecord $other ) {
656 if ( $other === $this ) {
657 return true;
660 if ( $this->getModel() !== $other->getModel() ) {
661 return false;
664 if ( $this->hasAddress()
665 && $other->hasAddress()
666 && $this->getAddress() == $other->getAddress()
668 return true;
671 if ( $this->getSize() !== $other->getSize() ) {
672 return false;
675 if ( $this->getSha1() !== $other->getSha1() ) {
676 return false;
679 return true;
683 * @return bool Is this a derived slot?
684 * @since 1.36
686 public function isDerived(): bool {
687 return $this->derived;