Backed out changeset b71c8c052463 (bug 1943846) for causing mass failures. CLOSED...
[gecko.git] / parser / html / nsHtml5Highlighter.h
blobd79634d33949471ecaca7399230c849165b3cf4a
1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 #ifndef nsHtml5Highlighter_h
5 #define nsHtml5Highlighter_h
7 #include "nsCOMPtr.h"
8 #include "nsHtml5TreeOperation.h"
9 #include "nsHtml5UTF16Buffer.h"
10 #include "nsHtml5TreeOperation.h"
11 #include "nsAHtml5TreeOpSink.h"
13 #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
15 /**
16 * A state machine for generating HTML for display in View Source based on
17 * the transitions the tokenizer makes on the source being viewed.
19 class nsHtml5Highlighter {
20 public:
21 /**
22 * The constructor.
24 * @param aOpSink the sink for the tree ops generated by this highlighter
26 explicit nsHtml5Highlighter(nsAHtml5TreeOpSink* aOpSink);
28 /**
29 * The destructor.
31 ~nsHtml5Highlighter();
33 /**
34 * Set the op sink (for speculation).
36 void SetOpSink(nsAHtml5TreeOpSink* aOpSink);
38 /**
39 * Reset state to after generated head but before processing any of the input
40 * stream.
42 void Rewind();
44 /**
45 * Starts the generated document.
47 void Start(const nsAutoString& aTitle);
49 /**
50 * Updates the charset source via the op queue.
52 void UpdateCharsetSource(nsCharsetSource aCharsetSource);
54 /**
55 * Report a tokenizer state transition.
57 * @param aState the state being transitioned to
58 * @param aReconsume whether this is a reconsuming transition
59 * @param aPos the tokenizer's current position into the buffer
61 int32_t Transition(int32_t aState, bool aReconsume, int32_t aPos);
63 /**
64 * Report end of file.
66 * Returns `true` normally and `false` on OOM.
68 [[nodiscard]] bool End();
70 /**
71 * Set the current buffer being tokenized
73 void SetBuffer(nsHtml5UTF16Buffer* aBuffer);
75 /**
76 * Let go of the buffer being tokenized but first, flush text from it.
78 * @param aPos the first UTF-16 code unit not to flush
80 void DropBuffer(int32_t aPos);
82 /**
83 * Query whether there are some many ops in the queue
84 * that they should be flushed now.
86 * @return true if FlushOps() should be called now
88 bool ShouldFlushOps();
90 /**
91 * Flush the tree ops into the sink.
93 * @return Ok(true) if there were ops to flush, Ok(false)
94 * if there were no ops to flush and Err() on OOM.
96 mozilla::Result<bool, nsresult> FlushOps();
98 /**
99 * Linkify the current attribute value if the attribute name is one of
100 * known URL attributes. (When executing tree ops, javascript: URLs will
101 * not be linkified, though.)
103 * @param aName the name of the attribute
104 * @param aValue the value of the attribute
106 void MaybeLinkifyAttributeValue(nsHtml5AttributeName* aName,
107 nsHtml5String aValue);
110 * Inform the highlighter that the tokenizer successfully completed a
111 * named character reference.
113 void CompletedNamedCharacterReference();
116 * Adds an error annotation to the node that's currently on top of
117 * mStack.
119 * @param aMsgId the id of the message in the property file
121 void AddErrorToCurrentNode(const char* aMsgId);
124 * Adds an error annotation to the node that corresponds to the most
125 * recently opened markup declaration/tag span, character reference or
126 * run of text.
128 * @param aMsgId the id of the message in the property file
130 void AddErrorToCurrentRun(const char* aMsgId);
133 * Adds an error annotation to the node that corresponds to the most
134 * recently opened markup declaration/tag span, character reference or
135 * run of text with one atom to use when formatting the message.
137 * @param aMsgId the id of the message in the property file
138 * @param aName the atom
140 void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName);
143 * Adds an error annotation to the node that corresponds to the most
144 * recently opened markup declaration/tag span, character reference or
145 * run of text with two atoms to use when formatting the message.
147 * @param aMsgId the id of the message in the property file
148 * @param aName the first atom
149 * @param aOther the second atom
151 void AddErrorToCurrentRun(const char* aMsgId, nsAtom* aName, nsAtom* aOther);
154 * Adds an error annotation to the node that corresponds to the most
155 * recent potentially character reference-starting ampersand.
157 * @param aMsgId the id of the message in the property file
159 void AddErrorToCurrentAmpersand(const char* aMsgId);
162 * Adds an error annotation to the node that corresponds to the most
163 * recent potentially self-closing slash.
165 * @param aMsgId the id of the message in the property file
167 void AddErrorToCurrentSlash(const char* aMsgId);
170 * Enqueues a tree op for adding base to the urls with the view-source:
172 * @param aValue the base URL to add
174 void AddBase(nsHtml5String aValue);
176 /** Starts the body */
177 void StartBodyContents();
179 private:
181 * Starts a wrapper around a run of characters.
183 void StartCharacters();
186 * Starts a span with no class.
188 void StartSpan();
191 * Starts a <span> and sets the class attribute on it.
193 * @param aClass the class to set (MUST be a static string that does not
194 * need to be released!)
196 void StartSpan(const char16_t* aClass);
199 * End the current <span> or <a> in the highlighter output.
201 void EndSpanOrA();
203 /** Ends a wrapper around a run of characters. */
204 void EndCharactersAndStartMarkupRun();
207 * Starts an <a>.
209 void StartA();
212 * Flushes characters up to but not including the current one.
214 void FlushChars();
217 * Flushes characters up to and including the current one.
219 void FlushCurrent();
222 * Finishes highlighting a tag in the input data by closing the open
223 * <span> and <a> elements in the highlighter output and then starts
224 * another <span> for potentially highlighting characters potentially
225 * appearing next.
227 void FinishTag();
230 * Adds a class attribute to the current node.
232 * @param aClass the class to set (MUST be a static string that does not
233 * need to be released!)
235 void AddClass(const char16_t* aClass);
238 * Allocates a handle for an element.
240 * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
241 * in nsHtml5TreeBuilderHSupplement.h.
243 * @return the handle
245 nsIContent** AllocateContentHandle();
248 * Enqueues an element creation tree operation.
250 * @param aName the name of the element
251 * @param aAttributes the attribute holder (ownership will be taken) or
252 * nullptr for no attributes
253 * @param aIntendedParent the intended parent node for the created element
254 * @param aCreator the content creator function
255 * @return the handle for the element that will be created
257 nsIContent** CreateElement(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
258 nsIContent** aIntendedParent,
259 mozilla::dom::HTMLContentCreatorFunction aCreator);
262 * Gets the handle for the current node. May be called only after the
263 * root element has been set.
265 * @return the handle for the current node
267 nsIContent** CurrentNode();
270 * Create an element and push it (its handle) on the stack.
272 * @param aName the name of the element
273 * @param aAttributes the attribute holder (ownership will be taken) or
274 * nullptr for no attributes
275 * @param aCreator the content creator function
277 void Push(nsAtom* aName, nsHtml5HtmlAttributes* aAttributes,
278 mozilla::dom::HTMLContentCreatorFunction aCreator);
280 /** Pushes a <span id="line<lineno>"> */
281 void PushCurrentLineContainer();
284 * Pops all inlines from the stack, pushes a pre, and pushes all inlines back
285 * with the same attributes.
287 void NewLine();
290 * Pops the current node off the stack.
292 void Pop();
295 * Appends text content to the current node.
297 * @param aBuffer the buffer to copy from
298 * @param aStart the index of the first code unit to copy
299 * @param aLength the number of code units to copy
301 void AppendCharacters(const char16_t* aBuffer, int32_t aStart,
302 int32_t aLength);
305 * Enqueues a tree op for adding an href attribute with the view-source:
306 * URL scheme to the current node.
308 * @param aValue the (potentially relative) URL to link to
310 void AddViewSourceHref(nsHtml5String aValue);
313 * The state we are transitioning away from.
315 int32_t mState;
318 * The index of the first UTF-16 code unit in mBuffer that hasn't been
319 * flushed yet.
321 int32_t mCStart;
324 * The position of the code unit in mBuffer that caused the current
325 * transition.
327 int32_t mPos;
330 * The current line number.
332 int32_t mLineNumber;
335 * The number of inline elements open inside the <pre> excluding the
336 * span potentially wrapping a run of characters.
338 int32_t mInlinesOpen;
341 * Whether there's a span wrapping a run of characters (excluding CDATA
342 * section) open.
344 bool mInCharacters;
347 * The current buffer being tokenized.
349 nsHtml5UTF16Buffer* mBuffer;
352 * The outgoing tree op queue.
354 nsTArray<nsHtml5TreeOperation> mOpQueue;
357 * The tree op stage for the tree op executor or a speculation when looking
358 * for meta charset.
360 * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
361 * object, because this object is owned by the nsHtml5Tokenizer instance that
362 * is owned by the nsHtml5StreamParser, which keeps the executor alive via
363 * nsHtml5Streamparser::mExecutorFlusher.
365 nsAHtml5TreeOpSink* mOpSink;
368 * The most recently opened markup declaration/tag or run of characters.
370 nsIContent** mCurrentRun;
373 * The most recent ampersand in a place where character references were
374 * allowed.
376 nsIContent** mAmpersand;
379 * The most recent slash that might become a self-closing slash.
381 nsIContent** mSlash;
384 * Memory for element handles.
386 mozilla::UniquePtr<nsIContent*[]> mHandles;
389 * Number of handles used in mHandles
391 int32_t mHandlesUsed;
394 * A holder for old contents of mHandles
396 nsTArray<mozilla::UniquePtr<nsIContent*[]>> mOldHandles;
399 * The element stack.
401 nsTArray<nsIContent**> mStack;
404 * Whether base is already visited once.
406 bool mSeenBase;
409 #endif // nsHtml5Highlighter_h