1 /* This Source Code Form is subject to the terms of the Mozilla Public
2 * License, v. 2.0. If a copy of the MPL was not distributed with this
3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
4 #ifndef nsHtml5Highlighter_h
5 #define nsHtml5Highlighter_h
8 #include "nsHtml5TreeOperation.h"
9 #include "nsHtml5UTF16Buffer.h"
10 #include "nsHtml5TreeOperation.h"
11 #include "nsAHtml5TreeOpSink.h"
13 #define NS_HTML5_HIGHLIGHTER_HANDLE_ARRAY_LENGTH 512
16 * A state machine for generating HTML for display in View Source based on
17 * the transitions the tokenizer makes on the source being viewed.
19 class nsHtml5Highlighter
{
24 * @param aOpSink the sink for the tree ops generated by this highlighter
26 explicit nsHtml5Highlighter(nsAHtml5TreeOpSink
* aOpSink
);
31 ~nsHtml5Highlighter();
34 * Set the op sink (for speculation).
36 void SetOpSink(nsAHtml5TreeOpSink
* aOpSink
);
39 * Reset state to after generated head but before processing any of the input
45 * Starts the generated document.
47 void Start(const nsAutoString
& aTitle
);
50 * Updates the charset source via the op queue.
52 void UpdateCharsetSource(nsCharsetSource aCharsetSource
);
55 * Report a tokenizer state transition.
57 * @param aState the state being transitioned to
58 * @param aReconsume whether this is a reconsuming transition
59 * @param aPos the tokenizer's current position into the buffer
61 int32_t Transition(int32_t aState
, bool aReconsume
, int32_t aPos
);
66 * Returns `true` normally and `false` on OOM.
68 [[nodiscard
]] bool End();
71 * Set the current buffer being tokenized
73 void SetBuffer(nsHtml5UTF16Buffer
* aBuffer
);
76 * Let go of the buffer being tokenized but first, flush text from it.
78 * @param aPos the first UTF-16 code unit not to flush
80 void DropBuffer(int32_t aPos
);
83 * Query whether there are some many ops in the queue
84 * that they should be flushed now.
86 * @return true if FlushOps() should be called now
88 bool ShouldFlushOps();
91 * Flush the tree ops into the sink.
93 * @return Ok(true) if there were ops to flush, Ok(false)
94 * if there were no ops to flush and Err() on OOM.
96 mozilla::Result
<bool, nsresult
> FlushOps();
99 * Linkify the current attribute value if the attribute name is one of
100 * known URL attributes. (When executing tree ops, javascript: URLs will
101 * not be linkified, though.)
103 * @param aName the name of the attribute
104 * @param aValue the value of the attribute
106 void MaybeLinkifyAttributeValue(nsHtml5AttributeName
* aName
,
107 nsHtml5String aValue
);
110 * Inform the highlighter that the tokenizer successfully completed a
111 * named character reference.
113 void CompletedNamedCharacterReference();
116 * Adds an error annotation to the node that's currently on top of
119 * @param aMsgId the id of the message in the property file
121 void AddErrorToCurrentNode(const char* aMsgId
);
124 * Adds an error annotation to the node that corresponds to the most
125 * recently opened markup declaration/tag span, character reference or
128 * @param aMsgId the id of the message in the property file
130 void AddErrorToCurrentRun(const char* aMsgId
);
133 * Adds an error annotation to the node that corresponds to the most
134 * recently opened markup declaration/tag span, character reference or
135 * run of text with one atom to use when formatting the message.
137 * @param aMsgId the id of the message in the property file
138 * @param aName the atom
140 void AddErrorToCurrentRun(const char* aMsgId
, nsAtom
* aName
);
143 * Adds an error annotation to the node that corresponds to the most
144 * recently opened markup declaration/tag span, character reference or
145 * run of text with two atoms to use when formatting the message.
147 * @param aMsgId the id of the message in the property file
148 * @param aName the first atom
149 * @param aOther the second atom
151 void AddErrorToCurrentRun(const char* aMsgId
, nsAtom
* aName
, nsAtom
* aOther
);
154 * Adds an error annotation to the node that corresponds to the most
155 * recent potentially character reference-starting ampersand.
157 * @param aMsgId the id of the message in the property file
159 void AddErrorToCurrentAmpersand(const char* aMsgId
);
162 * Adds an error annotation to the node that corresponds to the most
163 * recent potentially self-closing slash.
165 * @param aMsgId the id of the message in the property file
167 void AddErrorToCurrentSlash(const char* aMsgId
);
170 * Enqueues a tree op for adding base to the urls with the view-source:
172 * @param aValue the base URL to add
174 void AddBase(nsHtml5String aValue
);
176 /** Starts the body */
177 void StartBodyContents();
181 * Starts a wrapper around a run of characters.
183 void StartCharacters();
186 * Starts a span with no class.
191 * Starts a <span> and sets the class attribute on it.
193 * @param aClass the class to set (MUST be a static string that does not
194 * need to be released!)
196 void StartSpan(const char16_t
* aClass
);
199 * End the current <span> or <a> in the highlighter output.
203 /** Ends a wrapper around a run of characters. */
204 void EndCharactersAndStartMarkupRun();
212 * Flushes characters up to but not including the current one.
217 * Flushes characters up to and including the current one.
222 * Finishes highlighting a tag in the input data by closing the open
223 * <span> and <a> elements in the highlighter output and then starts
224 * another <span> for potentially highlighting characters potentially
230 * Adds a class attribute to the current node.
232 * @param aClass the class to set (MUST be a static string that does not
233 * need to be released!)
235 void AddClass(const char16_t
* aClass
);
238 * Allocates a handle for an element.
240 * See the documentation for nsHtml5TreeBuilder::AllocateContentHandle()
241 * in nsHtml5TreeBuilderHSupplement.h.
245 nsIContent
** AllocateContentHandle();
248 * Enqueues an element creation tree operation.
250 * @param aName the name of the element
251 * @param aAttributes the attribute holder (ownership will be taken) or
252 * nullptr for no attributes
253 * @param aIntendedParent the intended parent node for the created element
254 * @param aCreator the content creator function
255 * @return the handle for the element that will be created
257 nsIContent
** CreateElement(nsAtom
* aName
, nsHtml5HtmlAttributes
* aAttributes
,
258 nsIContent
** aIntendedParent
,
259 mozilla::dom::HTMLContentCreatorFunction aCreator
);
262 * Gets the handle for the current node. May be called only after the
263 * root element has been set.
265 * @return the handle for the current node
267 nsIContent
** CurrentNode();
270 * Create an element and push it (its handle) on the stack.
272 * @param aName the name of the element
273 * @param aAttributes the attribute holder (ownership will be taken) or
274 * nullptr for no attributes
275 * @param aCreator the content creator function
277 void Push(nsAtom
* aName
, nsHtml5HtmlAttributes
* aAttributes
,
278 mozilla::dom::HTMLContentCreatorFunction aCreator
);
280 /** Pushes a <span id="line<lineno>"> */
281 void PushCurrentLineContainer();
284 * Pops all inlines from the stack, pushes a pre, and pushes all inlines back
285 * with the same attributes.
290 * Pops the current node off the stack.
295 * Appends text content to the current node.
297 * @param aBuffer the buffer to copy from
298 * @param aStart the index of the first code unit to copy
299 * @param aLength the number of code units to copy
301 void AppendCharacters(const char16_t
* aBuffer
, int32_t aStart
,
305 * Enqueues a tree op for adding an href attribute with the view-source:
306 * URL scheme to the current node.
308 * @param aValue the (potentially relative) URL to link to
310 void AddViewSourceHref(nsHtml5String aValue
);
313 * The state we are transitioning away from.
318 * The index of the first UTF-16 code unit in mBuffer that hasn't been
324 * The position of the code unit in mBuffer that caused the current
330 * The current line number.
335 * The number of inline elements open inside the <pre> excluding the
336 * span potentially wrapping a run of characters.
338 int32_t mInlinesOpen
;
341 * Whether there's a span wrapping a run of characters (excluding CDATA
347 * The current buffer being tokenized.
349 nsHtml5UTF16Buffer
* mBuffer
;
352 * The outgoing tree op queue.
354 nsTArray
<nsHtml5TreeOperation
> mOpQueue
;
357 * The tree op stage for the tree op executor or a speculation when looking
360 * The op sink is owned by the nsHtml5TreeOpExecutor, which outlives this
361 * object, because this object is owned by the nsHtml5Tokenizer instance that
362 * is owned by the nsHtml5StreamParser, which keeps the executor alive via
363 * nsHtml5Streamparser::mExecutorFlusher.
365 nsAHtml5TreeOpSink
* mOpSink
;
368 * The most recently opened markup declaration/tag or run of characters.
370 nsIContent
** mCurrentRun
;
373 * The most recent ampersand in a place where character references were
376 nsIContent
** mAmpersand
;
379 * The most recent slash that might become a self-closing slash.
384 * Memory for element handles.
386 mozilla::UniquePtr
<nsIContent
*[]> mHandles
;
389 * Number of handles used in mHandles
391 int32_t mHandlesUsed
;
394 * A holder for old contents of mHandles
396 nsTArray
<mozilla::UniquePtr
<nsIContent
*[]>> mOldHandles
;
401 nsTArray
<nsIContent
**> mStack
;
404 * Whether base is already visited once.
409 #endif // nsHtml5Highlighter_h