Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / include / svtools / parhtml.hxx
blobb4fee63e311ab1ee2bdcd3cfb3b64280b0d9c402
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #pragma once
22 #include <svtools/svtdllapi.h>
23 #include <svtools/svparser.hxx>
24 #include <svtools/htmltokn.h>
26 #include <string_view>
27 #include <vector>
29 namespace com :: sun :: star :: uno { template <class interface_type> class Reference; }
31 namespace com::sun::star {
32 namespace document {
33 class XDocumentProperties;
37 class Color;
38 enum class HtmlOptionId;
40 #define HTMLFONTSZ1_DFLT 7
41 #define HTMLFONTSZ2_DFLT 10
42 #define HTMLFONTSZ3_DFLT 12
43 #define HTMLFONTSZ4_DFLT 14
44 #define HTMLFONTSZ5_DFLT 18
45 #define HTMLFONTSZ6_DFLT 24
46 #define HTMLFONTSZ7_DFLT 36
48 enum class HTMLTableFrame { Void, Above, Below, HSides, LHS, RHS, VSides, Box };
50 enum class HTMLTableRules { NONE, Groups, Rows, Cols, All };
52 enum class HTMLInputType
54 Text = 1,
55 Password,
56 Checkbox,
57 Radio,
58 Range,
59 Scribble,
60 File,
61 Hidden,
62 Submit,
63 Image,
64 Reset,
65 Button
68 enum class HTMLScriptLanguage
70 StarBasic,
71 JavaScript,
72 Unknown
75 template<typename EnumT>
76 struct HTMLOptionEnum
78 const char *pName; // value of an HTML option
79 EnumT nValue; // and corresponding value of an enum
82 /** Representation of an HTML option (=attribute in a start tag).
83 * The values of the options are always stored as strings.
84 * The methods GetNumber,... may only be called if the option
85 * is actually numerical,...
87 class SVT_DLLPUBLIC HTMLOption
89 OUString aValue; // value of the option (always as string)
90 OUString aToken; // name of the option as string
91 HtmlOptionId nToken; // and respective token
93 public:
95 HTMLOption( HtmlOptionId nTyp, OUString aToken, OUString aValue );
97 // name of the option...
98 HtmlOptionId GetToken() const { return nToken; } // ... as enum
99 const OUString& GetTokenString() const { return aToken; } // ... as string
101 // value of the option ...
102 const OUString& GetString() const { return aValue; } // ... as string
104 sal_uInt32 GetNumber() const; // ... as number
105 sal_Int32 GetSNumber() const; // ... as number
106 void GetNumbers( std::vector<sal_uInt32> &rNumbers ) const; // ... as numbers
107 void GetColor( Color& ) const; // ... as color
109 template<typename EnumT>
110 EnumT GetEnum( const HTMLOptionEnum<EnumT> *pOptEnums,
111 EnumT nDflt = static_cast<EnumT>(0) ) const
113 while( pOptEnums->pName )
115 if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
116 return pOptEnums->nValue;
117 pOptEnums++;
119 return nDflt;
122 template<typename EnumT>
123 bool GetEnum( EnumT &rEnum, const HTMLOptionEnum<EnumT> *pOptEnums ) const
125 while( pOptEnums->pName )
127 if( aValue.equalsIgnoreAsciiCaseAscii( pOptEnums->pName ) )
129 rEnum = pOptEnums->nValue;
130 return true;
132 pOptEnums++;
134 return false;
137 // ... and as a few special enums
138 HTMLInputType GetInputType() const; // <INPUT TYPE=...>
139 HTMLTableFrame GetTableFrame() const; // <TABLE FRAME=...>
140 HTMLTableRules GetTableRules() const; // <TABLE RULES=...>
141 //SvxAdjust GetAdjust() const; // <P,TH,TD ALIGN=>
144 typedef ::std::vector<HTMLOption> HTMLOptions;
146 class SVT_DLLPUBLIC HTMLParser : public SvParser<HtmlTokenId>
148 private:
149 mutable HTMLOptions maOptions; // options of the start tag
151 bool bNewDoc : 1; // read new Doc?
152 bool bIsInHeader : 1; // scan header section
153 bool bReadListing : 1; // read listings
154 bool bReadXMP : 1; // read XMP
155 bool bReadPRE : 1; // read preformatted text
156 bool bReadTextArea : 1; // read TEXTAREA
157 bool bReadScript : 1; // read <SCRIPT>
158 bool bReadStyle : 1; // read <STYLE>
159 bool bEndTokenFound : 1; // found </SCRIPT> or </STYLE>
161 bool bPre_IgnoreNewPara : 1; // flags for reading of PRE paragraphs
162 bool bReadNextChar : 1; // true: read NextChar again(JavaScript!)
163 bool bReadComment : 1; // true: read NextChar again (JavaScript!)
165 sal_uInt32 nPre_LinePos; // Pos in the line in the PRE-Tag
167 HtmlTokenId mnPendingOffToken; ///< OFF token pending for a <XX.../> ON/OFF ON token
169 OUString aEndToken;
171 /// XML namespace, in case of XHTML.
172 OUString maNamespace;
174 protected:
175 OUString sSaveToken; // the read tag as string
177 HtmlTokenId ScanText( const sal_Unicode cBreak = 0U );
179 HtmlTokenId GetNextRawToken();
181 // scan next token
182 virtual HtmlTokenId GetNextToken_() override;
184 virtual ~HTMLParser() override;
186 void FinishHeader() { bIsInHeader = false; }
188 void SetNamespace(std::u16string_view rNamespace);
190 public:
191 HTMLParser( SvStream& rIn, bool bReadNewDoc = true );
193 virtual SvParserState CallParser() override;
195 bool IsNewDoc() const { return bNewDoc; }
196 bool IsInHeader() const { return bIsInHeader; }
197 bool IsReadListing() const { return bReadListing; }
198 bool IsReadXMP() const { return bReadXMP; }
199 bool IsReadPRE() const { return bReadPRE; }
200 bool IsReadScript() const { return bReadScript; }
201 bool IsReadStyle() const { return bReadStyle; }
203 // start PRE-/LISTING or XMP mode or filter tags respectively
204 inline void StartPRE();
205 void FinishPRE() { bReadPRE = false; }
206 HtmlTokenId FilterPRE( HtmlTokenId nToken );
208 inline void StartListing();
209 void FinishListing() { bReadListing = false; }
210 HtmlTokenId FilterListing( HtmlTokenId nToken );
212 inline void StartXMP();
213 void FinishXMP() { bReadXMP = false; }
214 HtmlTokenId FilterXMP( HtmlTokenId nToken );
216 void FinishTextArea() { bReadTextArea = false; }
218 // finish PRE-/LISTING- and XMP mode
219 void FinishPREListingXMP() { bReadPRE = bReadListing = bReadXMP = false; }
221 // Filter the current token according to the current mode
222 // (PRE, XMP, ...) and set the flags. Is called by Continue before
223 // NextToken is called. If you implement own loops or call
224 // NextToken yourself, you should call this method beforehand.
225 HtmlTokenId FilterToken( HtmlTokenId nToken );
227 void ReadRawData( const OUString &rEndToken ) { aEndToken = rEndToken; }
229 // Token without \-sequences
230 void UnescapeToken();
232 // Determine the options. pNoConvertToken is the optional token
233 // of an option, for which the CR/LFs are not deleted from the value
234 // of the option.
235 const HTMLOptions& GetOptions( HtmlOptionId const *pNoConvertToken=nullptr );
237 // for asynchronous reading from the SvStream
238 virtual void Continue( HtmlTokenId nToken ) override;
241 protected:
243 static rtl_TextEncoding GetEncodingByMIME( const OUString& rMime );
245 /// template method: called when ParseMetaOptions adds a user-defined meta
246 virtual void AddMetaUserDefined( OUString const & i_rMetaName );
248 private:
249 /// parse meta options into XDocumentProperties and encoding
250 bool ParseMetaOptionsImpl( const css::uno::Reference< css::document::XDocumentProperties>&,
251 SvKeyValueIterator*,
252 const HTMLOptions&,
253 rtl_TextEncoding& rEnc );
255 public:
256 /// overriding method must call this implementation!
257 virtual bool ParseMetaOptions( const css::uno::Reference< css::document::XDocumentProperties>&,
258 SvKeyValueIterator* );
260 void ParseScriptOptions( OUString& rLangString, std::u16string_view rBaseURL, HTMLScriptLanguage& rLang,
261 OUString& rSrc, OUString& rLibrary, OUString& rModule );
263 // Remove a comment around the content of <SCRIPT> or <STYLE>.
264 // The whole line behind a "<!--" might be deleted (for JavaScript).
265 static void RemoveSGMLComment( OUString &rString );
267 static bool InternalImgToPrivateURL( OUString& rURL );
268 static rtl_TextEncoding GetEncodingByHttpHeader( SvKeyValueIterator *pHTTPHeader );
269 bool SetEncodingByHTTPHeader( SvKeyValueIterator *pHTTPHeader );
272 inline void HTMLParser::StartPRE()
274 bReadPRE = true;
275 bPre_IgnoreNewPara = true;
276 nPre_LinePos = 0;
279 inline void HTMLParser::StartListing()
281 bReadListing = true;
282 bPre_IgnoreNewPara = true;
283 nPre_LinePos = 0;
286 inline void HTMLParser::StartXMP()
288 bReadXMP = true;
289 bPre_IgnoreNewPara = true;
290 nPre_LinePos = 0;
293 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */