Branch libreoffice-5-0-4
[LibreOffice.git] / include / svtools / parhtml.hxx
blob8195bea192ac508fe13713492134f4fa2dbcdc4a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_SVTOOLS_PARHTML_HXX
21 #define INCLUDED_SVTOOLS_PARHTML_HXX
23 #include <svtools/svtdllapi.h>
24 #include <svtools/svparser.hxx>
26 #include <boost/ptr_container/ptr_vector.hpp>
28 namespace com { namespace sun { namespace star {
29 namespace document {
30 class XDocumentProperties;
32 } } }
34 class Color;
35 class SvNumberFormatter;
36 class SvKeyValueIterator;
38 #define HTMLFONTSZ1_DFLT 7
39 #define HTMLFONTSZ2_DFLT 10
40 #define HTMLFONTSZ3_DFLT 12
41 #define HTMLFONTSZ4_DFLT 14
42 #define HTMLFONTSZ5_DFLT 18
43 #define HTMLFONTSZ6_DFLT 24
44 #define HTMLFONTSZ7_DFLT 36
46 enum HTMLTableFrame { HTML_TF_VOID, HTML_TF_ABOVE, HTML_TF_BELOW,
47 HTML_TF_HSIDES, HTML_TF_LHS, HTML_TF_RHS, HTML_TF_VSIDES, HTML_TF_BOX };
49 enum HTMLTableRules { HTML_TR_NONE, HTML_TR_GROUPS, HTML_TR_ROWS,
50 HTML_TR_COLS, HTML_TR_ALL };
52 enum HTMLInputType
54 HTML_IT_TEXT = 0x01,
55 HTML_IT_PASSWORD = 0x02,
56 HTML_IT_CHECKBOX = 0x03,
57 HTML_IT_RADIO = 0x04,
58 HTML_IT_RANGE = 0x05,
59 HTML_IT_SCRIBBLE = 0x06,
60 HTML_IT_FILE = 0x07,
61 HTML_IT_HIDDEN = 0x08,
62 HTML_IT_SUBMIT = 0x09,
63 HTML_IT_IMAGE = 0x0a,
64 HTML_IT_RESET = 0x0b,
65 HTML_IT_BUTTON = 0x0c
68 enum HTMLScriptLanguage
70 HTML_SL_STARBASIC,
71 HTML_SL_JAVASCRIPT,
72 HTML_SL_UNKNOWN
75 struct HTMLOptionEnum
77 const sal_Char *pName; // value of an HTML option
78 sal_uInt16 nValue; // and corresponding value of an enum
81 /** Representation of an HTML option (=attribute in a start tag).
82 * The values of the options are always stored as strings.
83 * The methods GetNumber,... may only be called if the option
84 * is actually numerical,...
86 class SVT_DLLPUBLIC HTMLOption
88 OUString aValue; // value of the option (always as string)
89 OUString aToken; // name of the option as string
90 sal_uInt16 nToken; // and respective token
92 public:
94 HTMLOption( sal_uInt16 nTyp, const OUString& rToken, const OUString& rValue );
96 // name of the option...
97 sal_uInt16 GetToken() const { return nToken; } // ... as enum
98 const OUString& GetTokenString() const { return aToken; } // ... as string
100 // value of the option ...
101 const OUString& GetString() const { return aValue; } // ... as string
103 sal_uInt32 GetNumber() const; // ... as number
104 sal_Int32 GetSNumber() const; // ... as number
105 void GetNumbers( std::vector<sal_uInt32> &rNumbers, // ... as numbers
106 bool bSpaceDelim=false ) const;
107 void GetColor( Color& ) const; // ... as color
109 // ... as enum; pOptEnums is an HTMLOptionEnum array
110 sal_uInt16 GetEnum( const HTMLOptionEnum *pOptEnums,
111 sal_uInt16 nDflt=0 ) const;
112 bool GetEnum( sal_uInt16 &rEnum, const HTMLOptionEnum *pOptEnums ) const;
114 // ... and as a few special enums
115 HTMLInputType GetInputType() const; // <INPUT TYPE=...>
116 HTMLTableFrame GetTableFrame() const; // <TABLE FRAME=...>
117 HTMLTableRules GetTableRules() const; // <TABLE RULES=...>
118 //SvxAdjust GetAdjust() const; // <P,TH,TD ALIGN=>
121 typedef ::boost::ptr_vector<HTMLOption> HTMLOptions;
123 class SVT_DLLPUBLIC HTMLParser : public SvParser
125 private:
126 mutable HTMLOptions maOptions; // options of the start tag
128 bool bNewDoc : 1; // read new Doc?
129 bool bIsInHeader : 1; // scan header section
130 bool bIsInBody : 1; // scan body section
131 bool bReadListing : 1; // read listings
132 bool bReadXMP : 1; // read XMP
133 bool bReadPRE : 1; // read preformatted text
134 bool bReadTextArea : 1; // read TEXTAREA
135 bool bReadScript : 1; // read <SCRIPT>
136 bool bReadStyle : 1; // read <STYLE>
137 bool bEndTokenFound : 1; // found </SCRIPT> or </STYLE>
139 bool bPre_IgnoreNewPara : 1; // flags for reading of PRE paragraphs
140 bool bReadNextChar : 1; // true: read NextChar again(JavaScript!)
141 bool bReadComment : 1; // true: read NextChar again (JavaScript!)
143 sal_uInt32 nPre_LinePos; // Pos in the line in the PRE-Tag
145 int mnPendingOffToken; ///< OFF token pending for a <XX.../> ON/OFF ON token
147 OUString aEndToken;
149 protected:
150 OUString sSaveToken; // the read tag as string
152 int ScanText( const sal_Unicode cBreak = 0U );
154 int _GetNextRawToken();
156 // scan next token
157 virtual int _GetNextToken() SAL_OVERRIDE;
159 virtual ~HTMLParser();
161 void FinishHeader( bool bBody ) { bIsInHeader = false; bIsInBody = bBody; }
163 public:
164 HTMLParser( SvStream& rIn, bool bReadNewDoc = true );
166 virtual SvParserState CallParser() SAL_OVERRIDE;
168 bool IsNewDoc() const { return bNewDoc; }
169 bool IsInHeader() const { return bIsInHeader; }
170 bool IsInBody() const { return bIsInBody; }
171 bool IsReadListing() const { return bReadListing; }
172 bool IsReadXMP() const { return bReadXMP; }
173 bool IsReadPRE() const { return bReadPRE; }
174 bool IsReadScript() const { return bReadScript; }
175 bool IsReadStyle() const { return bReadStyle; }
177 void SetReadNextChar() { bReadNextChar = true; }
179 // start PRE-/LISTING or XMP mode or filter tags respectively
180 inline void StartPRE( bool bRestart=false );
181 void FinishPRE() { bReadPRE = false; }
182 int FilterPRE( int nToken );
184 inline void StartListing( bool bRestart=false );
185 void FinishListing() { bReadListing = false; }
186 int FilterListing( int nToken );
188 inline void StartXMP( bool bRestart=false );
189 void FinishXMP() { bReadXMP = false; }
190 int FilterXMP( int nToken );
192 void FinishTextArea() { bReadTextArea = false; }
194 // finish PRE-/LISTING- and XMP mode
195 void FinishPREListingXMP() { bReadPRE = bReadListing = bReadXMP = false; }
197 // Filter the current token according to the current mode
198 // (PRE, XMP, ...) and set the flags. Is called by Continue before
199 // NextToken is called. If you implement own loops or call
200 // NextToken yourself, you should call this method beforehand.
201 int FilterToken( int nToken );
203 // end scanning of a script (should only be called right after
204 // reading of a <SCRIPT>)
205 void EndScanScript() { bReadScript = false; }
207 void ReadRawData( const OUString &rEndToken ) { aEndToken = rEndToken; }
209 // Token without \-sequences
210 void UnescapeToken();
212 // Determine the options. pNoConvertToken is the optional token
213 // of an option, for which the CR/LFs are not deleted from the value
214 // of the option.
215 const HTMLOptions& GetOptions( sal_uInt16 *pNoConvertToken=0 );
217 // for asynchronous reading from the SvStream
218 virtual void Continue( int nToken ) SAL_OVERRIDE;
221 protected:
223 static rtl_TextEncoding GetEncodingByMIME( const OUString& rMime );
225 /// template method: called when ParseMetaOptions adds a user-defined meta
226 virtual void AddMetaUserDefined( OUString const & i_rMetaName );
228 private:
229 /// parse meta options into XDocumentProperties and encoding
230 bool ParseMetaOptionsImpl( const ::com::sun::star::uno::Reference<
231 ::com::sun::star::document::XDocumentProperties>&,
232 SvKeyValueIterator*,
233 const HTMLOptions&,
234 rtl_TextEncoding& rEnc );
236 public:
237 /// overriding method must call this implementation!
238 virtual bool ParseMetaOptions( const ::com::sun::star::uno::Reference<
239 ::com::sun::star::document::XDocumentProperties>&,
240 SvKeyValueIterator* );
242 bool ParseScriptOptions( OUString& rLangString, const OUString&, HTMLScriptLanguage& rLang,
243 OUString& rSrc, OUString& rLibrary, OUString& rModule );
245 // remove a comment around the content of <SCRIPT> or <STYLE>
246 // In case of 'bFull', the whole line behind a "<!--" might
247 // be deleted (for JavaSript)
248 static void RemoveSGMLComment( OUString &rString, bool bFull );
250 static bool InternalImgToPrivateURL( OUString& rURL );
251 static rtl_TextEncoding GetEncodingByHttpHeader( SvKeyValueIterator *pHTTPHeader );
252 bool SetEncodingByHTTPHeader( SvKeyValueIterator *pHTTPHeader );
255 inline void HTMLParser::StartPRE( bool bRestart )
257 bReadPRE = true;
258 bPre_IgnoreNewPara = !bRestart;
259 nPre_LinePos = 0UL;
262 inline void HTMLParser::StartListing( bool bRestart )
264 bReadListing = true;
265 bPre_IgnoreNewPara = !bRestart;
266 nPre_LinePos = 0UL;
269 inline void HTMLParser::StartXMP( bool bRestart )
271 bReadXMP = true;
272 bPre_IgnoreNewPara = !bRestart;
273 nPre_LinePos = 0UL;
276 #endif
278 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */