Avoid potential negative array index access to cached text.
[LibreOffice.git] / helpcompiler / source / BasCodeTagger.cxx
blob3511617f4dfb97dd14c326f578fe5c33a4c6be32
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <BasCodeTagger.hxx>
11 #include <rtl/ustring.hxx>
12 #include <iostream>
14 LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc )
16 if ( doc == nullptr )
17 throw BasicCodeTagger::NULL_DOCUMENT;
18 m_pCurrentNode = xmlDocGetRootElement( doc );
19 if ( m_pCurrentNode == nullptr )
20 throw BasicCodeTagger::EMPTY_DOCUMENT;
21 else if ( m_pCurrentNode->xmlChildrenNode != nullptr )
22 m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
23 nextNode();
26 void LibXmlTreeWalker::nextNode()
29 //next node
30 if ( m_pCurrentNode->next == nullptr )
32 m_pCurrentNode = m_Queue.front();
33 m_Queue.pop_front();
35 else
36 m_pCurrentNode = m_pCurrentNode->next;
37 //queue children if they exist
38 if ( m_pCurrentNode->xmlChildrenNode != nullptr )
39 m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
42 void LibXmlTreeWalker::ignoreCurrNodesChildren()
44 if ( m_pCurrentNode->xmlChildrenNode != nullptr )
45 m_Queue.pop_back();
48 bool LibXmlTreeWalker::end() const
50 return m_pCurrentNode->next == nullptr && m_Queue.empty();
54 BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc ):
55 m_Highlighter(HighlighterLanguage::Basic)
57 if ( rootDoc == nullptr )
58 throw NULL_DOCUMENT;
59 m_pDocument = rootDoc;
60 m_pXmlTreeWalker = nullptr;
61 m_bTaggingCompleted = false;
64 BasicCodeTagger::~BasicCodeTagger()
67 //!Gathers all the <bascode> tag nodes from xml tree.
68 /*!
69 * Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
71 void BasicCodeTagger::getBasicCodeContainerNodes()
73 xmlNodePtr currentNode;
75 m_BasicCodeContainerTags.clear();
77 m_pXmlTreeWalker.reset(new LibXmlTreeWalker( m_pDocument ));
79 currentNode = m_pXmlTreeWalker->currentNode();
80 if ( !( xmlStrcmp( currentNode->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
81 { //Found <bascode>
82 m_BasicCodeContainerTags.push_back( currentNode ); //it goes to the end of the list
84 while ( !m_pXmlTreeWalker->end() )
86 m_pXmlTreeWalker->nextNode();
87 if ( !( xmlStrcmp( m_pXmlTreeWalker->currentNode()->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
88 { //Found <bascode>
89 m_BasicCodeContainerTags.push_back( m_pXmlTreeWalker->currentNode() ); //it goes to the end of the list
90 m_pXmlTreeWalker->ignoreCurrNodesChildren();
95 //! Extracts Basic Codes contained in <bascode> tags.
96 /*!
97 * For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according
98 * to the Basic code syntax found in that paragraph.
100 void BasicCodeTagger::tagBasCodeParagraphs()
102 //helper variables
103 xmlNodePtr currParagraph;
104 for (auto const& currBascodeNode : m_BasicCodeContainerTags)
106 currParagraph = currBascodeNode->xmlChildrenNode; //first <paragraph>
107 while ( currParagraph != nullptr )
109 tagParagraph( currParagraph );
110 currParagraph=currParagraph->next;
113 m_BasicCodeContainerTags.clear();
116 //! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
117 void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph )
119 //1. get paragraph text
120 xmlChar* codeSnippet;
121 codeSnippet = xmlNodeListGetString( m_pDocument, paragraph->xmlChildrenNode, 1 );
122 if ( codeSnippet == nullptr )
124 return; //no text, nothing more to do here
126 //2. delete every child from paragraph (except attributes)
127 xmlNodePtr curNode = paragraph->xmlChildrenNode;
128 xmlNodePtr sibling;
129 while ( curNode != nullptr )
131 sibling = curNode->next;
132 xmlUnlinkNode( curNode );
133 xmlFreeNode( curNode );
134 curNode = sibling;
137 //3. create new paragraph content
138 OUString strLine( reinterpret_cast<const char*>(codeSnippet),
139 strlen(reinterpret_cast<const char*>(codeSnippet)),
140 RTL_TEXTENCODING_UTF8 );
141 std::vector<HighlightPortion> portions;
142 m_Highlighter.getHighlightPortions( strLine, portions );
143 for (auto const& portion : portions)
145 OString sToken(OUStringToOString(strLine.subView(portion.nBegin, portion.nEnd-portion.nBegin), RTL_TEXTENCODING_UTF8));
146 xmlNodePtr text = xmlNewText(reinterpret_cast<const xmlChar*>(sToken.getStr()));
147 if ( portion.tokenType != TokenType::Whitespace )
149 xmlChar* typeStr = getTypeString( portion.tokenType );
150 curNode = xmlNewTextChild( paragraph, nullptr, reinterpret_cast<xmlChar const *>("item"), nullptr );
151 xmlNewProp( curNode, reinterpret_cast<xmlChar const *>("type"), typeStr );
152 xmlAddChild( curNode, text );
153 xmlFree( typeStr );
155 else
156 xmlAddChild( paragraph, text );
158 xmlFree( codeSnippet );
161 //! Manages tagging process.
163 * This is the "main" function of BasicCodeTagger.
165 void BasicCodeTagger::tagBasicCodes()
167 if ( m_bTaggingCompleted )
168 return;
169 //gather <bascode> nodes
172 getBasicCodeContainerNodes();
174 catch (TaggerException &ex)
176 std::cout << "BasCodeTagger error occurred. Error code:" << ex << std::endl;
179 //tag basic code paragraphs in <bascode> tag
180 tagBasCodeParagraphs();
181 m_bTaggingCompleted = true;
184 //! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
185 xmlChar* BasicCodeTagger::getTypeString( TokenType tokenType )
187 const char* str;
188 switch ( tokenType )
190 case TokenType::Unknown :
191 str = "unknown";
192 break;
193 case TokenType::Identifier :
194 str = "identifier";
195 break;
196 case TokenType::Whitespace :
197 str = "whitespace";
198 break;
199 case TokenType::Number :
200 str = "number";
201 break;
202 case TokenType::String :
203 str = "string";
204 break;
205 case TokenType::EOL :
206 str = "eol";
207 break;
208 case TokenType::Comment :
209 str = "comment";
210 break;
211 case TokenType::Error :
212 str = "error";
213 break;
214 case TokenType::Operator :
215 str = "operator";
216 break;
217 case TokenType::Keywords :
218 str = "keyword";
219 break;
220 case TokenType::Parameter :
221 str = "parameter";
222 break;
223 default :
224 str = "unknown";
225 break;
227 return xmlCharStrdup( str );
230 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */