bump product version to 7.2.5.1
[LibreOffice.git] / helpcompiler / source / BasCodeTagger.cxx
blob415dec04c470e05075373a3dd883d2243ff6acb8
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <BasCodeTagger.hxx>
11 #include <iostream>
13 LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc )
15 if ( doc == nullptr )
16 throw BasicCodeTagger::NULL_DOCUMENT;
17 m_pCurrentNode = xmlDocGetRootElement( doc );
18 if ( m_pCurrentNode == nullptr )
19 throw BasicCodeTagger::EMPTY_DOCUMENT;
20 else if ( m_pCurrentNode->xmlChildrenNode != nullptr )
21 m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
22 nextNode();
25 void LibXmlTreeWalker::nextNode()
28 //next node
29 if ( m_pCurrentNode->next == nullptr )
31 m_pCurrentNode = m_Queue.front();
32 m_Queue.pop_front();
34 else
35 m_pCurrentNode = m_pCurrentNode->next;
36 //queue children if they exist
37 if ( m_pCurrentNode->xmlChildrenNode != nullptr )
38 m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
41 void LibXmlTreeWalker::ignoreCurrNodesChildren()
43 if ( m_pCurrentNode->xmlChildrenNode != nullptr )
44 m_Queue.pop_back();
47 bool LibXmlTreeWalker::end() const
49 return m_pCurrentNode->next == nullptr && m_Queue.empty();
53 BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc ):
54 m_Highlighter(HighlighterLanguage::Basic)
56 if ( rootDoc == nullptr )
57 throw NULL_DOCUMENT;
58 m_pDocument = rootDoc;
59 m_pXmlTreeWalker = nullptr;
60 m_bTaggingCompleted = false;
63 BasicCodeTagger::~BasicCodeTagger()
66 //!Gathers all the <bascode> tag nodes from xml tree.
67 /*!
68 * Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
70 void BasicCodeTagger::getBasicCodeContainerNodes()
72 xmlNodePtr currentNode;
74 m_BasicCodeContainerTags.clear();
76 m_pXmlTreeWalker.reset(new LibXmlTreeWalker( m_pDocument ));
78 currentNode = m_pXmlTreeWalker->currentNode();
79 if ( !( xmlStrcmp( currentNode->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
80 { //Found <bascode>
81 m_BasicCodeContainerTags.push_back( currentNode ); //it goes to the end of the list
83 while ( !m_pXmlTreeWalker->end() )
85 m_pXmlTreeWalker->nextNode();
86 if ( !( xmlStrcmp( m_pXmlTreeWalker->currentNode()->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
87 { //Found <bascode>
88 m_BasicCodeContainerTags.push_back( m_pXmlTreeWalker->currentNode() ); //it goes to the end of the list
89 m_pXmlTreeWalker->ignoreCurrNodesChildren();
94 //! Extracts Basic Codes contained in <bascode> tags.
95 /*!
96 * For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according
97 * to the Basic code syntax found in that paragraph.
99 void BasicCodeTagger::tagBasCodeParagraphs()
101 //helper variables
102 xmlNodePtr currParagraph;
103 for (auto const& currBascodeNode : m_BasicCodeContainerTags)
105 currParagraph = currBascodeNode->xmlChildrenNode; //first <paragraph>
106 while ( currParagraph != nullptr )
108 tagParagraph( currParagraph );
109 currParagraph=currParagraph->next;
112 m_BasicCodeContainerTags.clear();
115 //! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
116 void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph )
118 //1. get paragraph text
119 xmlChar* codeSnippet;
120 codeSnippet = xmlNodeListGetString( m_pDocument, paragraph->xmlChildrenNode, 1 );
121 if ( codeSnippet == nullptr )
123 return; //no text, nothing more to do here
125 //2. delete every child from paragraph (except attributes)
126 xmlNodePtr curNode = paragraph->xmlChildrenNode;
127 xmlNodePtr sibling;
128 while ( curNode != nullptr )
130 sibling = curNode->next;
131 xmlUnlinkNode( curNode );
132 xmlFreeNode( curNode );
133 curNode = sibling;
136 //3. create new paragraph content
137 OUString strLine( reinterpret_cast<const char*>(codeSnippet),
138 strlen(reinterpret_cast<const char*>(codeSnippet)),
139 RTL_TEXTENCODING_UTF8 );
140 std::vector<HighlightPortion> portions;
141 m_Highlighter.getHighlightPortions( strLine, portions );
142 for (auto const& portion : portions)
144 OString sToken(OUStringToOString(strLine.subView(portion.nBegin, portion.nEnd-portion.nBegin), RTL_TEXTENCODING_UTF8));
145 xmlNodePtr text = xmlNewText(reinterpret_cast<const xmlChar*>(sToken.getStr()));
146 if ( portion.tokenType != TokenType::Whitespace )
148 xmlChar* typeStr = getTypeString( portion.tokenType );
149 curNode = xmlNewTextChild( paragraph, nullptr, reinterpret_cast<xmlChar const *>("item"), nullptr );
150 xmlNewProp( curNode, reinterpret_cast<xmlChar const *>("type"), typeStr );
151 xmlAddChild( curNode, text );
152 xmlFree( typeStr );
154 else
155 xmlAddChild( paragraph, text );
157 xmlFree( codeSnippet );
160 //! Manages tagging process.
162 * This is the "main" function of BasicCodeTagger.
164 void BasicCodeTagger::tagBasicCodes()
166 if ( m_bTaggingCompleted )
167 return;
168 //gather <bascode> nodes
171 getBasicCodeContainerNodes();
173 catch (TaggerException &ex)
175 std::cout << "BasCodeTagger error occurred. Error code:" << ex << std::endl;
178 //tag basic code paragraphs in <bascode> tag
179 tagBasCodeParagraphs();
180 m_bTaggingCompleted = true;
183 //! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
184 xmlChar* BasicCodeTagger::getTypeString( TokenType tokenType )
186 const char* str;
187 switch ( tokenType )
189 case TokenType::Unknown :
190 str = "unknown";
191 break;
192 case TokenType::Identifier :
193 str = "identifier";
194 break;
195 case TokenType::Whitespace :
196 str = "whitespace";
197 break;
198 case TokenType::Number :
199 str = "number";
200 break;
201 case TokenType::String :
202 str = "string";
203 break;
204 case TokenType::EOL :
205 str = "eol";
206 break;
207 case TokenType::Comment :
208 str = "comment";
209 break;
210 case TokenType::Error :
211 str = "error";
212 break;
213 case TokenType::Operator :
214 str = "operator";
215 break;
216 case TokenType::Keywords :
217 str = "keyword";
218 break;
219 case TokenType::Parameter :
220 str = "parameter";
221 break;
222 default :
223 str = "unknown";
224 break;
226 return xmlCharStrdup( str );
229 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */