1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <BasCodeTagger.hxx>
11 #include <rtl/ustring.hxx>
14 LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc
)
17 throw BasicCodeTagger::NULL_DOCUMENT
;
18 m_pCurrentNode
= xmlDocGetRootElement( doc
);
19 if ( m_pCurrentNode
== nullptr )
20 throw BasicCodeTagger::EMPTY_DOCUMENT
;
21 else if ( m_pCurrentNode
->xmlChildrenNode
!= nullptr )
22 m_Queue
.push_back( m_pCurrentNode
->xmlChildrenNode
);
26 void LibXmlTreeWalker::nextNode()
30 if ( m_pCurrentNode
->next
== nullptr )
32 m_pCurrentNode
= m_Queue
.front();
36 m_pCurrentNode
= m_pCurrentNode
->next
;
37 //queue children if they exist
38 if ( m_pCurrentNode
->xmlChildrenNode
!= nullptr )
39 m_Queue
.push_back( m_pCurrentNode
->xmlChildrenNode
);
42 void LibXmlTreeWalker::ignoreCurrNodesChildren()
44 if ( m_pCurrentNode
->xmlChildrenNode
!= nullptr )
48 bool LibXmlTreeWalker::end() const
50 return m_pCurrentNode
->next
== nullptr && m_Queue
.empty();
54 BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc
):
55 m_Highlighter(HighlighterLanguage::Basic
)
57 if ( rootDoc
== nullptr )
59 m_pDocument
= rootDoc
;
60 m_pXmlTreeWalker
= nullptr;
61 m_bTaggingCompleted
= false;
64 BasicCodeTagger::~BasicCodeTagger()
67 //!Gathers all the <bascode> tag nodes from xml tree.
69 * Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
71 void BasicCodeTagger::getBasicCodeContainerNodes()
73 xmlNodePtr currentNode
;
75 m_BasicCodeContainerTags
.clear();
77 m_pXmlTreeWalker
.reset(new LibXmlTreeWalker( m_pDocument
));
79 currentNode
= m_pXmlTreeWalker
->currentNode();
80 if ( !( xmlStrcmp( currentNode
->name
, reinterpret_cast<const xmlChar
*>("bascode") ) ) )
82 m_BasicCodeContainerTags
.push_back( currentNode
); //it goes to the end of the list
84 while ( !m_pXmlTreeWalker
->end() )
86 m_pXmlTreeWalker
->nextNode();
87 if ( !( xmlStrcmp( m_pXmlTreeWalker
->currentNode()->name
, reinterpret_cast<const xmlChar
*>("bascode") ) ) )
89 m_BasicCodeContainerTags
.push_back( m_pXmlTreeWalker
->currentNode() ); //it goes to the end of the list
90 m_pXmlTreeWalker
->ignoreCurrNodesChildren();
95 //! Extracts Basic Codes contained in <bascode> tags.
97 * For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according
98 * to the Basic code syntax found in that paragraph.
100 void BasicCodeTagger::tagBasCodeParagraphs()
103 xmlNodePtr currParagraph
;
104 for (auto const& currBascodeNode
: m_BasicCodeContainerTags
)
106 currParagraph
= currBascodeNode
->xmlChildrenNode
; //first <paragraph>
107 while ( currParagraph
!= nullptr )
109 tagParagraph( currParagraph
);
110 currParagraph
=currParagraph
->next
;
113 m_BasicCodeContainerTags
.clear();
116 //! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
117 void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph
)
119 //1. get paragraph text
120 xmlChar
* codeSnippet
;
121 codeSnippet
= xmlNodeListGetString( m_pDocument
, paragraph
->xmlChildrenNode
, 1 );
122 if ( codeSnippet
== nullptr )
124 return; //no text, nothing more to do here
126 //2. delete every child from paragraph (except attributes)
127 xmlNodePtr curNode
= paragraph
->xmlChildrenNode
;
129 while ( curNode
!= nullptr )
131 sibling
= curNode
->next
;
132 xmlUnlinkNode( curNode
);
133 xmlFreeNode( curNode
);
137 //3. create new paragraph content
138 OUString
strLine( reinterpret_cast<const char*>(codeSnippet
),
139 strlen(reinterpret_cast<const char*>(codeSnippet
)),
140 RTL_TEXTENCODING_UTF8
);
141 std::vector
<HighlightPortion
> portions
;
142 m_Highlighter
.getHighlightPortions( strLine
, portions
);
143 for (auto const& portion
: portions
)
145 OString
sToken(OUStringToOString(strLine
.subView(portion
.nBegin
, portion
.nEnd
-portion
.nBegin
), RTL_TEXTENCODING_UTF8
));
146 xmlNodePtr text
= xmlNewText(reinterpret_cast<const xmlChar
*>(sToken
.getStr()));
147 if ( portion
.tokenType
!= TokenType::Whitespace
)
149 xmlChar
* typeStr
= getTypeString( portion
.tokenType
);
150 curNode
= xmlNewTextChild( paragraph
, nullptr, reinterpret_cast<xmlChar
const *>("item"), nullptr );
151 xmlNewProp( curNode
, reinterpret_cast<xmlChar
const *>("type"), typeStr
);
152 xmlAddChild( curNode
, text
);
156 xmlAddChild( paragraph
, text
);
158 xmlFree( codeSnippet
);
161 //! Manages tagging process.
163 * This is the "main" function of BasicCodeTagger.
165 void BasicCodeTagger::tagBasicCodes()
167 if ( m_bTaggingCompleted
)
169 //gather <bascode> nodes
172 getBasicCodeContainerNodes();
174 catch (TaggerException
&ex
)
176 std::cout
<< "BasCodeTagger error occurred. Error code:" << ex
<< std::endl
;
179 //tag basic code paragraphs in <bascode> tag
180 tagBasCodeParagraphs();
181 m_bTaggingCompleted
= true;
184 //! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
185 xmlChar
* BasicCodeTagger::getTypeString( TokenType tokenType
)
190 case TokenType::Unknown
:
193 case TokenType::Identifier
:
196 case TokenType::Whitespace
:
199 case TokenType::Number
:
202 case TokenType::String
:
205 case TokenType::EOL
:
208 case TokenType::Comment
:
211 case TokenType::Error
:
214 case TokenType::Operator
:
217 case TokenType::Keywords
:
220 case TokenType::Parameter
:
227 return xmlCharStrdup( str
);
230 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */