1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <BasCodeTagger.hxx>
13 LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc
)
16 throw BasicCodeTagger::NULL_DOCUMENT
;
17 m_pCurrentNode
= xmlDocGetRootElement( doc
);
18 if ( m_pCurrentNode
== nullptr )
19 throw BasicCodeTagger::EMPTY_DOCUMENT
;
20 else if ( m_pCurrentNode
->xmlChildrenNode
!= nullptr )
21 m_Queue
.push_back( m_pCurrentNode
->xmlChildrenNode
);
25 void LibXmlTreeWalker::nextNode()
29 if ( m_pCurrentNode
->next
== nullptr )
31 m_pCurrentNode
= m_Queue
.front();
35 m_pCurrentNode
= m_pCurrentNode
->next
;
36 //queue children if they exist
37 if ( m_pCurrentNode
->xmlChildrenNode
!= nullptr )
38 m_Queue
.push_back( m_pCurrentNode
->xmlChildrenNode
);
41 void LibXmlTreeWalker::ignoreCurrNodesChildren()
43 if ( m_pCurrentNode
->xmlChildrenNode
!= nullptr )
47 bool LibXmlTreeWalker::end() const
49 return m_pCurrentNode
->next
== nullptr && m_Queue
.empty();
53 BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc
):
54 m_Highlighter(HighlighterLanguage::Basic
)
56 if ( rootDoc
== nullptr )
58 m_pDocument
= rootDoc
;
59 m_pXmlTreeWalker
= nullptr;
60 m_bTaggingCompleted
= false;
63 BasicCodeTagger::~BasicCodeTagger()
66 //!Gathers all the <bascode> tag nodes from xml tree.
68 * Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
70 void BasicCodeTagger::getBasicCodeContainerNodes()
72 xmlNodePtr currentNode
;
74 m_BasicCodeContainerTags
.clear();
76 m_pXmlTreeWalker
.reset(new LibXmlTreeWalker( m_pDocument
));
78 currentNode
= m_pXmlTreeWalker
->currentNode();
79 if ( !( xmlStrcmp( currentNode
->name
, reinterpret_cast<const xmlChar
*>("bascode") ) ) )
81 m_BasicCodeContainerTags
.push_back( currentNode
); //it goes to the end of the list
83 while ( !m_pXmlTreeWalker
->end() )
85 m_pXmlTreeWalker
->nextNode();
86 if ( !( xmlStrcmp( m_pXmlTreeWalker
->currentNode()->name
, reinterpret_cast<const xmlChar
*>("bascode") ) ) )
88 m_BasicCodeContainerTags
.push_back( m_pXmlTreeWalker
->currentNode() ); //it goes to the end of the list
89 m_pXmlTreeWalker
->ignoreCurrNodesChildren();
94 //! Extracts Basic Codes contained in <bascode> tags.
96 * For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according
97 * to the Basic code syntax found in that paragraph.
99 void BasicCodeTagger::tagBasCodeParagraphs()
102 xmlNodePtr currParagraph
;
103 for (auto const& currBascodeNode
: m_BasicCodeContainerTags
)
105 currParagraph
= currBascodeNode
->xmlChildrenNode
; //first <paragraph>
106 while ( currParagraph
!= nullptr )
108 tagParagraph( currParagraph
);
109 currParagraph
=currParagraph
->next
;
112 m_BasicCodeContainerTags
.clear();
115 //! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
116 void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph
)
118 //1. get paragraph text
119 xmlChar
* codeSnippet
;
120 codeSnippet
= xmlNodeListGetString( m_pDocument
, paragraph
->xmlChildrenNode
, 1 );
121 if ( codeSnippet
== nullptr )
123 return; //no text, nothing more to do here
125 //2. delete every child from paragraph (except attributes)
126 xmlNodePtr curNode
= paragraph
->xmlChildrenNode
;
128 while ( curNode
!= nullptr )
130 sibling
= curNode
->next
;
131 xmlUnlinkNode( curNode
);
132 xmlFreeNode( curNode
);
136 //3. create new paragraph content
137 OUString
strLine( reinterpret_cast<const sal_Char
*>(codeSnippet
),
138 strlen(reinterpret_cast<const char*>(codeSnippet
)),
139 RTL_TEXTENCODING_UTF8
);
140 std::vector
<HighlightPortion
> portions
;
141 m_Highlighter
.getHighlightPortions( strLine
, portions
);
142 for (auto const& portion
: portions
)
144 OString
sToken(OUStringToOString(strLine
.copy(portion
.nBegin
, portion
.nEnd
-portion
.nBegin
), RTL_TEXTENCODING_UTF8
));
145 xmlNodePtr text
= xmlNewText(reinterpret_cast<const xmlChar
*>(sToken
.getStr()));
146 if ( portion
.tokenType
!= TokenType::Whitespace
)
148 xmlChar
* typeStr
= getTypeString( portion
.tokenType
);
149 curNode
= xmlNewTextChild( paragraph
, nullptr, reinterpret_cast<xmlChar
const *>("item"), nullptr );
150 xmlNewProp( curNode
, reinterpret_cast<xmlChar
const *>("type"), typeStr
);
151 xmlAddChild( curNode
, text
);
155 xmlAddChild( paragraph
, text
);
157 xmlFree( codeSnippet
);
160 //! Manages tagging process.
162 * This is the "main" function of BasicCodeTagger.
164 void BasicCodeTagger::tagBasicCodes()
166 if ( m_bTaggingCompleted
)
168 //gather <bascode> nodes
171 getBasicCodeContainerNodes();
173 catch (TaggerException
&ex
)
175 std::cout
<< "BasCodeTagger error occurred. Error code:" << ex
<< std::endl
;
178 //tag basic code paragraphs in <bascode> tag
179 tagBasCodeParagraphs();
180 m_bTaggingCompleted
= true;
183 //! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
184 xmlChar
* BasicCodeTagger::getTypeString( TokenType tokenType
)
189 case TokenType::Unknown
:
192 case TokenType::Identifier
:
195 case TokenType::Whitespace
:
198 case TokenType::Number
:
201 case TokenType::String
:
204 case TokenType::EOL
:
207 case TokenType::Comment
:
210 case TokenType::Error
:
213 case TokenType::Operator
:
216 case TokenType::Keywords
:
219 case TokenType::Parameter
:
226 return xmlCharStrdup( str
);
229 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */