helpcompiler/source/BasCodeTagger.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  */
   9
  10 #include <BasCodeTagger.hxx>
  11
  12 LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc )
  13 {
  14     if ( doc == nullptr )
  15         throw BasicCodeTagger::NULL_DOCUMENT;
  16     m_pCurrentNode = xmlDocGetRootElement( doc );
  17     if ( m_pCurrentNode == nullptr )
  18         throw BasicCodeTagger::EMPTY_DOCUMENT;
  19     else if ( m_pCurrentNode->xmlChildrenNode != nullptr )
  20         m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
  21     nextNode();
  22 }
  23
  24 void LibXmlTreeWalker::nextNode()
  25 {
  26
  27       //next node
  28     if ( m_pCurrentNode->next == nullptr )
  29     {
  30         m_pCurrentNode = m_Queue.front();
  31         m_Queue.pop_front();
  32     }
  33     else
  34         m_pCurrentNode = m_pCurrentNode->next;
  35     //queue children if they exist
  36     if ( m_pCurrentNode->xmlChildrenNode != nullptr )
  37         m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
  38 }
  39
  40 void LibXmlTreeWalker::ignoreCurrNodesChildren()
  41 {
  42     if ( m_pCurrentNode->xmlChildrenNode != nullptr )
  43           m_Queue.pop_back();
  44 }
  45
  46 bool LibXmlTreeWalker::end()
  47 {
  48     return m_pCurrentNode->next == nullptr && m_Queue.empty();
  49 }
  50
  51
  52 BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc ):
  53     m_Highlighter(HighlighterLanguage::Basic)
  54 {
  55       if ( rootDoc == nullptr )
  56           throw NULL_DOCUMENT;
  57       m_pDocument = rootDoc;
  58     m_pXmlTreeWalker = nullptr;
  59     m_bTaggingCompleted = false;
  60
  61 }
  62
  63 BasicCodeTagger::~BasicCodeTagger()
  64 {
  65     if ( m_pXmlTreeWalker != nullptr )
  66       delete m_pXmlTreeWalker;
  67 }
  68 //!Gathers all the <bascode> tag nodes from xml tree.
  69 /*!
  70  *    Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
  71  */
  72 void BasicCodeTagger::getBasicCodeContainerNodes()
  73 {
  74       xmlNodePtr currentNode;
  75
  76     m_BasicCodeContainerTags.clear();
  77
  78     if ( m_pXmlTreeWalker != nullptr )
  79       delete m_pXmlTreeWalker;
  80     m_pXmlTreeWalker = new LibXmlTreeWalker( m_pDocument );
  81
  82     currentNode = m_pXmlTreeWalker->currentNode();
  83     if ( !( xmlStrcmp( currentNode->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
  84     { //Found <bascode>
  85         m_BasicCodeContainerTags.push_back( currentNode ); //it goes to the end of the list
  86     }
  87     while ( !m_pXmlTreeWalker->end() )
  88     {
  89           m_pXmlTreeWalker->nextNode();
  90         if ( !( xmlStrcmp( m_pXmlTreeWalker->currentNode()->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
  91         { //Found <bascode>
  92             m_BasicCodeContainerTags.push_back( m_pXmlTreeWalker->currentNode() ); //it goes to the end of the list
  93             m_pXmlTreeWalker->ignoreCurrNodesChildren();
  94         }
  95     }
  96 }
  97
  98 //! Extracts Basic Codes contained in <bascode> tags.
  99 /*!
 100  *  For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according
 101  *  to the Basic code syntax found in that paragraph.
 102  */
 103 void BasicCodeTagger::tagBasCodeParagraphs()
 104 {
 105     //helper variables
 106     xmlNodePtr currBascodeNode;
 107     xmlNodePtr currParagraph;
 108     while ( !m_BasicCodeContainerTags.empty() )
 109     {
 110         currBascodeNode = m_BasicCodeContainerTags.front();
 111         currParagraph = currBascodeNode->xmlChildrenNode; //first <paragraph>
 112         while ( currParagraph != nullptr )
 113         {
 114             tagParagraph( currParagraph );
 115             currParagraph=currParagraph->next;
 116         }
 117         m_BasicCodeContainerTags.pop_front(); //next element
 118     }
 119 }
 120
 121 //! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
 122 void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph )
 123 {
 124     //1. get paragraph text
 125     xmlChar* codeSnippet;
 126     codeSnippet = xmlNodeListGetString( m_pDocument, paragraph->xmlChildrenNode, 1 );
 127     if ( codeSnippet == nullptr )
 128     {
 129         return; //no text, nothing more to do here
 130     }
 131     //2. delete every child from paragraph (except attributes)
 132     xmlNodePtr curNode = paragraph->xmlChildrenNode;
 133     xmlNodePtr sibling;
 134     while ( curNode != nullptr )
 135     {
 136         sibling = curNode->next;
 137         xmlUnlinkNode( curNode );
 138         xmlFreeNode( curNode );
 139         curNode = sibling;
 140     }
 141
 142     //3. create new paragraph content
 143     OUString strLine( reinterpret_cast<const sal_Char*>(codeSnippet),
 144                                 strlen(reinterpret_cast<const char*>(codeSnippet)),
 145                                 RTL_TEXTENCODING_UTF8 );
 146     std::vector<HighlightPortion> portions;
 147     m_Highlighter.getHighlightPortions( strLine, portions );
 148     for (std::vector<HighlightPortion>::iterator i(portions.begin());
 149          i != portions.end(); ++i)
 150     {
 151         OString sToken(OUStringToOString(strLine.copy(i->nBegin, i->nEnd-i->nBegin), RTL_TEXTENCODING_UTF8));
 152         xmlNodePtr text = xmlNewText(reinterpret_cast<const xmlChar*>(sToken.getStr()));
 153         if ( i->tokenType != TokenType::Whitespace )
 154         {
 155             xmlChar* typeStr = getTypeString( i->tokenType );
 156             curNode = xmlNewTextChild( paragraph, nullptr, reinterpret_cast<xmlChar const *>("item"), nullptr );
 157             xmlNewProp( curNode, reinterpret_cast<xmlChar const *>("type"), typeStr );
 158             xmlAddChild( curNode, text );
 159             xmlFree( typeStr );
 160         }
 161         else
 162             xmlAddChild( paragraph, text );
 163     }
 164     xmlFree( codeSnippet );
 165 }
 166
 167 //! Manages tagging process.
 168 /*!
 169  *    This is the "main" function of BasicCodeTagger.
 170  */
 171 void BasicCodeTagger::tagBasicCodes()
 172 {
 173       if ( m_bTaggingCompleted )
 174         return;
 175     //gather <bascode> nodes
 176     try
 177     {
 178         getBasicCodeContainerNodes();
 179     }
 180     catch (TaggerException &ex)
 181     {
 182           std::cout << "BasCodeTagger error occurred. Error code:" << ex << std::endl;
 183     }
 184
 185     //tag basic code paragraphs in <bascode> tag
 186     tagBasCodeParagraphs();
 187     m_bTaggingCompleted = true;
 188 }
 189
 190 //! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
 191 xmlChar* BasicCodeTagger::getTypeString( TokenType tokenType )
 192 {
 193     const char* str;
 194     switch ( tokenType )
 195     {
 196         case TokenType::Unknown :
 197             str = "unknown";
 198             break;
 199         case TokenType::Identifier :
 200             str = "identifier";
 201             break;
 202         case TokenType::Whitespace :
 203             str = "whitespace";
 204             break;
 205         case TokenType::Number :
 206             str = "number";
 207             break;
 208         case TokenType::String :
 209             str = "string";
 210             break;
 211         case TokenType::EOL :
 212             str = "eol";
 213             break;
 214         case TokenType::Comment :
 215             str = "comment";
 216             break;
 217         case TokenType::Error :
 218             str = "error";
 219             break;
 220         case TokenType::Operator :
 221             str = "operator";
 222             break;
 223         case TokenType::Keywords :
 224             str = "keyword";
 225             break;
 226         case TokenType::Parameter :
 227             str = "parameter";
 228             break;
 229         default :
 230             str = "unknown";
 231             break;
 232     }
 233     return xmlCharStrdup( str );
 234 }
 235
 236 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */