helpcompiler/source/BasCodeTagger.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  */
   9
  10 #include <BasCodeTagger.hxx>
  11 #include <rtl/ustring.hxx>
  12 #include <iostream>
  13
  14 LibXmlTreeWalker::LibXmlTreeWalker( xmlDocPtr doc )
  15 {
  16     if ( doc == nullptr )
  17         throw BasicCodeTagger::NULL_DOCUMENT;
  18     m_pCurrentNode = xmlDocGetRootElement( doc );
  19     if ( m_pCurrentNode == nullptr )
  20         throw BasicCodeTagger::EMPTY_DOCUMENT;
  21     else if ( m_pCurrentNode->xmlChildrenNode != nullptr )
  22         m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
  23     nextNode();
  24 }
  25
  26 void LibXmlTreeWalker::nextNode()
  27 {
  28
  29       //next node
  30     if ( m_pCurrentNode->next == nullptr )
  31     {
  32         m_pCurrentNode = m_Queue.front();
  33         m_Queue.pop_front();
  34     }
  35     else
  36         m_pCurrentNode = m_pCurrentNode->next;
  37     //queue children if they exist
  38     if ( m_pCurrentNode->xmlChildrenNode != nullptr )
  39         m_Queue.push_back( m_pCurrentNode->xmlChildrenNode );
  40 }
  41
  42 void LibXmlTreeWalker::ignoreCurrNodesChildren()
  43 {
  44     if ( m_pCurrentNode->xmlChildrenNode != nullptr )
  45           m_Queue.pop_back();
  46 }
  47
  48 bool LibXmlTreeWalker::end() const
  49 {
  50     return m_pCurrentNode->next == nullptr && m_Queue.empty();
  51 }
  52
  53
  54 BasicCodeTagger::BasicCodeTagger( xmlDocPtr rootDoc ):
  55     m_Highlighter(HighlighterLanguage::Basic)
  56 {
  57     if ( rootDoc == nullptr )
  58         throw NULL_DOCUMENT;
  59     m_pDocument = rootDoc;
  60     m_pXmlTreeWalker = nullptr;
  61     m_bTaggingCompleted = false;
  62 }
  63
  64 BasicCodeTagger::~BasicCodeTagger()
  65 {
  66 }
  67 //!Gathers all the <bascode> tag nodes from xml tree.
  68 /*!
  69  *    Assumes m_pDocument is valid. Handles m_pXmlTreeWalker and m_BasicCodeContainerTags members.
  70  */
  71 void BasicCodeTagger::getBasicCodeContainerNodes()
  72 {
  73     xmlNodePtr currentNode;
  74
  75     m_BasicCodeContainerTags.clear();
  76
  77     m_pXmlTreeWalker.reset(new LibXmlTreeWalker( m_pDocument ));
  78
  79     currentNode = m_pXmlTreeWalker->currentNode();
  80     if ( !( xmlStrcmp( currentNode->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
  81     { //Found <bascode>
  82         m_BasicCodeContainerTags.push_back( currentNode ); //it goes to the end of the list
  83     }
  84     while ( !m_pXmlTreeWalker->end() )
  85     {
  86         m_pXmlTreeWalker->nextNode();
  87         if ( !( xmlStrcmp( m_pXmlTreeWalker->currentNode()->name, reinterpret_cast<const xmlChar*>("bascode") ) ) )
  88         { //Found <bascode>
  89             m_BasicCodeContainerTags.push_back( m_pXmlTreeWalker->currentNode() ); //it goes to the end of the list
  90             m_pXmlTreeWalker->ignoreCurrNodesChildren();
  91         }
  92     }
  93 }
  94
  95 //! Extracts Basic Codes contained in <bascode> tags.
  96 /*!
  97  *  For each <bascode> this method iterates through it's <paragraph> tags and "inserts" <item> tags according
  98  *  to the Basic code syntax found in that paragraph.
  99  */
 100 void BasicCodeTagger::tagBasCodeParagraphs()
 101 {
 102     //helper variables
 103     xmlNodePtr currParagraph;
 104     for (auto const& currBascodeNode : m_BasicCodeContainerTags)
 105     {
 106         currParagraph = currBascodeNode->xmlChildrenNode; //first <paragraph>
 107         while ( currParagraph != nullptr )
 108         {
 109             tagParagraph( currParagraph );
 110             currParagraph=currParagraph->next;
 111         }
 112     }
 113     m_BasicCodeContainerTags.clear();
 114 }
 115
 116 //! Used by tagBasCodeParagraphs(). It does the work on the current paragraph containing Basic code.
 117 void BasicCodeTagger::tagParagraph( xmlNodePtr paragraph )
 118 {
 119     //1. get paragraph text
 120     xmlChar* codeSnippet;
 121     codeSnippet = xmlNodeListGetString( m_pDocument, paragraph->xmlChildrenNode, 1 );
 122     if ( codeSnippet == nullptr )
 123     {
 124         return; //no text, nothing more to do here
 125     }
 126     //2. delete every child from paragraph (except attributes)
 127     xmlNodePtr curNode = paragraph->xmlChildrenNode;
 128     xmlNodePtr sibling;
 129     while ( curNode != nullptr )
 130     {
 131         sibling = curNode->next;
 132         xmlUnlinkNode( curNode );
 133         xmlFreeNode( curNode );
 134         curNode = sibling;
 135     }
 136
 137     //3. create new paragraph content
 138     OUString strLine( reinterpret_cast<const char*>(codeSnippet),
 139                                 strlen(reinterpret_cast<const char*>(codeSnippet)),
 140                                 RTL_TEXTENCODING_UTF8 );
 141     std::vector<HighlightPortion> portions;
 142     m_Highlighter.getHighlightPortions( strLine, portions );
 143     for (auto const& portion : portions)
 144     {
 145         OString sToken(OUStringToOString(strLine.subView(portion.nBegin, portion.nEnd-portion.nBegin), RTL_TEXTENCODING_UTF8));
 146         xmlNodePtr text = xmlNewText(reinterpret_cast<const xmlChar*>(sToken.getStr()));
 147         if ( portion.tokenType != TokenType::Whitespace )
 148         {
 149             xmlChar* typeStr = getTypeString( portion.tokenType );
 150             curNode = xmlNewTextChild( paragraph, nullptr, reinterpret_cast<xmlChar const *>("item"), nullptr );
 151             xmlNewProp( curNode, reinterpret_cast<xmlChar const *>("type"), typeStr );
 152             xmlAddChild( curNode, text );
 153             xmlFree( typeStr );
 154         }
 155         else
 156             xmlAddChild( paragraph, text );
 157     }
 158     xmlFree( codeSnippet );
 159 }
 160
 161 //! Manages tagging process.
 162 /*!
 163  *    This is the "main" function of BasicCodeTagger.
 164  */
 165 void BasicCodeTagger::tagBasicCodes()
 166 {
 167     if ( m_bTaggingCompleted )
 168         return;
 169     //gather <bascode> nodes
 170     try
 171     {
 172         getBasicCodeContainerNodes();
 173     }
 174     catch (TaggerException &ex)
 175     {
 176           std::cout << "BasCodeTagger error occurred. Error code:" << ex << std::endl;
 177     }
 178
 179     //tag basic code paragraphs in <bascode> tag
 180     tagBasCodeParagraphs();
 181     m_bTaggingCompleted = true;
 182 }
 183
 184 //! Converts SyntaxHighlighter's TokenTypes enum to a type string for <item type=... >
 185 xmlChar* BasicCodeTagger::getTypeString( TokenType tokenType )
 186 {
 187     const char* str;
 188     switch ( tokenType )
 189     {
 190         case TokenType::Unknown :
 191             str = "unknown";
 192             break;
 193         case TokenType::Identifier :
 194             str = "identifier";
 195             break;
 196         case TokenType::Whitespace :
 197             str = "whitespace";
 198             break;
 199         case TokenType::Number :
 200             str = "number";
 201             break;
 202         case TokenType::String :
 203             str = "string";
 204             break;
 205         case TokenType::EOL :
 206             str = "eol";
 207             break;
 208         case TokenType::Comment :
 209             str = "comment";
 210             break;
 211         case TokenType::Error :
 212             str = "error";
 213             break;
 214         case TokenType::Operator :
 215             str = "operator";
 216             break;
 217         case TokenType::Keywords :
 218             str = "keyword";
 219             break;
 220         case TokenType::Parameter :
 221             str = "parameter";
 222             break;
 223         default :
 224             str = "unknown";
 225             break;
 226     }
 227     return xmlCharStrdup( str );
 228 }
 229
 230 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */