src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestDifferentRoutes.java

   1 /*
   2 * Licensed to the Apache Software Foundation (ASF) under one or more
   3 * contributor license agreements.  See the NOTICE file distributed with
   4 * this work for additional information regarding copyright ownership.
   5 * The ASF licenses this file to You under the Apache License, Version 2.0
   6 * (the "License"); you may not use this file except in compliance with
   7 * the License.  You may obtain a copy of the License at
   8 *
   9 *     http://www.apache.org/licenses/LICENSE-2.0
  10 *
  11 * Unless required by applicable law or agreed to in writing, software
  12 * distributed under the License is distributed on an "AS IS" BASIS,
  13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14 * See the License for the specific language governing permissions and
  15 * limitations under the License.
  16 */
  17 package org.apache.poi.hwpf.extractor;
  18
  19 import java.io.FileInputStream;
  20 import java.util.Iterator;
  21
  22 import org.apache.poi.hwpf.HWPFDocument;
  23 import org.apache.poi.hwpf.model.TextPiece;
  24 import org.apache.poi.hwpf.usermodel.Paragraph;
  25 import org.apache.poi.hwpf.usermodel.Range;
  26
  27 import junit.framework.TestCase;
  28
  29 /**
  30  * Test the different routes to extracting text
  31  *
  32  * @author Nick Burch (nick at torchbox dot com)
  33  */
  34 public class TestDifferentRoutes extends TestCase {
  35         private String[] p_text = new String[] {
  36                         "This is a simple word document\r",
  37                         "\r",
  38                         "It has a number of paragraphs in it\r",
  39                         "\r",
  40                         "Some of them even feature bold, italic and underlined text\r",
  41                         "\r",
  42                         "\r",
  43                         "This bit is in a different font and size\r",
  44                         "\r",
  45                         "\r",
  46                         "This bit features some red text.\r",
  47                         "\r",
  48                         "\r",
  49                         "It is otherwise very very boring.\r"
  50         };
  51
  52         private HWPFDocument doc;
  53
  54     protected void setUp() throws Exception {
  55                 String dirname = System.getProperty("HWPF.testdata.path");
  56
  57                 String filename = dirname + "/test2.doc";
  58                 doc = new HWPFDocument(new FileInputStream(filename));
  59     }
  60
  61     /**
  62      * Test model based extraction
  63      */
  64     public void testExtractFromModel() {
  65         Range r = doc.getRange();
  66
  67         String[] text = new String[r.numParagraphs()];
  68         for(int i=0; i < r.numParagraphs(); i++) {
  69                 Paragraph p = r.getParagraph(i);
  70                 text[i] = p.text();
  71         }
  72
  73         assertEquals(p_text.length, text.length);
  74         for(int i=0; i<p_text.length; i++) {
  75                 assertEquals(p_text[i], text[i]);
  76         }
  77     }
  78
  79     /**
  80      * Test textPieces based extraction
  81      */
  82     public void testExtractFromTextPieces() throws Exception {
  83         StringBuffer textBuf = new StringBuffer();
  84
  85         Iterator textPieces = doc.getTextTable().getTextPieces().iterator();
  86         while (textPieces.hasNext()) {
  87                 TextPiece piece = (TextPiece) textPieces.next();
  88
  89                 String encoding = "Cp1252";
  90                 if (piece.usesUnicode()) {
  91                         encoding = "UTF-16LE";
  92                 }
  93                 String text = new String(piece.getRawBytes(), encoding);
  94                 textBuf.append(text);
  95         }
  96
  97         StringBuffer exp = new StringBuffer();
  98         for(int i=0; i<p_text.length; i++) {
  99                 exp.append(p_text[i]);
 100         }
 101         assertEquals(exp.toString(), textBuf.toString());
 102     }
 103 }