1 /* ====================================================================
2 Licensed to the Apache Software Foundation (ASF) under one or more
3 contributor license agreements. See the NOTICE file distributed with
4 this work for additional information regarding copyright ownership.
5 The ASF licenses this file to You under the Apache License, Version 2.0
6 (the "License"); you may not use this file except in compliance with
7 the License. You may obtain a copy of the License at
9 http://www.apache.org/licenses/LICENSE-2.0
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16 ==================================================================== */
18 package org
.apache
.poi
.hssf
.extractor
;
20 import java
.io
.FileInputStream
;
21 import java
.io
.IOException
;
22 import java
.io
.InputStream
;
24 import junit
.framework
.TestCase
;
26 import org
.apache
.poi
.hssf
.HSSFTestDataSamples
;
27 import org
.apache
.poi
.hssf
.usermodel
.HSSFWorkbook
;
28 import org
.apache
.poi
.poifs
.filesystem
.DirectoryNode
;
29 import org
.apache
.poi
.poifs
.filesystem
.POIFSFileSystem
;
33 public final class TestExcelExtractor
extends TestCase
{
35 private static final ExcelExtractor
createExtractor(String sampleFileName
) {
37 InputStream is
= HSSFTestDataSamples
.openSampleFileStream(sampleFileName
);
40 return new ExcelExtractor(new POIFSFileSystem(is
));
41 } catch (IOException e
) {
42 throw new RuntimeException(e
);
47 public void testSimple() {
49 ExcelExtractor extractor
= createExtractor("Simple.xls");
51 assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor
.getText());
53 // Now turn off sheet names
54 extractor
.setIncludeSheetNames(false);
55 assertEquals("replaceMe\n", extractor
.getText());
58 public void testNumericFormula() {
60 ExcelExtractor extractor
= createExtractor("sumifformula.xls");
64 "1000.0\t1.0\t5.0\n" +
73 extractor
.setFormulasNotResults(true);
77 "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
87 public void testwithContinueRecords() {
89 ExcelExtractor extractor
= createExtractor("StringContinueRecords.xls");
94 // Until we fixed bug #41064, this would've
96 assertTrue(extractor
.getText().length() > 40960);
99 public void testStringConcat() {
101 ExcelExtractor extractor
= createExtractor("SimpleWithFormula.xls");
103 // Comes out as NaN if treated as a number
104 // And as XYZ if treated as a string
105 assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", extractor
.getText());
107 extractor
.setFormulasNotResults(true);
109 assertEquals("Sheet1\nreplaceme\nreplaceme\nCONCATENATE(A1,A2)\nSheet2\nSheet3\n", extractor
.getText());
112 public void testStringFormula() {
114 ExcelExtractor extractor
= createExtractor("StringFormulas.xls");
116 // Comes out as NaN if treated as a number
117 // And as XYZ if treated as a string
118 assertEquals("Sheet1\nXYZ\nSheet2\nSheet3\n", extractor
.getText());
120 extractor
.setFormulasNotResults(true);
122 assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor
.getText());
126 public void testEventExtractor() throws Exception
{
127 EventBasedExcelExtractor extractor
;
129 // First up, a simple file with string
130 // based formulas in it
131 extractor
= new EventBasedExcelExtractor(
133 HSSFTestDataSamples
.openSampleFileStream("SimpleWithFormula.xls")
136 extractor
.setIncludeSheetNames(true);
138 String text
= extractor
.getText();
139 assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text
);
141 extractor
.setIncludeSheetNames(false);
142 extractor
.setFormulasNotResults(true);
144 text
= extractor
.getText();
145 assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text
);
148 // Now, a slightly longer file with numeric formulas
149 extractor
= new EventBasedExcelExtractor(
151 HSSFTestDataSamples
.openSampleFileStream("sumifformula.xls")
154 extractor
.setIncludeSheetNames(false);
155 extractor
.setFormulasNotResults(true);
157 text
= extractor
.getText();
159 "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
168 public void testWithComments() throws Exception
{
169 ExcelExtractor extractor
= createExtractor("SimpleWithComments.xls");
170 extractor
.setIncludeSheetNames(false);
172 // Check without comments
181 extractor
.setIncludeCellComments(true);
183 "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
184 "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
185 "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
192 * Embded in a non-excel file
194 public void testWithEmbeded() throws Exception
{
195 String pdirname
= System
.getProperty("POIFS.testdata.path");
196 String filename
= pdirname
+ "/word_with_embeded.doc";
197 POIFSFileSystem fs
= new POIFSFileSystem(
198 new FileInputStream(filename
)
201 DirectoryNode objPool
= (DirectoryNode
)
202 fs
.getRoot().getEntry("ObjectPool");
203 DirectoryNode dirA
= (DirectoryNode
)
204 objPool
.getEntry("_1269427460");
205 DirectoryNode dirB
= (DirectoryNode
)
206 objPool
.getEntry("_1269427461");
208 HSSFWorkbook wbA
= new HSSFWorkbook(dirA
, fs
, true);
209 HSSFWorkbook wbB
= new HSSFWorkbook(dirB
, fs
, true);
211 ExcelExtractor exA
= new ExcelExtractor(wbA
);
212 ExcelExtractor exB
= new ExcelExtractor(wbB
);
214 assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
216 assertEquals("Sample Excel", exA
.getSummaryInformation().getTitle());
218 assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
220 assertEquals("Sample Excel 2", exB
.getSummaryInformation().getTitle());
224 * Excel embeded in excel
226 public void testWithEmbededInOwn() throws Exception
{
227 String pdirname
= System
.getProperty("POIFS.testdata.path");
228 String filename
= pdirname
+ "/excel_with_embeded.xls";
229 POIFSFileSystem fs
= new POIFSFileSystem(
230 new FileInputStream(filename
)
233 DirectoryNode dirA
= (DirectoryNode
)
234 fs
.getRoot().getEntry("MBD0000A3B5");
235 DirectoryNode dirB
= (DirectoryNode
)
236 fs
.getRoot().getEntry("MBD0000A3B4");
238 HSSFWorkbook wbA
= new HSSFWorkbook(dirA
, fs
, true);
239 HSSFWorkbook wbB
= new HSSFWorkbook(dirB
, fs
, true);
241 ExcelExtractor exA
= new ExcelExtractor(wbA
);
242 ExcelExtractor exB
= new ExcelExtractor(wbB
);
244 assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
246 assertEquals("Sample Excel", exA
.getSummaryInformation().getTitle());
248 assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
250 assertEquals("Sample Excel 2", exB
.getSummaryInformation().getTitle());
252 // And the base file too
253 ExcelExtractor ex
= new ExcelExtractor(fs
);
254 assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
256 assertEquals("Excel With Embeded", ex
.getSummaryInformation().getTitle());