Inspired by bug #44958 - Record level support for Data Tables. (No formula parser...
[poi.git] / src / testcases / org / apache / poi / hssf / extractor / TestExcelExtractor.java
blob9bb137ff69cd1b9959344381a546a4508c6e18aa
1 /* ====================================================================
2 Licensed to the Apache Software Foundation (ASF) under one or more
3 contributor license agreements. See the NOTICE file distributed with
4 this work for additional information regarding copyright ownership.
5 The ASF licenses this file to You under the Apache License, Version 2.0
6 (the "License"); you may not use this file except in compliance with
7 the License. You may obtain a copy of the License at
9 http://www.apache.org/licenses/LICENSE-2.0
11 Unless required by applicable law or agreed to in writing, software
12 distributed under the License is distributed on an "AS IS" BASIS,
13 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 See the License for the specific language governing permissions and
15 limitations under the License.
16 ==================================================================== */
18 package org.apache.poi.hssf.extractor;
20 import java.io.FileInputStream;
21 import java.io.IOException;
22 import java.io.InputStream;
24 import junit.framework.TestCase;
26 import org.apache.poi.hssf.HSSFTestDataSamples;
27 import org.apache.poi.hssf.usermodel.HSSFWorkbook;
28 import org.apache.poi.poifs.filesystem.DirectoryNode;
29 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
30 /**
33 public final class TestExcelExtractor extends TestCase {
35 private static final ExcelExtractor createExtractor(String sampleFileName) {
37 InputStream is = HSSFTestDataSamples.openSampleFileStream(sampleFileName);
39 try {
40 return new ExcelExtractor(new POIFSFileSystem(is));
41 } catch (IOException e) {
42 throw new RuntimeException(e);
47 public void testSimple() {
49 ExcelExtractor extractor = createExtractor("Simple.xls");
51 assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
53 // Now turn off sheet names
54 extractor.setIncludeSheetNames(false);
55 assertEquals("replaceMe\n", extractor.getText());
58 public void testNumericFormula() {
60 ExcelExtractor extractor = createExtractor("sumifformula.xls");
62 assertEquals(
63 "Sheet1\n" +
64 "1000.0\t1.0\t5.0\n" +
65 "2000.0\t2.0\t\n" +
66 "3000.0\t3.0\t\n" +
67 "4000.0\t4.0\t\n" +
68 "5000.0\t5.0\t\n" +
69 "Sheet2\nSheet3\n",
70 extractor.getText()
73 extractor.setFormulasNotResults(true);
75 assertEquals(
76 "Sheet1\n" +
77 "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
78 "2000.0\t2.0\t\n" +
79 "3000.0\t3.0\t\n" +
80 "4000.0\t4.0\t\n" +
81 "5000.0\t5.0\t\n" +
82 "Sheet2\nSheet3\n",
83 extractor.getText()
87 public void testwithContinueRecords() {
89 ExcelExtractor extractor = createExtractor("StringContinueRecords.xls");
91 extractor.getText();
93 // Has masses of text
94 // Until we fixed bug #41064, this would've
95 // failed by now
96 assertTrue(extractor.getText().length() > 40960);
99 public void testStringConcat() {
101 ExcelExtractor extractor = createExtractor("SimpleWithFormula.xls");
103 // Comes out as NaN if treated as a number
104 // And as XYZ if treated as a string
105 assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", extractor.getText());
107 extractor.setFormulasNotResults(true);
109 assertEquals("Sheet1\nreplaceme\nreplaceme\nCONCATENATE(A1,A2)\nSheet2\nSheet3\n", extractor.getText());
112 public void testStringFormula() {
114 ExcelExtractor extractor = createExtractor("StringFormulas.xls");
116 // Comes out as NaN if treated as a number
117 // And as XYZ if treated as a string
118 assertEquals("Sheet1\nXYZ\nSheet2\nSheet3\n", extractor.getText());
120 extractor.setFormulasNotResults(true);
122 assertEquals("Sheet1\nUPPER(\"xyz\")\nSheet2\nSheet3\n", extractor.getText());
126 public void testEventExtractor() throws Exception {
127 EventBasedExcelExtractor extractor;
129 // First up, a simple file with string
130 // based formulas in it
131 extractor = new EventBasedExcelExtractor(
132 new POIFSFileSystem(
133 HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
136 extractor.setIncludeSheetNames(true);
138 String text = extractor.getText();
139 assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
141 extractor.setIncludeSheetNames(false);
142 extractor.setFormulasNotResults(true);
144 text = extractor.getText();
145 assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
148 // Now, a slightly longer file with numeric formulas
149 extractor = new EventBasedExcelExtractor(
150 new POIFSFileSystem(
151 HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
154 extractor.setIncludeSheetNames(false);
155 extractor.setFormulasNotResults(true);
157 text = extractor.getText();
158 assertEquals(
159 "1000.0\t1.0\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
160 "2000.0\t2.0\n" +
161 "3000.0\t3.0\n" +
162 "4000.0\t4.0\n" +
163 "5000.0\t5.0\n",
164 text
168 public void testWithComments() throws Exception {
169 ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
170 extractor.setIncludeSheetNames(false);
172 // Check without comments
173 assertEquals(
174 "1.0\tone\n" +
175 "2.0\ttwo\n" +
176 "3.0\tthree\n",
177 extractor.getText()
180 // Now with
181 extractor.setIncludeCellComments(true);
182 assertEquals(
183 "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
184 "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
185 "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
186 extractor.getText()
192 * Embded in a non-excel file
194 public void testWithEmbeded() throws Exception {
195 String pdirname = System.getProperty("POIFS.testdata.path");
196 String filename = pdirname + "/word_with_embeded.doc";
197 POIFSFileSystem fs = new POIFSFileSystem(
198 new FileInputStream(filename)
201 DirectoryNode objPool = (DirectoryNode)
202 fs.getRoot().getEntry("ObjectPool");
203 DirectoryNode dirA = (DirectoryNode)
204 objPool.getEntry("_1269427460");
205 DirectoryNode dirB = (DirectoryNode)
206 objPool.getEntry("_1269427461");
208 HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
209 HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
211 ExcelExtractor exA = new ExcelExtractor(wbA);
212 ExcelExtractor exB = new ExcelExtractor(wbB);
214 assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
215 exA.getText());
216 assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
218 assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
219 exB.getText());
220 assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
224 * Excel embeded in excel
226 public void testWithEmbededInOwn() throws Exception {
227 String pdirname = System.getProperty("POIFS.testdata.path");
228 String filename = pdirname + "/excel_with_embeded.xls";
229 POIFSFileSystem fs = new POIFSFileSystem(
230 new FileInputStream(filename)
233 DirectoryNode dirA = (DirectoryNode)
234 fs.getRoot().getEntry("MBD0000A3B5");
235 DirectoryNode dirB = (DirectoryNode)
236 fs.getRoot().getEntry("MBD0000A3B4");
238 HSSFWorkbook wbA = new HSSFWorkbook(dirA, fs, true);
239 HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
241 ExcelExtractor exA = new ExcelExtractor(wbA);
242 ExcelExtractor exB = new ExcelExtractor(wbB);
244 assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
245 exA.getText());
246 assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
248 assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
249 exB.getText());
250 assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
252 // And the base file too
253 ExcelExtractor ex = new ExcelExtractor(fs);
254 assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
255 ex.getText());
256 assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());