Skip to content

Commit

Permalink
Merge pull request #162 from mbrocchieri/parsing-error
Browse files Browse the repository at this point in the history
Add option to not throw exception when parsing error and support defa…
  • Loading branch information
Olivier Chédru authored May 3, 2021
2 parents ba61c91 + e7f1e67 commit ee2b8cd
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,16 @@ private static String relsNameFor(String entryName) {
}

private Map<String, String> readWorkbookPartsIds(String workbookRelsEntryName) throws IOException, XMLStreamException {
String xlFolder = workbookRelsEntryName.substring(0, workbookRelsEntryName.indexOf("_rel"));
Map<String, String> partsIdById = new HashMap<>();
SimpleXmlReader rels = new SimpleXmlReader(factory, getRequiredEntryContent(workbookRelsEntryName));
while (rels.goTo("Relationship")) {
String id = rels.getAttribute("Id");
String target = rels.getAttribute("Target");
// if name does not start with /, it is a relative path
if (!target.startsWith("/")) {
target = xlFolder + target;
} // else it is an absolute path
partsIdById.put(id, target);
}
return partsIdById;
Expand All @@ -89,6 +94,11 @@ private PartEntryNames extractPartEntriesFromContentTypes() throws XMLStreamExce
break;
}
}
if (entries.workbook == null) {
// in case of a default workbook path, we got this
// <Default Extension="xml" ContentType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml" />
entries.workbook = "/xl/workbook.xml";
}
}
return entries;
}
Expand Down Expand Up @@ -173,7 +183,7 @@ public InputStream getSheetContent(Sheet sheet) throws IOException {
sheet.getIndex(), sheet.getName(), sheet.getId());
throw new ExcelReaderException(msg);
}
return getRequiredEntryContent("xl/" + name);
return getRequiredEntryContent(name);
}

public List<String> getFormatList() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,32 +30,33 @@ public class ReadableWorkbook implements Closeable {

private final OPCPackage pkg;
private final SST sst;
private final ReadingOptions readingOptions;

private boolean date1904;
private final List<Sheet> sheets = new ArrayList<>();
private Integer activeTab;

public ReadableWorkbook(File inputFile) throws IOException {
this(OPCPackage.open(inputFile));
this(OPCPackage.open(inputFile), ReadingOptions.DEFAULT_READING_OPTIONS);
}

/**
* Note: will load the whole xlsx file into memory,
* (but will not uncompress it in memory)
*/
public ReadableWorkbook(InputStream inputStream) throws IOException {
this(inputStream, false);
this(inputStream, ReadingOptions.DEFAULT_READING_OPTIONS);
}

/**
* Note: will load the whole xlsx file into memory,
* (but will not uncompress it in memory)
*/
public ReadableWorkbook(InputStream inputStream, boolean withStyle) throws IOException {
this(OPCPackage.open(inputStream, withStyle));
public ReadableWorkbook(InputStream inputStream, ReadingOptions readingOptions) throws IOException {
this(OPCPackage.open(inputStream, readingOptions.isWithCellFormat()), readingOptions);
}

private ReadableWorkbook(OPCPackage pkg) throws IOException {
private ReadableWorkbook(OPCPackage pkg, ReadingOptions readingOptions) throws IOException {

try {
this.pkg = pkg;
Expand All @@ -68,6 +69,7 @@ private ReadableWorkbook(OPCPackage pkg) throws IOException {
} catch (XMLStreamException e) {
throw new ExcelReaderException(e);
}
this.readingOptions = readingOptions;
}

@Override
Expand Down Expand Up @@ -166,6 +168,10 @@ public static boolean isOLE2Header(byte[] bytes) {
return HeaderSignatures.isHeader(bytes, HeaderSignatures.OLE_2_SIGNATURE);
}

ReadingOptions getReadingOptions() {
return readingOptions;
}

private static Runnable asUncheckedRunnable(Closeable c) {
return () -> {
try {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package org.dhatim.fastexcel.reader;

public class ReadingOptions {
public static final ReadingOptions DEFAULT_READING_OPTIONS = new ReadingOptions(false, false);
private final boolean withCellFormat;
private final boolean cellInErrorIfParseError;

/**
* @param withCellFormat If true, extract cell formatting
* @param cellInErrorIfParseError If true, cell type is ERROR if it is not possible to parse cell value.
* If false, an exception is throw when there is a parsing error
*/
public ReadingOptions(boolean withCellFormat, boolean cellInErrorIfParseError) {
this.withCellFormat = withCellFormat;
this.cellInErrorIfParseError = cellInErrorIfParseError;
}

/**
* @return true for extract cell formatting
*/
public boolean isWithCellFormat() {
return withCellFormat;
}

/**
* @return true for cell type is ERROR if it is not possible to parse cell value,
* false for an exception is throw when there is a parsing error
*/
public boolean isCellInErrorIfParseError() {
return cellInErrorIfParseError;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,15 @@ private Cell parseOther(CellAddress addr, String type, String dataFormatId, Stri
while (r.goTo(() -> r.isStartElement("v") || r.isEndElement("c") || r.isStartElement("f"))) {
if ("v".equals(r.getLocalName())) {
rawValue = r.getValueUntilEndElement("v");
value = "".equals(rawValue) ? null : parser.apply(rawValue);
try {
value = "".equals(rawValue) ? null : parser.apply(rawValue);
} catch (ExcelReaderException e) {
if (workbook.getReadingOptions().isCellInErrorIfParseError()) {
definedType = CellType.ERROR;
} else {
throw e;
}
}
} else if ("f".equals(r.getLocalName())) {
String ref = r.getAttribute("ref");
String t = r.getAttribute("t");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ void expectErrors() throws IOException {
expectError("/invalid/only-content-types.xlsx", "/xl/_rels/custom1-workbook.xml.rels not found");
expectError("/invalid/no-workbook-rels.xlsx", "/xl/_rels/custom1-workbook.xml.rels not found");
expectError("/invalid/no-workbook-xml.xlsx", "/xl/custom1-workbook.xml not found");
expectError("/invalid/no-sheet.xlsx", "xl/worksheets/custom3-sheet1.xml not found");
expectError("/invalid/no-sheet.xlsx", "/xl/worksheets/custom3-sheet1.xml not found");
expectError("/invalid/missing-sheet-entry.xlsx", "Sheet#0 'Feuil1' is missing an entry in workbook rels (for id: 'rId42')");
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@

import org.junit.jupiter.api.Test;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.time.LocalDateTime;
import java.util.Iterator;
import java.util.stream.Stream;

import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.junit.jupiter.api.Assertions.fail;

class SimpleReaderTest {

Expand Down Expand Up @@ -65,5 +70,68 @@ void test() throws IOException {
}
}

@Test
void testWithParseErrorOnNumber() throws IOException {
try (InputStream is = Resources.open("/xlsx/parseError.xlsx");
ReadableWorkbook wb = new ReadableWorkbook(is, ReadingOptions.DEFAULT_READING_OPTIONS)) {
Sheet sheet = wb.getFirstSheet();
try (Stream<Row> rows = sheet.openStream()) {
Iterator<Row> it = rows.iterator();
try {
it.hasNext();
fail("Must throw an exception");
} catch (ExcelReaderException e) {
// OK
}
}
}


try (InputStream is = Resources.open("/xlsx/parseError.xlsx");
ReadableWorkbook wb = new ReadableWorkbook(is, new ReadingOptions(false, true))) {
Sheet sheet = wb.getFirstSheet();
try (Stream<Row> rows = sheet.openStream()) {
Iterator<Row> it = rows.iterator();
assertTrue(it.hasNext());
Iterator<Cell> cellIt = it.next().iterator();
assertTrue(cellIt.hasNext());
Cell cell = cellIt.next();
assertEquals(CellType.ERROR, cell.getType());
}
}
}

@Test
public void testDefaultWorkbookPath() throws IOException {
try (InputStream is = Resources.open("/xlsx/DefaultContentType.xlsx");
ReadableWorkbook wb = new ReadableWorkbook(is, new ReadingOptions(false, true))) {
Sheet sheet = wb.getFirstSheet();
try (Stream<Row> rows = sheet.openStream()) {
Iterator<Row> it = rows.iterator();
assertTrue(it.hasNext());
Iterator<Cell> cellIt = it.next().iterator();
assertTrue(cellIt.hasNext());
Cell cell = cellIt.next();
assertEquals(CellType.NUMBER, cell.getType());
assertEquals(BigDecimal.ONE, cell.getValue());
}
}
}

@Test
public void testDefaultWorkbookPath2() throws IOException {
try (InputStream is = Resources.open("/xlsx/absolutePath.xlsx");
ReadableWorkbook wb = new ReadableWorkbook(is, new ReadingOptions(false, true))) {
Sheet sheet = wb.getFirstSheet();
try (Stream<Row> rows = sheet.openStream()) {
Iterator<Row> it = rows.iterator();
assertTrue(it.hasNext());
Iterator<Cell> cellIt = it.next().iterator();
assertTrue(cellIt.hasNext());
Cell cell = cellIt.next();
assertEquals(CellType.NUMBER, cell.getType());
assertEquals(BigDecimal.ONE, cell.getValue());
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public class WithFormatTest {
@Test
void testFile() throws IOException {
try (InputStream inputStream = open("/xlsx/withStyle.xlsx");
ReadableWorkbook excel = new ReadableWorkbook(inputStream, true)) {
ReadableWorkbook excel = new ReadableWorkbook(inputStream, new ReadingOptions(true, false))) {
Optional<Sheet> sheet = excel.getActiveSheet();
assertTrue(sheet.isPresent());
Iterator<Row> it = sheet.get().openStream().iterator();
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit ee2b8cd

Please sign in to comment.