Skip to content

Commit

Permalink
Support TempFileCommentsTable (monitorjbl#65)
Browse files Browse the repository at this point in the history
  • Loading branch information
pjfanning authored Oct 11, 2021
1 parent 96be7da commit 0914fa7
Show file tree
Hide file tree
Showing 4 changed files with 244 additions and 156 deletions.
60 changes: 57 additions & 3 deletions src/main/java/com/github/pjfanning/xlsx/StreamingReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ public static class Builder {
private boolean avoidTempFiles = false;
private boolean useSstTempFile = false;
private boolean encryptSstTempFile = false;
private boolean useCommentsTempFile = false;
private boolean encryptCommentsTempFile = false;
private boolean adjustLegacyComments = false;
private boolean readComments = false;
private boolean readCoreProperties = false;
Expand Down Expand Up @@ -101,6 +103,22 @@ public boolean readComments() {
return readComments;
}

/**
* @return Whether to use a temp file for the Comments data. If false, no
* temp file will be used and the entire table will be loaded into memory.
*/
public boolean useCommentsTempFile() {
return useCommentsTempFile;
}

/**
* @return Whether to encrypt the temp file for the Comments data. Only applies if <code>useCommentsTempFile()</code>
* is true.
*/
public boolean encryptCommentsTempFile() {
return encryptCommentsTempFile;
}

/**
* @return Whether to read the core document properties.
*/
Expand Down Expand Up @@ -159,7 +177,7 @@ public Builder bufferSize(int bufferSize) {
}

/**
* For password protected files specify password to open file.
* For password protected files, specify password to open file.
* If the password is incorrect a {@code ReadException} is thrown on
* {@code read}.
* <p>NULL indicates that no password should be used, this is the
Expand Down Expand Up @@ -189,7 +207,7 @@ public Builder convertFromOoXmlStrict(boolean convertFromOoXmlStrict) {

/**
* Enables a mode where the code tries to avoid creating temp files. This is independent of
* {@code #setUseSstTempFile}.
* {@code #setUseSstTempFile} and {@code #setUseCommentsTempFile}.
* <p>
* By default, temp files are used to avoid holding onto too much data in memory.
*
Expand All @@ -203,7 +221,7 @@ public Builder setAvoidTempFiles(boolean avoidTempFiles) {

/**
* Enables use of Shared Strings Table temp file. This option exists to accommodate
* extremely large workbooks with millions of unique strings. Normally the SST is entirely
* extremely large workbooks with millions of unique strings. Normally, the SST is entirely
* loaded into memory, but with large workbooks with high cardinality (i.e., very few
* duplicate values) the SST may not fit entirely into memory.
* <p>
Expand Down Expand Up @@ -234,6 +252,42 @@ public Builder setEncryptSstTempFile(boolean encryptSstTempFile) {
return this;
}

/**
* Enables use of Comments temp file. This option exists to accommodate
* workbooks with lots of comments. Normally, the Comments are all
* loaded into memory.
* <p>
* By default, all the Comments data *will* be loaded into memory. <strong>However</strong>,
* enabling this option at all will have some noticeable performance degradation as you are
* trading memory for disk space.
*
* @param useCommentsTempFile whether to use a temp file to store the Comments data
* @return reference to current {@code Builder}
* @see #setReadComments(boolean)
* @see #setEncryptCommentsTempFile(boolean)
*/
public Builder setUseCommentsTempFile(boolean useCommentsTempFile) {
this.useCommentsTempFile = useCommentsTempFile;
return this;
}

/**
* Enables use of encryption in the Comments temp file. This only applies if <code>setUseCommentsTempFile</code>
* is set to true.
* <p>
* By default, the temp file is not encrypted. <strong>However</strong>,
* enabling this option could slow down the processing of Comments data.
*
* @param encryptCommentsTempFile whether to encrypt the temp file used to store the Comments data
* @return reference to current {@code Builder}
* @see #setReadComments(boolean)
* @see #setUseCommentsTempFile(boolean)
*/
public Builder setEncryptCommentsTempFile(boolean encryptCommentsTempFile) {
this.encryptCommentsTempFile = encryptCommentsTempFile;
return this;
}

/**
* Enables the reading of the comments.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ void loadSheets(OoxmlReader reader, SharedStringsTable sst, StylesTable stylesTa
PackagePart part = iter.getSheetPart();
sheetStreams.put(part, is);
if (builder.readComments()) {
sheetComments.put(part, iter.getSheetComments());
sheetComments.put(part, iter.getSheetComments(builder));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,10 @@ Licensed to the Apache Software Foundation (ASF) under one or more

import javax.xml.parsers.ParserConfigurationException;

import com.github.pjfanning.poi.xssf.streaming.TempFileCommentsTable;
import com.github.pjfanning.xlsx.StreamingReader;
import org.apache.poi.ooxml.POIXMLException;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
import org.apache.poi.openxml4j.opc.PackagePartName;
Expand Down Expand Up @@ -84,7 +85,7 @@ public class OoxmlReader {
/**
* Creates a new XSSFReader, for the given package
*/
public OoxmlReader(OPCPackage pkg) throws IOException, OpenXML4JException {
public OoxmlReader(OPCPackage pkg) {
this.pkg = pkg;

PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
Expand Down Expand Up @@ -133,7 +134,7 @@ public StylesTable getStylesTable() throws IOException, InvalidFormatException {
}
return styles;
}

/**
* Returns an InputStream to read the contents of the
* main Workbook, which contains key overall data for
Expand Down Expand Up @@ -286,7 +287,7 @@ public String getSheetName() {
* Returns the comments associated with this sheet,
* or null if there aren't any
*/
public Comments getSheetComments() {
public Comments getSheetComments(StreamingReader.Builder builder) {
PackagePart sheetPkg = getSheetPart();

// Do we have a comments relationship? (Only ever one if so)
Expand All @@ -297,7 +298,7 @@ public Comments getSheetComments() {
PackageRelationship comments = commentsList.getRelationship(0);
PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
return parseComments(commentsPart);
return parseComments(builder, commentsPart);
}
} catch (InvalidFormatException|IOException e) {
LOGGER.log(POILogger.WARN, e);
Expand All @@ -306,9 +307,16 @@ public Comments getSheetComments() {
return null;
}

//to allow subclassing
protected Comments parseComments(PackagePart commentsPart) throws IOException {
return new CommentsTable(commentsPart);
private Comments parseComments(StreamingReader.Builder builder, PackagePart commentsPart) throws IOException {
if (builder.useCommentsTempFile()) {
try (InputStream is = commentsPart.getInputStream()) {
TempFileCommentsTable ct = new TempFileCommentsTable(builder.encryptCommentsTempFile());
ct.readFrom(is);
return ct;
}
} else {
return new CommentsTable(commentsPart);
}
}

/**
Expand Down
Loading

0 comments on commit 0914fa7

Please sign in to comment.