From 498a1f735e61709d25c7d1378176f31971a4d801 Mon Sep 17 00:00:00 2001 From: wgzhao Date: Sun, 22 Sep 2024 18:44:04 +0800 Subject: [PATCH] [update][plugin][excelreader] Minor code change 1. Utilize `WorkbookFactory` to read all supported Excel files. 2. Cancel subsequent tasks if the files to be read are empty. --- .../reader/excelreader/ExcelHelper.java | 11 +++------- .../reader/excelreader/ExcelReader.java | 22 ++++++------------- 2 files changed, 10 insertions(+), 23 deletions(-) diff --git a/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelHelper.java b/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelHelper.java index 4d78bef56..43032153c 100644 --- a/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelHelper.java +++ b/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelHelper.java @@ -28,14 +28,13 @@ import com.wgzhao.addax.common.element.Record; import com.wgzhao.addax.common.element.StringColumn; import com.wgzhao.addax.common.exception.AddaxException; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.DateUtil; import org.apache.poi.ss.usermodel.FormulaEvaluator; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; -import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -61,12 +60,8 @@ public void open(String filePath) { try { this.file = new FileInputStream(filePath); - if (filePath.endsWith(".xlsx")) { - this.workbook = new XSSFWorkbook(file); - } else { - this.workbook = new HSSFWorkbook(file); - } - // ONLY reader the first sheet + workbook = WorkbookFactory.create(file); + // ONLY read the first sheet Sheet sheet = workbook.getSheetAt(0); this.evaluator = workbook.getCreationHelper().createFormulaEvaluator(); this.rowIterator = sheet.iterator(); diff --git a/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelReader.java b/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelReader.java index b938827d7..91c43ecb5 100644 --- a/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelReader.java +++ b/plugin/reader/excelreader/src/main/java/com/wgzhao/addax/plugin/reader/excelreader/ExcelReader.java @@ -28,15 +28,14 @@ import com.wgzhao.addax.common.spi.Reader; import com.wgzhao.addax.common.util.Configuration; import com.wgzhao.addax.storage.util.FileHelper; -import org.apache.commons.lang3.StringUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.List; +import static com.wgzhao.addax.common.spi.ErrorCode.CONFIG_ERROR; import static com.wgzhao.addax.common.spi.ErrorCode.REQUIRED_VALUE; -import static com.wgzhao.addax.common.spi.ErrorCode.RUNTIME_ERROR; public class ExcelReader extends Reader @@ -48,7 +47,6 @@ public static class Job private static final Logger LOG = LoggerFactory.getLogger(Job.class); private Configuration originConfig = null; - private List path = null; private List sourceFiles; @Override @@ -57,9 +55,7 @@ public void init() this.originConfig = this.getPluginJobConf(); // Compatible with the old version, path is a string before String pathInString = this.originConfig.getNecessaryValue(Key.PATH, REQUIRED_VALUE); - if (StringUtils.isBlank(pathInString)) { - throw AddaxException.asAddaxException(REQUIRED_VALUE, "the path is required"); - } + List path; if (!pathInString.startsWith("[") && !pathInString.endsWith("]")) { path = new ArrayList<>(); path.add(pathInString); @@ -71,8 +67,11 @@ public void init() } } -// this.sourceFiles = this.buildSourceTargets(); this.sourceFiles = FileHelper.buildSourceTargets(path); + if (sourceFiles.isEmpty()) { + throw AddaxException.asAddaxException(CONFIG_ERROR, + "Cannot find any file in path: " + path + ", assuring the path(s) exists and has right permission"); + } LOG.info("The number of files to read is: [{}]", this.sourceFiles.size()); } @@ -88,14 +87,7 @@ public List split(int adviceNumber) LOG.debug("Begin to split..."); List readerSplitConfigs = new ArrayList<>(); - // warn:每个slice拖且仅拖一个文件, - // int splitNumber = adviceNumber - int splitNumber = this.sourceFiles.size(); - if (0 == splitNumber) { - throw AddaxException.asAddaxException( - RUNTIME_ERROR, - "Nothing found in the directory " + this.originConfig.getString(Key.PATH) + ". Please check it"); - } + int splitNumber = Math.min(this.sourceFiles.size(), adviceNumber); List> splitSourceFiles = FileHelper.splitSourceFiles(this.sourceFiles, splitNumber); for (List files : splitSourceFiles) {