diff --git a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java index f6bc05404d1..02f2fa0bd40 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/ingest/IngestServiceBean.java @@ -753,7 +753,8 @@ public boolean ingestAsTabular(Long datafile_id, boolean forceTypeCheck) { } BufferedInputStream inputStream = null; - File additionalData = null; + File additionalData = null; + File localFile = null; StorageIO storageIO = null; try { @@ -761,15 +762,16 @@ public boolean ingestAsTabular(Long datafile_id, boolean forceTypeCheck) { storageIO.open(); if (storageIO.isLocalFile()) { + localFile = storageIO.getFileSystemPath().toFile(); inputStream = new BufferedInputStream(storageIO.getInputStream()); } else { ReadableByteChannel dataFileChannel = storageIO.getReadChannel(); - File tempFile = File.createTempFile("tempIngestSourceFile", ".tmp"); - FileChannel tempIngestSourceChannel = new FileOutputStream(tempFile).getChannel(); + localFile = File.createTempFile("tempIngestSourceFile", ".tmp"); + FileChannel tempIngestSourceChannel = new FileOutputStream(localFile).getChannel(); tempIngestSourceChannel.transferFrom(dataFileChannel, 0, storageIO.getSize()); - inputStream = new BufferedInputStream(new FileInputStream(tempFile)); + inputStream = new BufferedInputStream(new FileInputStream(localFile)); logger.fine("Saved "+storageIO.getSize()+" bytes in a local temp file."); } } catch (IOException ioEx) { @@ -792,7 +794,25 @@ public boolean ingestAsTabular(Long datafile_id, boolean forceTypeCheck) { if (ingestRequest.getLabelsFile() != null) { additionalData = new File(ingestRequest.getLabelsFile()); } - } + } + + if (forceTypeCheck) { + String newType = FileUtil.retestIngestableFileType(localFile, dataFile.getContentType()); + + ingestPlugin = getTabDataReaderByMimeType(newType); + logger.fine("Re-tested file type: " + newType + "; Using ingest plugin " + ingestPlugin.getClass()); + + // check again: + if (ingestPlugin == null) { + // If it's still null - give up! + + dataFile.SetIngestProblem(); + FileUtil.createIngestFailureReport(dataFile, "No ingest plugin found for file type "+dataFile.getContentType()); + dataFile = fileService.save(dataFile); + logger.warning("Ingest failure: failed to detect ingest plugin (file type check forced)"); + return false; + } + } TabularDataIngest tabDataIngest = null; try { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java index cc6424c6aeb..d30c9adc8fa 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java @@ -86,6 +86,16 @@ public class FileUtil implements java.io.Serializable { private static final Logger logger = Logger.getLogger(FileUtil.class.getCanonicalName()); private static final String[] TABULAR_DATA_FORMAT_SET = {"POR", "SAV", "DTA", "RDA"}; + // The list of formats for which we need to re-test, if we want to attempt + // to ingest the file again: + // Example: We have added support for Stata-13+; the new ingest plugin is called + // when the file type is detected as "application/x-stata-1[345]". But the + // previously uploaded, but not ingested stata files are in the database + // typed as "application/x-stata". So we want to run the new-and-improved + // DTA check that will re-identify the specific DTA flavor. + // If similar cases are introduced in the future, the affected formats will + // need to be added to the list. + private static final String[] TABULAR_DATA_FORMATS_RETEST = {"DTA"}; private static Map STATISTICAL_FILE_EXTENSION = new HashMap(); @@ -280,10 +290,17 @@ private static String determineContentType(File fileObject) { } + public static String retestIngestableFileType(File file, String fileType) { + IngestableDataChecker tabChecker = new IngestableDataChecker(TABULAR_DATA_FORMATS_RETEST); + String newType = tabChecker.detectTabularDataFormat(file); + + return newType != null ? newType : fileType; + } + public static String determineFileType(File f, String fileName) throws IOException{ String fileType = null; String fileExtension = getFileExtension(fileName); - + // step 1: