Skip to content

Commit

Permalink
Added more code for re-testing ingestable file type.
Browse files Browse the repository at this point in the history
  • Loading branch information
landreev committed Aug 1, 2018
1 parent eaa5d84 commit 769dbb7
Show file tree
Hide file tree
Showing 2 changed files with 43 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -753,23 +753,25 @@ public boolean ingestAsTabular(Long datafile_id, boolean forceTypeCheck) {
}

BufferedInputStream inputStream = null;
File additionalData = null;
File additionalData = null;
File localFile = null;
StorageIO<DataFile> storageIO = null;

try {
storageIO = dataFile.getStorageIO();
storageIO.open();

if (storageIO.isLocalFile()) {
localFile = storageIO.getFileSystemPath().toFile();
inputStream = new BufferedInputStream(storageIO.getInputStream());
} else {
ReadableByteChannel dataFileChannel = storageIO.getReadChannel();
File tempFile = File.createTempFile("tempIngestSourceFile", ".tmp");
FileChannel tempIngestSourceChannel = new FileOutputStream(tempFile).getChannel();
localFile = File.createTempFile("tempIngestSourceFile", ".tmp");
FileChannel tempIngestSourceChannel = new FileOutputStream(localFile).getChannel();

tempIngestSourceChannel.transferFrom(dataFileChannel, 0, storageIO.getSize());

inputStream = new BufferedInputStream(new FileInputStream(tempFile));
inputStream = new BufferedInputStream(new FileInputStream(localFile));
logger.fine("Saved "+storageIO.getSize()+" bytes in a local temp file.");
}
} catch (IOException ioEx) {
Expand All @@ -792,7 +794,25 @@ public boolean ingestAsTabular(Long datafile_id, boolean forceTypeCheck) {
if (ingestRequest.getLabelsFile() != null) {
additionalData = new File(ingestRequest.getLabelsFile());
}
}
}

if (forceTypeCheck) {
String newType = FileUtil.retestIngestableFileType(localFile, dataFile.getContentType());

ingestPlugin = getTabDataReaderByMimeType(newType);
logger.fine("Re-tested file type: " + newType + "; Using ingest plugin " + ingestPlugin.getClass());

// check again:
if (ingestPlugin == null) {
// If it's still null - give up!

dataFile.SetIngestProblem();
FileUtil.createIngestFailureReport(dataFile, "No ingest plugin found for file type "+dataFile.getContentType());
dataFile = fileService.save(dataFile);
logger.warning("Ingest failure: failed to detect ingest plugin (file type check forced)");
return false;
}
}

TabularDataIngest tabDataIngest = null;
try {
Expand Down
19 changes: 18 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/util/FileUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,16 @@ public class FileUtil implements java.io.Serializable {
private static final Logger logger = Logger.getLogger(FileUtil.class.getCanonicalName());

private static final String[] TABULAR_DATA_FORMAT_SET = {"POR", "SAV", "DTA", "RDA"};
// The list of formats for which we need to re-test, if we want to attempt
// to ingest the file again:
// Example: We have added support for Stata-13+; the new ingest plugin is called
// when the file type is detected as "application/x-stata-1[345]". But the
// previously uploaded, but not ingested stata files are in the database
// typed as "application/x-stata". So we want to run the new-and-improved
// DTA check that will re-identify the specific DTA flavor.
// If similar cases are introduced in the future, the affected formats will
// need to be added to the list.
private static final String[] TABULAR_DATA_FORMATS_RETEST = {"DTA"};

private static Map<String, String> STATISTICAL_FILE_EXTENSION = new HashMap<String, String>();

Expand Down Expand Up @@ -280,10 +290,17 @@ private static String determineContentType(File fileObject) {

}

public static String retestIngestableFileType(File file, String fileType) {
IngestableDataChecker tabChecker = new IngestableDataChecker(TABULAR_DATA_FORMATS_RETEST);
String newType = tabChecker.detectTabularDataFormat(file);

return newType != null ? newType : fileType;
}

public static String determineFileType(File f, String fileName) throws IOException{
String fileType = null;
String fileExtension = getFileExtension(fileName);



// step 1:
Expand Down

0 comments on commit 769dbb7

Please sign in to comment.