Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add UNF recalculation endpoint #3589 #3605

Merged
merged 10 commits into from
Feb 8, 2017
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
import edu.harvard.iq.dataverse.ingest.IngestUtil;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrSearchResult;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
Expand All @@ -14,12 +16,15 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.json.Json;
import javax.json.JsonObjectBuilder;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
Expand Down Expand Up @@ -49,6 +54,9 @@ public class DatasetVersionServiceBean implements java.io.Serializable {
@EJB
SystemConfig systemConfig;

@EJB
IndexServiceBean indexService;

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

Expand Down Expand Up @@ -973,5 +981,25 @@ public HashMap getFileMetadataHistory(DataFile df){
return hashList;
*/
}

public JsonObjectBuilder fixUnf(String datasetVersionId) {
JsonObjectBuilder info = Json.createObjectBuilder();
if (datasetVersionId == null || datasetVersionId.isEmpty()) {
info.add("message", "datasetVersionId was null or empty!");
return info;
}
long dsvId = Long.parseLong(datasetVersionId);
DatasetVersion datasetVersion = find(dsvId);
if (datasetVersion == null) {
info.add("message", "Could not find a dataset version based on datasetVersionId " + datasetVersionId + ".");
return info;
}
IngestUtil.recalculateDatasetVersionUNF(datasetVersion);
DatasetVersion saved = em.merge(datasetVersion);
info.add("message", "New UNF value saved (" + saved.getUNF() + "). Reindexing dataset.");
boolean doNormalSolrDocCleanUp = true;
Future<String> indexingResult = indexService.indexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp);
return info;
}

} // end class
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseRoleServiceBean;
import edu.harvard.iq.dataverse.DataverseServiceBean;
Expand Down Expand Up @@ -186,6 +187,9 @@ String getWrappedMessageWhenJson() {
@EJB
protected UserNotificationServiceBean userNotificationSvc;

@EJB
protected DatasetVersionServiceBean datasetVersionSvc;

@PersistenceContext(unitName = "VDCNet-ejbPU")
protected EntityManager em;

Expand Down
19 changes: 19 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Admin.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@
import edu.harvard.iq.dataverse.authorization.RoleAssignee;
import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.ingest.IngestUtil;
import javax.json.JsonArray;
/**
* Where the secure, setup API calls live.
* @author michael
Expand Down Expand Up @@ -863,4 +865,21 @@ public Response findRoleAssignee(@PathParam("idtf") String idtf) {
: ok(json(ra.getDisplayInfo()));
}

@Path("datasets/integrity")
@GET
public Response checkDatasetIntegrity() {
JsonArray datasetVersionsWithWrongUnfValue = IngestUtil.getUnfData(datasetSvc.findAll()).build();
JsonObjectBuilder info = Json.createObjectBuilder();
info.add("numProblems", datasetVersionsWithWrongUnfValue.size());
info.add("problems", datasetVersionsWithWrongUnfValue);
return ok(info);
}

@Path("datasets/integrity/unf/{datasetVersionId}")
@POST
public Response fixUnf(@PathParam("datasetVersionId") String datasetVersionId) {
JsonObjectBuilder info = datasetVersionSvc.fixUnf(datasetVersionId);
return ok(info);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -619,50 +619,11 @@ public void recalculateDataFileUNF(DataFile dataFile) {
dataFile.getDataTable().setUnf(fileUnfValue);
}
}

public void recalculateDatasetVersionUNF(DatasetVersion version) {
String[] unfValues = new String[0];
String datasetUnfValue = null;
List<String> unfValueList = new ArrayList<>();

logger.fine("recalculating UNF for dataset version.");
Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
while (itfm.hasNext()) {
FileMetadata fileMetadata = itfm.next();
if (fileMetadata != null &&
fileMetadata.getDataFile() != null &&
fileMetadata.getDataFile().isTabularData() &&
fileMetadata.getDataFile().getUnf() != null) {
String varunf = fileMetadata.getDataFile().getUnf();
unfValueList.add(varunf);
}
}

if (unfValueList.size() > 0) {
unfValues = unfValueList.toArray(unfValues);

logger.fine("Attempting to calculate new UNF from total of " + unfValueList.size() + " file-level signatures.");
try {
datasetUnfValue = UNFUtil.calculateUNF(unfValues);
} catch (IOException ex) {
logger.warning("IO Exception: Failed to recalculate the UNF for the dataset version id="+version.getId());
} catch (UnfException uex) {
logger.warning("UNF Exception: Failed to recalculate the UNF for the dataset version id="+version.getId());
}

if (datasetUnfValue != null) {
version.setUNF(datasetUnfValue);
logger.fine("Recalculated the UNF for the dataset version id="+version.getId()+", new signature: "+datasetUnfValue);
}
} else {
// Of course if no files in the version have UNFs, we need to make sure
// that the version has the NULL UNF too.
// Otherwise, the version will still have a UNF if the user deletes
// all the tabular files from the version!
version.setUNF(null);
}
IngestUtil.recalculateDatasetVersionUNF(version);
}

public void sendFailNotification(Long dataset_id) {
FacesMessage facesMessage = new FacesMessage("ingest failed");
/* commented out push channel message:
Expand Down
117 changes: 113 additions & 4 deletions src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,25 @@
package edu.harvard.iq.dataverse.ingest;

import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.util.FileUtil;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import org.dataverse.unf.UNFUtil;
import org.dataverse.unf.UnfException;

/**
* Various helper methods used by IngestServiceBean.
Expand Down Expand Up @@ -64,10 +72,10 @@ public static void checkForDuplicateFileNamesFinal(DatasetVersion version, List<
}

/**
* Checks if the unique file path of the supplied fileMetadata is already on
* the list of the existing files; and if so, keeps generating a new name
* until it is unique. Returns the final file name. (i.e., it only modifies the
* filename, and not the folder name, in order to achieve uniqueness)
* Checks if the unique file path of the supplied fileMetadata is already on
* the list of the existing files; and if so, keeps generating a new name
* until it is unique. Returns the final file name. (i.e., it only modifies
* the filename, and not the folder name, in order to achieve uniqueness)
*
* @param fileMetadata supplied FileMetadata
* @param existingFileNames a set of the already existing pathnames
Expand Down Expand Up @@ -195,4 +203,105 @@ private static Set<String> existingPathNamesAsSet(DatasetVersion version, FileMe
return pathNamesExisting;
}

/**
* @param version The DatasetVersion to mutate, setting or unsetting the
* UNF.
*/
public static void recalculateDatasetVersionUNF(DatasetVersion version) {
logger.fine("recalculating UNF for dataset version.");
if (version == null) {
return;
}
List<String> unfValueList = getUnfValuesOfFiles(version);
if (unfValueList.size() > 0) {
String[] unfValues = new String[0];
unfValues = unfValueList.toArray(unfValues);

logger.fine("Attempting to calculate new UNF from total of " + unfValueList.size() + " file-level signatures.");
String datasetUnfValue = null;
try {
datasetUnfValue = UNFUtil.calculateUNF(unfValues);
} catch (IOException ex) {
// It's unclear how to exercise this IOException.
logger.warning("IO Exception: Failed to recalculate the UNF for the dataset version id=" + version.getId());
} catch (UnfException uex) {
// It's unclear how to exercise this UnfException.
logger.warning("UNF Exception: Failed to recalculate the UNF for the dataset version id=" + version.getId());
}

if (datasetUnfValue != null) {
version.setUNF(datasetUnfValue);
logger.fine("Recalculated the UNF for the dataset version id=" + version.getId() + ", new signature: " + datasetUnfValue);
}
} else {
// Of course if no files in the version have UNFs, we need to make sure
// that the version has the NULL UNF too.
// Otherwise, the version will still have a UNF if the user deletes
// all the tabular files from the version!
version.setUNF(null);
}
}

public static List<String> getUnfValuesOfFiles(DatasetVersion version) {
List<String> unfValueList = new ArrayList<>();
if (version == null) {
return unfValueList;
}
Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
while (itfm.hasNext()) {
FileMetadata fileMetadata = itfm.next();
if (fileMetadata != null
&& fileMetadata.getDataFile() != null
&& fileMetadata.getDataFile().isTabularData()
&& fileMetadata.getDataFile().getUnf() != null) {
String varunf = fileMetadata.getDataFile().getUnf();
unfValueList.add(varunf);
}
}
return unfValueList;
}

public static boolean shouldHaveUnf(DatasetVersion version) {
if (version == null) {
return false;
}
List<String> values = getUnfValuesOfFiles(version);
logger.fine("UNF values for files from Dataset version " + version.getSemanticVersion() + " from " + version.getDataset().getGlobalId() + ": " + values);
if (values.size() > 0) {
return true;
} else {
return false;
}

}

public static JsonArrayBuilder getUnfData(List<Dataset> datasets) {
JsonArrayBuilder datasetVersionsWithWrongUnfValue = Json.createArrayBuilder();
if (datasets == null || datasets.isEmpty()) {
return datasetVersionsWithWrongUnfValue;
}
for (Dataset dataset : datasets) {
for (DatasetVersion dsv : dataset.getVersions()) {
boolean shouldHaveUnf = IngestUtil.shouldHaveUnf(dsv);
String existingUnf = dsv.getUNF();
if (shouldHaveUnf) {
if (existingUnf == null) {
String msg = "Dataset version " + dsv.getSemanticVersion() + " (datasetVersionId " + dsv.getId() + ") from " + dsv.getDataset().getGlobalId() + " doesn't have a UNF but should!";
JsonObjectBuilder problem = Json.createObjectBuilder();
problem.add("datasetVersionId", dsv.getId());
problem.add("message", msg);
datasetVersionsWithWrongUnfValue.add(problem);
}
} else if (existingUnf != null) {
String msg = "Dataset version " + dsv.getSemanticVersion() + " (datasetVersionId " + dsv.getId() + ") from " + dsv.getDataset().getGlobalId() + " has a UNF (" + existingUnf + ") but shouldn't!";
JsonObjectBuilder problem = Json.createObjectBuilder();
problem.add("datasetVersionId", dsv.getId());
problem.add("message", msg);
datasetVersionsWithWrongUnfValue.add(problem);
}
}
}
return datasetVersionsWithWrongUnfValue;
}

}
Loading