Skip to content

Commit

Permalink
Merge pull request #3605 from IQSS/3589-unf-recalc
Browse files Browse the repository at this point in the history
add UNF recalculation endpoint #3589
  • Loading branch information
kcondon authored Feb 8, 2017
2 parents 50019da + 019bbb3 commit cfc1c78
Show file tree
Hide file tree
Showing 7 changed files with 265 additions and 110 deletions.
6 changes: 6 additions & 0 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,12 @@ Execute all saved searches and make links to dataverses and datasets that are fo

PUT http://$SERVER/api/admin/savedsearches/makelinks/all?debug=true

Dataset Integrity
^^^^^^^^^^^^^^^^^

Add a UNF value for a dataset version, if it's missing, by supplying the dataset version database id::

POST http://$SERVER/api/admin/datasets/integrity/{datasetVersionId}/fixunf

.. |CORS| raw:: html

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
import edu.harvard.iq.dataverse.ingest.IngestUtil;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrSearchResult;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
Expand All @@ -14,17 +16,21 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.json.Json;
import javax.json.JsonObjectBuilder;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
import javax.persistence.TypedQuery;
import org.apache.commons.lang.StringUtils;
import org.jsoup.helper.StringUtil;


/**
Expand All @@ -49,6 +55,9 @@ public class DatasetVersionServiceBean implements java.io.Serializable {
@EJB
SystemConfig systemConfig;

@EJB
IndexServiceBean indexService;

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

Expand Down Expand Up @@ -973,5 +982,29 @@ public HashMap getFileMetadataHistory(DataFile df){
return hashList;
*/
}

public JsonObjectBuilder fixUnf(String datasetVersionId) {
JsonObjectBuilder info = Json.createObjectBuilder();
if (datasetVersionId == null || datasetVersionId.isEmpty()) {
info.add("message", "datasetVersionId was null or empty!");
return info;
}
long dsvId = Long.parseLong(datasetVersionId);
DatasetVersion datasetVersion = find(dsvId);
if (datasetVersion == null) {
info.add("message", "Could not find a dataset version based on datasetVersionId " + datasetVersionId + ".");
return info;
}
if (!StringUtil.isBlank(datasetVersion.getUNF())) {
info.add("message", "Dataset version (id=" + datasetVersionId + ") already has a UNF. Blank the UNF value in the database if you must change it.");
return info;
}
IngestUtil.recalculateDatasetVersionUNF(datasetVersion);
DatasetVersion saved = em.merge(datasetVersion);
info.add("message", "New UNF value saved (" + saved.getUNF() + "). Reindexing dataset.");
boolean doNormalSolrDocCleanUp = true;
Future<String> indexingResult = indexService.indexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp);
return info;
}

} // end class
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseRoleServiceBean;
import edu.harvard.iq.dataverse.DataverseServiceBean;
Expand Down Expand Up @@ -186,6 +187,9 @@ String getWrappedMessageWhenJson() {
@EJB
protected UserNotificationServiceBean userNotificationSvc;

@EJB
protected DatasetVersionServiceBean datasetVersionSvc;

@PersistenceContext(unitName = "VDCNet-ejbPU")
protected EntityManager em;

Expand Down
11 changes: 11 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Admin.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse.api;


import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.EMailValidator;
Expand Down Expand Up @@ -53,6 +54,9 @@
import edu.harvard.iq.dataverse.authorization.RoleAssignee;
import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier;
import edu.harvard.iq.dataverse.authorization.users.User;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.concurrent.Future;
/**
* Where the secure, setup API calls live.
* @author michael
Expand Down Expand Up @@ -863,4 +867,11 @@ public Response findRoleAssignee(@PathParam("idtf") String idtf) {
: ok(json(ra.getDisplayInfo()));
}

@Path("datasets/integrity/{datasetVersionId}/fixunf")
@POST
public Response fixUnf(@PathParam("datasetVersionId") String datasetVersionId) {
JsonObjectBuilder info = datasetVersionSvc.fixUnf(datasetVersionId);
return ok(info);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -619,50 +619,11 @@ public void recalculateDataFileUNF(DataFile dataFile) {
dataFile.getDataTable().setUnf(fileUnfValue);
}
}

public void recalculateDatasetVersionUNF(DatasetVersion version) {
String[] unfValues = new String[0];
String datasetUnfValue = null;
List<String> unfValueList = new ArrayList<>();

logger.fine("recalculating UNF for dataset version.");
Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
while (itfm.hasNext()) {
FileMetadata fileMetadata = itfm.next();
if (fileMetadata != null &&
fileMetadata.getDataFile() != null &&
fileMetadata.getDataFile().isTabularData() &&
fileMetadata.getDataFile().getUnf() != null) {
String varunf = fileMetadata.getDataFile().getUnf();
unfValueList.add(varunf);
}
}

if (unfValueList.size() > 0) {
unfValues = unfValueList.toArray(unfValues);

logger.fine("Attempting to calculate new UNF from total of " + unfValueList.size() + " file-level signatures.");
try {
datasetUnfValue = UNFUtil.calculateUNF(unfValues);
} catch (IOException ex) {
logger.warning("IO Exception: Failed to recalculate the UNF for the dataset version id="+version.getId());
} catch (UnfException uex) {
logger.warning("UNF Exception: Failed to recalculate the UNF for the dataset version id="+version.getId());
}

if (datasetUnfValue != null) {
version.setUNF(datasetUnfValue);
logger.fine("Recalculated the UNF for the dataset version id="+version.getId()+", new signature: "+datasetUnfValue);
}
} else {
// Of course if no files in the version have UNFs, we need to make sure
// that the version has the NULL UNF too.
// Otherwise, the version will still have a UNF if the user deletes
// all the tabular files from the version!
version.setUNF(null);
}
IngestUtil.recalculateDatasetVersionUNF(version);
}

public void sendFailNotification(Long dataset_id) {
FacesMessage facesMessage = new FacesMessage("ingest failed");
/* commented out push channel message:
Expand Down
90 changes: 85 additions & 5 deletions src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@
package edu.harvard.iq.dataverse.ingest;

import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.util.FileUtil;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import org.dataverse.unf.UNFUtil;
import org.dataverse.unf.UnfException;

/**
* Various helper methods used by IngestServiceBean.
Expand Down Expand Up @@ -64,10 +73,10 @@ public static void checkForDuplicateFileNamesFinal(DatasetVersion version, List<
}

/**
* Checks if the unique file path of the supplied fileMetadata is already on
* the list of the existing files; and if so, keeps generating a new name
* until it is unique. Returns the final file name. (i.e., it only modifies the
* filename, and not the folder name, in order to achieve uniqueness)
* Checks if the unique file path of the supplied fileMetadata is already on
* the list of the existing files; and if so, keeps generating a new name
* until it is unique. Returns the final file name. (i.e., it only modifies
* the filename, and not the folder name, in order to achieve uniqueness)
*
* @param fileMetadata supplied FileMetadata
* @param existingFileNames a set of the already existing pathnames
Expand Down Expand Up @@ -195,4 +204,75 @@ private static Set<String> existingPathNamesAsSet(DatasetVersion version, FileMe
return pathNamesExisting;
}

/**
* @param version The DatasetVersion to mutate, setting or unsetting the
* UNF.
*/
public static void recalculateDatasetVersionUNF(DatasetVersion version) {
logger.fine("recalculating UNF for dataset version.");
if (version == null) {
return;
}
List<String> unfValueList = getUnfValuesOfFiles(version);
if (unfValueList.size() > 0) {
String[] unfValues = new String[0];
unfValues = unfValueList.toArray(unfValues);

logger.fine("Attempting to calculate new UNF from total of " + unfValueList.size() + " file-level signatures.");
String datasetUnfValue = null;
try {
datasetUnfValue = UNFUtil.calculateUNF(unfValues);
} catch (IOException ex) {
// It's unclear how to exercise this IOException.
logger.warning("IO Exception: Failed to recalculate the UNF for the dataset version id=" + version.getId());
} catch (UnfException uex) {
// It's unclear how to exercise this UnfException.
logger.warning("UNF Exception: Failed to recalculate the UNF for the dataset version id=" + version.getId());
}

if (datasetUnfValue != null) {
version.setUNF(datasetUnfValue);
logger.fine("Recalculated the UNF for the dataset version id=" + version.getId() + ", new signature: " + datasetUnfValue);
}
} else {
// Of course if no files in the version have UNFs, we need to make sure
// that the version has the NULL UNF too.
// Otherwise, the version will still have a UNF if the user deletes
// all the tabular files from the version!
version.setUNF(null);
}
}

public static List<String> getUnfValuesOfFiles(DatasetVersion version) {
List<String> unfValueList = new ArrayList<>();
if (version == null) {
return unfValueList;
}
Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
while (itfm.hasNext()) {
FileMetadata fileMetadata = itfm.next();
if (fileMetadata != null
&& fileMetadata.getDataFile() != null
&& fileMetadata.getDataFile().isTabularData()
&& fileMetadata.getDataFile().getUnf() != null) {
String varunf = fileMetadata.getDataFile().getUnf();
unfValueList.add(varunf);
}
}
return unfValueList;
}

public static boolean shouldHaveUnf(DatasetVersion version) {
if (version == null) {
return false;
}
List<String> values = getUnfValuesOfFiles(version);
logger.fine("UNF values for files from Dataset version " + version.getSemanticVersion() + " from " + version.getDataset().getGlobalId() + ": " + values);
if (values.size() > 0) {
return true;
} else {
return false;
}

}
}
Loading

0 comments on commit cfc1c78

Please sign in to comment.