Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add UNF recalculation endpoint #3589 #3605

Merged
merged 10 commits into from
Feb 8, 2017
6 changes: 6 additions & 0 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,12 @@ Execute all saved searches and make links to dataverses and datasets that are fo

PUT http://$SERVER/api/admin/savedsearches/makelinks/all?debug=true

Dataset Integrity
^^^^^^^^^^^^^^^^^

Add a UNF value for a dataset version, if it's missing, by supplying the dataset version database id::

POST http://$SERVER/api/admin/datasets/integrity/{datasetVersionId}/fixunf

.. |CORS| raw:: html

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
package edu.harvard.iq.dataverse;

import edu.harvard.iq.dataverse.DatasetVersion.VersionState;
import edu.harvard.iq.dataverse.ingest.IngestUtil;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.search.SolrSearchResult;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
Expand All @@ -14,17 +16,21 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Future;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.json.Json;
import javax.json.JsonObjectBuilder;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.persistence.Query;
import javax.persistence.TypedQuery;
import org.apache.commons.lang.StringUtils;
import org.jsoup.helper.StringUtil;


/**
Expand All @@ -49,6 +55,9 @@ public class DatasetVersionServiceBean implements java.io.Serializable {
@EJB
SystemConfig systemConfig;

@EJB
IndexServiceBean indexService;

@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;

Expand Down Expand Up @@ -973,5 +982,29 @@ public HashMap getFileMetadataHistory(DataFile df){
return hashList;
*/
}

public JsonObjectBuilder fixUnf(String datasetVersionId) {
JsonObjectBuilder info = Json.createObjectBuilder();
if (datasetVersionId == null || datasetVersionId.isEmpty()) {
info.add("message", "datasetVersionId was null or empty!");
return info;
}
long dsvId = Long.parseLong(datasetVersionId);
DatasetVersion datasetVersion = find(dsvId);
if (datasetVersion == null) {
info.add("message", "Could not find a dataset version based on datasetVersionId " + datasetVersionId + ".");
return info;
}
if (!StringUtil.isBlank(datasetVersion.getUNF())) {
info.add("message", "Dataset version (id=" + datasetVersionId + ") already has a UNF. Blank the UNF value in the database if you must change it.");
return info;
}
IngestUtil.recalculateDatasetVersionUNF(datasetVersion);
DatasetVersion saved = em.merge(datasetVersion);
info.add("message", "New UNF value saved (" + saved.getUNF() + "). Reindexing dataset.");
boolean doNormalSolrDocCleanUp = true;
Future<String> indexingResult = indexService.indexDataset(datasetVersion.getDataset(), doNormalSolrDocCleanUp);
return info;
}

} // end class
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseRoleServiceBean;
import edu.harvard.iq.dataverse.DataverseServiceBean;
Expand Down Expand Up @@ -186,6 +187,9 @@ String getWrappedMessageWhenJson() {
@EJB
protected UserNotificationServiceBean userNotificationSvc;

@EJB
protected DatasetVersionServiceBean datasetVersionSvc;

@PersistenceContext(unitName = "VDCNet-ejbPU")
protected EntityManager em;

Expand Down
11 changes: 11 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/api/Admin.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse.api;


import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DvObject;
import edu.harvard.iq.dataverse.EMailValidator;
Expand Down Expand Up @@ -53,6 +54,9 @@
import edu.harvard.iq.dataverse.authorization.RoleAssignee;
import edu.harvard.iq.dataverse.authorization.UserRecordIdentifier;
import edu.harvard.iq.dataverse.authorization.users.User;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.concurrent.Future;
/**
* Where the secure, setup API calls live.
* @author michael
Expand Down Expand Up @@ -863,4 +867,11 @@ public Response findRoleAssignee(@PathParam("idtf") String idtf) {
: ok(json(ra.getDisplayInfo()));
}

@Path("datasets/integrity/{datasetVersionId}/fixunf")
@POST
public Response fixUnf(@PathParam("datasetVersionId") String datasetVersionId) {
JsonObjectBuilder info = datasetVersionSvc.fixUnf(datasetVersionId);
return ok(info);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -619,50 +619,11 @@ public void recalculateDataFileUNF(DataFile dataFile) {
dataFile.getDataTable().setUnf(fileUnfValue);
}
}

public void recalculateDatasetVersionUNF(DatasetVersion version) {
String[] unfValues = new String[0];
String datasetUnfValue = null;
List<String> unfValueList = new ArrayList<>();

logger.fine("recalculating UNF for dataset version.");
Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
while (itfm.hasNext()) {
FileMetadata fileMetadata = itfm.next();
if (fileMetadata != null &&
fileMetadata.getDataFile() != null &&
fileMetadata.getDataFile().isTabularData() &&
fileMetadata.getDataFile().getUnf() != null) {
String varunf = fileMetadata.getDataFile().getUnf();
unfValueList.add(varunf);
}
}

if (unfValueList.size() > 0) {
unfValues = unfValueList.toArray(unfValues);

logger.fine("Attempting to calculate new UNF from total of " + unfValueList.size() + " file-level signatures.");
try {
datasetUnfValue = UNFUtil.calculateUNF(unfValues);
} catch (IOException ex) {
logger.warning("IO Exception: Failed to recalculate the UNF for the dataset version id="+version.getId());
} catch (UnfException uex) {
logger.warning("UNF Exception: Failed to recalculate the UNF for the dataset version id="+version.getId());
}

if (datasetUnfValue != null) {
version.setUNF(datasetUnfValue);
logger.fine("Recalculated the UNF for the dataset version id="+version.getId()+", new signature: "+datasetUnfValue);
}
} else {
// Of course if no files in the version have UNFs, we need to make sure
// that the version has the NULL UNF too.
// Otherwise, the version will still have a UNF if the user deletes
// all the tabular files from the version!
version.setUNF(null);
}
IngestUtil.recalculateDatasetVersionUNF(version);
}

public void sendFailNotification(Long dataset_id) {
FacesMessage facesMessage = new FacesMessage("ingest failed");
/* commented out push channel message:
Expand Down
90 changes: 85 additions & 5 deletions src/main/java/edu/harvard/iq/dataverse/ingest/IngestUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,26 @@
package edu.harvard.iq.dataverse.ingest;

import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.FileMetadata;
import edu.harvard.iq.dataverse.util.FileUtil;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObjectBuilder;
import org.dataverse.unf.UNFUtil;
import org.dataverse.unf.UnfException;

/**
* Various helper methods used by IngestServiceBean.
Expand Down Expand Up @@ -64,10 +73,10 @@ public static void checkForDuplicateFileNamesFinal(DatasetVersion version, List<
}

/**
* Checks if the unique file path of the supplied fileMetadata is already on
* the list of the existing files; and if so, keeps generating a new name
* until it is unique. Returns the final file name. (i.e., it only modifies the
* filename, and not the folder name, in order to achieve uniqueness)
* Checks if the unique file path of the supplied fileMetadata is already on
* the list of the existing files; and if so, keeps generating a new name
* until it is unique. Returns the final file name. (i.e., it only modifies
* the filename, and not the folder name, in order to achieve uniqueness)
*
* @param fileMetadata supplied FileMetadata
* @param existingFileNames a set of the already existing pathnames
Expand Down Expand Up @@ -195,4 +204,75 @@ private static Set<String> existingPathNamesAsSet(DatasetVersion version, FileMe
return pathNamesExisting;
}

/**
* @param version The DatasetVersion to mutate, setting or unsetting the
* UNF.
*/
public static void recalculateDatasetVersionUNF(DatasetVersion version) {
logger.fine("recalculating UNF for dataset version.");
if (version == null) {
return;
}
List<String> unfValueList = getUnfValuesOfFiles(version);
if (unfValueList.size() > 0) {
String[] unfValues = new String[0];
unfValues = unfValueList.toArray(unfValues);

logger.fine("Attempting to calculate new UNF from total of " + unfValueList.size() + " file-level signatures.");
String datasetUnfValue = null;
try {
datasetUnfValue = UNFUtil.calculateUNF(unfValues);
} catch (IOException ex) {
// It's unclear how to exercise this IOException.
logger.warning("IO Exception: Failed to recalculate the UNF for the dataset version id=" + version.getId());
} catch (UnfException uex) {
// It's unclear how to exercise this UnfException.
logger.warning("UNF Exception: Failed to recalculate the UNF for the dataset version id=" + version.getId());
}

if (datasetUnfValue != null) {
version.setUNF(datasetUnfValue);
logger.fine("Recalculated the UNF for the dataset version id=" + version.getId() + ", new signature: " + datasetUnfValue);
}
} else {
// Of course if no files in the version have UNFs, we need to make sure
// that the version has the NULL UNF too.
// Otherwise, the version will still have a UNF if the user deletes
// all the tabular files from the version!
version.setUNF(null);
}
}

public static List<String> getUnfValuesOfFiles(DatasetVersion version) {
List<String> unfValueList = new ArrayList<>();
if (version == null) {
return unfValueList;
}
Iterator<FileMetadata> itfm = version.getFileMetadatas().iterator();
while (itfm.hasNext()) {
FileMetadata fileMetadata = itfm.next();
if (fileMetadata != null
&& fileMetadata.getDataFile() != null
&& fileMetadata.getDataFile().isTabularData()
&& fileMetadata.getDataFile().getUnf() != null) {
String varunf = fileMetadata.getDataFile().getUnf();
unfValueList.add(varunf);
}
}
return unfValueList;
}

public static boolean shouldHaveUnf(DatasetVersion version) {
if (version == null) {
return false;
}
List<String> values = getUnfValuesOfFiles(version);
logger.fine("UNF values for files from Dataset version " + version.getSemanticVersion() + " from " + version.getDataset().getGlobalId() + ": " + values);
if (values.size() > 0) {
return true;
} else {
return false;
}

}
}
Loading