diff --git a/doc/release-notes/9464-json-validation.md b/doc/release-notes/9464-json-validation.md new file mode 100644 index 00000000000..f104263ba35 --- /dev/null +++ b/doc/release-notes/9464-json-validation.md @@ -0,0 +1,3 @@ +Functionality has been added to help validate dataset JSON prior to dataset creation. There are two new API endpoints in this release. The first takes in a collection alias and returns a custom dataset schema based on the required fields of the collection. The second takes in a collection alias and a dataset JSON file and does an automated validation of the JSON file against the custom schema for the collection. In this release funtionality is limited to json format validation and validating required elements. Future releases will address field types, controlled vocabulary, etc. (Issue #9464 and #9465) + +For documentation see the API changelog: http://preview.guides.gdcc.io/en/develop/api/changelog.html diff --git a/doc/sphinx-guides/source/_static/api/dataset-schema.json b/doc/sphinx-guides/source/_static/api/dataset-schema.json new file mode 100644 index 00000000000..34b8a1eeedb --- /dev/null +++ b/doc/sphinx-guides/source/_static/api/dataset-schema.json @@ -0,0 +1,122 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "$defs": { + "field": { + "type": "object", + "required": ["typeClass", "multiple", "typeName"], + "properties": { + "value": { + "anyOf": [ + { + "type": "array" + }, + { + "type": "string" + }, + { + "$ref": "#/$defs/field" + } + ] + }, + "typeClass": { + "type": "string" + }, + "multiple": { + "type": "boolean" + }, + "typeName": { + "type": "string" + } + } + } +}, +"type": "object", +"properties": { + "datasetVersion": { + "type": "object", + "properties": { + "license": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "uri": { + "type": "string", + "format": "uri" + } + }, + "required": ["name", "uri"] + }, + "metadataBlocks": { + "type": "object", + "properties": { + "citation": { + "type": "object", + "properties": { + "fields": { + "type": "array", + "items": { + "$ref": "#/$defs/field" + }, + "minItems": 5, + "allOf": [ + { + "contains": { + "properties": { + "typeName": { + "const": "title" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "author" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "datasetContact" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "dsDescription" + } + } + } + }, + { + "contains": { + "properties": { + "typeName": { + "const": "subject" + } + } + } + } + ] + } + }, + "required": ["fields"] + } + }, + "required": ["citation"] + } + }, + "required": ["metadataBlocks"] + } + }, + "required": ["datasetVersion"] +} diff --git a/doc/sphinx-guides/source/api/changelog.rst b/doc/sphinx-guides/source/api/changelog.rst index 70dbe7bda52..d2908533a14 100644 --- a/doc/sphinx-guides/source/api/changelog.rst +++ b/doc/sphinx-guides/source/api/changelog.rst @@ -5,8 +5,13 @@ API Changelog :local: :depth: 1 -6.1 ---- +v6.1 +---- + +New +~~~ +- **/api/dataverses/{id}/datasetSchema**: See :ref:`get-dataset-json-schema`. +- **/api/dataverses/{id}/validateDatasetJson**: See :ref:`validate-dataset-json`. New ~~~ @@ -17,8 +22,8 @@ Changes ~~~~~~~ - **/api/datasets/{id}/versions/{versionId}/citation**: This endpoint now accepts a new boolean optional query parameter "includeDeaccessioned", which, if enabled, causes the endpoint to consider deaccessioned versions when searching for versions to obtain the citation. See :ref:`get-citation`. -6.0 ---- +v6.0 +---- Changes ~~~~~~~ diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 0bc0b55becc..56190dd342c 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -505,6 +505,56 @@ The fully expanded example above (without environment variables) looks like this .. note:: Previous endpoints ``$SERVER/api/dataverses/$id/metadatablocks/:isRoot`` and ``POST https://$SERVER/api/dataverses/$id/metadatablocks/:isRoot?key=$apiKey`` are deprecated, but supported. +.. _get-dataset-json-schema: + +Retrieve a Dataset JSON Schema for a Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Retrieves a JSON schema customized for a given collection in order to validate a dataset JSON file prior to creating the dataset. This +first version of the schema only includes required elements and fields. In the future we plan to improve the schema by adding controlled +vocabulary and more robust dataset field format testing: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" "$SERVER_URL/api/dataverses/$ID/datasetSchema" + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" "https://demo.dataverse.org/api/dataverses/root/datasetSchema" + +Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint. + +While it is recommended to download a copy of the JSON Schema from the collection (as above) to account for any fields that have been marked as required, you can also download a minimal :download:`dataset-schema.json <../_static/api/dataset-schema.json>` to get a sense of the schema when no customizations have been made. + +.. _validate-dataset-json: + +Validate Dataset JSON File for a Collection +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Validates a dataset JSON file customized for a given collection prior to creating the dataset. The validation only tests for json formatting +and the presence of required elements: + +.. code-block:: bash + + export API_TOKEN=xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx + export SERVER_URL=https://demo.dataverse.org + export ID=root + + curl -H "X-Dataverse-key:$API_TOKEN" -X POST "$SERVER_URL/api/dataverses/$ID/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json + +The fully expanded example above (without environment variables) looks like this: + +.. code-block:: bash + + curl -H "X-Dataverse-key:xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" -X POST "https://demo.dataverse.org/api/dataverses/root/validateDatasetJson" -H 'Content-type:application/json' --upload-file dataset.json + +Note: you must have "Add Dataset" permission in the given collection to invoke this endpoint. .. _create-dataset-command: diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java index c4749be0cb3..a3425987bf8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevel.java @@ -30,8 +30,9 @@ @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdDatasetFieldTypeId", query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id = :datasetFieldTypeId"), @NamedQuery(name = "DataverseFieldTypeInputLevel.findByDataverseIdAndDatasetFieldTypeIdList", - query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList") - + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.datasetFieldType.id in :datasetFieldIdList"), + @NamedQuery(name = "DataverseFieldTypeInputLevel.findRequiredByDataverseId", + query = "select f from DataverseFieldTypeInputLevel f where f.dataverse.id = :dataverseId and f.required = 'true' ") }) @Table(name="DataverseFieldTypeInputLevel" , uniqueConstraints={ diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java index 66c700f59ce..1bd290ecc4d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseFieldTypeInputLevelServiceBean.java @@ -88,6 +88,16 @@ public DataverseFieldTypeInputLevel findByDataverseIdDatasetFieldTypeId(Long dat return null; } } + + public List findRequiredByDataverseId(Long dataverseId) { + Query query = em.createNamedQuery("DataverseFieldTypeInputLevel.findRequiredByDataverseId", DataverseFieldTypeInputLevel.class); + query.setParameter("dataverseId", dataverseId); + try{ + return query.getResultList(); + } catch ( NoResultException nre ) { + return null; + } + } public void delete(DataverseFieldTypeInputLevel dataverseFieldTypeInputLevel) { em.remove(em.merge(dataverseFieldTypeInputLevel)); diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 1698ca19c35..10b5d800c21 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -18,9 +18,11 @@ import edu.harvard.iq.dataverse.search.IndexServiceBean; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SolrSearchResult; +import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.storageuse.StorageQuota; import edu.harvard.iq.dataverse.util.StringUtil; import edu.harvard.iq.dataverse.util.SystemConfig; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.File; import java.io.IOException; import java.sql.Timestamp; @@ -43,7 +45,15 @@ import jakarta.persistence.NonUniqueResultException; import jakarta.persistence.PersistenceContext; import jakarta.persistence.TypedQuery; +import java.nio.file.Files; +import java.nio.file.Paths; +import org.apache.commons.lang3.StringUtils; import org.apache.solr.client.solrj.SolrServerException; +import org.everit.json.schema.Schema; +import org.everit.json.schema.ValidationException; +import org.everit.json.schema.loader.SchemaLoader; +import org.json.JSONObject; +import org.json.JSONTokener; /** * @@ -81,6 +91,9 @@ public class DataverseServiceBean implements java.io.Serializable { @EJB PermissionServiceBean permissionService; + @EJB + DataverseFieldTypeInputLevelServiceBean dataverseFieldTypeInputLevelService; + @EJB SystemConfig systemConfig; @@ -875,6 +888,273 @@ public List getDatasetTitlesWithinDataverse(Long dataverseId) { return em.createNativeQuery(cqString).getResultList(); } + + public String getCollectionDatasetSchema(String dataverseAlias) { + + Dataverse testDV = this.findByAlias(dataverseAlias); + + while (!testDV.isMetadataBlockRoot()) { + if (testDV.getOwner() == null) { + break; // we are at the root; which by defintion is metadata blcok root, regarldess of the value + } + testDV = testDV.getOwner(); + } + + /* Couldn't get the 'return base if no extra required fields to work with the path provided + leaving it as 'out of scope' for now SEK 11/27/2023 + + List required = new ArrayList<>(); + + required = dataverseFieldTypeInputLevelService.findRequiredByDataverseId(testDV.getId()); + + if (required == null || required.isEmpty()){ + String pathToJsonFile = "src/main/resources/edu/harvas/iq/dataverse/baseDatasetSchema.json"; + String baseSchema = getBaseSchemaStringFromFile(pathToJsonFile); + if (baseSchema != null && !baseSchema.isEmpty()){ + return baseSchema; + } + } + + */ + List selectedBlocks = new ArrayList<>(); + List requiredDSFT = new ArrayList<>(); + + selectedBlocks.addAll(testDV.getMetadataBlocks()); + + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : mdb.getDatasetFieldTypes()) { + if (!dsft.isChild()) { + DataverseFieldTypeInputLevel dsfIl = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), dsft.getId()); + if (dsfIl != null) { + dsft.setRequiredDV(dsfIl.isRequired()); + dsft.setInclude(dsfIl.isInclude()); + } else { + dsft.setRequiredDV(dsft.isRequired()); + dsft.setInclude(true); + } + if (dsft.isHasChildren()) { + for (DatasetFieldType child : dsft.getChildDatasetFieldTypes()) { + DataverseFieldTypeInputLevel dsfIlChild = dataverseFieldTypeInputLevelService.findByDataverseIdDatasetFieldTypeId(testDV.getId(), child.getId()); + if (dsfIlChild != null) { + child.setRequiredDV(dsfIlChild.isRequired()); + child.setInclude(dsfIlChild.isInclude()); + } else { + // in the case of conditionally required (child = true, parent = false) + // we set this to false; i.e this is the default "don't override" value + child.setRequiredDV(child.isRequired() && dsft.isRequired()); + child.setInclude(true); + } + } + } + if(dsft.isRequiredDV()){ + requiredDSFT.add(dsft); + } + } + } + + } + + String reqMDBNames = ""; + List hasReqFields = new ArrayList<>(); + String retval = datasetSchemaPreface; + for (MetadataBlock mdb : selectedBlocks) { + for (DatasetFieldType dsft : requiredDSFT) { + if (dsft.getMetadataBlock().equals(mdb)) { + hasReqFields.add(mdb); + if (!reqMDBNames.isEmpty()) reqMDBNames += ","; + reqMDBNames += "\"" + mdb.getName() + "\""; + break; + } + } + } + int countMDB = 0; + for (MetadataBlock mdb : hasReqFields) { + if (countMDB>0){ + retval += ","; + } + retval += getCustomMDBSchema(mdb, requiredDSFT); + countMDB++; + } + + retval += "\n }"; + + retval += endOfjson.replace("blockNames", reqMDBNames); + + return retval; + + } + + private String getCustomMDBSchema (MetadataBlock mdb, List requiredDSFT){ + String retval = ""; + boolean mdbHasReqField = false; + int numReq = 0; + List requiredThisMDB = new ArrayList<>(); + + for (DatasetFieldType dsft : requiredDSFT ){ + + if(dsft.getMetadataBlock().equals(mdb)){ + numReq++; + mdbHasReqField = true; + requiredThisMDB.add(dsft); + } + } + if (mdbHasReqField){ + retval += startOfMDB.replace("blockName", mdb.getName()); + + retval += minItemsTemplate.replace("numMinItems", Integer.toString(requiredThisMDB.size())); + int count = 0; + for (DatasetFieldType dsft:requiredThisMDB ){ + count++; + String reqValImp = reqValTemplate.replace("reqFieldTypeName", dsft.getName()); + if (count < requiredThisMDB.size()){ + retval += reqValImp + "\n"; + } else { + reqValImp = StringUtils.substring(reqValImp, 0, reqValImp.length() - 1); + retval += reqValImp+ "\n"; + retval += endOfReqVal; + } + } + + } + + return retval; + } + + public String isDatasetJsonValid(String dataverseAlias, String jsonInput) { + JSONObject rawSchema = new JSONObject(new JSONTokener(getCollectionDatasetSchema(dataverseAlias))); + + try { + Schema schema = SchemaLoader.load(rawSchema); + schema.validate(new JSONObject(jsonInput)); // throws a ValidationException if this object is invalid + } catch (ValidationException vx) { + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage()); + String accumulatedexceptions = ""; + for (ValidationException va : vx.getCausingExceptions()){ + accumulatedexceptions = accumulatedexceptions + va; + accumulatedexceptions = accumulatedexceptions.replace("org.everit.json.schema.ValidationException:", " "); + } + if (!accumulatedexceptions.isEmpty()){ + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + accumulatedexceptions; + } else { + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.failed") + " " + vx.getErrorMessage(); + } + + } catch (Exception ex) { + logger.info(BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage()); + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.exception") + ex.getLocalizedMessage(); + } + + return BundleUtil.getStringFromBundle("dataverses.api.validate.json.succeeded"); + } + + static String getBaseSchemaStringFromFile(String pathToJsonFile) { + File datasetSchemaJson = new File(pathToJsonFile); + try { + String datasetSchemaAsJson = new String(Files.readAllBytes(Paths.get(datasetSchemaJson.getAbsolutePath()))); + return datasetSchemaAsJson; + } catch (IOException ex) { + logger.info("IO - failed to get schema file - will build on fly " +ex.getMessage()); + return null; + } catch (Exception e){ + logger.info("Other exception - failed to get schema file - will build on fly. " + e.getMessage()); + return null; + } + } + + private String datasetSchemaPreface = + "{\n" + + " \"$schema\": \"http://json-schema.org/draft-04/schema#\",\n" + + " \"$defs\": {\n" + + " \"field\": {\n" + + " \"type\": \"object\",\n" + + " \"required\": [\"typeClass\", \"multiple\", \"typeName\"],\n" + + " \"properties\": {\n" + + " \"value\": {\n" + + " \"anyOf\": [\n" + + " {\n" + + " \"type\": \"array\"\n" + + " },\n" + + " {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " {\n" + + " \"$ref\": \"#/$defs/field\"\n" + + " }\n" + + " ]\n" + + " },\n" + + " \"typeClass\": {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " \"multiple\": {\n" + + " \"type\": \"boolean\"\n" + + " },\n" + + " \"typeName\": {\n" + + " \"type\": \"string\"\n" + + " }\n" + + " }\n" + + " }\n" + + "},\n" + + "\"type\": \"object\",\n" + + "\"properties\": {\n" + + " \"datasetVersion\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"license\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + " \"name\": {\n" + + " \"type\": \"string\"\n" + + " },\n" + + " \"uri\": {\n" + + " \"type\": \"string\",\n" + + " \"format\": \"uri\"\n" + + " }\n" + + " },\n" + + " \"required\": [\"name\", \"uri\"]\n" + + " },\n" + + " \"metadataBlocks\": {\n" + + " \"type\": \"object\",\n" + + " \"properties\": {\n" + + "" ; + + private String startOfMDB = "" + +" \"blockName\": {\n" + +" \"type\": \"object\",\n" + +" \"properties\": {\n" + +" \"fields\": {\n" + +" \"type\": \"array\",\n" + +" \"items\": {\n" + +" \"$ref\": \"#/$defs/field\"\n" + +" },"; + + private String reqValTemplate = " {\n" + +" \"contains\": {\n" + +" \"properties\": {\n" + +" \"typeName\": {\n" + +" \"const\": \"reqFieldTypeName\"\n" + +" }\n" + +" }\n" + +" }\n" + +" },"; + + private String minItemsTemplate = "\n \"minItems\": numMinItems,\n" + +" \"allOf\": [\n"; + private String endOfReqVal = " ]\n" + +" }\n" + +" },\n" + +" \"required\": [\"fields\"]\n" + +" }"; + + private String endOfjson = ",\n" + +" \"required\": [blockNames]\n" + +" }\n" + +" },\n" + +" \"required\": [\"metadataBlocks\"]\n" + +" }\n" + +" },\n" + +" \"required\": [\"datasetVersion\"]\n" + +"}\n"; + public void saveStorageQuota(Dataverse target, Long allocation) { StorageQuota storageQuota = target.getStorageQuota(); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index b1e7559f858..6c1bf42c02a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -45,6 +45,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteDataverseLinkingDataverseCommand; import edu.harvard.iq.dataverse.engine.command.impl.DeleteExplicitGroupCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetDatasetSchemaCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionQuotaCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetCollectionStorageUseCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetRootCommand; @@ -72,6 +73,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.UpdateDataverseMetadataBlocksCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateExplicitGroupCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateMetadataBlockFacetsCommand; +import edu.harvard.iq.dataverse.engine.command.impl.ValidateDatasetJsonCommand; import edu.harvard.iq.dataverse.settings.JvmSettings; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; @@ -130,7 +132,6 @@ import java.util.Optional; import java.util.stream.Collectors; import jakarta.servlet.http.HttpServletResponse; -import jakarta.validation.constraints.NotNull; import jakarta.ws.rs.WebApplicationException; import jakarta.ws.rs.core.Context; import jakarta.ws.rs.core.StreamingOutput; @@ -236,6 +237,40 @@ public Response addDataverse(@Context ContainerRequestContext crc, String body, } } + + @POST + @AuthRequired + @Path("{identifier}/validateDatasetJson") + @Consumes("application/json") + public Response validateDatasetJson(@Context ContainerRequestContext crc, String body, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + try { + String validationMessage = execCommand(new ValidateDatasetJsonCommand(createDataverseRequest(u), findDataverseOrDie(idtf), body)); + return ok(validationMessage); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @GET + @AuthRequired + @Path("{identifier}/datasetSchema") + @Produces(MediaType.APPLICATION_JSON) + public Response getDatasetSchema(@Context ContainerRequestContext crc, @PathParam("identifier") String idtf) { + User u = getRequestUser(crc); + + try { + String datasetSchema = execCommand(new GetDatasetSchemaCommand(createDataverseRequest(u), findDataverseOrDie(idtf))); + JsonObject jsonObject = JsonUtil.getJsonObject(datasetSchema); + return Response.ok(jsonObject).build(); + } catch (WrappedResponse ex) { + Logger.getLogger(Dataverses.class.getName()).log(Level.SEVERE, null, ex); + return ex.getResponse(); + } + } + + @POST @AuthRequired diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java new file mode 100644 index 00000000000..2d5e1251614 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetDatasetSchemaCommand.java @@ -0,0 +1,38 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class GetDatasetSchemaCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(GetDatasetSchemaCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + + public GetDatasetSchemaCommand(DataverseRequest aRequest, Dataverse target) { + super(aRequest, target); + dataverse = target; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + return ctxt.dataverses().getCollectionDatasetSchema(dataverse.getAlias()); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java new file mode 100644 index 00000000000..619740ddd89 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/ValidateDatasetJsonCommand.java @@ -0,0 +1,41 @@ + +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.DataFile; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + + +import java.util.logging.Logger; + +/** + * + * @author stephenkraffmiller + */ +@RequiredPermissions(Permission.AddDataset) +public class ValidateDatasetJsonCommand extends AbstractCommand { + + private static final Logger logger = Logger.getLogger(ValidateDatasetJsonCommand.class.getCanonicalName()); + + private final Dataverse dataverse; + private final String datasetJson; + + public ValidateDatasetJsonCommand(DataverseRequest aRequest, Dataverse target, String datasetJsonIn) { + super(aRequest, target); + dataverse = target; + datasetJson = datasetJsonIn; + } + + @Override + public String execute(CommandContext ctxt) throws CommandException { + + return ctxt.dataverses().isDatasetJsonValid(dataverse.getAlias(), datasetJson); + + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java index 79369207963..1b5619c53e0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsServiceBean.java @@ -51,7 +51,7 @@ public class MetricsServiceBean implements Serializable { /** Dataverses */ - + public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { Query query = em.createNativeQuery("" + "select distinct to_char(date_trunc('month', dvobject.publicationdate),'YYYY-MM') as month, count(date_trunc('month', dvobject.publicationdate))\n" @@ -64,7 +64,7 @@ public JsonArray getDataversesTimeSeries(UriInfo uriInfo, Dataverse d) { List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -129,9 +129,9 @@ public List dataversesBySubject(Dataverse d) { /** Datasets */ - + public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dataverse d) { - Query query = em.createNativeQuery( + Query query = em.createNativeQuery( "select distinct date, count(dataset_id)\n" + "from (\n" + "select min(to_char(COALESCE(releasetime, createtime), 'YYYY-MM')) as date, dataset_id\n" @@ -149,8 +149,8 @@ public JsonArray getDatasetsTimeSeries(UriInfo uriInfo, String dataLocation, Dat List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - - + + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -180,10 +180,10 @@ public long datasetsToMonth(String yyyymm, String dataLocation, Dataverse d) { // But do not use this notation if you need the values returned to // meaningfully identify the datasets! - + Query query = em.createNativeQuery( - - + + "select count(*)\n" + "from (\n" + "select datasetversion.dataset_id || ':' || max(datasetversion.versionnumber + (.1 * datasetversion.minorversionnumber))\n" @@ -312,7 +312,7 @@ public JsonArray filesTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesToJson(results); } - + /** * @param yyyymm Month in YYYY-MM format. * @param d @@ -389,7 +389,7 @@ public JsonArray filesByType(Dataverse d) { return jab.build(); } - + public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { Query query = em.createNativeQuery("SELECT DISTINCT to_char(" + (published ? "ob.publicationdate" : "ob.createdate") + ",'YYYY-MM') as date, df.contenttype, count(df.id), coalesce(sum(df.filesize),0) " + " FROM DataFile df, DvObject ob" @@ -402,13 +402,13 @@ public JsonArray filesByTypeTimeSeries(Dataverse d, boolean published) { logger.log(Level.FINE, "Metric query: {0}", query); List results = query.getResultList(); return MetricsUtil.timeSeriesByTypeToJson(results); - + } - /** Downloads + /** Downloads * @param d * @throws ParseException */ - + public JsonArray downloadsTimeSeries(Dataverse d) { // ToDo - published only? Query earlyDateQuery = em.createNativeQuery("" @@ -432,11 +432,11 @@ public JsonArray downloadsTimeSeries(Dataverse d) { List results = query.getResultList(); return MetricsUtil.timeSeriesToJson(results); } - + /* * This includes getting historic download without a timestamp if query * is earlier than earliest timestamped record - * + * * @param yyyymm Month in YYYY-MM format. */ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { @@ -459,7 +459,7 @@ public long downloadsToMonth(String yyyymm, Dataverse d) throws ParseException { + "where (date_trunc('month', responsetime) <= to_date('" + yyyymm + "','YYYY-MM')" + "or responsetime is NULL)\n" // includes historic guestbook records without date + "and eventtype!='" + GuestbookResponse.ACCESS_REQUEST +"'\n" - + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") + + ((d==null) ? ";": "AND dataset_id in (" + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ");") ); logger.log(Level.FINE, "Metric query: {0}", query); return (long) query.getSingleResult(); @@ -487,7 +487,7 @@ public long downloadsPastDays(int days, Dataverse d) { return (long) query.getSingleResult(); } - + public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select distinct to_char(gb.responsetime, 'YYYY-MM') as date, ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" @@ -501,7 +501,7 @@ public JsonArray fileDownloadsTimeSeries(Dataverse d, boolean uniqueCounts) { return MetricsUtil.timeSeriesByIDAndPIDToJson(results); } - + public JsonArray fileDownloads(String yyyymm, Dataverse d, boolean uniqueCounts) { Query query = em.createNativeQuery("select ob.id, ob.protocol || ':' || ob.authority || '/' || ob.identifier as pid, count(" + (uniqueCounts ? "distinct email" : "*") + ") " + " FROM guestbookresponse gb, DvObject ob" @@ -543,7 +543,7 @@ public JsonArray uniqueDownloadsTimeSeries(Dataverse d) { return MetricsUtil.timeSeriesByPIDToJson(results); } - + public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { //select distinct count(distinct email),dataset_id, date_trunc('month', responsetime) from guestbookresponse group by dataset_id, date_trunc('month',responsetime) order by dataset_id,date_trunc('month',responsetime); @@ -571,10 +571,10 @@ public JsonArray uniqueDatasetDownloads(String yyyymm, Dataverse d) { return jab.build(); } - - //MDC - - + + //MDC + + public JsonArray mdcMetricTimeSeries(MetricType metricType, String country, Dataverse d) { Query query = em.createNativeQuery("SELECT distinct substring(monthyear from 1 for 7) as date, coalesce(sum(" + metricType.toString() + "),0) as count FROM DatasetMetrics\n" + ((d == null) ? "" : "WHERE dataset_id in ( " + getCommaSeparatedIdStringForSubtree(d, "Dataset") + ")\n") @@ -746,7 +746,7 @@ public Metric getMetric(String name, String dataLocation, String dayString, Data // https://github.com/DANS-KNAW/dataverse/blob/dans-develop/src/main/java/edu/harvard/iq/dataverse/metrics/MetricsDansServiceBean.java /** - * + * * @param dvId - parent dataverse id * @param dtype - type of object to return 'Dataverse' or 'Dataset' * @return - list of objects of specified type included in the subtree (includes parent dataverse if dtype is 'Dataverse') @@ -768,7 +768,7 @@ private String getCommaSeparatedIdStringForSubtree(Dataverse d, String dtype) { } private List getChildrenIdsRecursively(Long dvId, String dtype, DatasetVersion.VersionState versionState) { - + //Intended to be called only with dvId != null String sql = "WITH RECURSIVE querytree AS (\n" + " SELECT id, dtype, owner_id, publicationdate\n" diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 3cf75eb669f..adccecce718 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2676,6 +2676,9 @@ dataverses.api.move.dataverse.error.forceMove=Please use the parameter ?forceMov dataverses.api.create.dataset.error.mustIncludeVersion=Please provide initial version in the dataset json dataverses.api.create.dataset.error.superuserFiles=Only a superuser may add files via this api dataverses.api.create.dataset.error.mustIncludeAuthorName=Please provide author name in the dataset json +dataverses.api.validate.json.succeeded=The Dataset JSON provided is valid for this Dataverse Collection. +dataverses.api.validate.json.failed=The Dataset JSON provided failed validation with the following error: +dataverses.api.validate.json.exception=Validation failed with following exception: #Access.java access.api.allowRequests.failure.noDataset=Could not find Dataset with id: {0} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java index d20f1e8a58b..6a746b7c5b5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/DatasetsIT.java @@ -62,6 +62,7 @@ import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.SystemConfig; import edu.harvard.iq.dataverse.util.json.JSONLDUtil; +import edu.harvard.iq.dataverse.util.json.JsonUtil; import java.io.File; import java.io.IOException; @@ -162,6 +163,59 @@ public static void afterClass() { .statusCode(200); */ } + + @Test + public void testCollectionSchema(){ + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response getCollectionSchemaResponse = UtilIT.getCollectionSchema(dataverseAlias, apiToken); + getCollectionSchemaResponse.prettyPrint(); + getCollectionSchemaResponse.then().assertThat() + .statusCode(200); + + JsonObject expectedSchema = null; + try { + expectedSchema = JsonUtil.getJsonObjectFromFile("doc/sphinx-guides/source/_static/api/dataset-schema.json"); + } catch (IOException ex) { + } + + assertEquals(JsonUtil.prettyPrint(expectedSchema), JsonUtil.prettyPrint(getCollectionSchemaResponse.body().asString())); + + String expectedJson = UtilIT.getDatasetJson("scripts/search/tests/data/dataset-finch1.json"); + + Response validateDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, expectedJson, apiToken); + validateDatasetJsonResponse.prettyPrint(); + validateDatasetJsonResponse.then().assertThat() + .statusCode(200); + + + String pathToJsonFile = "scripts/search/tests/data/datasetMissingReqFields.json"; + + String jsonIn = UtilIT.getDatasetJson(pathToJsonFile); + + Response validateBadDatasetJsonResponse = UtilIT.validateDatasetJson(dataverseAlias, jsonIn, apiToken); + validateBadDatasetJsonResponse.prettyPrint(); + validateBadDatasetJsonResponse.then().assertThat() + .statusCode(200); + + + validateBadDatasetJsonResponse.then().assertThat() + .statusCode(OK.getStatusCode()) + .body(containsString("failed validation")); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverseResponse.prettyPrint(); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + + } @Test public void testCreateDataset() { diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index eb44930e4cf..6edeecd6800 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -419,6 +419,23 @@ static Response getGuestbookResponses(String dataverseAlias, Long guestbookId, S return requestSpec.get("/api/dataverses/" + dataverseAlias + "/guestbookResponses/"); } + static Response getCollectionSchema(String dataverseAlias, String apiToken) { + Response getCollectionSchemaResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .contentType("application/json") + .get("/api/dataverses/" + dataverseAlias + "/datasetSchema"); + return getCollectionSchemaResponse; + } + + static Response validateDatasetJson(String dataverseAlias, String datasetJson, String apiToken) { + Response getValidateDatasetJsonResponse = given() + .header(API_TOKEN_HTTP_HEADER, apiToken) + .body(datasetJson) + .contentType("application/json") + .post("/api/dataverses/" + dataverseAlias + "/validateDatasetJson"); + return getValidateDatasetJsonResponse; + } + static Response createRandomDatasetViaNativeApi(String dataverseAlias, String apiToken) { return createRandomDatasetViaNativeApi(dataverseAlias, apiToken, false); }