From 4dd10bd8e1da1f7ecb6a960c5759ef315b659ed0 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Mon, 19 Nov 2018 16:32:36 -0500 Subject: [PATCH] stub our docs and API for Make Data Count #4821 --- doc/sphinx-guides/source/admin/index.rst | 1 + .../source/admin/make-data-count.rst | 87 +++++++++++++++++++ doc/sphinx-guides/source/api/metrics.rst | 4 +- .../harvard/iq/dataverse/api/Datasets.java | 23 ++++- .../iq/dataverse/api/MakeDataCountApi.java | 20 +++++ .../makedatacount/MakeDataCountUtil.java | 69 +++++++++++++++ .../iq/dataverse/api/MakeDataCountApiIT.java | 64 ++++++++++++++ .../edu/harvard/iq/dataverse/api/UtilIT.java | 25 ++++++ 8 files changed, 291 insertions(+), 2 deletions(-) create mode 100644 doc/sphinx-guides/source/admin/make-data-count.rst create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountUtil.java create mode 100644 src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java diff --git a/doc/sphinx-guides/source/admin/index.rst b/doc/sphinx-guides/source/admin/index.rst index cec018bef4d..b4d17c4aa1a 100755 --- a/doc/sphinx-guides/source/admin/index.rst +++ b/doc/sphinx-guides/source/admin/index.rst @@ -18,6 +18,7 @@ This guide documents the functionality only available to superusers (such as "da metadatacustomization metadataexport timers + make-data-count integrations geoconnect-worldmap user-administration diff --git a/doc/sphinx-guides/source/admin/make-data-count.rst b/doc/sphinx-guides/source/admin/make-data-count.rst new file mode 100644 index 00000000000..0607d3c846e --- /dev/null +++ b/doc/sphinx-guides/source/admin/make-data-count.rst @@ -0,0 +1,87 @@ +Make Data Count +=============== + +`Make Data Count`_ is a project to collect and standardize metrics on data use. They are part of a broader Research Data Alliance (RDA) `Data Usage Metrics Working Group`_ that they helped launch and they publish a `newsletter`_. + +.. _Make Data Count: https://makedatacount.org +.. _Data Usage Metrics Working Group: https://www.rd-alliance.org/groups/data-usage-metrics-wg +.. _newsletter: https://makedatacount.org/contact/ + +.. contents:: Contents: + :local: + +Introduction +------------ + +All installations of Dataverse that use DOIs as persistent identifiers are encouraged to send data usage metrics to the "open hub" operated by DataCite for Make Data Count. + +Data repositories using Handles and other identifiers are not supported by Make Data Count but in the notes_ following a July 2018 webinar, you can see the project's response on this topic. + +.. _notes: https://docs.google.com/document/d/1b1itytDVDsI_Ni2LoxrG887YGt0zDc96tpyJEgBN9Q8/ + +Make Data Count is built on top of existing standards such as COUNTER and SUSHI that come out of the article publishing world. To meet the needs of the data publishing world, Make Data Count created the `COUNTER Code of Practice for Research Data`_ (`preprint`_), which is the standard that Dataverse implements. The Make Data Count project has emphasized that they would like feedback on the code of practice. + + +.. _COUNTER Code of Practice for Research Data: https://makedatacount.org/counter-code-of-practice-for-research-data/ +.. _preprint: https://doi.org/10.7287/peerj.preprints.26505v1 + +Sending Metrics from Dataverse to the DataCite Hub +-------------------------------------------------- + +To configure Dataverse to send the metrics to the DataCite hub, you must set up a cron job to call the following API endpoint: + +``curl -X POST http://localhost:8080/api/admin/makeDataCount/sendToHub`` + +The following metrics will be sent for each published dataset: + +- Views ("investigations" in COUNTER) +- Downloads ("requests" in COUNTER) + +Retrieving Make Data Count Metrics from the DataCite Hub +-------------------------------------------------------- + +The following metrics can be downloaded directly from the DataCite hub (see https://support.datacite.org/docs/eventdata-guide) for datasets hosted by Dataverse installations that have been configured to send these metrics to the hub: + +- Total Views for a Dataset +- Unique Views for a Dataset +- Total Downloads for a Dataset +- Downloads for a Dataset +- Citations for a Dataset (via Crossref) + +Retrieving Make Data Count Metrics from Dataverse +------------------------------------------------- + +Dataverse users might find it more convenient to retrieve Make Data Count metrics from their installation of Dataverse rather the DataCite hub. + +The Dataverse API endpoints for retrieving Make Data Count metrics are described below. Please note that in the curl examples, Bash environment variables are used with the idea that you can set a few environment variables and copy and paste the examples as is. For example, "$DV_BASE_URL" could become "https://demo.dataverse.org" by issuing the following ``export`` command from Bash: + +``export DV_BASE_URL=https://demo.dataverse.org`` + +To confirm that the environment variable was set properly, you can use ``echo`` like this: + +``echo $DV_BASE_URL`` + +Retrieving Total Views for a Dataset ++++++++++++++++++++++++++++++++++++++++++++++++++ + +``curl "$DV_BASE_URL/api/datasets/:persistentId/makeDataCount/viewsTotal?persistentId=$DOI"`` + +Retrieving Unique Views for a Dataset ++++++++++++++++++++++++++++++++++++++++++++++++++ + +``curl "$DV_BASE_URL/api/datasets/:persistentId/makeDataCount/viewsUnique?persistentId=$DOI"`` + +Retrieving Total Downloads for a Dataset ++++++++++++++++++++++++++++++++++++++++++++++++++ + +``curl "$DV_BASE_URL/api/datasets/:persistentId/makeDataCount/downloadsTotal?persistentId=$DOI"`` + +Retrieving Unique Downloads for a Dataset ++++++++++++++++++++++++++++++++++++++++++++++++++ + +``curl "$DV_BASE_URL/api/datasets/:persistentId/makeDataCount/downloadsTotal?persistentId=$DOI"`` + +Retrieving Citations for a Dataset ++++++++++++++++++++++++++++++++++++++++++++++++++ + +``curl "$DV_BASE_URL/api/datasets/:persistentId/makeDataCount/citations?persistentId=$DOI"`` diff --git a/doc/sphinx-guides/source/api/metrics.rst b/doc/sphinx-guides/source/api/metrics.rst index 821b74b0a96..fec02ce5748 100755 --- a/doc/sphinx-guides/source/api/metrics.rst +++ b/doc/sphinx-guides/source/api/metrics.rst @@ -1,6 +1,8 @@ Metrics API =========== +The Metrics API provides counts of downloads, datasets created, files uploaded, and more, as described below. Dataverse also supports Make Data Count, which is described in the :doc:`/admin/make-data-count` section of the Admin Guide. + .. contents:: |toctitle| :local: @@ -78,4 +80,4 @@ Returns the number of datasets by each subject:: CORS - \ No newline at end of file + diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 4f868d90ae7..0fddaead84d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -75,6 +75,7 @@ import edu.harvard.iq.dataverse.S3PackageImporter; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.UpdateDvObjectPIDMetadataCommand; +import edu.harvard.iq.dataverse.makedatacount.MakeDataCountUtil; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.BundleUtil; import edu.harvard.iq.dataverse.util.EjbUtil; @@ -1519,6 +1520,26 @@ public Response lockDataset(@PathParam("identifier") String id, @PathParam("type }); } - + + @GET + @Path("{id}/makeDataCount/{metric}") + public Response getMakeDataCountMetric(@PathParam("id") String idSupplied, @PathParam("metric") String metricSupplied) { + try { + Dataset dataset = findDatasetOrDie(idSupplied); + JsonObjectBuilder jsonObjectBuilder = Json.createObjectBuilder(); + MakeDataCountUtil.MetricType metricType = null; + try { + metricType = MakeDataCountUtil.MetricType.fromString(metricSupplied); + } catch (IllegalArgumentException ex) { + return error(Response.Status.BAD_REQUEST, ex.getMessage()); + } + String description = metricType.name() + " metric for dataset " + dataset.getId(); + jsonObjectBuilder.add("description", description); + return allowCors(ok(jsonObjectBuilder)); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java new file mode 100644 index 00000000000..31502643310 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/MakeDataCountApi.java @@ -0,0 +1,20 @@ +package edu.harvard.iq.dataverse.api; + +import javax.ws.rs.POST; +import javax.ws.rs.Path; +import javax.ws.rs.core.Response; + +/** + * Note that there are makeDataCount endpoints in Datasets.java as well. + */ +@Path("admin/makeDataCount") +public class MakeDataCountApi extends AbstractApiBean { + + @POST + @Path("sendToHub") + public Response sendDataToHub() { + String msg = "Data has been sent to Make Data Count"; + return ok(msg); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountUtil.java b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountUtil.java new file mode 100644 index 00000000000..23b136ff20c --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/makedatacount/MakeDataCountUtil.java @@ -0,0 +1,69 @@ +package edu.harvard.iq.dataverse.makedatacount; + +import java.util.Arrays; + +/** + * See doc/sphinx-guides/source/admin/make-data-count.rst for user facing docs + * about Make Data Count. Go read that first. + * + * The main issue for initial backend work is + * https://github.com/IQSS/dataverse/issues/4821 + * + * The following is a brain dump of additional details from participating in a + * 2018-10-18 kickoff meeting (notes at + * https://docs.google.com/document/d/1eM4rAuhmR4ZQxJC_PTE0rq2x7N3aNEjMN7QVvpkY1os/edit?usp=sharing + * ) and from watching two webinars at https://makedatacount.org/presentations/ + * (MDC Webinar: COUNTER Code of Practice September 13th, 2017 and MDC Webinar: + * How to Make Your Data Count July 10th, 2018). + * + * The recommended starting point to implement Make Data Count is + * https://github.com/CDLUC3/Make-Data-Count/blob/master/getting-started.md + * which specifically recommends reading the "COUNTER Code of Practice for + * Research Data" mentioned in the user facing docs. + * + * Make Data Count was first implemented in DASH. Here's an example dataset: + * https://dash.ucmerced.edu/stash/dataset/doi:10.6071/M3RP49 + * + * For processing logs we could try DASH's + * https://github.com/CDLUC3/counter-processor + * + * Next, DataOne implemented it, and you can see an example dataset here: + * https://search.dataone.org/view/doi:10.5063/F1Z899CZ + * + * Parts of DataOne are written in Java so perhaps there is some code that can + * be reused? + */ +public class MakeDataCountUtil { + + public enum MetricType { + + VIEWS_TOTAL("viewsTotal"), + VIEWS_UNIQUE("viewsUnique"), + DOWNLOADS_TOTAL("downloadsTotal"), + DOWNLOADS_UNIQUE("downloadsUnique"), + CITATIONS("citations"); + + private final String text; + + private MetricType(final String text) { + this.text = text; + } + + public static MetricType fromString(String text) { + if (text != null) { + for (MetricType metricType : MetricType.values()) { + if (text.equals(metricType.text)) { + return metricType; + } + } + } + throw new IllegalArgumentException("MetricType must be one of these values: " + Arrays.asList(MetricType.values()) + "."); + } + + @Override + public String toString() { + return text; + } + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java new file mode 100644 index 00000000000..675723b28c1 --- /dev/null +++ b/src/test/java/edu/harvard/iq/dataverse/api/MakeDataCountApiIT.java @@ -0,0 +1,64 @@ +package edu.harvard.iq.dataverse.api; + +import com.jayway.restassured.RestAssured; +import com.jayway.restassured.response.Response; +import static javax.ws.rs.core.Response.Status.CREATED; +import static javax.ws.rs.core.Response.Status.OK; +import static javax.ws.rs.core.Response.Status.BAD_REQUEST; +import static org.hamcrest.CoreMatchers.equalTo; +import org.junit.BeforeClass; +import org.junit.Test; + +public class MakeDataCountApiIT { + + @BeforeClass + public static void setUpClass() { + RestAssured.baseURI = UtilIT.getRestAssuredBaseUri(); + } + + @Test + public void testMakeDataCountSendDataToHub() { + Response sendDataToHub = UtilIT.makeDataCountSendDataToHub(); + sendDataToHub.prettyPrint(); + sendDataToHub.then().assertThat() + .statusCode(OK.getStatusCode()); + } + + @Test + public void testMakeDataCountGetMetric() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + createUser.then().assertThat() + .statusCode(OK.getStatusCode()); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + createDataverseResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + createDatasetResponse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + + String invalidMetric = "junk"; + Response invalidMetricAttempt = UtilIT.makeDataCountGetMetricForDataset(datasetId.toString(), invalidMetric, apiToken); + invalidMetricAttempt.prettyPrint(); + invalidMetricAttempt.then().assertThat() + .body("message", equalTo("MetricType must be one of these values: [viewsTotal, viewsUnique, downloadsTotal, downloadsUnique, citations].")) + .statusCode(BAD_REQUEST.getStatusCode()); + + String metric = "viewsTotal"; + Response getCitations = UtilIT.makeDataCountGetMetricForDataset(datasetId.toString(), metric, apiToken); + getCitations.prettyPrint(); + getCitations.then().assertThat() + .body("data.description", equalTo("VIEWS_TOTAL metric for dataset " + datasetId)) + .statusCode(OK.getStatusCode()); + } + +} diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index 1a39a125d01..03c4e6344c4 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -1767,4 +1767,29 @@ static Response getOaiListIdentifiers(String setName, String metadataFormat) { String apiPath = String.format("/oai?verb=ListIdentifiers&set=%s&metadataPrefix=%s", setName, metadataFormat); return given().get(apiPath); } + + static Response makeDataCountSendDataToHub() { + return given().post("/api/admin/makeDataCount/sendToHub"); + } + + static Response makeDataCountDownloadFromHub(String metric) { + return given().post("/api/admin/makeDataCount/downloadFromHub/" + metric); + } + + static Response makeDataCountGetMetricForDataset(String idOrPersistentIdOfDataset, String metric, String apiToken) { + System.out.println("metric: " + metric); + String idInPath = idOrPersistentIdOfDataset; // Assume it's a number. + String optionalQueryParam = ""; // If idOrPersistentId is a number we'll just put it in the path. + if (!NumberUtils.isNumber(idOrPersistentIdOfDataset)) { + idInPath = ":persistentId"; + optionalQueryParam = "?persistentId=" + idOrPersistentIdOfDataset; + } + RequestSpecification requestSpecification = given(); + if (apiToken != null) { + requestSpecification = given() + .header(UtilIT.API_TOKEN_HTTP_HEADER, apiToken); + } + return requestSpecification.get("/api/datasets/" + idInPath + "/makeDataCount/" + metric + optionalQueryParam); + } + }