diff --git a/doc/sphinx-guides/source/api/native-api.rst b/doc/sphinx-guides/source/api/native-api.rst index 3dbf4505de2..774ff0aa88c 100644 --- a/doc/sphinx-guides/source/api/native-api.rst +++ b/doc/sphinx-guides/source/api/native-api.rst @@ -1598,6 +1598,19 @@ Dataset Integrity Recalculate the UNF value of a dataset version, if it's missing, by supplying the dataset version database id:: POST http://$SERVER/api/admin/datasets/integrity/{datasetVersionId}/fixmissingunf + +Datafile Integrity +~~~~~~~~~~~~~~~~~~ + +Recalculate the check sum value value of a datafile, by supplying the file's database id and an algorithm (Valid values for $ALGORITHM include MD5, SHA-1, SHA-256, and SHA-512):: + + curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/admin/computeDataFileHashValue/{fileId}/algorithm/$ALGORITHM + +Validate an existing check sum value against one newly calculated from the saved file:: + + curl -H X-Dataverse-key:$API_TOKEN -X POST $SERVER_URL/api/admin/validateDataFileHashValue/{fileId} + +These are only available to super users. .. _dataset-validation-api: diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java index f8006ea340e..931eb29eb61 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Admin.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Admin.java @@ -1248,7 +1248,7 @@ public Response reregisterHdlToPID(@PathParam("id") String id) { List args = Arrays.asList(id,e.getMessage()); return badRequest(BundleUtil.getStringFromBundle("admin.api.migrateHDL.failureWithException", args)); } - System.out.print("before the return ok..."); + return ok(BundleUtil.getStringFromBundle("admin.api.migrateHDL.success")); } @@ -1431,6 +1431,133 @@ public Response updateHashValues(@PathParam("alg") String alg, @QueryParam("num" return ok("Datafile rehashing complete." + successes + " of " + rehashed + " files successfully rehashed."); } + + @POST + @Path("/computeDataFileHashValue/{fileId}/algorithm/{alg}") + public Response computeDataFileHashValue(@PathParam("fileId") String fileId, @PathParam("alg") String alg) { + + try { + User u = findAuthenticatedUserOrDie(); + if (!u.isSuperuser()) { + return error(Status.UNAUTHORIZED, "must be superuser"); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + + DataFile fileToUpdate = null; + try { + fileToUpdate = findDataFileOrDie(fileId); + } catch (WrappedResponse r) { + logger.info("Could not find file with the id: " + fileId); + return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); + } + + if (fileToUpdate.isHarvested()) { + return error(Status.BAD_REQUEST, "File with the id: " + fileId + " is harvested."); + } + + DataFile.ChecksumType cType = null; + try { + cType = DataFile.ChecksumType.fromString(alg); + } catch (IllegalArgumentException iae) { + return error(Status.BAD_REQUEST, "Unknown algorithm: " + alg); + } + + String newChecksum = ""; + + InputStream in = null; + try { + + StorageIO storage = fileToUpdate.getStorageIO(); + storage.open(DataAccessOption.READ_ACCESS); + if (!fileToUpdate.isTabularData()) { + in = storage.getInputStream(); + } else { + in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } + if (in == null) { + return error(Status.NOT_FOUND, "Could not retrieve file with the id: " + fileId); + } + newChecksum = FileUtil.calculateChecksum(in, cType); + fileToUpdate.setChecksumType(cType); + fileToUpdate.setChecksumValue(newChecksum); + + } catch (Exception e) { + logger.warning("Unexpected Exception: " + e.getMessage()); + + } finally { + IOUtils.closeQuietly(in); + } + + return ok("Datafile rehashing complete. " + fileId + " successfully rehashed. New hash value is: " + newChecksum); + } + + @POST + @Path("/validateDataFileHashValue/{fileId}") + public Response validateDataFileHashValue(@PathParam("fileId") String fileId) { + + try { + User u = findAuthenticatedUserOrDie(); + if (!u.isSuperuser()) { + return error(Status.UNAUTHORIZED, "must be superuser"); + } + } catch (WrappedResponse e1) { + return error(Status.UNAUTHORIZED, "api key required"); + } + + DataFile fileToValidate = null; + try { + fileToValidate = findDataFileOrDie(fileId); + } catch (WrappedResponse r) { + logger.info("Could not find file with the id: " + fileId); + return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId); + } + + if (fileToValidate.isHarvested()) { + return error(Status.BAD_REQUEST, "File with the id: " + fileId + " is harvested."); + } + + DataFile.ChecksumType cType = null; + try { + String checkSumTypeFromDataFile = fileToValidate.getChecksumType().toString(); + cType = DataFile.ChecksumType.fromString(checkSumTypeFromDataFile); + } catch (IllegalArgumentException iae) { + return error(Status.BAD_REQUEST, "Unknown algorithm"); + } + + String currentChecksum = fileToValidate.getChecksumValue(); + String calculatedChecksum = ""; + InputStream in = null; + try { + + StorageIO storage = fileToValidate.getStorageIO(); + storage.open(DataAccessOption.READ_ACCESS); + if (!fileToValidate.isTabularData()) { + in = storage.getInputStream(); + } else { + in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); + } + if (in == null) { + return error(Status.NOT_FOUND, "Could not retrieve file with the id: " + fileId); + } + calculatedChecksum = FileUtil.calculateChecksum(in, cType); + + } catch (Exception e) { + logger.warning("Unexpected Exception: " + e.getMessage()); + return error(Status.BAD_REQUEST, "Checksum Validation Unexpected Exception: " + e.getMessage()); + } finally { + IOUtils.closeQuietly(in); + + } + + if (currentChecksum.equals(calculatedChecksum)) { + return ok("Datafile validation complete for " + fileId + ". The hash value is: " + calculatedChecksum); + } else { + return error(Status.EXPECTATION_FAILED, "Datafile validation failed for " + fileId + ". The saved hash value is: " + currentChecksum + " while the recalculated hash value for the stored file is: " + calculatedChecksum); + } + + } @GET @Path("/submitDataVersionToArchive/{id}/{version}") diff --git a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java index 5bb08ae33f7..46f0410789c 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java @@ -3,29 +3,26 @@ import com.jayway.restassured.RestAssured; import com.jayway.restassured.path.json.JsonPath; import com.jayway.restassured.response.Response; +import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider; import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.GitHubOAuth2AP; import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.OrcidOAuth2AP; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; -import static java.lang.Thread.sleep; import java.util.ArrayList; import java.util.List; import static javax.ws.rs.core.Response.Status.FORBIDDEN; -import static javax.ws.rs.core.Response.Status.OK; import static javax.ws.rs.core.Response.Status.BAD_REQUEST; import org.junit.Test; import org.junit.BeforeClass; import java.util.UUID; -import javax.validation.constraints.AssertTrue; + import static javax.ws.rs.core.Response.Status.CREATED; -import static javax.ws.rs.core.Response.Status.NOT_FOUND; import static javax.ws.rs.core.Response.Status.OK; import static javax.ws.rs.core.Response.Status.UNAUTHORIZED; import static junit.framework.Assert.assertEquals; import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.notNullValue; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; import org.junit.Ignore; public class AdminIT { @@ -511,7 +508,103 @@ public void testFindPermissonsOn() { Response deleteSuperuser = UtilIT.deleteUser(username); assertEquals(200, deleteSuperuser.getStatusCode()); } - + + @Test + public void testRecalculateDataFileHash() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverse = UtilIT.createRandomDataverse(apiToken); + createDataverse.prettyPrint(); + createDataverse.then().assertThat() + .statusCode(CREATED.getStatusCode()); + + String dataverseAlias = JsonPath.from(createDataverse.body().asString()).getString("data.alias"); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id"); + Response datasetAsJson = UtilIT.nativeGet(datasetId, apiToken); + datasetAsJson.then().assertThat() + .statusCode(OK.getStatusCode()); + + String pathToFile = "scripts/search/data/tabular/50by1000.dta"; + Response addResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken); + + Long origFileId = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id"); + + Response createSuperuser = UtilIT.createRandomUser(); + String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser); + String superuserUsername = UtilIT.getUsernameFromResponse(createSuperuser); + UtilIT.makeSuperUser(superuserUsername); + + assertTrue("Failed test if Ingest Lock exceeds max duration " + origFileId, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", superuserApiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION)); + + //Bad file id + Response computeDataFileHashResponse = UtilIT.computeDataFileHashValue("BadFileId", DataFile.ChecksumType.MD5.toString(), superuserApiToken); + + computeDataFileHashResponse.then().assertThat() + .body("status", equalTo("ERROR")) + .body("message", equalTo("Could not find file with the id: BadFileId")) + .statusCode(BAD_REQUEST.getStatusCode()); + + //Bad Algorithm + computeDataFileHashResponse = UtilIT.computeDataFileHashValue(origFileId.toString(), "Blank", superuserApiToken); + + computeDataFileHashResponse.then().assertThat() + .body("status", equalTo("ERROR")) + .body("message", equalTo("Unknown algorithm: Blank")) + .statusCode(BAD_REQUEST.getStatusCode()); + + //Not a Super user + computeDataFileHashResponse = UtilIT.computeDataFileHashValue(origFileId.toString(), DataFile.ChecksumType.MD5.toString(), apiToken); + + computeDataFileHashResponse.then().assertThat() + .body("status", equalTo("ERROR")) + .body("message", equalTo("must be superuser")) + .statusCode(UNAUTHORIZED.getStatusCode()); + + + computeDataFileHashResponse = UtilIT.computeDataFileHashValue(origFileId.toString(), DataFile.ChecksumType.MD5.toString(), superuserApiToken); + computeDataFileHashResponse.prettyPrint(); + + computeDataFileHashResponse.then().assertThat() + .body("data.message", equalTo("Datafile rehashing complete. " + origFileId.toString() + " successfully rehashed. New hash value is: 003b8c67fbdfa6df31c0e43e65b93f0e")) + .statusCode(OK.getStatusCode()); + + //Not a Super user + Response validationResponse = UtilIT.validateDataFileHashValue(origFileId.toString(), apiToken); + + validationResponse.then().assertThat() + .body("status", equalTo("ERROR")) + .body("message", equalTo("must be superuser")) + .statusCode(UNAUTHORIZED.getStatusCode()); + + //Bad File Id + validationResponse = UtilIT.validateDataFileHashValue("BadFileId", superuserApiToken); + + validationResponse.then().assertThat() + .body("status", equalTo("ERROR")) + .body("message", equalTo("Could not find file with the id: BadFileId")) + .statusCode(BAD_REQUEST.getStatusCode()); + + validationResponse = UtilIT.validateDataFileHashValue(origFileId.toString(), superuserApiToken); + validationResponse.prettyPrint(); + validationResponse.then().assertThat() + .body("data.message", equalTo("Datafile validation complete for " + origFileId.toString() + ". The hash value is: 003b8c67fbdfa6df31c0e43e65b93f0e")) + .statusCode(OK.getStatusCode()); + + // String checkSumVal = + Response pubdv = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken); + Response publishDSViaNative = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken); + publishDSViaNative.then().assertThat() + .statusCode(OK.getStatusCode()); + + } @Test @Ignore diff --git a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java index b65cf95af24..338ae2fe3a0 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java @@ -35,8 +35,6 @@ import org.hamcrest.Matcher; import static com.jayway.restassured.path.xml.XmlPath.from; import static com.jayway.restassured.RestAssured.given; -import static edu.harvard.iq.dataverse.api.AccessIT.apiToken; -import static edu.harvard.iq.dataverse.api.AccessIT.datasetId; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; @@ -151,14 +149,30 @@ public static Response createRandomAuthenticatedUser(String authenticationProvid } public static Response migrateDatasetIdentifierFromHDLToPId(String datasetIdentifier, String apiToken) { - System.out.print(datasetIdentifier); Response response = given() .body(datasetIdentifier) .contentType(ContentType.JSON) .post("/api/admin/" + datasetIdentifier + "/reregisterHDLToPID?key=" + apiToken); return response; } + + public static Response computeDataFileHashValue(String fileId, String alg, String apiToken) { + Response response = given() + .body(fileId) + .contentType(ContentType.JSON) + .post("/api/admin/computeDataFileHashValue/" + fileId + "/algorithm/" + alg + "?key=" + apiToken); + return response; + } + + public static Response validateDataFileHashValue(String fileId, String apiToken) { + Response response = given() + .body(fileId) + .contentType(ContentType.JSON) + .post("/api/admin/validateDataFileHashValue/" + fileId + "?key=" + apiToken); + return response; + } + private static String getAuthenticatedUserAsJsonString(String persistentUserId, String firstName, String lastName, String authenticationProviderId, String identifier) { JsonObjectBuilder builder = Json.createObjectBuilder(); builder.add("authenticationProviderId", authenticationProviderId);