Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

5867 api for hash verification regeneration #6228

Merged
merged 9 commits into from
Oct 2, 2019
13 changes: 13 additions & 0 deletions doc/sphinx-guides/source/api/native-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1598,6 +1598,19 @@ Dataset Integrity
Recalculate the UNF value of a dataset version, if it's missing, by supplying the dataset version database id::

POST http://$SERVER/api/admin/datasets/integrity/{datasetVersionId}/fixmissingunf

Datafile Integrity
~~~~~~~~~~~~~~~~~~

Recalculate the check sum value value of a datafile, by supplying the file's database id and an algorithm::

POST http://$SERVER/api/admin/computeDataFileHashValue/{fileId}/algorithm/{alg}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be a full curl command and can the {alg} options be enumerated?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

updated per suggestion. thanks!


Validate an existing check sum value against one newly calculated from the saved file

POST http://$SERVER/api/admin/validateDataFileHashValue/{fileId}

These are only available to super users.

.. _dataset-validation-api:

Expand Down
129 changes: 128 additions & 1 deletion src/main/java/edu/harvard/iq/dataverse/api/Admin.java
Original file line number Diff line number Diff line change
Expand Up @@ -1248,7 +1248,7 @@ public Response reregisterHdlToPID(@PathParam("id") String id) {
List<String> args = Arrays.asList(id,e.getMessage());
return badRequest(BundleUtil.getStringFromBundle("admin.api.migrateHDL.failureWithException", args));
}
System.out.print("before the return ok...");

return ok(BundleUtil.getStringFromBundle("admin.api.migrateHDL.success"));
}

Expand Down Expand Up @@ -1431,6 +1431,133 @@ public Response updateHashValues(@PathParam("alg") String alg, @QueryParam("num"

return ok("Datafile rehashing complete." + successes + " of " + rehashed + " files successfully rehashed.");
}

@POST
@Path("/computeDataFileHashValue/{fileId}/algorithm/{alg}")
public Response computeDataFileHashValue(@PathParam("fileId") String fileId, @PathParam("alg") String alg) {

try {
User u = findAuthenticatedUserOrDie();
if (!u.isSuperuser()) {
return error(Status.UNAUTHORIZED, "must be superuser");
}
} catch (WrappedResponse e1) {
return error(Status.UNAUTHORIZED, "api key required");
}

DataFile fileToUpdate = null;
try {
fileToUpdate = findDataFileOrDie(fileId);
} catch (WrappedResponse r) {
logger.info("Could not find file with the id: " + fileId);
return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId);
}

if (fileToUpdate.isHarvested()) {
return error(Status.BAD_REQUEST, "File with the id: " + fileId + " is harvested.");
}

DataFile.ChecksumType cType = null;
try {
cType = DataFile.ChecksumType.fromString(alg);
} catch (IllegalArgumentException iae) {
return error(Status.BAD_REQUEST, "Unknown algorithm: " + alg);
}

String newChecksum = "";

InputStream in = null;
try {

StorageIO<DataFile> storage = fileToUpdate.getStorageIO();
storage.open(DataAccessOption.READ_ACCESS);
if (!fileToUpdate.isTabularData()) {
in = storage.getInputStream();
} else {
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
}
if (in == null) {
return error(Status.NOT_FOUND, "Could not retrieve file with the id: " + fileId);
}
newChecksum = FileUtil.calculateChecksum(in, cType);
fileToUpdate.setChecksumType(cType);
fileToUpdate.setChecksumValue(newChecksum);

} catch (Exception e) {
logger.warning("Unexpected Exception: " + e.getMessage());

} finally {
IOUtils.closeQuietly(in);
}

return ok("Datafile rehashing complete. " + fileId + " successfully rehashed. New hash value is: " + newChecksum);
}

@POST
@Path("/validateDataFileHashValue/{fileId}")
public Response validateDataFileHashValue(@PathParam("fileId") String fileId) {

try {
User u = findAuthenticatedUserOrDie();
if (!u.isSuperuser()) {
return error(Status.UNAUTHORIZED, "must be superuser");
}
} catch (WrappedResponse e1) {
return error(Status.UNAUTHORIZED, "api key required");
}

DataFile fileToValidate = null;
try {
fileToValidate = findDataFileOrDie(fileId);
} catch (WrappedResponse r) {
logger.info("Could not find file with the id: " + fileId);
return error(Status.BAD_REQUEST, "Could not find file with the id: " + fileId);
}

if (fileToValidate.isHarvested()) {
return error(Status.BAD_REQUEST, "File with the id: " + fileId + " is harvested.");
}

DataFile.ChecksumType cType = null;
try {
String checkSumTypeFromDataFile = fileToValidate.getChecksumType().toString();
cType = DataFile.ChecksumType.fromString(checkSumTypeFromDataFile);
} catch (IllegalArgumentException iae) {
return error(Status.BAD_REQUEST, "Unknown algorithm");
}

String currentChecksum = fileToValidate.getChecksumValue();
String calculatedChecksum = "";
InputStream in = null;
try {

StorageIO<DataFile> storage = fileToValidate.getStorageIO();
storage.open(DataAccessOption.READ_ACCESS);
if (!fileToValidate.isTabularData()) {
in = storage.getInputStream();
} else {
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION);
}
if (in == null) {
return error(Status.NOT_FOUND, "Could not retrieve file with the id: " + fileId);
}
calculatedChecksum = FileUtil.calculateChecksum(in, cType);

} catch (Exception e) {
logger.warning("Unexpected Exception: " + e.getMessage());
return error(Status.BAD_REQUEST, "Checksum Validation Unexpected Exception: " + e.getMessage());
} finally {
IOUtils.closeQuietly(in);

}

if (currentChecksum.equals(calculatedChecksum)) {
return ok("Datafile validation complete for " + fileId + ". The hash value is: " + calculatedChecksum);
} else {
return error(Status.EXPECTATION_FAILED, "Datafile validation failed for " + fileId + ". The saved hash value is: " + currentChecksum + " while the recalculated hash value for the stored file is: " + calculatedChecksum);
}

}

@GET
@Path("/submitDataVersionToArchive/{id}/{version}")
Expand Down
107 changes: 100 additions & 7 deletions src/test/java/edu/harvard/iq/dataverse/api/AdminIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,26 @@
import com.jayway.restassured.RestAssured;
import com.jayway.restassured.path.json.JsonPath;
import com.jayway.restassured.response.Response;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinAuthenticationProvider;
import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.GitHubOAuth2AP;
import edu.harvard.iq.dataverse.authorization.providers.oauth2.impl.OrcidOAuth2AP;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import static java.lang.Thread.sleep;
import java.util.ArrayList;
import java.util.List;
import static javax.ws.rs.core.Response.Status.FORBIDDEN;
import static javax.ws.rs.core.Response.Status.OK;
import static javax.ws.rs.core.Response.Status.BAD_REQUEST;
import org.junit.Test;
import org.junit.BeforeClass;
import java.util.UUID;
import javax.validation.constraints.AssertTrue;

import static javax.ws.rs.core.Response.Status.CREATED;
import static javax.ws.rs.core.Response.Status.NOT_FOUND;
import static javax.ws.rs.core.Response.Status.OK;
import static javax.ws.rs.core.Response.Status.UNAUTHORIZED;
import static junit.framework.Assert.assertEquals;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.notNullValue;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
import org.junit.Ignore;

public class AdminIT {
Expand Down Expand Up @@ -511,7 +508,103 @@ public void testFindPermissonsOn() {
Response deleteSuperuser = UtilIT.deleteUser(username);
assertEquals(200, deleteSuperuser.getStatusCode());
}


@Test
public void testRecalculateDataFileHash() {

Response createUser = UtilIT.createRandomUser();
createUser.prettyPrint();

String username = UtilIT.getUsernameFromResponse(createUser);
String apiToken = UtilIT.getApiTokenFromResponse(createUser);

Response createDataverse = UtilIT.createRandomDataverse(apiToken);
createDataverse.prettyPrint();
createDataverse.then().assertThat()
.statusCode(CREATED.getStatusCode());

String dataverseAlias = JsonPath.from(createDataverse.body().asString()).getString("data.alias");

Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken);
createDatasetResponse.prettyPrint();
Integer datasetId = JsonPath.from(createDatasetResponse.body().asString()).getInt("data.id");
Response datasetAsJson = UtilIT.nativeGet(datasetId, apiToken);
datasetAsJson.then().assertThat()
.statusCode(OK.getStatusCode());

String pathToFile = "scripts/search/data/tabular/50by1000.dta";
Response addResponse = UtilIT.uploadFileViaNative(datasetId.toString(), pathToFile, apiToken);

Long origFileId = JsonPath.from(addResponse.body().asString()).getLong("data.files[0].dataFile.id");

Response createSuperuser = UtilIT.createRandomUser();
String superuserApiToken = UtilIT.getApiTokenFromResponse(createSuperuser);
String superuserUsername = UtilIT.getUsernameFromResponse(createSuperuser);
UtilIT.makeSuperUser(superuserUsername);

assertTrue("Failed test if Ingest Lock exceeds max duration " + origFileId, UtilIT.sleepForLock(datasetId.longValue(), "Ingest", superuserApiToken, UtilIT.MAXIMUM_INGEST_LOCK_DURATION));

//Bad file id
Response computeDataFileHashResponse = UtilIT.computeDataFileHashValue("BadFileId", DataFile.ChecksumType.MD5.toString(), superuserApiToken);

computeDataFileHashResponse.then().assertThat()
.body("status", equalTo("ERROR"))
.body("message", equalTo("Could not find file with the id: BadFileId"))
.statusCode(BAD_REQUEST.getStatusCode());

//Bad Algorithm
computeDataFileHashResponse = UtilIT.computeDataFileHashValue(origFileId.toString(), "Blank", superuserApiToken);

computeDataFileHashResponse.then().assertThat()
.body("status", equalTo("ERROR"))
.body("message", equalTo("Unknown algorithm: Blank"))
.statusCode(BAD_REQUEST.getStatusCode());

//Not a Super user
computeDataFileHashResponse = UtilIT.computeDataFileHashValue(origFileId.toString(), DataFile.ChecksumType.MD5.toString(), apiToken);

computeDataFileHashResponse.then().assertThat()
.body("status", equalTo("ERROR"))
.body("message", equalTo("must be superuser"))
.statusCode(UNAUTHORIZED.getStatusCode());


computeDataFileHashResponse = UtilIT.computeDataFileHashValue(origFileId.toString(), DataFile.ChecksumType.MD5.toString(), superuserApiToken);
computeDataFileHashResponse.prettyPrint();

computeDataFileHashResponse.then().assertThat()
.body("data.message", equalTo("Datafile rehashing complete. " + origFileId.toString() + " successfully rehashed. New hash value is: 003b8c67fbdfa6df31c0e43e65b93f0e"))
.statusCode(OK.getStatusCode());

//Not a Super user
Response validationResponse = UtilIT.validateDataFileHashValue(origFileId.toString(), apiToken);

validationResponse.then().assertThat()
.body("status", equalTo("ERROR"))
.body("message", equalTo("must be superuser"))
.statusCode(UNAUTHORIZED.getStatusCode());

//Bad File Id
validationResponse = UtilIT.validateDataFileHashValue("BadFileId", superuserApiToken);

validationResponse.then().assertThat()
.body("status", equalTo("ERROR"))
.body("message", equalTo("Could not find file with the id: BadFileId"))
.statusCode(BAD_REQUEST.getStatusCode());

validationResponse = UtilIT.validateDataFileHashValue(origFileId.toString(), superuserApiToken);
validationResponse.prettyPrint();
validationResponse.then().assertThat()
.body("data.message", equalTo("Datafile validation complete for " + origFileId.toString() + ". The hash value is: 003b8c67fbdfa6df31c0e43e65b93f0e"))
.statusCode(OK.getStatusCode());

// String checkSumVal =
Response pubdv = UtilIT.publishDataverseViaNativeApi(dataverseAlias, apiToken);
Response publishDSViaNative = UtilIT.publishDatasetViaNativeApi(datasetId, "major", apiToken);
publishDSViaNative.then().assertThat()
.statusCode(OK.getStatusCode());

}

@Test
@Ignore
Expand Down
20 changes: 17 additions & 3 deletions src/test/java/edu/harvard/iq/dataverse/api/UtilIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
import org.hamcrest.Matcher;
import static com.jayway.restassured.path.xml.XmlPath.from;
import static com.jayway.restassured.RestAssured.given;
import static edu.harvard.iq.dataverse.api.AccessIT.apiToken;
import static edu.harvard.iq.dataverse.api.AccessIT.datasetId;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
Expand Down Expand Up @@ -151,14 +149,30 @@ public static Response createRandomAuthenticatedUser(String authenticationProvid
}

public static Response migrateDatasetIdentifierFromHDLToPId(String datasetIdentifier, String apiToken) {
System.out.print(datasetIdentifier);
Response response = given()
.body(datasetIdentifier)
.contentType(ContentType.JSON)
.post("/api/admin/" + datasetIdentifier + "/reregisterHDLToPID?key=" + apiToken);
return response;
}


public static Response computeDataFileHashValue(String fileId, String alg, String apiToken) {
Response response = given()
.body(fileId)
.contentType(ContentType.JSON)
.post("/api/admin/computeDataFileHashValue/" + fileId + "/algorithm/" + alg + "?key=" + apiToken);
return response;
}

public static Response validateDataFileHashValue(String fileId, String apiToken) {
Response response = given()
.body(fileId)
.contentType(ContentType.JSON)
.post("/api/admin/validateDataFileHashValue/" + fileId + "?key=" + apiToken);
return response;
}

private static String getAuthenticatedUserAsJsonString(String persistentUserId, String firstName, String lastName, String authenticationProviderId, String identifier) {
JsonObjectBuilder builder = Json.createObjectBuilder();
builder.add("authenticationProviderId", authenticationProviderId);
Expand Down