-
Notifications
You must be signed in to change notification settings - Fork 500
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
SHA-256 and SHA-512 support #5035
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,8 @@ | |
import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailData; | ||
import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailException; | ||
import edu.harvard.iq.dataverse.confirmemail.ConfirmEmailInitResponse; | ||
import edu.harvard.iq.dataverse.dataaccess.DataAccessOption; | ||
import edu.harvard.iq.dataverse.dataaccess.StorageIO; | ||
import edu.harvard.iq.dataverse.engine.command.impl.PublishDataverseCommand; | ||
import edu.harvard.iq.dataverse.settings.Setting; | ||
import javax.json.Json; | ||
|
@@ -41,6 +43,8 @@ | |
import javax.ws.rs.core.Response; | ||
import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; | ||
import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; | ||
|
||
import java.io.InputStream; | ||
import java.io.StringReader; | ||
import java.util.Map; | ||
import java.util.logging.Level; | ||
|
@@ -53,6 +57,9 @@ | |
import javax.validation.ConstraintViolationException; | ||
import javax.ws.rs.Produces; | ||
import javax.ws.rs.core.Response.Status; | ||
|
||
import org.apache.commons.io.IOUtils; | ||
|
||
import java.util.List; | ||
import edu.harvard.iq.dataverse.authorization.AuthTestDataServiceBean; | ||
import edu.harvard.iq.dataverse.authorization.RoleAssignee; | ||
|
@@ -65,6 +72,7 @@ | |
import edu.harvard.iq.dataverse.ingest.IngestServiceBean; | ||
import edu.harvard.iq.dataverse.userdata.UserListMaker; | ||
import edu.harvard.iq.dataverse.userdata.UserListResult; | ||
import edu.harvard.iq.dataverse.util.FileUtil; | ||
import java.util.Date; | ||
import java.util.ResourceBundle; | ||
import javax.inject.Inject; | ||
|
@@ -1101,6 +1109,115 @@ public Response registerDataFileAll() { | |
+ " unregistered, published files registered successfully."); | ||
} | ||
|
||
@GET | ||
@Path("/updateHashValues/{alg}") | ||
public Response updateHashValues(@PathParam("alg") String alg, @QueryParam("num") int num) { | ||
Integer count = fileService.findAll().size(); | ||
Integer successes = 0; | ||
Integer alreadyUpdated = 0; | ||
Integer rehashed = 0; | ||
Integer harvested=0; | ||
|
||
if (num <= 0) | ||
num = Integer.MAX_VALUE; | ||
DataFile.ChecksumType cType = null; | ||
try { | ||
cType = DataFile.ChecksumType.fromString(alg); | ||
} catch (IllegalArgumentException iae) { | ||
return error(Status.BAD_REQUEST, "Unknown algorithm"); | ||
} | ||
logger.info("Starting to rehash: analyzing " + count + " files. " + new Date()); | ||
logger.info("Hashes not created with " + alg + " will be verified, and, if valid, replaced with a hash using " | ||
+ alg); | ||
try { | ||
User u = findAuthenticatedUserOrDie(); | ||
if (!u.isSuperuser()) | ||
return error(Status.UNAUTHORIZED, "must be superuser"); | ||
} catch (WrappedResponse e1) { | ||
return error(Status.UNAUTHORIZED, "api key required"); | ||
} | ||
|
||
for (DataFile df : fileService.findAll()) { | ||
if (rehashed.intValue() >= num) | ||
break; | ||
InputStream in = null; | ||
InputStream in2 = null; | ||
try { | ||
if (df.isHarvested()) { | ||
harvested++; | ||
} else { | ||
if (!df.getChecksumType().equals(cType)) { | ||
|
||
rehashed++; | ||
logger.fine(rehashed + ": Datafile: " + df.getFileMetadata().getLabel() + ", " | ||
+ df.getIdentifier()); | ||
// verify hash and calc new one to replace it | ||
StorageIO<DataFile> storage = df.getStorageIO(); | ||
storage.open(DataAccessOption.READ_ACCESS); | ||
if (!df.isTabularData()) { | ||
in = storage.getInputStream(); | ||
} else { | ||
// if this is a tabular file, read the preserved original "auxiliary file" | ||
// instead: | ||
in = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); | ||
} | ||
if (in == null) | ||
logger.warning("Cannot retrieve file."); | ||
String currentChecksum = FileUtil.CalculateChecksum(in, df.getChecksumType()); | ||
if (currentChecksum.equals(df.getChecksumValue())) { | ||
logger.fine("Current checksum for datafile: " + df.getFileMetadata().getLabel() + ", " | ||
+ df.getIdentifier() + " is valid"); | ||
storage.open(DataAccessOption.READ_ACCESS); | ||
if (!df.isTabularData()) { | ||
in2 = storage.getInputStream(); | ||
} else { | ||
// if this is a tabular file, read the preserved original "auxiliary file" | ||
// instead: | ||
in2 = storage.getAuxFileAsInputStream(FileUtil.SAVED_ORIGINAL_FILENAME_EXTENSION); | ||
} | ||
if (in2 == null) | ||
logger.warning("Cannot retrieve file to calculate new checksum."); | ||
String newChecksum = FileUtil.CalculateChecksum(in2, cType); | ||
|
||
df.setChecksumType(cType); | ||
df.setChecksumValue(newChecksum); | ||
successes++; | ||
if (successes % 100 == 0) { | ||
logger.info( | ||
successes + " of " + count + " files rehashed successfully. " + new Date()); | ||
} | ||
} else { | ||
logger.warning("Problem: Current checksum for datafile: " + df.getFileMetadata().getLabel() | ||
+ ", " + df.getIdentifier() + " is INVALID"); | ||
} | ||
} else { | ||
alreadyUpdated++; | ||
if (alreadyUpdated % 100 == 0) { | ||
logger.info(alreadyUpdated + " of " + count | ||
+ " files are already have hashes with the new algorithm. " + new Date()); | ||
} | ||
} | ||
} | ||
} catch (Exception e) { | ||
logger.warning("Unexpected Exception: " + e.getMessage()); | ||
|
||
} finally { | ||
IOUtils.closeQuietly(in); | ||
IOUtils.closeQuietly(in2); | ||
} | ||
} | ||
logger.info("Final Results:"); | ||
logger.info(harvested + " harvested files skipped."); | ||
logger.info( | ||
alreadyUpdated + " of " + count + " files already had hashes with the new algorithm. " + new Date()); | ||
logger.info(rehashed + " of " + count + " files to rehash. " + new Date()); | ||
logger.info( | ||
successes + " of " + rehashed + " files successfully rehashed with the new algorithm. " + new Date()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is a lot of logging at the "info" level. Should it be reduced to "fine"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually, I was going to suggest that we keep the more verbose logging. This is a very special operation; nobody's going to be changing the checksum algorithms on their files every day. And it's something important enough, so could be a good thing to have some "paper trail" of this switch having been performed. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Up to you guys. As is now, the summary is info level and there are warnings for any file that doesn't match (or can't be read, etc.). The original hash is deleted in this operation though, so if keeping the original one documented is trail-worthy, a per-file info message could be added with the original algorithm and hash and file id (storageidentifier?). |
||
|
||
return ok("Datafile rehashing complete." + successes + " of " + rehashed + " files successfully rehashed."); | ||
} | ||
|
||
|
||
@DELETE | ||
@Path("/clearMetricsCache") | ||
public Response clearMetricsCache() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm glad to see this is superuser-only for now.