Skip to content

Commit

Permalink
Merge pull request #4489 from IQSS/4486-redirect-to-s3
Browse files Browse the repository at this point in the history
redirect to S3 instead of streaming.
  • Loading branch information
kcondon authored Mar 8, 2018
2 parents 8346361 + c6c1896 commit d37efb8
Show file tree
Hide file tree
Showing 4 changed files with 122 additions and 25 deletions.
8 changes: 8 additions & 0 deletions doc/sphinx-guides/source/installation/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,14 @@ Then, we'll need to identify which S3 bucket we're using. Replace ``your_bucket_

``./asadmin create-jvm-options "-Ddataverse.files.s3-bucket-name=your_bucket_name"``

Optionally, you can have users download files from S3 directly rather than having files pass from S3 through Glassfish to your users. To accomplish this, set ``dataverse.files.s3-download-redirect`` to ``true`` like this:

``./asadmin create-jvm-options "-Ddataverse.files.s3-download-redirect=true"``

If you enable ``dataverse.files.s3-download-redirect`` as described above, note that the S3 URLs expire after an hour by default but you can configure the expiration time using the ``dataverse.files.s3-url-expiration-minutes`` JVM option. Here's an example of setting the expiration time to 120 minutes:

``./asadmin create-jvm-options "-D dataverse.files.s3-url-expiration-minutes=120"``

Lastly, go ahead and restart your glassfish server. With Dataverse deployed and the site online, you should be able to upload datasets and data files and see the corresponding files in your S3 bucket. Within a bucket, the folder structure emulates that found in local file storage.

.. _Branding Your Installation:
Expand Down
28 changes: 7 additions & 21 deletions src/main/java/edu/harvard/iq/dataverse/api/Access.java
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ public BundleDownloadInstance datafileBundle(@PathParam("fileId") Long fileId, @
@Path("datafile/{fileId}")
@GET
@Produces({ "application/xml" })
public DownloadInstance datafile(@PathParam("fileId") Long fileId, @QueryParam("gbrecs") Boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
public DownloadInstance datafile(@PathParam("fileId") Long fileId, @QueryParam("gbrecs") Boolean gbrecs, @QueryParam("key") String apiToken, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
DataFile df = dataFileService.find(fileId);
GuestbookResponse gbr = null;

Expand All @@ -197,6 +197,11 @@ public DownloadInstance datafile(@PathParam("fileId") Long fileId, @QueryParam("
throw new WebApplicationException(Response.Status.NOT_FOUND);
}

if (df.isHarvested()) {
throw new WebApplicationException(Response.Status.NOT_FOUND);
// (nobody should ever be using this API on a harvested DataFile)!
}

if (apiToken == null || apiToken.equals("")) {
apiToken = headers.getHeaderString(API_KEY_HEADER);
}
Expand Down Expand Up @@ -445,13 +450,8 @@ public DownloadInstance tabularDatafileMetadataPreprocessed(@PathParam("fileId")
@Path("datafiles/{fileIds}")
@GET
@Produces({"application/zip"})
public /*ZippedDownloadInstance*/ Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") Boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {
// create a Download Instance without, without a primary Download Info object:
//ZippedDownloadInstance downloadInstance = new ZippedDownloadInstance();
public Response datafiles(@PathParam("fileIds") String fileIds, @QueryParam("gbrecs") Boolean gbrecs, @QueryParam("key") String apiTokenParam, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) throws WebApplicationException /*throws NotFoundException, ServiceUnavailableException, PermissionDeniedException, AuthorizationRequiredException*/ {




long setLimit = systemConfig.getZipDownloadLimit();
if (!(setLimit > 0L)) {
setLimit = DataFileZipper.DEFAULT_ZIPFILE_LIMIT;
Expand Down Expand Up @@ -563,20 +563,6 @@ public void write(OutputStream os) throws IOException,
return Response.ok(stream).build();
}


/*
* Geting rid of the tempPreview API - it's always been a big, fat hack.
* the edit files page is now using the Base64 image strings in the preview
* URLs, just like the search and dataset pages.
@Path("tempPreview/{fileSystemId}")
@GET
@Produces({"image/png"})
public InputStream tempPreview(@PathParam("fileSystemId") String fileSystemId, @Context UriInfo uriInfo, @Context HttpHeaders headers, @Context HttpServletResponse response) {
}*/



@Path("fileCardImage/{fileId}")
@GET
@Produces({ "image/png" })
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@
import edu.harvard.iq.dataverse.engine.command.impl.CreateGuestbookResponseCommand;
import java.io.File;
import java.io.FileInputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
import javax.ws.rs.RedirectionException;

/**
*
Expand Down Expand Up @@ -206,6 +209,44 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
if (storageIO == null) {
throw new WebApplicationException(Response.Status.SERVICE_UNAVAILABLE);
}
} else {
if (storageIO instanceof S3AccessIO && !(dataFile.isTabularData()) && isRedirectToS3()) {
// [attempt to] redirect:
String redirect_url_str = ((S3AccessIO)storageIO).generateTemporaryS3Url();
// better exception handling here?
logger.info("Data Access API: direct S3 url: "+redirect_url_str);
URI redirect_uri;

try {
redirect_uri = new URI(redirect_url_str);
} catch (URISyntaxException ex) {
logger.info("Data Access API: failed to create S3 redirect url ("+redirect_url_str+")");
redirect_uri = null;
}
if (redirect_uri != null) {
// definitely close the (still open) S3 input stream,
// since we are not going to use it. The S3 documentation
// emphasizes that it is very important not to leave these
// lying around un-closed, since they are going to fill
// up the S3 connection pool!
storageIO.getInputStream().close();

// increment the download count, if necessary:
if (di.getGbr() != null) {
try {
logger.fine("writing guestbook response, for an S3 download redirect.");
Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
di.getCommand().submit(cmd);
} catch (CommandException e) {
}
}

// finally, issue the redirect:
Response response = Response.seeOther(redirect_uri).build();
logger.info("Issuing redirect to the file location on S3.");
throw new RedirectionException(response);
}
}
}

InputStream instream = storageIO.getInputStream();
Expand Down Expand Up @@ -284,13 +325,10 @@ public void writeTo(DownloadInstance di, Class<?> clazz, Type type, Annotation[]
logger.fine("writing guestbook response.");
Command<?> cmd = new CreateGuestbookResponseCommand(di.getDataverseRequestService().getDataverseRequest(), di.getGbr(), di.getGbr().getDataFile().getOwner());
di.getCommand().submit(cmd);
} catch (CommandException e) {
//if an error occurs here then download won't happen no need for response recs...
}
} catch (CommandException e) {}
} else {
logger.fine("not writing guestbook response");
}


instream.close();
outstream.close();
Expand Down Expand Up @@ -376,5 +414,13 @@ private long getFileSize(DownloadInstance di, String extraHeader) {
}
return -1;
}

private boolean isRedirectToS3() {
String optionValue = System.getProperty("dataverse.files.s3-download-redirect");
if ("true".equalsIgnoreCase(optionValue)) {
return true;
}
return false;
}

}
57 changes: 57 additions & 0 deletions src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package edu.harvard.iq.dataverse.dataaccess;

import com.amazonaws.AmazonClientException;
import com.amazonaws.HttpMethod;
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
Expand All @@ -15,10 +16,12 @@
import com.amazonaws.services.s3.model.DeleteObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest.KeyVersion;
import com.amazonaws.services.s3.model.GeneratePresignedUrlRequest;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.ResponseHeaderOverrides;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import edu.harvard.iq.dataverse.DataFile;
Expand All @@ -35,6 +38,7 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.URL;
import java.nio.channels.Channel;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
Expand Down Expand Up @@ -624,4 +628,57 @@ private String getMainFileKey() throws IOException {

return key;
}

public String generateTemporaryS3Url() throws IOException {
//Questions:
// Q. Should this work for private and public?
// A. Yes! Since the URL has a limited, short life span. -- L.A.
// Q. how long should the download url work?
// A. 1 hour by default seems like an OK number. Making it configurable seems like a good idea too. -- L.A.
if (s3 == null) {
throw new IOException("ERROR: s3 not initialised. ");
}
if (dvObject instanceof DataFile) {
key = getMainFileKey();
java.util.Date expiration = new java.util.Date();
long msec = expiration.getTime();
msec += 1000 * getUrlExpirationMinutes();
expiration.setTime(msec);

GeneratePresignedUrlRequest generatePresignedUrlRequest =
new GeneratePresignedUrlRequest(bucketName, key);
generatePresignedUrlRequest.setMethod(HttpMethod.GET); // Default.
generatePresignedUrlRequest.setExpiration(expiration);
ResponseHeaderOverrides responseHeaders = new ResponseHeaderOverrides();
responseHeaders.setContentDisposition("attachment; filename="+this.getDataFile().getDisplayName());
responseHeaders.setContentType(this.getDataFile().getContentType());
generatePresignedUrlRequest.setResponseHeaders(responseHeaders);

URL s = s3.generatePresignedUrl(generatePresignedUrlRequest);

return s.toString();
} else if (dvObject instanceof Dataset) {
throw new IOException("Data Access: GenerateTemporaryS3Url: Invalid DvObject type : Dataset");
} else if (dvObject instanceof Dataverse) {
throw new IOException("Data Access: Invalid DvObject type : Dataverse");
} else {
throw new IOException("Data Access: Invalid DvObject type");
}
}

private int getUrlExpirationMinutes() {
String optionValue = System.getProperty("dataverse.files.s3-url-expiration-minutes");
if (optionValue != null) {
Integer num;
try {
num = new Integer(optionValue);
} catch (NumberFormatException ex) {
num = null;
}
if (num != null) {
return num;
}
}
return 60;
}
}

0 comments on commit d37efb8

Please sign in to comment.