From bb92f189569031eff6988c0fb9713352424c6fcb Mon Sep 17 00:00:00 2001 From: ali Date: Sun, 2 Feb 2025 00:50:24 -0800 Subject: [PATCH 01/16] fix: download operators result --- .../request/ResultExportRequest.scala | 18 +- .../texera/web/resource/ResultResource.scala | 72 +- .../web/service/ResultExportService.scala | 757 +++++++++++------- .../service/user/download/download.service.ts | 74 +- .../result-exportation.component.html | 5 +- .../result-exportation.component.ts | 55 +- .../workflow-result-export.service.ts | 93 ++- 7 files changed, 675 insertions(+), 399 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala index 3d9b50e270f..a34fab3d41c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala @@ -1,13 +1,13 @@ package edu.uci.ics.texera.web.model.websocket.request case class ResultExportRequest( - exportType: String, - workflowId: Int, - workflowName: String, - operatorId: String, - operatorName: String, - datasetIds: List[Int], - rowIndex: Int, - columnIndex: Int, - filename: String + exportType: String, // e.g. "csv", "google_sheet", "arrow", "data" + workflowId: Int, + workflowName: String, + operatorIds: List[String], // changed from single operatorId: String -> List of strings + datasetIds: List[Int], + rowIndex: Int, // used by "data" export + columnIndex: Int, // used by "data" export + filename: String, // optional filename override + destination: String // "dataset" or "local" ) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala index 206546e8d56..f0843cd4c20 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala @@ -5,38 +5,88 @@ import edu.uci.ics.amber.core.virtualidentity.WorkflowIdentity import edu.uci.ics.texera.web.auth.SessionUser import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse -import edu.uci.ics.texera.web.service.{ResultExportService, WorkflowService} +import edu.uci.ics.texera.web.service.ResultExportService import io.dropwizard.auth.Auth import javax.ws.rs._ -import javax.ws.rs.core.Response +import javax.ws.rs.core.{MediaType, Response} +import javax.ws.rs.core.Response.Status import scala.jdk.CollectionConverters._ @Path("/result") +@Produces(Array(MediaType.APPLICATION_JSON, MediaType.APPLICATION_OCTET_STREAM)) class ResultResource extends LazyLogging { @POST @Path("/export") def exportResult( - request: ResultExportRequest, - @Auth user: SessionUser - ): Response = { - + request: ResultExportRequest, + @Auth user: SessionUser + ): Response = { try { - val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) + if (request.destination == "local") { + // CASE A: multiple operators => produce ZIP + if (request.operatorIds.size > 1) { + val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) + val (zipStream, zipFileNameOpt) = + resultExportService.exportOperatorsAsZip(user.user, request) + + if (zipStream == null) { + return Response + .status(Response.Status.INTERNAL_SERVER_ERROR) + .`type`(MediaType.APPLICATION_JSON) + .entity(Map("error" -> "Failed to export multiple operators as zip").asJava) + .build() + } + + val finalFileName = zipFileNameOpt.getOrElse("operators.zip") + return Response + .ok(zipStream, "application/zip") + .header("Content-Disposition", s"attachment; filename=\"$finalFileName\"") + .build() + } + + // CASE B: exactly one operator => single file + if (request.operatorIds.size != 1) { + return Response + .status(Response.Status.BAD_REQUEST) + .`type`(MediaType.APPLICATION_JSON) + .entity(Map("error" -> "Local download supports no operator or many.").asJava) + .build() + } + val singleOpId = request.operatorIds.head - val exportResponse: ResultExportResponse = - resultExportService.exportResult(user.user, request) + val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) + val (streamingOutput, fileNameOpt) = + resultExportService.exportOperatorResultAsStream(request, singleOpId) - Response.ok(exportResponse).build() + if (streamingOutput == null) { + return Response + .status(Response.Status.INTERNAL_SERVER_ERROR) + .`type`(MediaType.APPLICATION_JSON) + .entity(Map("error" -> "Failed to export operator").asJava) + .build() + } + val finalFileName = fileNameOpt.getOrElse("download.dat") + Response + .ok(streamingOutput, MediaType.APPLICATION_OCTET_STREAM) + .header("Content-Disposition", s"attachment; filename=\"$finalFileName\"") + .build() + + } else { + // destination == "dataset" etc. => old logic + val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) + val exportResponse = resultExportService.exportResult(user.user, request) + Response.ok(exportResponse).build() + } } catch { case ex: Exception => Response .status(Response.Status.INTERNAL_SERVER_ERROR) + .`type`(MediaType.APPLICATION_JSON) .entity(Map("error" -> ex.getMessage).asJava) .build() } } - } diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 8d40867e907..d15c008c4c3 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -2,7 +2,6 @@ package edu.uci.ics.texera.web.service import com.github.tototoshi.csv.CSVWriter import com.google.api.client.googleapis.json.GoogleJsonResponseException -import com.google.api.client.util.Lists import com.google.api.services.drive.Drive import com.google.api.services.drive.model.{File, FileList, Permission} import com.google.api.services.sheets.v4.Sheets @@ -10,16 +9,15 @@ import com.google.api.services.sheets.v4.model.{Spreadsheet, SpreadsheetProperti import edu.uci.ics.amber.core.storage.{DocumentFactory, VFSURIFactory} import edu.uci.ics.amber.core.storage.model.VirtualDocument import edu.uci.ics.amber.core.tuple.Tuple +import edu.uci.ics.amber.core.virtualidentity.{OperatorIdentity, WorkflowIdentity} import edu.uci.ics.amber.engine.common.Utils.retry import edu.uci.ics.amber.util.PathUtils -import edu.uci.ics.amber.core.virtualidentity.{OperatorIdentity, WorkflowIdentity} import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse import edu.uci.ics.texera.web.resource.GoogleResource import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.createNewDatasetVersionByAddingFiles import edu.uci.ics.texera.web.resource.dashboard.user.workflow.WorkflowVersionResource -import org.jooq.types.UInteger import edu.uci.ics.amber.util.ArrowUtils import edu.uci.ics.amber.core.workflow.PortIdentity import edu.uci.ics.texera.web.service.WorkflowExecutionService.getLatestExecutionId @@ -33,15 +31,32 @@ import java.util import java.util.concurrent.{Executors, ThreadPoolExecutor} import scala.annotation.tailrec import scala.collection.mutable -import scala.jdk.CollectionConverters.SeqHasAsJava +import scala.jdk.CollectionConverters._ import org.apache.arrow.memory.RootAllocator -import org.apache.arrow.vector._ import org.apache.arrow.vector.ipc.ArrowFileWriter +import org.apache.arrow.vector.VectorSchemaRoot import org.apache.commons.lang3.StringUtils import java.io.OutputStream import java.nio.channels.Channels +import java.util.zip.{ZipEntry, ZipOutputStream} +import javax.ws.rs.WebApplicationException +import javax.ws.rs.core.StreamingOutput import scala.util.Using +import java.io.{FilterOutputStream, IOException, OutputStream} + +/** + * A simple wrapper that ignores 'close()' calls on the underlying stream. + * This allows each operator's writer to call close() without ending the entire ZipOutputStream. + */ +private class NonClosingOutputStream(os: OutputStream) extends FilterOutputStream(os) { + @throws[IOException] + override def close(): Unit = { + // do not actually close the underlying stream + super.flush() + // omit super.close() + } +} object ResultExportService { final private val UPLOAD_BATCH_ROW_COUNT = 10000 @@ -57,325 +72,312 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { import ResultExportService._ private val cache = new mutable.HashMap[String, String] - def exportResult( - user: User, - request: ResultExportRequest - ): ResultExportResponse = { - // retrieve the file link saved in the session if exists - if (cache.contains(request.exportType)) { - return ResultExportResponse( - "success", - s"Link retrieved from cache ${cache(request.exportType)}" + + def exportResult(user: User, request: ResultExportRequest): ResultExportResponse = { + val successMessages = new mutable.ListBuffer[String]() + val errorMessages = new mutable.ListBuffer[String]() + + // iterate through all operator IDs + request.operatorIds.foreach { opId => + try { + val (messageOpt, errorOpt) = exportSingleOperator(user, request, opId) + messageOpt.foreach(successMessages += _) + errorOpt.foreach(errorMessages += _) + } catch { + case ex: Exception => + // catch any unforeseen exceptions so that other operators can still be attempted + errorMessages += s"Error exporting operator $opId: ${ex.getMessage}" + } + } + + if (errorMessages.isEmpty) { + ResultExportResponse("success", successMessages.mkString("\n")) + } else if (successMessages.isEmpty) { + ResultExportResponse("error", errorMessages.mkString("\n")) + } else { + // partial success + ResultExportResponse( + "partial", + s"Some operators succeeded:\n${successMessages.mkString("\n")}\n\n" + + s"Some operators failed:\n${errorMessages.mkString("\n")}" ) } + } + + /** + * Export the result for ONE operator. + * Return (SomeSuccessMessage, SomeErrorMessage) or (None, None) if handled differently + */ + private def exportSingleOperator( + user: User, + request: ResultExportRequest, + operatorId: String + ): (Option[String], Option[String]) = { + + // Possibly use some caching key + val cacheKey = s"${request.exportType}-$operatorId" + if (cache.contains(cacheKey)) { + return ( + Some(s"Link retrieved from cache for operator $operatorId: ${cache(cacheKey)}"), + None + ) + } + + val execIdOpt = getLatestExecutionId(workflowIdentity) + if (execIdOpt.isEmpty) { + return (None, Some(s"Workflow ${request.workflowId} has no execution result")) + } - // By now the workflow should finish running - // Only supports external port 0 for now. TODO: support multiple ports val storageUri = VFSURIFactory.createResultURI( workflowIdentity, - getLatestExecutionId(workflowIdentity).getOrElse( - return ResultExportResponse("error", "The workflow contains no results") - ), - OperatorIdentity(request.operatorId), + execIdOpt.get, + OperatorIdentity(operatorId), PortIdentity() ) - val operatorResult: VirtualDocument[Tuple] = + val operatorResult = DocumentFactory.openDocument(storageUri)._1.asInstanceOf[VirtualDocument[Tuple]] if (operatorResult.getCount == 0) { - return ResultExportResponse("error", "The workflow contains no results") + return (None, Some(s"Operator $operatorId has no results (empty)")) } val results: Iterable[Tuple] = operatorResult.get().to(Iterable) - val attributeNames = results.head.getSchema.getAttributeNames + val attributeNames = results.head.getSchema.getAttributeNames - // handle the request according to export type request.exportType match { case "google_sheet" => - handleGoogleSheetRequest(cache, request, results, attributeNames) + val (msg, err) = handleGoogleSheetRequest(operatorId, results, attributeNames, request) + (msg, err) + case "csv" => - handleCSVRequest(user, request, results, attributeNames) + val (msg, err) = handleCSVRequest(operatorId, user, request, results, attributeNames) + (msg, err) + case "data" => - handleDataRequest(user, request, results) + val (msg, err) = handleDataRequest(operatorId, user, request, results) + (msg, err) + case "arrow" => - handleArrowRequest(user, request, results) - case _ => - ResultExportResponse("error", s"Unknown export type: ${request.exportType}") + val (msg, err) = handleArrowRequest(operatorId, user, request, results) + (msg, err) + + case unknown => + (None, Some(s"Unknown export type: $unknown")) } } private def handleCSVRequest( - user: User, - request: ResultExportRequest, - results: Iterable[Tuple], - headers: List[String] - ): ResultExportResponse = { - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - - pool.submit(() => - { - val writer = CSVWriter.open(pipedOutputStream) - writer.writeRow(headers) - results.foreach { tuple => - writer.writeRow(tuple.getFields.toIndexedSeq) + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple], + headers: List[String] + ): (Option[String], Option[String]) = { + try { + val pipedOutputStream = new PipedOutputStream() + val pipedInputStream = new PipedInputStream(pipedOutputStream) + + pool.submit(new Runnable { + override def run(): Unit = { + val writer = CSVWriter.open(pipedOutputStream) + writer.writeRow(headers) + results.foreach { tuple => + writer.writeRow(tuple.getFields.toIndexedSeq) + } + writer.close() } - writer.close() - }.asInstanceOf[Runnable] - ) - - val fileName = generateFileName(request, "csv") - saveToDatasets(request, user, pipedInputStream, fileName) + }) - ResultExportResponse( - "success", - s"File saved to User Dashboard as $fileName to Datasets ${request.datasetIds.mkString(",")}" - ) + val fileName = generateFileName(request, operatorId, "csv") + saveToDatasets(request, user, pipedInputStream, fileName) + (Some(s"CSV export done for operator $operatorId -> file: $fileName"), None) + } catch { + case ex: Exception => + (None, Some(s"CSV export failed for operator $operatorId: ${ex.getMessage}")) + } } private def handleGoogleSheetRequest( - exportCache: mutable.HashMap[String, String], - request: ResultExportRequest, - results: Iterable[Tuple], - header: List[String] - ): ResultExportResponse = { - // create google sheet - val sheetService: Sheets = GoogleResource.getSheetService - val sheetId: String = - createGoogleSheet(sheetService, request.workflowName) - if (sheetId == null) { - return ResultExportResponse("error", "Fail to create google sheet") - } + operatorId: String, + results: Iterable[Tuple], + header: List[String], + request: ResultExportRequest + ): (Option[String], Option[String]) = { + try { + val sheetService: Sheets = GoogleResource.getSheetService + val sheetId: String = createGoogleSheet(sheetService, s"${request.workflowName}-$operatorId") + if (sheetId == null) { + return (None, Some(s"Fail to create google sheet for operator $operatorId")) + } + + val driveService: Drive = GoogleResource.getDriveService + moveToResultFolder(driveService, sheetId) + + // share + val perm = new Permission().setType("anyone").setRole("reader") + driveService.permissions().create(sheetId, perm).execute() - val driveService: Drive = GoogleResource.getDriveService - moveToResultFolder(driveService, sheetId) - - // allow user to access this sheet in the service account - val sharePermission: Permission = new Permission() - .setType("anyone") - .setRole("reader") - driveService - .permissions() - .create(sheetId, sharePermission) - .execute() - - // upload the content asynchronously to avoid long waiting on the user side. - pool - .submit(() => - { + // asynchronously upload data + pool.submit(new Runnable { + override def run(): Unit = { uploadHeader(sheetService, sheetId, header) uploadResult(sheetService, sheetId, results) - }.asInstanceOf[Runnable] - ) + } + }) - // generate success response - val link = s"https://docs.google.com/spreadsheets/d/$sheetId/edit" - val message: String = - s"Google sheet created. The results may be still uploading. You can access the sheet $link" - // save the file link in the session cache - exportCache(request.exportType) = link - ResultExportResponse("success", message) - } + val link = s"https://docs.google.com/spreadsheets/d/$sheetId/edit" + // you can store in a small local cache if you want + val cacheKey = s"${request.exportType}-$operatorId" + cache(cacheKey) = link - /** - * create the google sheet and return the sheet Id - */ - private def createGoogleSheet(sheetService: Sheets, workflowName: String): String = { - val createSheetRequest = new Spreadsheet() - .setProperties(new SpreadsheetProperties().setTitle(workflowName)) - val targetSheet: Spreadsheet = sheetService.spreadsheets - .create(createSheetRequest) - .setFields("spreadsheetId") - .execute - targetSheet.getSpreadsheetId - } - - private def handleDataRequest( - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): ResultExportResponse = { - val rowIndex = request.rowIndex - val columnIndex = request.columnIndex - val filename = request.filename - - if (rowIndex >= results.size || columnIndex >= results.head.getFields.length) { - return ResultExportResponse("error", s"Invalid row or column index") + val msg = s"Google sheet created for operator $operatorId: $link (results are uploading)" + (Some(msg), None) + } catch { + case ex: Exception => + (None, Some(s"Google Sheet export failed for operator $operatorId: ${ex.getMessage}")) } + } - val selectedRow = results.toSeq(rowIndex) - val field: Any = selectedRow.getField(columnIndex) - val dataBytes: Array[Byte] = convertFieldToBytes(field) - - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - - pool.submit(() => - { - pipedOutputStream.write(dataBytes) - pipedOutputStream.close() - }.asInstanceOf[Runnable] - ) - - saveToDatasets(request, user, pipedInputStream, filename) - - ResultExportResponse( - "success", - s"Data file $filename saved to Datasets ${request.datasetIds.mkString(",")}" - ) + private def createGoogleSheet(sheetService: Sheets, title: String): String = { + val sheetProps = new SpreadsheetProperties().setTitle(title) + val createReq = new Spreadsheet().setProperties(sheetProps) + val target = sheetService.spreadsheets.create(createReq).setFields("spreadsheetId").execute() + target.getSpreadsheetId } - /** - * move the workflow results to a specific folder - */ @tailrec - private def moveToResultFolder( - driveService: Drive, - sheetId: String, - retry: Boolean = true - ): Unit = { + private def moveToResultFolder(driveService: Drive, sheetId: String, retryOnce: Boolean = true): Unit = { val folderId = retrieveResultFolderId(driveService) try { - driveService - .files() - .update(sheetId, null) - .setAddParents(folderId) - .execute() + driveService.files().update(sheetId, null).setAddParents(folderId).execute() } catch { - case exception: GoogleJsonResponseException => - if (retry) { - // This exception maybe caused by the full deletion of the target folder and - // the cached folder id is obsolete. - // * note: by full deletion, the folder has to be deleted from trash as well. - // In this case, try again. - moveToResultFolder(driveService, sheetId, retry = false) + case ex: GoogleJsonResponseException => + if (retryOnce) { + // maybe folder was deleted/trash, so try again + moveToResultFolder(driveService, sheetId, retryOnce = false) } else { - // if the exception continues to show up then just throw it normally. - throw exception + throw ex } } } - private def retrieveResultFolderId(driveService: Drive): String = - synchronized { - val folderResult: FileList = driveService - .files() - .list() - .setQ( - s"mimeType = 'application/vnd.google-apps.folder' and name='$WORKFLOW_RESULT_FOLDER_NAME'" - ) + private def retrieveResultFolderId(driveService: Drive): String = synchronized { + val folderResult: FileList = + driveService.files().list() + .setQ(s"mimeType = 'application/vnd.google-apps.folder' and name='$WORKFLOW_RESULT_FOLDER_NAME'") .setSpaces("drive") .execute() - if (folderResult.getFiles.isEmpty) { - val fileMetadata: File = new File() - fileMetadata.setName(WORKFLOW_RESULT_FOLDER_NAME) - fileMetadata.setMimeType("application/vnd.google-apps.folder") - val targetFolder: File = driveService.files.create(fileMetadata).setFields("id").execute - targetFolder.getId - } else { - folderResult.getFiles.get(0).getId - } + if (folderResult.getFiles.isEmpty) { + val fileMetadata = new File() + fileMetadata.setName(WORKFLOW_RESULT_FOLDER_NAME) + fileMetadata.setMimeType("application/vnd.google-apps.folder") + val targetFolder: File = driveService.files.create(fileMetadata).setFields("id").execute() + targetFolder.getId + } else { + folderResult.getFiles.get(0).getId } + } - /** - * upload the result header to the google sheet - */ - private def uploadHeader( - sheetService: Sheets, - sheetId: String, - header: List[AnyRef] - ): Unit = { + private def uploadHeader(sheetService: Sheets, sheetId: String, header: List[AnyRef]): Unit = { uploadContent(sheetService, sheetId, List(header.asJava).asJava) } - /** - * upload the result body to the google sheet - */ - private def uploadResult( - sheetService: Sheets, - sheetId: String, - result: Iterable[Tuple] - ): Unit = { - val content: util.List[util.List[AnyRef]] = - Lists.newArrayListWithCapacity(UPLOAD_BATCH_ROW_COUNT) - // use for loop to avoid copying the whole result at the same time - for (tuple: Tuple <- result) { - - val tupleContent: util.List[AnyRef] = - tuple.getFields - .map(convertUnsupported) - .toArray - .toList - .asJava - content.add(tupleContent) - - if (content.size() == UPLOAD_BATCH_ROW_COUNT) { - uploadContent(sheetService, sheetId, content) - content.clear() + private def uploadResult(sheetService: Sheets, sheetId: String, result: Iterable[Tuple]): Unit = { + val batch = new util.ArrayList[util.List[AnyRef]](UPLOAD_BATCH_ROW_COUNT) + + for (tuple <- result) { + val row: util.List[AnyRef] = tuple.getFields.map(convertUnsupported).toList.asJava + batch.add(row) + + if (batch.size() == UPLOAD_BATCH_ROW_COUNT) { + uploadContent(sheetService, sheetId, batch) + batch.clear() } } + if (!batch.isEmpty) { + uploadContent(sheetService, sheetId, batch) + } + } - if (!content.isEmpty) { - uploadContent(sheetService, sheetId, content) + private def uploadContent(sheetService: Sheets, sheetId: String, content: util.List[util.List[AnyRef]]): Unit = { + val body = new ValueRange().setValues(content) + val range = "A1" + val options = "RAW" + retry(attempts = RETRY_ATTEMPTS, baseBackoffTimeInMS = BASE_BACK_OOF_TIME_IN_MS) { + sheetService.spreadsheets.values().append(sheetId, range, body).setValueInputOption(options).execute() } } - /** - * convert the tuple content into the type the Google Sheet API supports - */ - private def convertUnsupported(content: Any): AnyRef = { - content match { + private def convertUnsupported(anyVal: Any): AnyRef = { + anyVal match { + case null => "" + case s: String => s + case n: Number => n + case other => other.toString + } + } - // if null, use empty string to represent. - case null => "" + private def handleDataRequest( + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { + try { + val rowIndex = request.rowIndex + val columnIndex = request.columnIndex + val fileName = request.filename - // Google Sheet API supports String and number(long, int, double and so on) - case _: String | _: Number => content.asInstanceOf[AnyRef] + if (rowIndex >= results.size || columnIndex >= results.head.getFields.length) { + return (None, Some(s"Invalid rowIndex or columnIndex for operator $operatorId")) + } - // convert all the other type into String - case _ => content.toString - } + val selectedRow = results.toSeq(rowIndex) + val field: Any = selectedRow.getField(columnIndex) + val dataBytes: Array[Byte] = convertFieldToBytes(field) - } + val pipedOutputStream = new PipedOutputStream() + val pipedInputStream = new PipedInputStream(pipedOutputStream) - /** - * upload the content to the google sheet - * The type of content is java list because the google API is in java - */ - private def uploadContent( - sheetService: Sheets, - sheetId: String, - content: util.List[util.List[AnyRef]] - ): Unit = { - val body: ValueRange = new ValueRange().setValues(content) - val range: String = "A1" - val valueInputOption: String = "RAW" - - // using retry logic here, to handle possible API errors, i.e., rate limit exceeded. - retry(attempts = RETRY_ATTEMPTS, baseBackoffTimeInMS = BASE_BACK_OOF_TIME_IN_MS) { - sheetService.spreadsheets.values - .append(sheetId, range, body) - .setValueInputOption(valueInputOption) - .execute + pool.submit(new Runnable { + override def run(): Unit = { + pipedOutputStream.write(dataBytes) + pipedOutputStream.close() + } + }) + + saveToDatasets(request, user, pipedInputStream, fileName) + (Some(s"Data export done for operator $operatorId -> file: $fileName"), None) + } catch { + case ex: Exception => + (None, Some(s"Data export failed for operator $operatorId: ${ex.getMessage}")) } + } + private def convertFieldToBytes(field: Any): Array[Byte] = { + field match { + case data: Array[Byte] => data + case data: String => data.getBytes(StandardCharsets.UTF_8) + case other => other.toString.getBytes(StandardCharsets.UTF_8) + } } private def handleArrowRequest( - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): ResultExportResponse = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { if (results.isEmpty) { - return ResultExportResponse("error", "No results to export") + return (None, Some(s"No results to export for operator $operatorId")) } - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - val allocator = new RootAllocator() + try { + val pipedOutputStream = new PipedOutputStream() + val pipedInputStream = new PipedInputStream(pipedOutputStream) + val allocator = new RootAllocator() - pool.submit(() => - { + pool.submit(() => { Using.Manager { use => val (writer, root) = createArrowWriter(results, allocator, pipedOutputStream) use(writer) @@ -385,99 +387,246 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { writeArrowData(writer, root, results) } - }.asInstanceOf[Runnable] - ) + }) - val fileName = generateFileName(request, "arrow") - saveToDatasets(request, user, pipedInputStream, fileName) + val fileName = generateFileName(request, operatorId, "arrow") + saveToDatasets(request, user, pipedInputStream, fileName) - ResultExportResponse( - "success", - s"Arrow file saved as $fileName to Datasets ${request.datasetIds.mkString(",")}" - ) + (Some(s"Arrow file export done for operator $operatorId -> file: $fileName"), None) + } catch { + case ex: Exception => + (None, Some(s"Arrow export failed for operator $operatorId: ${ex.getMessage}")) + } } private def createArrowWriter( - results: Iterable[Tuple], - allocator: RootAllocator, - outputStream: OutputStream - ): (ArrowFileWriter, VectorSchemaRoot) = { - val schema = results.head.getSchema + results: Iterable[Tuple], + allocator: RootAllocator, + outputStream: OutputStream + ): (ArrowFileWriter, VectorSchemaRoot) = { + val schema = results.head.getSchema val arrowSchema = ArrowUtils.fromTexeraSchema(schema) - val root = VectorSchemaRoot.create(arrowSchema, allocator) - val channel = Channels.newChannel(outputStream) - val writer = new ArrowFileWriter(root, null, channel) + val root = VectorSchemaRoot.create(arrowSchema, allocator) + val channel = Channels.newChannel(outputStream) + val writer = new ArrowFileWriter(root, null, channel) (writer, root) } - private def writeArrowData( - writer: ArrowFileWriter, - root: VectorSchemaRoot, - results: Iterable[Tuple] - ): Unit = { + private def writeArrowData(writer: ArrowFileWriter, root: VectorSchemaRoot, results: Iterable[Tuple]): Unit = { writer.start() - val batchSize = 1000 - - // Convert to Seq to get total size - val resultSeq = results.toSeq - val totalSize = resultSeq.size + val batchSize = 1000 + val resultList = results.toList + val totalSize = resultList.size - // Process in complete batches for (batchStart <- 0 until totalSize by batchSize) { - val batchEnd = Math.min(batchStart + batchSize, totalSize) + val batchEnd = Math.min(batchStart + batchSize, totalSize) val currentBatchSize = batchEnd - batchStart - // Process each tuple in the current batch for (i <- 0 until currentBatchSize) { - val tuple = resultSeq(batchStart + i) + val tuple = resultList(batchStart + i) ArrowUtils.setTexeraTuple(tuple, i, root) } - - // Set the correct row count for this batch and write it root.setRowCount(currentBatchSize) writer.writeBatch() root.clear() } - writer.end() } - private def generateFileName(request: ResultExportRequest, extension: String): String = { + private def generateFileName(request: ResultExportRequest, operatorId: String, extension: String): String = { val latestVersion = - WorkflowVersionResource.getLatestVersion(UInteger.valueOf(request.workflowId)) + WorkflowVersionResource.getLatestVersion(org.jooq.types.UInteger.valueOf(request.workflowId)) val timestamp = LocalDateTime .now() .truncatedTo(ChronoUnit.SECONDS) .format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")) - StringUtils.replaceEach( - s"${request.workflowName}-v$latestVersion-${request.operatorName}-$timestamp.$extension", - Array("/", "\\"), - Array("", "") - ) + + val rawName = s"${request.workflowName}-op$operatorId-v$latestVersion-$timestamp.$extension" + // remove any path separators + StringUtils.replaceEach(rawName, Array("/", "\\"), Array("", "")) } private def saveToDatasets( - request: ResultExportRequest, - user: User, - pipedInputStream: PipedInputStream, - fileName: String - ): Unit = { + request: ResultExportRequest, + user: User, + pipedInputStream: PipedInputStream, + fileName: String + ): Unit = { request.datasetIds.foreach { did => - val datasetPath = PathUtils.getDatasetPath(UInteger.valueOf(did)) - val filePath = datasetPath.resolve(fileName) + val datasetPath = PathUtils.getDatasetPath(org.jooq.types.UInteger.valueOf(did)) + val filePath = datasetPath.resolve(fileName) createNewDatasetVersionByAddingFiles( - UInteger.valueOf(did), + org.jooq.types.UInteger.valueOf(did), user, Map(filePath -> pipedInputStream) ) } } - private def convertFieldToBytes(field: Any): Array[Byte] = { - field match { - case data: Array[Byte] => data - case data: String => data.getBytes(StandardCharsets.UTF_8) - case data => data.toString.getBytes(StandardCharsets.UTF_8) + /** + * For local download of a single operator. Streams the data directly. + * We return (StreamingOutput, Some(filename)) on success, or (null, None) on error. + */ + def exportOperatorResultAsStream( + request: ResultExportRequest, + operatorId: String + ): (StreamingOutput, Option[String]) = { + val execIdOpt = getLatestExecutionId(workflowIdentity) + if (execIdOpt.isEmpty) { + return (null, None) + } + + val storageUri = VFSURIFactory.createResultURI( + workflowIdentity, + execIdOpt.get, + OperatorIdentity(operatorId), + PortIdentity() + ) + val operatorResult = + DocumentFactory.openDocument(storageUri)._1.asInstanceOf[VirtualDocument[Tuple]] + if (operatorResult.getCount == 0) { + return (null, None) + } + + val results: Iterable[Tuple] = operatorResult.get().to(Iterable) + val extension: String = request.exportType match { + case "csv" => "csv" + case "arrow" => "arrow" + case "data" => "bin" + case other => "dat" } + + val fileName = generateFileName(request, operatorId, extension) + + val streamingOutput: StreamingOutput = new StreamingOutput { + override def write(out: OutputStream): Unit = { + request.exportType match { + case "csv" => writeCsv(out, results) + case "arrow" => writeArrow(out, results) + case _ => writeCsv(out, results) // fallback + } + } + } + + (streamingOutput, Some(fileName)) } + + /** + * Writes CSV to output stream + */ + private def writeCsv(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { + // for large data, you might want a buffered approach + val csvWriter = CSVWriter.open(outputStream) // Tototoshi CSVWriter can open an OutputStream + val headers = results.head.getSchema.getAttributeNames + csvWriter.writeRow(headers) + results.foreach { tuple => + csvWriter.writeRow(tuple.getFields.toIndexedSeq) + } + csvWriter.close() + } + + /** + * Writes Arrow to output stream + */ + private def writeArrow(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { + if (results.isEmpty) return + + val allocator = new RootAllocator() + Using.Manager { use => + val (writer, root) = createArrowWriter(results, allocator, outputStream) + use(writer) + use(root) + use(allocator) + + writer.start() + val batchSize = 1000 + val resultList = results.toList + val totalSize = resultList.size + + for (batchStart <- 0 until totalSize by batchSize) { + val batchEnd = Math.min(batchStart + batchSize, totalSize) + val currentBatchSize = batchEnd - batchStart + + for (i <- 0 until currentBatchSize) { + val tuple = resultList(batchStart + i) + ArrowUtils.setTexeraTuple(tuple, i, root) + } + root.setRowCount(currentBatchSize) + writer.writeBatch() + root.clear() + } + writer.end() + } + } + + def exportOperatorsAsZip( + user: User, + request: ResultExportRequest + ): (StreamingOutput, Option[String]) = { + if (request.operatorIds.isEmpty) { + return (null, None) + } + + val timestamp = LocalDateTime + .now() + .truncatedTo(ChronoUnit.SECONDS) + .format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")) + val zipFileName = s"${request.workflowName}-$timestamp.zip" + + val streamingOutput = new StreamingOutput { + override def write(outputStream: OutputStream): Unit = { + Using.resource(new ZipOutputStream(outputStream)) { zipOut => + request.operatorIds.foreach { opId => + val execIdOpt = getLatestExecutionId(workflowIdentity) + if (execIdOpt.isEmpty) { + throw new WebApplicationException( + s"No execution result for workflow ${request.workflowId}" + ) + } + + val storageUri = VFSURIFactory.createResultURI( + workflowIdentity, + execIdOpt.get, + OperatorIdentity(opId), + PortIdentity() + ) + val operatorResult = + DocumentFactory.openDocument(storageUri)._1.asInstanceOf[VirtualDocument[Tuple]] + + if (operatorResult.getCount == 0) { + // create empty record + zipOut.putNextEntry(new ZipEntry(s"$opId-empty.txt")) + val msg = s"Operator $opId has no results" + zipOut.write(msg.getBytes(StandardCharsets.UTF_8)) + zipOut.closeEntry() + } else { + val results = operatorResult.get().to(Iterable) + val extension = request.exportType match { + case "csv" => "csv" + case "arrow" => "arrow" + case "data" => "bin" + case _ => "dat" + } + val operatorFileName = generateFileName(request, opId, extension) + + zipOut.putNextEntry(new ZipEntry(operatorFileName)) + + // create a non-closing wrapper around zipOut + val nonClosingStream = new NonClosingOutputStream(zipOut) + + request.exportType match { + case "csv" => writeCsv(nonClosingStream, results) + case "arrow" => writeArrow(nonClosingStream, results) + case _ => writeCsv(nonClosingStream, results) + } + zipOut.closeEntry() + } + } + } + } + } + + (streamingOutput, Some(zipFileName)) + } + } diff --git a/core/gui/src/app/dashboard/service/user/download/download.service.ts b/core/gui/src/app/dashboard/service/user/download/download.service.ts index 0a0d460824d..744ad9f3188 100644 --- a/core/gui/src/app/dashboard/service/user/download/download.service.ts +++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts @@ -8,7 +8,7 @@ import { WorkflowPersistService } from "src/app/common/service/workflow-persist/ import * as JSZip from "jszip"; import { Workflow } from "../../../../common/type/workflow"; import { AppSettings } from "../../../../common/app-setting"; -import { HttpClient } from "@angular/common/http"; +import { HttpClient, HttpResponse } from "@angular/common/http"; export const EXPORT_BASE_URL = "result/export"; @@ -16,7 +16,7 @@ interface DownloadableItem { blob: Blob; fileName: string; } - +/* TODO: refactor download service to export */ @Injectable({ providedIn: "root", }) @@ -88,40 +88,78 @@ export class DownloadService { ); } + /** + * Export the workflow result. If destination = "local", the server returns a BLOB (file). + * Otherwise, it returns JSON with a status message. + */ public exportWorkflowResult( exportType: string, workflowId: number, workflowName: string, - operatorId: string, - operatorName: string, + operatorIds: string[], datasetIds: number[], rowIndex: number, columnIndex: number, - filename: string + filename: string, + destination: string // "local" or "dataset" ): Observable { const requestBody = { exportType, workflowId, workflowName, - operatorId, - operatorName, + operatorIds, datasetIds, rowIndex, columnIndex, filename, + destination, }; + if (destination === "local") { + return this.http.post( + `${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, + requestBody, + { + responseType: "blob" as const, + observe: "response", + headers: { + "Content-Type": "application/json", + Accept: "application/octet-stream", + }, + } + ); + } else { + // dataset => return JSON + return this.http.post( + `${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, + requestBody, + { + responseType: "json" as const, + observe: "response", + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + } + ); + } + } - /* - TODO: curently, the response is json because the backend does not return a file and export - the result into the database. Next, we will implement download feature (export to local). - */ - return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { - responseType: "json", - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - }); + /** + * Utility function to download a file from the server from blob object. + */ + public saveBlobFile(response: any, defaultFileName: string): void { + // If the server sets "Content-Disposition: attachment; filename="someName.csv"" header, + // we can parse that out. Otherwise just use defaultFileName. + const contentDisposition = response.headers.get("Content-Disposition"); + let fileName = defaultFileName; + if (contentDisposition) { + const match = contentDisposition.match(/filename="(.+)"/); + if (match && match[1]) { + fileName = match[1]; + } + } + const blob = response.body; // the actual file data + this.fileSaverService.saveAs(blob, fileName); } downloadOperatorsResult( diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html index af8d7b5cecf..d25c4ddbad1 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html @@ -57,6 +57,9 @@ + @@ -96,7 +99,7 @@ nz-button nzType="default" *ngIf="destination === 'local'" - (click)="onClickExportAllResult()"> + (click)="onClickSaveResultFileToLocal()"> Export diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index 9b8ab442013..1a13eafedf4 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -53,19 +53,40 @@ export class ResultExportationComponent implements OnInit { } updateOutputType(): void { - const highlightedOperatorIds = this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs(); - if (highlightedOperatorIds.length === 1) { - const operatorId = highlightedOperatorIds[0]; - const outputTypes = this.workflowResultService.determineOutputTypes(operatorId); - this.isTableOutput = outputTypes.isTableOutput; - this.isVisualizationOutput = outputTypes.isVisualizationOutput; - this.containsBinaryData = outputTypes.containsBinaryData; - } else { - // TODO: handle multiple operators + const highlightedOperatorIds = this.workflowActionService + .getJointGraphWrapper() + .getCurrentHighlightedOperatorIDs(); + + if (highlightedOperatorIds.length === 0) { + // No operators highlighted this.isTableOutput = false; this.isVisualizationOutput = false; this.containsBinaryData = false; + return; } + + // Assume they're all table or visualization + // until we find an operator that isn't + let allTable = true; + let allVisualization = true; + let anyBinaryData = false; + + for (const operatorId of highlightedOperatorIds) { + const outputTypes = this.workflowResultService.determineOutputTypes(operatorId); + if (!outputTypes.isTableOutput) { + allTable = false; + } + if (!outputTypes.isVisualizationOutput) { + allVisualization = false; + } + if (outputTypes.containsBinaryData) { + anyBinaryData = true; + } + } + + this.isTableOutput = allTable; + this.isVisualizationOutput = allVisualization; + this.containsBinaryData = anyBinaryData; } onUserInputDatasetName(event: Event): void { @@ -87,14 +108,24 @@ export class ResultExportationComponent implements OnInit { this.rowIndex, this.columnIndex, this.inputFileName, - this.sourceTriggered === "menu" + this.sourceTriggered === "menu", + "dataset" ); this.modalRef.close(); } } - onClickExportAllResult() { - this.workflowResultExportService.exportOperatorsResultToLocal(this.sourceTriggered === "menu"); + onClickSaveResultFileToLocal() { + this.workflowResultExportService.exportWorkflowExecutionResult( + this.exportType, + this.workflowName, + [], + this.rowIndex, + this.columnIndex, + this.inputFileName, + this.sourceTriggered === "menu", + "local" + ); this.modalRef.close(); } } diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts index 9f9750705ae..da4cf59251a 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts @@ -134,7 +134,8 @@ export class WorkflowResultExportService { rowIndex: number, columnIndex: number, filename: string, - exportAll: boolean = false + exportAll: boolean = false, + destination: string = "dataset" // default to dataset ): void { if (!environment.exportExecutionResultEnabled) { return; @@ -145,56 +146,60 @@ export class WorkflowResultExportService { return; } - let operatorIds: string[]; - if (!exportAll) + // gather operator IDs + let operatorIds: string[] = []; + if (!exportAll) { operatorIds = [...this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs()]; - else + } else { operatorIds = this.workflowActionService .getTexeraGraph() .getAllOperators() .map(operator => operator.operatorID); + } - this.notificationService.loading("exporting..."); - operatorIds.forEach(operatorId => { - if (!this.workflowResultService.hasAnyResult(operatorId)) { - console.log(`Operator ${operatorId} has no result to export`); - return; - } - const operator = this.workflowActionService.getTexeraGraph().getOperator(operatorId); - const operatorName = operator.customDisplayName ?? operator.operatorType; + if (operatorIds.length === 0) { + console.log("No operators selected to export"); + return; + } - /* - * This function (and service) was previously used to export result - * into the local file system (downloading). Currently it is used to only - * export to the dataset. - * TODO: refactor this service to have export namespace and download should be - * an export type (export to local file system) - * TODO: rowIndex and columnIndex can be used to export a specific cells in the result - */ - this.downloadService - .exportWorkflowResult( - exportType, - workflowId, - workflowName, - operatorId, - operatorName, - [...datasetIds], - rowIndex, - columnIndex, - filename - ) - .subscribe({ - next: _ => { - this.notificationService.info("The result has been exported successfully"); - }, - error: (res: unknown) => { - const errorResponse = res as { error: { error: string } }; - this.notificationService.error( - "An error happened in exporting operator results " + errorResponse.error.error - ); - }, - }); - }); + // show loading + this.notificationService.loading("Exporting..."); + + // Make request + this.downloadService + .exportWorkflowResult( + exportType, + workflowId, + workflowName, + operatorIds, + [...datasetIds], + rowIndex, + columnIndex, + filename, + destination + ) + .subscribe({ + next: response => { + if (destination === "local") { + // "local" => response is a blob + // We can parse the file name from header or use fallback + this.downloadService.saveBlobFile(response, filename); + this.notificationService.info("File downloaded successfully"); + } else { + // "dataset" => response is JSON + // The server might return a JSON with {status, message} + const responseBody = response.body; + if (responseBody && responseBody.status === "success") { + this.notificationService.success(responseBody.message); + } else { + this.notificationService.error(responseBody?.message || "An error occurred during export"); + } + } + }, + error: err => { + this.notificationService.error(`An error happened in exporting operator results: ${err?.error?.error || err}`); + }, + }); } /** From 0e2717619528a8908283bc2124414f5972fe501f Mon Sep 17 00:00:00 2001 From: ali Date: Sun, 2 Feb 2025 14:05:05 -0800 Subject: [PATCH 02/16] fix: lint --- .../request/ResultExportRequest.scala | 18 +- .../texera/web/resource/ResultResource.scala | 10 +- .../web/service/ResultExportService.scala | 243 ++++++++++-------- .../service/user/download/download.service.ts | 40 ++- .../result-exportation.component.ts | 4 +- .../workflow-result-export.service.ts | 5 +- 6 files changed, 168 insertions(+), 152 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala index a34fab3d41c..51e47aae780 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/request/ResultExportRequest.scala @@ -1,13 +1,13 @@ package edu.uci.ics.texera.web.model.websocket.request case class ResultExportRequest( - exportType: String, // e.g. "csv", "google_sheet", "arrow", "data" - workflowId: Int, - workflowName: String, - operatorIds: List[String], // changed from single operatorId: String -> List of strings - datasetIds: List[Int], - rowIndex: Int, // used by "data" export - columnIndex: Int, // used by "data" export - filename: String, // optional filename override - destination: String // "dataset" or "local" + exportType: String, // e.g. "csv", "google_sheet", "arrow", "data" + workflowId: Int, + workflowName: String, + operatorIds: List[String], // changed from single operatorId: String -> List of strings + datasetIds: List[Int], + rowIndex: Int, // used by "data" export + columnIndex: Int, // used by "data" export + filename: String, // optional filename override + destination: String // "dataset" or "local" ) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala index f0843cd4c20..bc33b64e79b 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala @@ -20,9 +20,9 @@ class ResultResource extends LazyLogging { @POST @Path("/export") def exportResult( - request: ResultExportRequest, - @Auth user: SessionUser - ): Response = { + request: ResultExportRequest, + @Auth user: SessionUser + ): Response = { try { if (request.destination == "local") { // CASE A: multiple operators => produce ZIP @@ -42,7 +42,7 @@ class ResultResource extends LazyLogging { val finalFileName = zipFileNameOpt.getOrElse("operators.zip") return Response .ok(zipStream, "application/zip") - .header("Content-Disposition", s"attachment; filename=\"$finalFileName\"") + .header("Content-Disposition", "attachment; filename=\"" + finalFileName + "\"") .build() } @@ -71,7 +71,7 @@ class ResultResource extends LazyLogging { val finalFileName = fileNameOpt.getOrElse("download.dat") Response .ok(streamingOutput, MediaType.APPLICATION_OCTET_STREAM) - .header("Content-Disposition", s"attachment; filename=\"$finalFileName\"") + .header("Content-Disposition", "attachment; filename=\"" + finalFileName + "\"") .build() } else { diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index d15c008c4c3..a9d5370327a 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -46,9 +46,9 @@ import scala.util.Using import java.io.{FilterOutputStream, IOException, OutputStream} /** - * A simple wrapper that ignores 'close()' calls on the underlying stream. - * This allows each operator's writer to call close() without ending the entire ZipOutputStream. - */ + * A simple wrapper that ignores 'close()' calls on the underlying stream. + * This allows each operator's writer to call close() without ending the entire ZipOutputStream. + */ private class NonClosingOutputStream(os: OutputStream) extends FilterOutputStream(os) { @throws[IOException] override def close(): Unit = { @@ -75,7 +75,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { def exportResult(user: User, request: ResultExportRequest): ResultExportResponse = { val successMessages = new mutable.ListBuffer[String]() - val errorMessages = new mutable.ListBuffer[String]() + val errorMessages = new mutable.ListBuffer[String]() // iterate through all operator IDs request.operatorIds.foreach { opId => @@ -105,14 +105,14 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * Export the result for ONE operator. - * Return (SomeSuccessMessage, SomeErrorMessage) or (None, None) if handled differently - */ + * Export the result for ONE operator. + * Return (SomeSuccessMessage, SomeErrorMessage) or (None, None) if handled differently + */ private def exportSingleOperator( - user: User, - request: ResultExportRequest, - operatorId: String - ): (Option[String], Option[String]) = { + user: User, + request: ResultExportRequest, + operatorId: String + ): (Option[String], Option[String]) = { // Possibly use some caching key val cacheKey = s"${request.exportType}-$operatorId" @@ -141,7 +141,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } val results: Iterable[Tuple] = operatorResult.get().to(Iterable) - val attributeNames = results.head.getSchema.getAttributeNames + val attributeNames = results.head.getSchema.getAttributeNames request.exportType match { case "google_sheet" => @@ -166,15 +166,15 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def handleCSVRequest( - operatorId: String, - user: User, - request: ResultExportRequest, - results: Iterable[Tuple], - headers: List[String] - ): (Option[String], Option[String]) = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple], + headers: List[String] + ): (Option[String], Option[String]) = { try { val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) + val pipedInputStream = new PipedInputStream(pipedOutputStream) pool.submit(new Runnable { override def run(): Unit = { @@ -197,14 +197,14 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def handleGoogleSheetRequest( - operatorId: String, - results: Iterable[Tuple], - header: List[String], - request: ResultExportRequest - ): (Option[String], Option[String]) = { + operatorId: String, + results: Iterable[Tuple], + header: List[String], + request: ResultExportRequest + ): (Option[String], Option[String]) = { try { val sheetService: Sheets = GoogleResource.getSheetService - val sheetId: String = createGoogleSheet(sheetService, s"${request.workflowName}-$operatorId") + val sheetId: String = createGoogleSheet(sheetService, s"${request.workflowName}-$operatorId") if (sheetId == null) { return (None, Some(s"Fail to create google sheet for operator $operatorId")) } @@ -239,13 +239,17 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { private def createGoogleSheet(sheetService: Sheets, title: String): String = { val sheetProps = new SpreadsheetProperties().setTitle(title) - val createReq = new Spreadsheet().setProperties(sheetProps) - val target = sheetService.spreadsheets.create(createReq).setFields("spreadsheetId").execute() + val createReq = new Spreadsheet().setProperties(sheetProps) + val target = sheetService.spreadsheets.create(createReq).setFields("spreadsheetId").execute() target.getSpreadsheetId } @tailrec - private def moveToResultFolder(driveService: Drive, sheetId: String, retryOnce: Boolean = true): Unit = { + private def moveToResultFolder( + driveService: Drive, + sheetId: String, + retryOnce: Boolean = true + ): Unit = { val folderId = retrieveResultFolderId(driveService) try { driveService.files().update(sheetId, null).setAddParents(folderId).execute() @@ -260,23 +264,28 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } } - private def retrieveResultFolderId(driveService: Drive): String = synchronized { - val folderResult: FileList = - driveService.files().list() - .setQ(s"mimeType = 'application/vnd.google-apps.folder' and name='$WORKFLOW_RESULT_FOLDER_NAME'") - .setSpaces("drive") - .execute() - - if (folderResult.getFiles.isEmpty) { - val fileMetadata = new File() - fileMetadata.setName(WORKFLOW_RESULT_FOLDER_NAME) - fileMetadata.setMimeType("application/vnd.google-apps.folder") - val targetFolder: File = driveService.files.create(fileMetadata).setFields("id").execute() - targetFolder.getId - } else { - folderResult.getFiles.get(0).getId + private def retrieveResultFolderId(driveService: Drive): String = + synchronized { + val folderResult: FileList = + driveService + .files() + .list() + .setQ( + s"mimeType = 'application/vnd.google-apps.folder' and name='$WORKFLOW_RESULT_FOLDER_NAME'" + ) + .setSpaces("drive") + .execute() + + if (folderResult.getFiles.isEmpty) { + val fileMetadata = new File() + fileMetadata.setName(WORKFLOW_RESULT_FOLDER_NAME) + fileMetadata.setMimeType("application/vnd.google-apps.folder") + val targetFolder: File = driveService.files.create(fileMetadata).setFields("id").execute() + targetFolder.getId + } else { + folderResult.getFiles.get(0).getId + } } - } private def uploadHeader(sheetService: Sheets, sheetId: String, header: List[AnyRef]): Unit = { uploadContent(sheetService, sheetId, List(header.asJava).asJava) @@ -299,45 +308,53 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } } - private def uploadContent(sheetService: Sheets, sheetId: String, content: util.List[util.List[AnyRef]]): Unit = { - val body = new ValueRange().setValues(content) - val range = "A1" + private def uploadContent( + sheetService: Sheets, + sheetId: String, + content: util.List[util.List[AnyRef]] + ): Unit = { + val body = new ValueRange().setValues(content) + val range = "A1" val options = "RAW" retry(attempts = RETRY_ATTEMPTS, baseBackoffTimeInMS = BASE_BACK_OOF_TIME_IN_MS) { - sheetService.spreadsheets.values().append(sheetId, range, body).setValueInputOption(options).execute() + sheetService.spreadsheets + .values() + .append(sheetId, range, body) + .setValueInputOption(options) + .execute() } } private def convertUnsupported(anyVal: Any): AnyRef = { anyVal match { - case null => "" - case s: String => s - case n: Number => n - case other => other.toString + case null => "" + case s: String => s + case n: Number => n + case other => other.toString } } private def handleDataRequest( - operatorId: String, - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): (Option[String], Option[String]) = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { try { - val rowIndex = request.rowIndex + val rowIndex = request.rowIndex val columnIndex = request.columnIndex - val fileName = request.filename + val fileName = request.filename if (rowIndex >= results.size || columnIndex >= results.head.getFields.length) { return (None, Some(s"Invalid rowIndex or columnIndex for operator $operatorId")) } - val selectedRow = results.toSeq(rowIndex) - val field: Any = selectedRow.getField(columnIndex) + val selectedRow = results.toSeq(rowIndex) + val field: Any = selectedRow.getField(columnIndex) val dataBytes: Array[Byte] = convertFieldToBytes(field) val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) + val pipedInputStream = new PipedInputStream(pipedOutputStream) pool.submit(new Runnable { override def run(): Unit = { @@ -363,19 +380,19 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def handleArrowRequest( - operatorId: String, - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): (Option[String], Option[String]) = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { if (results.isEmpty) { return (None, Some(s"No results to export for operator $operatorId")) } try { val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - val allocator = new RootAllocator() + val pipedInputStream = new PipedInputStream(pipedOutputStream) + val allocator = new RootAllocator() pool.submit(() => { Using.Manager { use => @@ -400,26 +417,30 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def createArrowWriter( - results: Iterable[Tuple], - allocator: RootAllocator, - outputStream: OutputStream - ): (ArrowFileWriter, VectorSchemaRoot) = { - val schema = results.head.getSchema + results: Iterable[Tuple], + allocator: RootAllocator, + outputStream: OutputStream + ): (ArrowFileWriter, VectorSchemaRoot) = { + val schema = results.head.getSchema val arrowSchema = ArrowUtils.fromTexeraSchema(schema) - val root = VectorSchemaRoot.create(arrowSchema, allocator) - val channel = Channels.newChannel(outputStream) - val writer = new ArrowFileWriter(root, null, channel) + val root = VectorSchemaRoot.create(arrowSchema, allocator) + val channel = Channels.newChannel(outputStream) + val writer = new ArrowFileWriter(root, null, channel) (writer, root) } - private def writeArrowData(writer: ArrowFileWriter, root: VectorSchemaRoot, results: Iterable[Tuple]): Unit = { + private def writeArrowData( + writer: ArrowFileWriter, + root: VectorSchemaRoot, + results: Iterable[Tuple] + ): Unit = { writer.start() - val batchSize = 1000 + val batchSize = 1000 val resultList = results.toList - val totalSize = resultList.size + val totalSize = resultList.size for (batchStart <- 0 until totalSize by batchSize) { - val batchEnd = Math.min(batchStart + batchSize, totalSize) + val batchEnd = Math.min(batchStart + batchSize, totalSize) val currentBatchSize = batchEnd - batchStart for (i <- 0 until currentBatchSize) { @@ -433,7 +454,11 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { writer.end() } - private def generateFileName(request: ResultExportRequest, operatorId: String, extension: String): String = { + private def generateFileName( + request: ResultExportRequest, + operatorId: String, + extension: String + ): String = { val latestVersion = WorkflowVersionResource.getLatestVersion(org.jooq.types.UInteger.valueOf(request.workflowId)) val timestamp = LocalDateTime @@ -447,14 +472,14 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def saveToDatasets( - request: ResultExportRequest, - user: User, - pipedInputStream: PipedInputStream, - fileName: String - ): Unit = { + request: ResultExportRequest, + user: User, + pipedInputStream: PipedInputStream, + fileName: String + ): Unit = { request.datasetIds.foreach { did => val datasetPath = PathUtils.getDatasetPath(org.jooq.types.UInteger.valueOf(did)) - val filePath = datasetPath.resolve(fileName) + val filePath = datasetPath.resolve(fileName) createNewDatasetVersionByAddingFiles( org.jooq.types.UInteger.valueOf(did), user, @@ -464,13 +489,13 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * For local download of a single operator. Streams the data directly. - * We return (StreamingOutput, Some(filename)) on success, or (null, None) on error. - */ + * For local download of a single operator. Streams the data directly. + * We return (StreamingOutput, Some(filename)) on success, or (null, None) on error. + */ def exportOperatorResultAsStream( - request: ResultExportRequest, - operatorId: String - ): (StreamingOutput, Option[String]) = { + request: ResultExportRequest, + operatorId: String + ): (StreamingOutput, Option[String]) = { val execIdOpt = getLatestExecutionId(workflowIdentity) if (execIdOpt.isEmpty) { return (null, None) @@ -501,9 +526,9 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { val streamingOutput: StreamingOutput = new StreamingOutput { override def write(out: OutputStream): Unit = { request.exportType match { - case "csv" => writeCsv(out, results) + case "csv" => writeCsv(out, results) case "arrow" => writeArrow(out, results) - case _ => writeCsv(out, results) // fallback + case _ => writeCsv(out, results) // fallback } } } @@ -512,8 +537,8 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * Writes CSV to output stream - */ + * Writes CSV to output stream + */ private def writeCsv(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { // for large data, you might want a buffered approach val csvWriter = CSVWriter.open(outputStream) // Tototoshi CSVWriter can open an OutputStream @@ -526,8 +551,8 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * Writes Arrow to output stream - */ + * Writes Arrow to output stream + */ private def writeArrow(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { if (results.isEmpty) return @@ -539,12 +564,12 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { use(allocator) writer.start() - val batchSize = 1000 + val batchSize = 1000 val resultList = results.toList - val totalSize = resultList.size + val totalSize = resultList.size for (batchStart <- 0 until totalSize by batchSize) { - val batchEnd = Math.min(batchStart + batchSize, totalSize) + val batchEnd = Math.min(batchStart + batchSize, totalSize) val currentBatchSize = batchEnd - batchStart for (i <- 0 until currentBatchSize) { @@ -560,9 +585,9 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } def exportOperatorsAsZip( - user: User, - request: ResultExportRequest - ): (StreamingOutput, Option[String]) = { + user: User, + request: ResultExportRequest + ): (StreamingOutput, Option[String]) = { if (request.operatorIds.isEmpty) { return (null, None) } @@ -602,10 +627,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } else { val results = operatorResult.get().to(Iterable) val extension = request.exportType match { - case "csv" => "csv" + case "csv" => "csv" case "arrow" => "arrow" - case "data" => "bin" - case _ => "dat" + case "data" => "bin" + case _ => "dat" } val operatorFileName = generateFileName(request, opId, extension) diff --git a/core/gui/src/app/dashboard/service/user/download/download.service.ts b/core/gui/src/app/dashboard/service/user/download/download.service.ts index 744ad9f3188..0a693c613fe 100644 --- a/core/gui/src/app/dashboard/service/user/download/download.service.ts +++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts @@ -115,32 +115,24 @@ export class DownloadService { destination, }; if (destination === "local") { - return this.http.post( - `${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, - requestBody, - { - responseType: "blob" as const, - observe: "response", - headers: { - "Content-Type": "application/json", - Accept: "application/octet-stream", - }, - } - ); + return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { + responseType: "blob" as const, + observe: "response", + headers: { + "Content-Type": "application/json", + Accept: "application/octet-stream", + }, + }); } else { // dataset => return JSON - return this.http.post( - `${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, - requestBody, - { - responseType: "json" as const, - observe: "response", - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - } - ); + return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { + responseType: "json" as const, + observe: "response", + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + }); } } diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index 1a13eafedf4..432b0a7bef9 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -53,9 +53,7 @@ export class ResultExportationComponent implements OnInit { } updateOutputType(): void { - const highlightedOperatorIds = this.workflowActionService - .getJointGraphWrapper() - .getCurrentHighlightedOperatorIDs(); + const highlightedOperatorIds = this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs(); if (highlightedOperatorIds.length === 0) { // No operators highlighted diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts index da4cf59251a..33e531dee0b 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts @@ -196,8 +196,9 @@ export class WorkflowResultExportService { } } }, - error: err => { - this.notificationService.error(`An error happened in exporting operator results: ${err?.error?.error || err}`); + error: (err: unknown) => { + const errorMessage = (err as any)?.error?.error || (err as any)?.error || err; + this.notificationService.error(`An error happened in exporting operator results: ${errorMessage}`); }, }); } From 98aeef58b8338ed95e610e26bb522571ff05b0d6 Mon Sep 17 00:00:00 2001 From: ali Date: Thu, 13 Feb 2025 20:14:02 -0800 Subject: [PATCH 03/16] fix: comments --- .../texera/web/resource/ResultResource.scala | 90 +++++++++---------- .../service/user/download/download.service.ts | 2 +- .../workflow-result-export.service.ts | 8 +- 3 files changed, 51 insertions(+), 49 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala index bc33b64e79b..2733923a613 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala @@ -24,61 +24,61 @@ class ResultResource extends LazyLogging { @Auth user: SessionUser ): Response = { try { - if (request.destination == "local") { - // CASE A: multiple operators => produce ZIP - if (request.operatorIds.size > 1) { + request.destination match { + case "local" => + // CASE A: multiple operators => produce ZIP + if (request.operatorIds.size > 1) { + val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) + val (zipStream, zipFileNameOpt) = + resultExportService.exportOperatorsAsZip(user.user, request) + + if (zipStream == null) { + return Response + .status(Response.Status.INTERNAL_SERVER_ERROR) + .`type`(MediaType.APPLICATION_JSON) + .entity(Map("error" -> "Failed to export multiple operators as zip").asJava) + .build() + } + + val finalFileName = zipFileNameOpt.getOrElse("operators.zip") + return Response + .ok(zipStream, "application/zip") + .header("Content-Disposition", "attachment; filename=\"" + finalFileName + "\"") + .build() + } + + // CASE B: exactly one operator => single file + if (request.operatorIds.size != 1) { + return Response + .status(Response.Status.BAD_REQUEST) + .`type`(MediaType.APPLICATION_JSON) + .entity(Map("error" -> "Local download supports no operator or many.").asJava) + .build() + } + val singleOpId = request.operatorIds.head + val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) - val (zipStream, zipFileNameOpt) = - resultExportService.exportOperatorsAsZip(user.user, request) + val (streamingOutput, fileNameOpt) = + resultExportService.exportOperatorResultAsStream(request, singleOpId) - if (zipStream == null) { + if (streamingOutput == null) { return Response .status(Response.Status.INTERNAL_SERVER_ERROR) .`type`(MediaType.APPLICATION_JSON) - .entity(Map("error" -> "Failed to export multiple operators as zip").asJava) + .entity(Map("error" -> "Failed to export operator").asJava) .build() } - val finalFileName = zipFileNameOpt.getOrElse("operators.zip") - return Response - .ok(zipStream, "application/zip") + val finalFileName = fileNameOpt.getOrElse("download.dat") + Response + .ok(streamingOutput, MediaType.APPLICATION_OCTET_STREAM) .header("Content-Disposition", "attachment; filename=\"" + finalFileName + "\"") .build() - } - - // CASE B: exactly one operator => single file - if (request.operatorIds.size != 1) { - return Response - .status(Response.Status.BAD_REQUEST) - .`type`(MediaType.APPLICATION_JSON) - .entity(Map("error" -> "Local download supports no operator or many.").asJava) - .build() - } - val singleOpId = request.operatorIds.head - - val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) - val (streamingOutput, fileNameOpt) = - resultExportService.exportOperatorResultAsStream(request, singleOpId) - - if (streamingOutput == null) { - return Response - .status(Response.Status.INTERNAL_SERVER_ERROR) - .`type`(MediaType.APPLICATION_JSON) - .entity(Map("error" -> "Failed to export operator").asJava) - .build() - } - - val finalFileName = fileNameOpt.getOrElse("download.dat") - Response - .ok(streamingOutput, MediaType.APPLICATION_OCTET_STREAM) - .header("Content-Disposition", "attachment; filename=\"" + finalFileName + "\"") - .build() - - } else { - // destination == "dataset" etc. => old logic - val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) - val exportResponse = resultExportService.exportResult(user.user, request) - Response.ok(exportResponse).build() + case _ => + // destination = "dataset" by default + val resultExportService = new ResultExportService(WorkflowIdentity(request.workflowId)) + val exportResponse = resultExportService.exportResult(user.user, request) + Response.ok(exportResponse).build() } } catch { case ex: Exception => diff --git a/core/gui/src/app/dashboard/service/user/download/download.service.ts b/core/gui/src/app/dashboard/service/user/download/download.service.ts index 0a693c613fe..621a186bea7 100644 --- a/core/gui/src/app/dashboard/service/user/download/download.service.ts +++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts @@ -101,7 +101,7 @@ export class DownloadService { rowIndex: number, columnIndex: number, filename: string, - destination: string // "local" or "dataset" + destination: "local" | "dataset" = "dataset" // "local" or "dataset" => default to "dataset" ): Observable { const requestBody = { exportType, diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts index 33e531dee0b..a36e6942594 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts @@ -134,8 +134,10 @@ export class WorkflowResultExportService { rowIndex: number, columnIndex: number, filename: string, - exportAll: boolean = false, - destination: string = "dataset" // default to dataset + exportAll: boolean = false, // if the user click export button on the top bar (a.k.a menu), + // we should export all operators, otherwise, only highlighted ones + // which means export button is selected from context-menu + destination: "dataset" | "local" = "dataset" // default to dataset ): void { if (!environment.exportExecutionResultEnabled) { return; @@ -187,7 +189,7 @@ export class WorkflowResultExportService { this.notificationService.info("File downloaded successfully"); } else { // "dataset" => response is JSON - // The server might return a JSON with {status, message} + // The server should return a JSON with {status, message} const responseBody = response.body; if (responseBody && responseBody.status === "success") { this.notificationService.success(responseBody.message); From c4b7d4368830f248c67c699076a306353db0a92d Mon Sep 17 00:00:00 2001 From: ali Date: Fri, 14 Feb 2025 10:16:25 -0800 Subject: [PATCH 04/16] fix: merge lakeFS --- .../response/ResultExportResponse.scala | 4 +- .../web/service/ResultExportService.scala | 737 +++++++++++------- 2 files changed, 453 insertions(+), 288 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/response/ResultExportResponse.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/response/ResultExportResponse.scala index 6a7140615ab..cb02d51282e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/response/ResultExportResponse.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/model/websocket/response/ResultExportResponse.scala @@ -1,5 +1,3 @@ package edu.uci.ics.texera.web.model.websocket.response -import edu.uci.ics.texera.web.model.websocket.event.TexeraWebSocketEvent - -case class ResultExportResponse(status: String, message: String) extends TexeraWebSocketEvent +case class ResultExportResponse(status: String, message: String) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 2de178e0c86..e0067988073 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -2,7 +2,6 @@ package edu.uci.ics.texera.web.service import com.github.tototoshi.csv.CSVWriter import com.google.api.client.googleapis.json.GoogleJsonResponseException -import com.google.api.client.util.Lists import com.google.api.services.drive.Drive import com.google.api.services.drive.model.{File, FileList, Permission} import com.google.api.services.sheets.v4.Sheets @@ -10,18 +9,16 @@ import com.google.api.services.sheets.v4.model.{Spreadsheet, SpreadsheetProperti import edu.uci.ics.amber.core.storage.DocumentFactory import edu.uci.ics.amber.core.storage.model.VirtualDocument import edu.uci.ics.amber.core.tuple.Tuple +import edu.uci.ics.amber.core.virtualidentity.{OperatorIdentity, WorkflowIdentity} +import edu.uci.ics.amber.core.workflow.PortIdentity import edu.uci.ics.amber.engine.common.Utils.retry import edu.uci.ics.amber.util.PathUtils -import edu.uci.ics.amber.core.virtualidentity.{OperatorIdentity, WorkflowIdentity} import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse import edu.uci.ics.texera.web.resource.GoogleResource import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.createNewDatasetVersionByAddingFiles -import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{ - WorkflowExecutionsResource, - WorkflowVersionResource -} +import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{WorkflowExecutionsResource, WorkflowVersionResource} import org.jooq.types.UInteger import edu.uci.ics.amber.util.ArrowUtils import edu.uci.ics.texera.web.service.WorkflowExecutionService.getLatestExecutionId @@ -43,8 +40,24 @@ import org.apache.commons.lang3.StringUtils import java.io.OutputStream import java.nio.channels.Channels +import java.util.zip.{ZipEntry, ZipOutputStream} +import javax.ws.rs.WebApplicationException +import javax.ws.rs.core.StreamingOutput import scala.util.Using -import edu.uci.ics.amber.core.workflow.PortIdentity +import java.io.{FilterOutputStream, IOException, OutputStream} + +/** + * A simple wrapper that ignores 'close()' calls on the underlying stream. + * This allows each operator's writer to call close() without ending the entire ZipOutputStream. + */ +private class NonClosingOutputStream(os: OutputStream) extends FilterOutputStream(os) { + @throws[IOException] + override def close(): Unit = { + // do not actually close the underlying stream + super.flush() + // omit super.close() + } +} object ResultExportService { final private val UPLOAD_BATCH_ROW_COUNT = 10000 @@ -60,323 +73,336 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { import ResultExportService._ private val cache = new mutable.HashMap[String, String] - def exportResult( - user: User, - request: ResultExportRequest - ): ResultExportResponse = { - // retrieve the file link saved in the session if exists - if (cache.contains(request.exportType)) { - return ResultExportResponse( - "success", - s"Link retrieved from cache ${cache(request.exportType)}" - ) - } + private def generateOneOperatorResult(operatorId: String): VirtualDocument[Tuple] = { // By now the workflow should finish running // Only supports external port 0 for now. TODO: support multiple ports val storageUri = WorkflowExecutionsResource.getResultUriByExecutionAndPort( workflowIdentity, getLatestExecutionId(workflowIdentity).get, - OperatorIdentity(request.operatorId), + OperatorIdentity(operatorId), PortIdentity() ) val operatorResult: VirtualDocument[Tuple] = DocumentFactory.openDocument(storageUri.get)._1.asInstanceOf[VirtualDocument[Tuple]] + return operatorResult + } + + def exportResult(user: User, request: ResultExportRequest): ResultExportResponse = { + val successMessages = new mutable.ListBuffer[String]() + val errorMessages = new mutable.ListBuffer[String]() + + // iterate through all operator IDs + request.operatorIds.foreach { opId => + try { + val (messageOpt, errorOpt) = exportSingleOperator(user, request, opId) + messageOpt.foreach(successMessages += _) + errorOpt.foreach(errorMessages += _) + } catch { + case ex: Exception => + // catch any unforeseen exceptions so that other operators can still be attempted + errorMessages += s"Error exporting operator $opId: ${ex.getMessage}" + } + } + + if (errorMessages.isEmpty) { + ResultExportResponse("success", successMessages.mkString("\n")) + } else if (successMessages.isEmpty) { + ResultExportResponse("error", errorMessages.mkString("\n")) + } else { + // partial success + ResultExportResponse( + "partial", + s"Some operators succeeded:\n${successMessages.mkString("\n")}\n\n" + + s"Some operators failed:\n${errorMessages.mkString("\n")}" + ) + } + } + + /** + * Export the result for ONE operator. + * Return (SomeSuccessMessage, SomeErrorMessage) or (None, None) if handled differently + */ + private def exportSingleOperator( + user: User, + request: ResultExportRequest, + operatorId: String + ): (Option[String], Option[String]) = { + + // Possibly use some caching key + val cacheKey = s"${request.exportType}-$operatorId" + if (cache.contains(cacheKey)) { + return ( + Some(s"Link retrieved from cache for operator $operatorId: ${cache(cacheKey)}"), + None + ) + } + + val execIdOpt = getLatestExecutionId(workflowIdentity) + if (execIdOpt.isEmpty) { + return (None, Some(s"Workflow ${request.workflowId} has no execution result")) + } + + val operatorResult = generateOneOperatorResult(operatorId) if (operatorResult.getCount == 0) { - return ResultExportResponse("error", "The workflow contains no results") + return (Option("error"), Option("The workflow contains no results")) } val results: Iterable[Tuple] = operatorResult.get().to(Iterable) val attributeNames = results.head.getSchema.getAttributeNames - // handle the request according to export type request.exportType match { case "google_sheet" => - handleGoogleSheetRequest(cache, request, results, attributeNames) + val (msg, err) = handleGoogleSheetRequest(operatorId, results, attributeNames, request) + (msg, err) + case "csv" => - handleCSVRequest(user, request, results, attributeNames) + val (msg, err) = handleCSVRequest(operatorId, user, request, results, attributeNames) + (msg, err) + case "data" => - handleDataRequest(user, request, results) + val (msg, err) = handleDataRequest(operatorId, user, request, results) + (msg, err) + case "arrow" => - handleArrowRequest(user, request, results) - case _ => - ResultExportResponse("error", s"Unknown export type: ${request.exportType}") + val (msg, err) = handleArrowRequest(operatorId, user, request, results) + (msg, err) + + case unknown => + (None, Some(s"Unknown export type: $unknown")) } } private def handleCSVRequest( - user: User, - request: ResultExportRequest, - results: Iterable[Tuple], - headers: List[String] - ): ResultExportResponse = { - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - - pool.submit(() => - { - val writer = CSVWriter.open(pipedOutputStream) - writer.writeRow(headers) - results.foreach { tuple => - writer.writeRow(tuple.getFields.toIndexedSeq) + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple], + headers: List[String] + ): (Option[String], Option[String]) = { + try { + val pipedOutputStream = new PipedOutputStream() + val pipedInputStream = new PipedInputStream(pipedOutputStream) + + pool.submit(new Runnable { + override def run(): Unit = { + val writer = CSVWriter.open(pipedOutputStream) + writer.writeRow(headers) + results.foreach { tuple => + writer.writeRow(tuple.getFields.toIndexedSeq) + } + writer.close() } - writer.close() - }.asInstanceOf[Runnable] - ) + }) - val fileName = generateFileName(request, "csv") - saveToDatasets(request, user, pipedInputStream, fileName) - - ResultExportResponse( - "success", - s"File saved to User Dashboard as $fileName to Datasets ${request.datasetIds.mkString(",")}" - ) + val fileName = generateFileName(request, operatorId, "csv") + saveToDatasets(request, user, pipedInputStream, fileName) + (Some(s"CSV export done for operator $operatorId -> file: $fileName"), None) + } catch { + case ex: Exception => + (None, Some(s"CSV export failed for operator $operatorId: ${ex.getMessage}")) + } } private def handleGoogleSheetRequest( - exportCache: mutable.HashMap[String, String], - request: ResultExportRequest, - results: Iterable[Tuple], - header: List[String] - ): ResultExportResponse = { - // create google sheet - val sheetService: Sheets = GoogleResource.getSheetService - val sheetId: String = - createGoogleSheet(sheetService, request.workflowName) - if (sheetId == null) { - return ResultExportResponse("error", "Fail to create google sheet") - } + operatorId: String, + results: Iterable[Tuple], + header: List[String], + request: ResultExportRequest + ): (Option[String], Option[String]) = { + try { + val sheetService: Sheets = GoogleResource.getSheetService + val sheetId: String = createGoogleSheet(sheetService, s"${request.workflowName}-$operatorId") + if (sheetId == null) { + return (None, Some(s"Fail to create google sheet for operator $operatorId")) + } + + val driveService: Drive = GoogleResource.getDriveService + moveToResultFolder(driveService, sheetId) + + // share + val perm = new Permission().setType("anyone").setRole("reader") + driveService.permissions().create(sheetId, perm).execute() - val driveService: Drive = GoogleResource.getDriveService - moveToResultFolder(driveService, sheetId) - - // allow user to access this sheet in the service account - val sharePermission: Permission = new Permission() - .setType("anyone") - .setRole("reader") - driveService - .permissions() - .create(sheetId, sharePermission) - .execute() - - // upload the content asynchronously to avoid long waiting on the user side. - pool - .submit(() => - { + // asynchronously upload data + pool.submit(new Runnable { + override def run(): Unit = { uploadHeader(sheetService, sheetId, header) uploadResult(sheetService, sheetId, results) - }.asInstanceOf[Runnable] - ) - - // generate success response - val link = s"https://docs.google.com/spreadsheets/d/$sheetId/edit" - val message: String = - s"Google sheet created. The results may be still uploading. You can access the sheet $link" - // save the file link in the session cache - exportCache(request.exportType) = link - ResultExportResponse("success", message) - } + } + }) - /** - * create the google sheet and return the sheet Id - */ - private def createGoogleSheet(sheetService: Sheets, workflowName: String): String = { - val createSheetRequest = new Spreadsheet() - .setProperties(new SpreadsheetProperties().setTitle(workflowName)) - val targetSheet: Spreadsheet = sheetService.spreadsheets - .create(createSheetRequest) - .setFields("spreadsheetId") - .execute - targetSheet.getSpreadsheetId - } + val link = s"https://docs.google.com/spreadsheets/d/$sheetId/edit" + // you can store in a small local cache if you want + val cacheKey = s"${request.exportType}-$operatorId" + cache(cacheKey) = link - private def handleDataRequest( - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): ResultExportResponse = { - val rowIndex = request.rowIndex - val columnIndex = request.columnIndex - val filename = request.filename - - if (rowIndex >= results.size || columnIndex >= results.head.getFields.length) { - return ResultExportResponse("error", s"Invalid row or column index") + val msg = s"Google sheet created for operator $operatorId: $link (results are uploading)" + (Some(msg), None) + } catch { + case ex: Exception => + (None, Some(s"Google Sheet export failed for operator $operatorId: ${ex.getMessage}")) } + } - val selectedRow = results.toSeq(rowIndex) - val field: Any = selectedRow.getField(columnIndex) - val dataBytes: Array[Byte] = convertFieldToBytes(field) - - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - - pool.submit(() => - { - pipedOutputStream.write(dataBytes) - pipedOutputStream.close() - }.asInstanceOf[Runnable] - ) - - saveToDatasets(request, user, pipedInputStream, filename) - - ResultExportResponse( - "success", - s"Data file $filename saved to Datasets ${request.datasetIds.mkString(",")}" - ) + private def createGoogleSheet(sheetService: Sheets, title: String): String = { + val sheetProps = new SpreadsheetProperties().setTitle(title) + val createReq = new Spreadsheet().setProperties(sheetProps) + val target = sheetService.spreadsheets.create(createReq).setFields("spreadsheetId").execute() + target.getSpreadsheetId } - /** - * move the workflow results to a specific folder - */ @tailrec private def moveToResultFolder( - driveService: Drive, - sheetId: String, - retry: Boolean = true - ): Unit = { + driveService: Drive, + sheetId: String, + retryOnce: Boolean = true + ): Unit = { val folderId = retrieveResultFolderId(driveService) try { - driveService - .files() - .update(sheetId, null) - .setAddParents(folderId) - .execute() + driveService.files().update(sheetId, null).setAddParents(folderId).execute() } catch { - case exception: GoogleJsonResponseException => - if (retry) { - // This exception maybe caused by the full deletion of the target folder and - // the cached folder id is obsolete. - // * note: by full deletion, the folder has to be deleted from trash as well. - // In this case, try again. - moveToResultFolder(driveService, sheetId, retry = false) + case ex: GoogleJsonResponseException => + if (retryOnce) { + // maybe folder was deleted/trash, so try again + moveToResultFolder(driveService, sheetId, retryOnce = false) } else { - // if the exception continues to show up then just throw it normally. - throw exception + throw ex } } } private def retrieveResultFolderId(driveService: Drive): String = synchronized { - val folderResult: FileList = driveService - .files() - .list() - .setQ( - s"mimeType = 'application/vnd.google-apps.folder' and name='$WORKFLOW_RESULT_FOLDER_NAME'" - ) - .setSpaces("drive") - .execute() + val folderResult: FileList = + driveService + .files() + .list() + .setQ( + s"mimeType = 'application/vnd.google-apps.folder' and name='$WORKFLOW_RESULT_FOLDER_NAME'" + ) + .setSpaces("drive") + .execute() if (folderResult.getFiles.isEmpty) { - val fileMetadata: File = new File() + val fileMetadata = new File() fileMetadata.setName(WORKFLOW_RESULT_FOLDER_NAME) fileMetadata.setMimeType("application/vnd.google-apps.folder") - val targetFolder: File = driveService.files.create(fileMetadata).setFields("id").execute + val targetFolder: File = driveService.files.create(fileMetadata).setFields("id").execute() targetFolder.getId } else { folderResult.getFiles.get(0).getId } } - /** - * upload the result header to the google sheet - */ - private def uploadHeader( - sheetService: Sheets, - sheetId: String, - header: List[AnyRef] - ): Unit = { + private def uploadHeader(sheetService: Sheets, sheetId: String, header: List[AnyRef]): Unit = { uploadContent(sheetService, sheetId, List(header.asJava).asJava) } - /** - * upload the result body to the google sheet - */ - private def uploadResult( - sheetService: Sheets, - sheetId: String, - result: Iterable[Tuple] - ): Unit = { - val content: util.List[util.List[AnyRef]] = - Lists.newArrayListWithCapacity(UPLOAD_BATCH_ROW_COUNT) - // use for loop to avoid copying the whole result at the same time - for (tuple: Tuple <- result) { - - val tupleContent: util.List[AnyRef] = - tuple.getFields - .map(convertUnsupported) - .toArray - .toList - .asJava - content.add(tupleContent) - - if (content.size() == UPLOAD_BATCH_ROW_COUNT) { - uploadContent(sheetService, sheetId, content) - content.clear() + private def uploadResult(sheetService: Sheets, sheetId: String, result: Iterable[Tuple]): Unit = { + val batch = new util.ArrayList[util.List[AnyRef]](UPLOAD_BATCH_ROW_COUNT) + + for (tuple <- result) { + val row: util.List[AnyRef] = tuple.getFields.map(convertUnsupported).toList.asJava + batch.add(row) + + if (batch.size() == UPLOAD_BATCH_ROW_COUNT) { + uploadContent(sheetService, sheetId, batch) + batch.clear() } } + if (!batch.isEmpty) { + uploadContent(sheetService, sheetId, batch) + } + } - if (!content.isEmpty) { - uploadContent(sheetService, sheetId, content) + private def uploadContent( + sheetService: Sheets, + sheetId: String, + content: util.List[util.List[AnyRef]] + ): Unit = { + val body = new ValueRange().setValues(content) + val range = "A1" + val options = "RAW" + retry(attempts = RETRY_ATTEMPTS, baseBackoffTimeInMS = BASE_BACK_OOF_TIME_IN_MS) { + sheetService.spreadsheets + .values() + .append(sheetId, range, body) + .setValueInputOption(options) + .execute() } } - /** - * convert the tuple content into the type the Google Sheet API supports - */ - private def convertUnsupported(content: Any): AnyRef = { - content match { + private def convertUnsupported(anyVal: Any): AnyRef = { + anyVal match { + case null => "" + case s: String => s + case n: Number => n + case other => other.toString + } + } - // if null, use empty string to represent. - case null => "" + private def handleDataRequest( + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { + try { + val rowIndex = request.rowIndex + val columnIndex = request.columnIndex + val fileName = request.filename - // Google Sheet API supports String and number(long, int, double and so on) - case _: String | _: Number => content.asInstanceOf[AnyRef] + if (rowIndex >= results.size || columnIndex >= results.head.getFields.length) { + return (None, Some(s"Invalid rowIndex or columnIndex for operator $operatorId")) + } - // convert all the other type into String - case _ => content.toString - } + val selectedRow = results.toSeq(rowIndex) + val field: Any = selectedRow.getField(columnIndex) + val dataBytes: Array[Byte] = convertFieldToBytes(field) - } + val pipedOutputStream = new PipedOutputStream() + val pipedInputStream = new PipedInputStream(pipedOutputStream) - /** - * upload the content to the google sheet - * The type of content is java list because the google API is in java - */ - private def uploadContent( - sheetService: Sheets, - sheetId: String, - content: util.List[util.List[AnyRef]] - ): Unit = { - val body: ValueRange = new ValueRange().setValues(content) - val range: String = "A1" - val valueInputOption: String = "RAW" - - // using retry logic here, to handle possible API errors, i.e., rate limit exceeded. - retry(attempts = RETRY_ATTEMPTS, baseBackoffTimeInMS = BASE_BACK_OOF_TIME_IN_MS) { - sheetService.spreadsheets.values - .append(sheetId, range, body) - .setValueInputOption(valueInputOption) - .execute + pool.submit(new Runnable { + override def run(): Unit = { + pipedOutputStream.write(dataBytes) + pipedOutputStream.close() + } + }) + + saveToDatasets(request, user, pipedInputStream, fileName) + (Some(s"Data export done for operator $operatorId -> file: $fileName"), None) + } catch { + case ex: Exception => + (None, Some(s"Data export failed for operator $operatorId: ${ex.getMessage}")) } + } + private def convertFieldToBytes(field: Any): Array[Byte] = { + field match { + case data: Array[Byte] => data + case data: String => data.getBytes(StandardCharsets.UTF_8) + case other => other.toString.getBytes(StandardCharsets.UTF_8) + } } private def handleArrowRequest( - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): ResultExportResponse = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { if (results.isEmpty) { - return ResultExportResponse("error", "No results to export") + return (None, Some(s"No results to export for operator $operatorId")) } - val pipedOutputStream = new PipedOutputStream() - val pipedInputStream = new PipedInputStream(pipedOutputStream) - val allocator = new RootAllocator() + try { + val pipedOutputStream = new PipedOutputStream() + val pipedInputStream = new PipedInputStream(pipedOutputStream) + val allocator = new RootAllocator() - pool.submit(() => - { + pool.submit(() => { Using.Manager { use => val (writer, root) = createArrowWriter(results, allocator, pipedOutputStream) use(writer) @@ -386,23 +412,23 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { writeArrowData(writer, root, results) } - }.asInstanceOf[Runnable] - ) + }) - val fileName = generateFileName(request, "arrow") - saveToDatasets(request, user, pipedInputStream, fileName) + val fileName = generateFileName(request, operatorId, "arrow") + saveToDatasets(request, user, pipedInputStream, fileName) - ResultExportResponse( - "success", - s"Arrow file saved as $fileName to Datasets ${request.datasetIds.mkString(",")}" - ) + (Some(s"Arrow file export done for operator $operatorId -> file: $fileName"), None) + } catch { + case ex: Exception => + (None, Some(s"Arrow export failed for operator $operatorId: ${ex.getMessage}")) + } } private def createArrowWriter( - results: Iterable[Tuple], - allocator: RootAllocator, - outputStream: OutputStream - ): (ArrowFileWriter, VectorSchemaRoot) = { + results: Iterable[Tuple], + allocator: RootAllocator, + outputStream: OutputStream + ): (ArrowFileWriter, VectorSchemaRoot) = { val schema = results.head.getSchema val arrowSchema = ArrowUtils.fromTexeraSchema(schema) val root = VectorSchemaRoot.create(arrowSchema, allocator) @@ -412,73 +438,214 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def writeArrowData( - writer: ArrowFileWriter, - root: VectorSchemaRoot, - results: Iterable[Tuple] - ): Unit = { + writer: ArrowFileWriter, + root: VectorSchemaRoot, + results: Iterable[Tuple] + ): Unit = { writer.start() val batchSize = 1000 + val resultList = results.toList + val totalSize = resultList.size - // Convert to Seq to get total size - val resultSeq = results.toSeq - val totalSize = resultSeq.size - - // Process in complete batches for (batchStart <- 0 until totalSize by batchSize) { val batchEnd = Math.min(batchStart + batchSize, totalSize) val currentBatchSize = batchEnd - batchStart - // Process each tuple in the current batch for (i <- 0 until currentBatchSize) { - val tuple = resultSeq(batchStart + i) + val tuple = resultList(batchStart + i) ArrowUtils.setTexeraTuple(tuple, i, root) } - - // Set the correct row count for this batch and write it root.setRowCount(currentBatchSize) writer.writeBatch() root.clear() } - writer.end() } - private def generateFileName(request: ResultExportRequest, extension: String): String = { + private def generateFileName( + request: ResultExportRequest, + operatorId: String, + extension: String + ): String = { val latestVersion = - WorkflowVersionResource.getLatestVersion(UInteger.valueOf(request.workflowId)) + WorkflowVersionResource.getLatestVersion(org.jooq.types.UInteger.valueOf(request.workflowId)) val timestamp = LocalDateTime .now() .truncatedTo(ChronoUnit.SECONDS) .format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")) - StringUtils.replaceEach( - s"${request.workflowName}-v$latestVersion-${request.operatorName}-$timestamp.$extension", - Array("/", "\\"), - Array("", "") - ) + + val rawName = s"${request.workflowName}-op$operatorId-v$latestVersion-$timestamp.$extension" + // remove any path separators + StringUtils.replaceEach(rawName, Array("/", "\\"), Array("", "")) } private def saveToDatasets( - request: ResultExportRequest, - user: User, - pipedInputStream: PipedInputStream, - fileName: String - ): Unit = { + request: ResultExportRequest, + user: User, + pipedInputStream: PipedInputStream, + fileName: String + ): Unit = { request.datasetIds.foreach { did => - val datasetPath = PathUtils.getDatasetPath(UInteger.valueOf(did)) + val datasetPath = PathUtils.getDatasetPath(org.jooq.types.UInteger.valueOf(did)) val filePath = datasetPath.resolve(fileName) createNewDatasetVersionByAddingFiles( - UInteger.valueOf(did), + org.jooq.types.UInteger.valueOf(did), user, Map(filePath -> pipedInputStream) ) } } - private def convertFieldToBytes(field: Any): Array[Byte] = { - field match { - case data: Array[Byte] => data - case data: String => data.getBytes(StandardCharsets.UTF_8) - case data => data.toString.getBytes(StandardCharsets.UTF_8) + /** + * For local download of a single operator. Streams the data directly. + * We return (StreamingOutput, Some(filename)) on success, or (null, None) on error. + */ + def exportOperatorResultAsStream( + request: ResultExportRequest, + operatorId: String + ): (StreamingOutput, Option[String]) = { + val execIdOpt = getLatestExecutionId(workflowIdentity) + if (execIdOpt.isEmpty) { + return (null, None) + } + + val operatorResult = generateOneOperatorResult(operatorId) + if (operatorResult.getCount == 0) { + return (null, None) + } + + val results: Iterable[Tuple] = operatorResult.get().to(Iterable) + val extension: String = request.exportType match { + case "csv" => "csv" + case "arrow" => "arrow" + case "data" => "bin" + case other => "dat" + } + + val fileName = generateFileName(request, operatorId, extension) + + val streamingOutput: StreamingOutput = new StreamingOutput { + override def write(out: OutputStream): Unit = { + request.exportType match { + case "csv" => writeCsv(out, results) + case "arrow" => writeArrow(out, results) + case _ => writeCsv(out, results) // fallback + } + } } + + (streamingOutput, Some(fileName)) + } + + /** + * Writes CSV to output stream + */ + private def writeCsv(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { + // for large data, you might want a buffered approach + val csvWriter = CSVWriter.open(outputStream) // Tototoshi CSVWriter can open an OutputStream + val headers = results.head.getSchema.getAttributeNames + csvWriter.writeRow(headers) + results.foreach { tuple => + csvWriter.writeRow(tuple.getFields.toIndexedSeq) + } + csvWriter.close() } + + /** + * Writes Arrow to output stream + */ + private def writeArrow(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { + if (results.isEmpty) return + + val allocator = new RootAllocator() + Using.Manager { use => + val (writer, root) = createArrowWriter(results, allocator, outputStream) + use(writer) + use(root) + use(allocator) + + writer.start() + val batchSize = 1000 + val resultList = results.toList + val totalSize = resultList.size + + for (batchStart <- 0 until totalSize by batchSize) { + val batchEnd = Math.min(batchStart + batchSize, totalSize) + val currentBatchSize = batchEnd - batchStart + + for (i <- 0 until currentBatchSize) { + val tuple = resultList(batchStart + i) + ArrowUtils.setTexeraTuple(tuple, i, root) + } + root.setRowCount(currentBatchSize) + writer.writeBatch() + root.clear() + } + writer.end() + } + } + + def exportOperatorsAsZip( + user: User, + request: ResultExportRequest + ): (StreamingOutput, Option[String]) = { + if (request.operatorIds.isEmpty) { + return (null, None) + } + + val timestamp = LocalDateTime + .now() + .truncatedTo(ChronoUnit.SECONDS) + .format(DateTimeFormatter.ofPattern("yyyy-MM-dd_HH-mm-ss")) + val zipFileName = s"${request.workflowName}-$timestamp.zip" + + val streamingOutput = new StreamingOutput { + override def write(outputStream: OutputStream): Unit = { + Using.resource(new ZipOutputStream(outputStream)) { zipOut => + request.operatorIds.foreach { opId => + val execIdOpt = getLatestExecutionId(workflowIdentity) + if (execIdOpt.isEmpty) { + throw new WebApplicationException( + s"No execution result for workflow ${request.workflowId}" + ) + } + + val operatorResult = generateOneOperatorResult(opId) + + if (operatorResult.getCount == 0) { + // create empty record + zipOut.putNextEntry(new ZipEntry(s"$opId-empty.txt")) + val msg = s"Operator $opId has no results" + zipOut.write(msg.getBytes(StandardCharsets.UTF_8)) + zipOut.closeEntry() + } else { + val results = operatorResult.get().to(Iterable) + val extension = request.exportType match { + case "csv" => "csv" + case "arrow" => "arrow" + case "data" => "bin" + case _ => "dat" + } + val operatorFileName = generateFileName(request, opId, extension) + + zipOut.putNextEntry(new ZipEntry(operatorFileName)) + + // create a non-closing wrapper around zipOut + val nonClosingStream = new NonClosingOutputStream(zipOut) + + request.exportType match { + case "csv" => writeCsv(nonClosingStream, results) + case "arrow" => writeArrow(nonClosingStream, results) + case _ => writeCsv(nonClosingStream, results) + } + zipOut.closeEntry() + } + } + } + } + } + + (streamingOutput, Some(zipFileName)) + } + } From d3625f0c6569b99652113fa65dc1e0ed9e0e8e1e Mon Sep 17 00:00:00 2001 From: ali Date: Fri, 14 Feb 2025 12:24:17 -0800 Subject: [PATCH 05/16] fix: menu bar --- .../result-exportation.component.ts | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index 432b0a7bef9..c642add31a4 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -53,9 +53,20 @@ export class ResultExportationComponent implements OnInit { } updateOutputType(): void { - const highlightedOperatorIds = this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs(); + // Determine if the caller of this component is menu or context menu + // if its menu then we need to export all operators else we need to export only highlighted operators + // TODO: currently, user need to set `view result` to true in order to export result but + // we should allow user to export result without setting `view result` to true + let operatorIds: readonly string[]; + if (this.sourceTriggered === "menu") { + operatorIds = this.workflowActionService.getTexeraGraph().getAllOperators().map(op => op.operatorID); + console.log("operatorIds in menu ", operatorIds); + } else { + operatorIds = this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs(); + console.log("operatorIds in context menu ", operatorIds); + } - if (highlightedOperatorIds.length === 0) { + if (operatorIds.length === 0) { // No operators highlighted this.isTableOutput = false; this.isVisualizationOutput = false; @@ -69,7 +80,7 @@ export class ResultExportationComponent implements OnInit { let allVisualization = true; let anyBinaryData = false; - for (const operatorId of highlightedOperatorIds) { + for (const operatorId of operatorIds) { const outputTypes = this.workflowResultService.determineOutputTypes(operatorId); if (!outputTypes.isTableOutput) { allTable = false; From 3e4f0c2429e0b953cc91f945ba9aeaf5912b8215 Mon Sep 17 00:00:00 2001 From: ali Date: Sat, 15 Feb 2025 15:58:39 -0800 Subject: [PATCH 06/16] fix: type safe --- .../service/user/download/download.interface.ts | 4 ++++ .../service/user/download/download.service.ts | 14 ++++++++------ .../result-exportation.component.ts | 4 ++-- .../workflow-result-export.service.ts | 7 +++++-- 4 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 core/gui/src/app/dashboard/service/user/download/download.interface.ts diff --git a/core/gui/src/app/dashboard/service/user/download/download.interface.ts b/core/gui/src/app/dashboard/service/user/download/download.interface.ts new file mode 100644 index 00000000000..d11ed246f71 --- /dev/null +++ b/core/gui/src/app/dashboard/service/user/download/download.interface.ts @@ -0,0 +1,4 @@ +export interface ExportWorkflowJsonResponse { + status: string; + message: string; +} diff --git a/core/gui/src/app/dashboard/service/user/download/download.service.ts b/core/gui/src/app/dashboard/service/user/download/download.service.ts index 621a186bea7..e1f27c4ca64 100644 --- a/core/gui/src/app/dashboard/service/user/download/download.service.ts +++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts @@ -9,6 +9,7 @@ import * as JSZip from "jszip"; import { Workflow } from "../../../../common/type/workflow"; import { AppSettings } from "../../../../common/app-setting"; import { HttpClient, HttpResponse } from "@angular/common/http"; +import { ExportWorkflowJsonResponse } from "./download.interface"; export const EXPORT_BASE_URL = "result/export"; @@ -16,7 +17,7 @@ interface DownloadableItem { blob: Blob; fileName: string; } -/* TODO: refactor download service to export */ + @Injectable({ providedIn: "root", }) @@ -102,7 +103,7 @@ export class DownloadService { columnIndex: number, filename: string, destination: "local" | "dataset" = "dataset" // "local" or "dataset" => default to "dataset" - ): Observable { + ): Observable | HttpResponse> { const requestBody = { exportType, workflowId, @@ -115,8 +116,8 @@ export class DownloadService { destination, }; if (destination === "local") { - return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { - responseType: "blob" as const, + return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { + responseType: "blob", observe: "response", headers: { "Content-Type": "application/json", @@ -125,8 +126,8 @@ export class DownloadService { }); } else { // dataset => return JSON - return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { - responseType: "json" as const, + return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { + responseType: "json", observe: "response", headers: { "Content-Type": "application/json", @@ -134,6 +135,7 @@ export class DownloadService { }, }); } + } /** diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index c642add31a4..611caaaa825 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -110,7 +110,7 @@ export class ResultExportationComponent implements OnInit { onClickSaveResultFileToDatasets(dataset: DashboardDataset) { if (dataset.dataset.did) { - this.workflowResultExportService.exportWorkflowExecutionResult( + this.workflowResultExportService.exportWorkflowExecutionResultToLocal( this.exportType, this.workflowName, [dataset.dataset.did], @@ -125,7 +125,7 @@ export class ResultExportationComponent implements OnInit { } onClickSaveResultFileToLocal() { - this.workflowResultExportService.exportWorkflowExecutionResult( + this.workflowResultExportService.exportWorkflowExecutionResultToLocal( this.exportType, this.workflowName, [], diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts index a36e6942594..8be71efa4f4 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts @@ -12,6 +12,8 @@ import { filter } from "rxjs/operators"; import { OperatorResultService, WorkflowResultService } from "../workflow-result/workflow-result.service"; import { OperatorPaginationResultService } from "../workflow-result/workflow-result.service"; import { DownloadService } from "../../../dashboard/service/user/download/download.service"; +import { HttpResponse } from "@angular/common/http"; +import { ExportWorkflowJsonResponse } from "../../../dashboard/service/user/download/download.interface"; @Injectable({ providedIn: "root", @@ -127,7 +129,7 @@ export class WorkflowResultExportService { /** * export the workflow execution result according the export type */ - exportWorkflowExecutionResult( + exportWorkflowExecutionResultToLocal( exportType: string, workflowName: string, datasetIds: ReadonlyArray = [], @@ -190,7 +192,8 @@ export class WorkflowResultExportService { } else { // "dataset" => response is JSON // The server should return a JSON with {status, message} - const responseBody = response.body; + const jsonResponse = response as HttpResponse; + const responseBody = jsonResponse.body; if (responseBody && responseBody.status === "success") { this.notificationService.success(responseBody.message); } else { From 6987bd6b4ee4c38d3fbb9d6146bc38bdc001596b Mon Sep 17 00:00:00 2001 From: ali Date: Sat, 15 Feb 2025 16:01:15 -0800 Subject: [PATCH 07/16] fix: type safe --- .../workflow-result-export.service.ts | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts index 8be71efa4f4..5eea6d97b35 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts @@ -151,15 +151,14 @@ export class WorkflowResultExportService { } // gather operator IDs - let operatorIds: string[] = []; - if (!exportAll) { - operatorIds = [...this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs()]; - } else { - operatorIds = this.workflowActionService - .getTexeraGraph() - .getAllOperators() - .map(operator => operator.operatorID); - } + const operatorIds = exportAll ? this.workflowActionService + .getTexeraGraph() + .getAllOperators() + .map(operator => operator.operatorID) : + [...this.workflowActionService + .getJointGraphWrapper() + .getCurrentHighlightedOperatorIDs()]; + if (operatorIds.length === 0) { console.log("No operators selected to export"); From c7d16a9a0653d015e703d2cdcdb8f4328be6d531 Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 18 Feb 2025 11:45:56 -0800 Subject: [PATCH 08/16] fix: error prune operators --- .../web/service/ResultExportService.scala | 25 +-- core/gui/package.json | 2 + .../user/download/download.interface.ts | 4 - .../service/user/download/download.service.ts | 18 +- .../result-exportation.component.html | 4 +- .../result-exportation.component.ts | 26 ++- .../workflow-result-export.service.spec.ts | 172 ------------------ .../workflow-result-export.service.ts | 150 ++------------- .../workflow-result.service.ts | 2 + core/gui/yarn.lock | 11 +- 10 files changed, 75 insertions(+), 339 deletions(-) delete mode 100644 core/gui/src/app/dashboard/service/user/download/download.interface.ts diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index e0067988073..5c309393506 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -83,9 +83,8 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { OperatorIdentity(operatorId), PortIdentity() ) - val operatorResult: VirtualDocument[Tuple] = - DocumentFactory.openDocument(storageUri.get)._1.asInstanceOf[VirtualDocument[Tuple]] - return operatorResult + /* Fix the error, I want to return an empty VirtualDocument so caller of this function can check its empty by .getCount() */ + storageUri.map(uri => DocumentFactory.openDocument(uri)._1.asInstanceOf[VirtualDocument[Tuple]]).orNull } def exportResult(user: User, request: ResultExportRequest): ResultExportResponse = { @@ -110,12 +109,16 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } else if (successMessages.isEmpty) { ResultExportResponse("error", errorMessages.mkString("\n")) } else { + ResultExportResponse("success", successMessages.mkString("\n")) // partial success - ResultExportResponse( - "partial", - s"Some operators succeeded:\n${successMessages.mkString("\n")}\n\n" + - s"Some operators failed:\n${errorMessages.mkString("\n")}" - ) + // TODO: user should be informed that some of the operators are not exported due to errors, + // currently, we assume the errors are due to no result so we just inform the user + // that request was successful since at least one operator was exported successfully +// ResultExportResponse( +// "partial", +// s"Some operators succeeded:\n${successMessages.mkString("\n")}\n\n" + +// s"Some operators failed:\n${errorMessages.mkString("\n")}" +// ) } } @@ -144,7 +147,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } val operatorResult = generateOneOperatorResult(operatorId) - if (operatorResult.getCount == 0) { + if (operatorResult == null || operatorResult.getCount == 0) { return (Option("error"), Option("The workflow contains no results")) } @@ -510,7 +513,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } val operatorResult = generateOneOperatorResult(operatorId) - if (operatorResult.getCount == 0) { + if (operatorResult == null || operatorResult.getCount == 0) { return (null, None) } @@ -612,7 +615,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { val operatorResult = generateOneOperatorResult(opId) - if (operatorResult.getCount == 0) { + if (operatorResult == null || operatorResult.getCount == 0) { // create empty record zipOut.putNextEntry(new ZipEntry(s"$opId-empty.txt")) val msg = s"Operator $opId has no results" diff --git a/core/gui/package.json b/core/gui/package.json index 6208258a3dd..4c5f27552ed 100644 --- a/core/gui/package.json +++ b/core/gui/package.json @@ -47,6 +47,7 @@ "@types/plotly.js-basic-dist-min": "2.12.4", "ajv": "8.10.0", "backbone": "1.4.1", + "content-disposition": "^0.5.4", "dagre": "0.8.5", "deep-map": "2.0.0", "edit-distance": "1.0.4", @@ -111,6 +112,7 @@ "@nrwl/nx-cloud": "19.1.0", "@nx/angular": "20.0.3", "@types/backbone": "1.4.15", + "@types/content-disposition": "^0", "@types/dagre": "0.7.47", "@types/file-saver": "2.0.5", "@types/graphlib": "2.1.8", diff --git a/core/gui/src/app/dashboard/service/user/download/download.interface.ts b/core/gui/src/app/dashboard/service/user/download/download.interface.ts deleted file mode 100644 index d11ed246f71..00000000000 --- a/core/gui/src/app/dashboard/service/user/download/download.interface.ts +++ /dev/null @@ -1,4 +0,0 @@ -export interface ExportWorkflowJsonResponse { - status: string; - message: string; -} diff --git a/core/gui/src/app/dashboard/service/user/download/download.service.ts b/core/gui/src/app/dashboard/service/user/download/download.service.ts index e1f27c4ca64..0febc5532ec 100644 --- a/core/gui/src/app/dashboard/service/user/download/download.service.ts +++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts @@ -9,7 +9,7 @@ import * as JSZip from "jszip"; import { Workflow } from "../../../../common/type/workflow"; import { AppSettings } from "../../../../common/app-setting"; import { HttpClient, HttpResponse } from "@angular/common/http"; -import { ExportWorkflowJsonResponse } from "./download.interface"; +var contentDisposition = require("content-disposition") export const EXPORT_BASE_URL = "result/export"; @@ -18,6 +18,11 @@ interface DownloadableItem { fileName: string; } +export interface ExportWorkflowJsonResponse { + status: string; + message: string; +} + @Injectable({ providedIn: "root", }) @@ -144,14 +149,13 @@ export class DownloadService { public saveBlobFile(response: any, defaultFileName: string): void { // If the server sets "Content-Disposition: attachment; filename="someName.csv"" header, // we can parse that out. Otherwise just use defaultFileName. - const contentDisposition = response.headers.get("Content-Disposition"); + const dispositionHeader = response.headers.get("Content-Disposition"); let fileName = defaultFileName; - if (contentDisposition) { - const match = contentDisposition.match(/filename="(.+)"/); - if (match && match[1]) { - fileName = match[1]; - } + if (dispositionHeader) { + const parsed = contentDisposition.parse(dispositionHeader); + fileName = parsed.parameters.filename || defaultFileName; } + const blob = response.body; // the actual file data this.fileSaverService.saveAs(blob, fileName); } diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html index d25c4ddbad1..e1821876b5f 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.html @@ -89,7 +89,7 @@ nz-button nzType="primary" class="dataset-option-link-btn" - (click)="onClickSaveResultFileToDatasets(dataset)"> + (click)="onClickExportResult('dataset', dataset)"> Save @@ -99,7 +99,7 @@ nz-button nzType="default" *ngIf="destination === 'local'" - (click)="onClickSaveResultFileToLocal()"> + (click)="onClickExportResult( 'local')"> Export diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index 611caaaa825..98f104ff6b1 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -60,10 +60,8 @@ export class ResultExportationComponent implements OnInit { let operatorIds: readonly string[]; if (this.sourceTriggered === "menu") { operatorIds = this.workflowActionService.getTexeraGraph().getAllOperators().map(op => op.operatorID); - console.log("operatorIds in menu ", operatorIds); } else { operatorIds = this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs(); - console.log("operatorIds in context menu ", operatorIds); } if (operatorIds.length === 0) { @@ -82,6 +80,9 @@ export class ResultExportationComponent implements OnInit { for (const operatorId of operatorIds) { const outputTypes = this.workflowResultService.determineOutputTypes(operatorId); + if (!outputTypes.hasAnyResult) { + continue; + } if (!outputTypes.isTableOutput) { allTable = false; } @@ -110,7 +111,7 @@ export class ResultExportationComponent implements OnInit { onClickSaveResultFileToDatasets(dataset: DashboardDataset) { if (dataset.dataset.did) { - this.workflowResultExportService.exportWorkflowExecutionResultToLocal( + this.workflowResultExportService.exportWorkflowExecutionResult( this.exportType, this.workflowName, [dataset.dataset.did], @@ -125,7 +126,7 @@ export class ResultExportationComponent implements OnInit { } onClickSaveResultFileToLocal() { - this.workflowResultExportService.exportWorkflowExecutionResultToLocal( + this.workflowResultExportService.exportWorkflowExecutionResult( this.exportType, this.workflowName, [], @@ -137,4 +138,21 @@ export class ResultExportationComponent implements OnInit { ); this.modalRef.close(); } + + onClickExportResult(destination: "dataset" | "local", dataset: DashboardDataset = {} as DashboardDataset) { + const datasetIds = destination === "dataset" ? + [dataset.dataset.did].filter((id): id is number => id !== undefined) : + []; + this.workflowResultExportService.exportWorkflowExecutionResult( + this.exportType, + this.workflowName, + datasetIds, + this.rowIndex, + this.columnIndex, + this.inputFileName, + this.sourceTriggered === "menu", + destination + ); + this.modalRef.close(); + } } diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts index 1159d1a06d5..04fde824191 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts @@ -94,176 +94,4 @@ describe("WorkflowResultExportService", () => { expect(service).toBeTruthy(); }); - it("should export paginated results as CSV for highlighted operators", fakeAsync(() => { - // Arrange - jointGraphWrapperSpy.getCurrentHighlightedOperatorIDs.and.returnValue(["operator1"]); - - const paginatedResultServiceSpy = jasmine.createSpyObj("OperatorPaginationResultService", ["selectPage"]); - - // Mock the paginated result service for 'operator1' - workflowResultServiceSpy.getPaginatedResultService.and.callFake(operatorId => { - if (operatorId === "operator1") { - return paginatedResultServiceSpy; - } - return undefined; - }); - workflowResultServiceSpy.getResultService.and.returnValue(undefined); - - // Mock paginated results for multiple pages - const paginatedResults: PaginatedResultEvent[] = [ - { - requestID: "request1", - operatorID: "operator1", - pageIndex: 0, - table: Array.from({ length: 10 }, (_, i) => ({ column1: `value${i}`, column2: `value${i}` })), - schema: [ - { attributeName: "column1", attributeType: "string" }, - { attributeName: "column2", attributeType: "string" }, - ], - }, - { - requestID: "request1", - operatorID: "operator1", - pageIndex: 1, - table: Array.from({ length: 10 }, (_, i) => ({ column1: `value${i + 10}`, column2: `value${i + 10}` })), - schema: [ - { attributeName: "column1", attributeType: "string" }, - { attributeName: "column2", attributeType: "string" }, - ], - }, - { - requestID: "request1", - operatorID: "operator1", - pageIndex: 2, - table: [{ column1: "value20", column2: "value20" }], - schema: [ - { attributeName: "column1", attributeType: "string" }, - { attributeName: "column2", attributeType: "string" }, - ], - }, - ]; - - paginatedResultServiceSpy.selectPage.and.callFake((page: number, size: any) => { - const index = page - 1; - if (index < paginatedResults.length) { - return of(paginatedResults[index]); - } else { - return EMPTY; - } - }); - - // Act - service.exportOperatorsResultToLocal(false); - - // Simulate asynchronous operations - tick(); - - // Assert - expect(downloadServiceSpy.downloadOperatorsResult).toHaveBeenCalled(); - const args = downloadServiceSpy.downloadOperatorsResult.calls.mostRecent().args; - expect(args[0]).toEqual(jasmine.any(Array)); - expect(args[1]).toEqual(jasmine.objectContaining({ wid: jasmine.any(String) })); - - const resultObservables = args[0]; - resultObservables[0].subscribe(files => { - expect(files[0].filename).toBe("result_operator1.csv"); - expect(files[0].blob).toEqual(jasmine.any(Blob)); - }); - })); - - it("should export a single visualization result as an HTML file when there is only one result", done => { - // Arrange - jointGraphWrapperSpy.getCurrentHighlightedOperatorIDs.and.returnValue(["operator2"]); - - const resultServiceSpy = jasmine.createSpyObj("OperatorResultService", ["getCurrentResultSnapshot"]); - - // Mock the result service for 'operator2' - workflowResultServiceSpy.getResultService.and.callFake(operatorId => { - if (operatorId === "operator2") { - return resultServiceSpy; - } - return undefined; - }); - workflowResultServiceSpy.getPaginatedResultService.and.returnValue(undefined); - - // Mock the result snapshot with one result - const resultSnapshot = [{ "html-content": "

Visualization

" }]; - - resultServiceSpy.getCurrentResultSnapshot.and.returnValue(resultSnapshot); - - downloadServiceSpy.downloadOperatorsResult.and.returnValue(of(new Blob())); - - // Act - service.exportOperatorsResultToLocal(false); - - expect(downloadServiceSpy.downloadOperatorsResult).toHaveBeenCalled(); - const args = downloadServiceSpy.downloadOperatorsResult.calls.mostRecent().args; - expect(args[0]).toEqual(jasmine.any(Array)); - expect(args[1]).toEqual(jasmine.objectContaining({ wid: jasmine.any(String) })); - - const resultObservables = args[0]; - resultObservables[0].subscribe(files => { - expect(files[0].filename).toBe("result_operator2_1.html"); - expect(files[0].blob).toEqual(jasmine.any(Blob)); - - const reader = new FileReader(); - reader.onload = () => { - const content = reader.result as string; - expect(content).toBe("

Visualization

"); - done(); - }; - reader.readAsText(files[0].blob); - }); - }); - - it("should export multiple visualization results as a zip file when there are multiple results", done => { - // Arrange - jointGraphWrapperSpy.getCurrentHighlightedOperatorIDs.and.returnValue(["operator2"]); - - const resultServiceSpy = jasmine.createSpyObj("OperatorResultService", ["getCurrentResultSnapshot"]); - - // Mock the result service for 'operator2' - workflowResultServiceSpy.getResultService.and.callFake(operatorId => { - if (operatorId === "operator2") { - return resultServiceSpy; - } - return undefined; - }); - workflowResultServiceSpy.getPaginatedResultService.and.returnValue(undefined); - - // Mock the result snapshot with multiple results - const resultSnapshot = [ - { "html-content": "

Visualization 1

" }, - { "html-content": "

Visualization 2

" }, - ]; - - resultServiceSpy.getCurrentResultSnapshot.and.returnValue(resultSnapshot); - - // Spy on the 'downloadOperatorsResult' method - downloadServiceSpy.downloadOperatorsResult.and.returnValue(of(new Blob())); - - // Act - service.exportOperatorsResultToLocal(false); - - // Assert - expect(downloadServiceSpy.downloadOperatorsResult).toHaveBeenCalled(); - const args = downloadServiceSpy.downloadOperatorsResult.calls.mostRecent().args; - expect(args[0]).toEqual(jasmine.any(Array)); // Check if the first argument is an array - expect(args[1]).toEqual(jasmine.objectContaining({ wid: jasmine.any(String) })); // Check if the second argument is a workflow object - - // Check the content of the observables - const resultObservables = args[0]; - resultObservables[0].subscribe(files => { - expect(files[0].filename).toBe("result_operator2_1.html"); - expect(files[0].blob).toEqual(jasmine.any(Blob)); - - const reader = new FileReader(); - reader.onload = () => { - const content = reader.result as string; - expect(content).toBe("

Visualization 1

"); - done(); - }; - reader.readAsText(files[0].blob); - }); - }); }); diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts index 5eea6d97b35..627d78795cf 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.ts @@ -10,10 +10,9 @@ import { ExecuteWorkflowService } from "../execute-workflow/execute-workflow.ser import { ExecutionState, isNotInExecution } from "../../types/execute-workflow.interface"; import { filter } from "rxjs/operators"; import { OperatorResultService, WorkflowResultService } from "../workflow-result/workflow-result.service"; -import { OperatorPaginationResultService } from "../workflow-result/workflow-result.service"; import { DownloadService } from "../../../dashboard/service/user/download/download.service"; import { HttpResponse } from "@angular/common/http"; -import { ExportWorkflowJsonResponse } from "../../../dashboard/service/user/download/download.interface"; +import { ExportWorkflowJsonResponse } from "../../../dashboard/service/user/download/download.service"; @Injectable({ providedIn: "root", @@ -30,22 +29,9 @@ export class WorkflowResultExportService { private workflowResultService: WorkflowResultService, private downloadService: DownloadService ) { - this.registerResultExportResponseHandler(); this.registerResultToExportUpdateHandler(); } - registerResultExportResponseHandler() { - this.workflowWebsocketService - .subscribeToEvent("ResultExportResponse") - .subscribe((response: ResultExportResponse) => { - if (response.status === "success") { - this.notificationService.success(response.message); - } else { - this.notificationService.error(response.message); - } - }); - } - registerResultToExportUpdateHandler() { merge( this.executeWorkflowService @@ -84,55 +70,13 @@ export class WorkflowResultExportService { }); } - /** - * Export the operator results as files. - * If multiple operatorIds are provided, results are zipped into a single file. - */ - exportOperatorsResultToLocal(exportAll: boolean = true): void { - let operatorIds: string[]; - if (!exportAll) - operatorIds = [...this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs()]; - else - operatorIds = this.workflowActionService - .getTexeraGraph() - .getAllOperators() - .map(operator => operator.operatorID); - - const resultObservables: Observable[] = []; - - operatorIds.forEach(operatorId => { - const resultService = this.workflowResultService.getResultService(operatorId); - const paginatedResultService = this.workflowResultService.getPaginatedResultService(operatorId); - - if (paginatedResultService) { - const observable = this.fetchAllPaginatedResultsAsCSV(paginatedResultService, operatorId); - resultObservables.push(observable); - } else if (resultService) { - const observable = this.fetchVisualizationResultsAsHTML(resultService, operatorId); - resultObservables.push(observable); - } - }); - - if (resultObservables.length === 0) { - return; - } - - this.downloadService - .downloadOperatorsResult(resultObservables, this.workflowActionService.getWorkflow()) - .subscribe({ - error: (error: unknown) => { - console.error("Error exporting operator results:", error); - }, - }); - } - /** * export the workflow execution result according the export type */ - exportWorkflowExecutionResultToLocal( + exportWorkflowExecutionResult( exportType: string, workflowName: string, - datasetIds: ReadonlyArray = [], + datasetIds: number[], rowIndex: number, columnIndex: number, filename: string, @@ -151,17 +95,14 @@ export class WorkflowResultExportService { } // gather operator IDs - const operatorIds = exportAll ? this.workflowActionService - .getTexeraGraph() - .getAllOperators() - .map(operator => operator.operatorID) : - [...this.workflowActionService - .getJointGraphWrapper() - .getCurrentHighlightedOperatorIDs()]; - + const operatorIds = exportAll + ? this.workflowActionService + .getTexeraGraph() + .getAllOperators() + .map(operator => operator.operatorID) + : [...this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs()]; if (operatorIds.length === 0) { - console.log("No operators selected to export"); return; } @@ -187,93 +128,26 @@ export class WorkflowResultExportService { // "local" => response is a blob // We can parse the file name from header or use fallback this.downloadService.saveBlobFile(response, filename); - this.notificationService.info("File downloaded successfully"); + this.notificationService.info("Files downloaded successfully"); } else { // "dataset" => response is JSON // The server should return a JSON with {status, message} const jsonResponse = response as HttpResponse; const responseBody = jsonResponse.body; if (responseBody && responseBody.status === "success") { - this.notificationService.success(responseBody.message); + this.notificationService.success("Result exported successfully"); } else { this.notificationService.error(responseBody?.message || "An error occurred during export"); } } }, error: (err: unknown) => { - const errorMessage = (err as any)?.error?.error || (err as any)?.error || err; + const errorMessage = (err as any)?.error?.message || (err as any)?.error || err; this.notificationService.error(`An error happened in exporting operator results: ${errorMessage}`); }, }); } - /** - * Helper method to fetch all paginated results and convert them to a CSV Blob. - */ - private fetchAllPaginatedResultsAsCSV( - paginatedResultService: OperatorPaginationResultService, - operatorId: string - ): Observable<{ filename: string; blob: Blob }[]> { - return new Observable(observer => { - const results: any[] = []; - let currentPage = 1; - const pageSize = 10; - - paginatedResultService - .selectPage(currentPage, pageSize) - .pipe( - expand((pageData: PaginatedResultEvent) => { - results.push(...pageData.table); - if (pageData.table.length === pageSize) { - currentPage++; - return paginatedResultService.selectPage(currentPage, pageSize); - } else { - return EMPTY; - } - }), - finalize(() => { - const { filename, blob } = this.createCSVBlob(results, operatorId); - observer.next([{ filename, blob }]); - observer.complete(); - }) - ) - .subscribe(); - }); - } - - /** - * Helper method to fetch visualization results and convert them to HTML Blobs. - */ - private fetchVisualizationResultsAsHTML( - resultService: OperatorResultService, - operatorId: string - ): Observable<{ filename: string; blob: Blob }[]> { - return new Observable(observer => { - const snapshot = resultService.getCurrentResultSnapshot(); - const files: { filename: string; blob: Blob }[] = []; - - snapshot?.forEach((s: any, index: number) => { - const fileContent = Object(s)["html-content"]; - const blob = new Blob([fileContent], { type: "text/html;charset=utf-8" }); - const filename = `result_${operatorId}_${index + 1}.html`; - files.push({ filename, blob }); - }); - - observer.next(files); - observer.complete(); - }); - } - - /** - * Convert the results array into CSV format and create a Blob. - */ - private createCSVBlob(results: any[], operatorId: string): { filename: string; blob: Blob } { - const csv = Papa.unparse(results); // Convert array of objects to CSV - const blob = new Blob([csv], { type: "text/csv;charset=utf-8" }); - const filename = `result_${operatorId}.csv`; - return { filename, blob }; - } - /** * Reset flags if the user leave workspace */ diff --git a/core/gui/src/app/workspace/service/workflow-result/workflow-result.service.ts b/core/gui/src/app/workspace/service/workflow-result/workflow-result.service.ts index c27d0b35f47..6fccb8ad637 100644 --- a/core/gui/src/app/workspace/service/workflow-result/workflow-result.service.ts +++ b/core/gui/src/app/workspace/service/workflow-result/workflow-result.service.ts @@ -175,6 +175,7 @@ export class WorkflowResultService { } public determineOutputTypes(operatorId: string): { + hasAnyResult: boolean; isTableOutput: boolean; isVisualizationOutput: boolean; containsBinaryData: boolean; @@ -183,6 +184,7 @@ export class WorkflowResultService { const paginatedResultService = this.getPaginatedResultService(operatorId); return { + hasAnyResult: this.hasAnyResult(operatorId), isTableOutput: this.hasTableOutput(paginatedResultService), containsBinaryData: this.hasBinaryData(paginatedResultService), isVisualizationOutput: this.hasVisualizationOutput(resultService, paginatedResultService), diff --git a/core/gui/yarn.lock b/core/gui/yarn.lock index 8bbe7249723..5786a5e2d09 100644 --- a/core/gui/yarn.lock +++ b/core/gui/yarn.lock @@ -5038,6 +5038,13 @@ __metadata: languageName: node linkType: hard +"@types/content-disposition@npm:^0": + version: 0.5.8 + resolution: "@types/content-disposition@npm:0.5.8" + checksum: 10c0/f10baeab2ec44579012c1170763851687e740ea30531a80cd7a403475730ce7d7ead4f88927cea6970cc2d5e74fa7af38cdf4f039c5f115fba1bb98ec0014977 + languageName: node + linkType: hard + "@types/cookie@npm:^0.4.1": version: 0.4.1 resolution: "@types/cookie@npm:0.4.1" @@ -7724,7 +7731,7 @@ __metadata: languageName: node linkType: hard -"content-disposition@npm:0.5.4, content-disposition@npm:~0.5.2": +"content-disposition@npm:0.5.4, content-disposition@npm:^0.5.4, content-disposition@npm:~0.5.2": version: 0.5.4 resolution: "content-disposition@npm:0.5.4" dependencies: @@ -11111,6 +11118,7 @@ __metadata: "@nx/angular": "npm:20.0.3" "@stoplight/json-ref-resolver": "npm:3.1.5" "@types/backbone": "npm:1.4.15" + "@types/content-disposition": "npm:^0" "@types/dagre": "npm:0.7.47" "@types/file-saver": "npm:2.0.5" "@types/graphlib": "npm:2.1.8" @@ -11130,6 +11138,7 @@ __metadata: babel-plugin-dynamic-import-node: "npm:2.3.3" backbone: "npm:1.4.1" concurrently: "npm:7.4.0" + content-disposition: "npm:^0.5.4" dagre: "npm:0.8.5" deep-map: "npm:2.0.0" edit-distance: "npm:1.0.4" From 671c51767e385a4e14671f98150a08feaed0a0f7 Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 18 Feb 2025 12:56:19 -0800 Subject: [PATCH 09/16] fix: lint --- .../web/service/ResultExportService.scala | 147 +++++++++--------- .../service/user/download/download.service.ts | 25 +-- .../result-exportation.component.ts | 10 +- .../workflow-result-export.service.spec.ts | 1 - 4 files changed, 96 insertions(+), 87 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 5c309393506..6d719edbf2e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -18,7 +18,10 @@ import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse import edu.uci.ics.texera.web.resource.GoogleResource import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.createNewDatasetVersionByAddingFiles -import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{WorkflowExecutionsResource, WorkflowVersionResource} +import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{ + WorkflowExecutionsResource, + WorkflowVersionResource +} import org.jooq.types.UInteger import edu.uci.ics.amber.util.ArrowUtils import edu.uci.ics.texera.web.service.WorkflowExecutionService.getLatestExecutionId @@ -47,9 +50,9 @@ import scala.util.Using import java.io.{FilterOutputStream, IOException, OutputStream} /** - * A simple wrapper that ignores 'close()' calls on the underlying stream. - * This allows each operator's writer to call close() without ending the entire ZipOutputStream. - */ + * A simple wrapper that ignores 'close()' calls on the underlying stream. + * This allows each operator's writer to call close() without ending the entire ZipOutputStream. + */ private class NonClosingOutputStream(os: OutputStream) extends FilterOutputStream(os) { @throws[IOException] override def close(): Unit = { @@ -84,7 +87,9 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { PortIdentity() ) /* Fix the error, I want to return an empty VirtualDocument so caller of this function can check its empty by .getCount() */ - storageUri.map(uri => DocumentFactory.openDocument(uri)._1.asInstanceOf[VirtualDocument[Tuple]]).orNull + storageUri + .map(uri => DocumentFactory.openDocument(uri)._1.asInstanceOf[VirtualDocument[Tuple]]) + .orNull } def exportResult(user: User, request: ResultExportRequest): ResultExportResponse = { @@ -123,14 +128,14 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * Export the result for ONE operator. - * Return (SomeSuccessMessage, SomeErrorMessage) or (None, None) if handled differently - */ + * Export the result for ONE operator. + * Return (SomeSuccessMessage, SomeErrorMessage) or (None, None) if handled differently + */ private def exportSingleOperator( - user: User, - request: ResultExportRequest, - operatorId: String - ): (Option[String], Option[String]) = { + user: User, + request: ResultExportRequest, + operatorId: String + ): (Option[String], Option[String]) = { // Possibly use some caching key val cacheKey = s"${request.exportType}-$operatorId" @@ -177,12 +182,12 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def handleCSVRequest( - operatorId: String, - user: User, - request: ResultExportRequest, - results: Iterable[Tuple], - headers: List[String] - ): (Option[String], Option[String]) = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple], + headers: List[String] + ): (Option[String], Option[String]) = { try { val pipedOutputStream = new PipedOutputStream() val pipedInputStream = new PipedInputStream(pipedOutputStream) @@ -208,11 +213,11 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def handleGoogleSheetRequest( - operatorId: String, - results: Iterable[Tuple], - header: List[String], - request: ResultExportRequest - ): (Option[String], Option[String]) = { + operatorId: String, + results: Iterable[Tuple], + header: List[String], + request: ResultExportRequest + ): (Option[String], Option[String]) = { try { val sheetService: Sheets = GoogleResource.getSheetService val sheetId: String = createGoogleSheet(sheetService, s"${request.workflowName}-$operatorId") @@ -257,10 +262,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { @tailrec private def moveToResultFolder( - driveService: Drive, - sheetId: String, - retryOnce: Boolean = true - ): Unit = { + driveService: Drive, + sheetId: String, + retryOnce: Boolean = true + ): Unit = { val folderId = retrieveResultFolderId(driveService) try { driveService.files().update(sheetId, null).setAddParents(folderId).execute() @@ -320,10 +325,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def uploadContent( - sheetService: Sheets, - sheetId: String, - content: util.List[util.List[AnyRef]] - ): Unit = { + sheetService: Sheets, + sheetId: String, + content: util.List[util.List[AnyRef]] + ): Unit = { val body = new ValueRange().setValues(content) val range = "A1" val options = "RAW" @@ -346,11 +351,11 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def handleDataRequest( - operatorId: String, - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): (Option[String], Option[String]) = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { try { val rowIndex = request.rowIndex val columnIndex = request.columnIndex @@ -391,11 +396,11 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def handleArrowRequest( - operatorId: String, - user: User, - request: ResultExportRequest, - results: Iterable[Tuple] - ): (Option[String], Option[String]) = { + operatorId: String, + user: User, + request: ResultExportRequest, + results: Iterable[Tuple] + ): (Option[String], Option[String]) = { if (results.isEmpty) { return (None, Some(s"No results to export for operator $operatorId")) } @@ -428,10 +433,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def createArrowWriter( - results: Iterable[Tuple], - allocator: RootAllocator, - outputStream: OutputStream - ): (ArrowFileWriter, VectorSchemaRoot) = { + results: Iterable[Tuple], + allocator: RootAllocator, + outputStream: OutputStream + ): (ArrowFileWriter, VectorSchemaRoot) = { val schema = results.head.getSchema val arrowSchema = ArrowUtils.fromTexeraSchema(schema) val root = VectorSchemaRoot.create(arrowSchema, allocator) @@ -441,10 +446,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def writeArrowData( - writer: ArrowFileWriter, - root: VectorSchemaRoot, - results: Iterable[Tuple] - ): Unit = { + writer: ArrowFileWriter, + root: VectorSchemaRoot, + results: Iterable[Tuple] + ): Unit = { writer.start() val batchSize = 1000 val resultList = results.toList @@ -466,10 +471,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def generateFileName( - request: ResultExportRequest, - operatorId: String, - extension: String - ): String = { + request: ResultExportRequest, + operatorId: String, + extension: String + ): String = { val latestVersion = WorkflowVersionResource.getLatestVersion(org.jooq.types.UInteger.valueOf(request.workflowId)) val timestamp = LocalDateTime @@ -483,11 +488,11 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } private def saveToDatasets( - request: ResultExportRequest, - user: User, - pipedInputStream: PipedInputStream, - fileName: String - ): Unit = { + request: ResultExportRequest, + user: User, + pipedInputStream: PipedInputStream, + fileName: String + ): Unit = { request.datasetIds.foreach { did => val datasetPath = PathUtils.getDatasetPath(org.jooq.types.UInteger.valueOf(did)) val filePath = datasetPath.resolve(fileName) @@ -500,13 +505,13 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * For local download of a single operator. Streams the data directly. - * We return (StreamingOutput, Some(filename)) on success, or (null, None) on error. - */ + * For local download of a single operator. Streams the data directly. + * We return (StreamingOutput, Some(filename)) on success, or (null, None) on error. + */ def exportOperatorResultAsStream( - request: ResultExportRequest, - operatorId: String - ): (StreamingOutput, Option[String]) = { + request: ResultExportRequest, + operatorId: String + ): (StreamingOutput, Option[String]) = { val execIdOpt = getLatestExecutionId(workflowIdentity) if (execIdOpt.isEmpty) { return (null, None) @@ -541,8 +546,8 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * Writes CSV to output stream - */ + * Writes CSV to output stream + */ private def writeCsv(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { // for large data, you might want a buffered approach val csvWriter = CSVWriter.open(outputStream) // Tototoshi CSVWriter can open an OutputStream @@ -555,8 +560,8 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } /** - * Writes Arrow to output stream - */ + * Writes Arrow to output stream + */ private def writeArrow(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { if (results.isEmpty) return @@ -589,9 +594,9 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { } def exportOperatorsAsZip( - user: User, - request: ResultExportRequest - ): (StreamingOutput, Option[String]) = { + user: User, + request: ResultExportRequest + ): (StreamingOutput, Option[String]) = { if (request.operatorIds.isEmpty) { return (null, None) } diff --git a/core/gui/src/app/dashboard/service/user/download/download.service.ts b/core/gui/src/app/dashboard/service/user/download/download.service.ts index 0febc5532ec..f08d3a5dabd 100644 --- a/core/gui/src/app/dashboard/service/user/download/download.service.ts +++ b/core/gui/src/app/dashboard/service/user/download/download.service.ts @@ -9,7 +9,7 @@ import * as JSZip from "jszip"; import { Workflow } from "../../../../common/type/workflow"; import { AppSettings } from "../../../../common/app-setting"; import { HttpClient, HttpResponse } from "@angular/common/http"; -var contentDisposition = require("content-disposition") +var contentDisposition = require("content-disposition"); export const EXPORT_BASE_URL = "result/export"; @@ -121,7 +121,7 @@ export class DownloadService { destination, }; if (destination === "local") { - return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { + return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { responseType: "blob", observe: "response", headers: { @@ -131,16 +131,19 @@ export class DownloadService { }); } else { // dataset => return JSON - return this.http.post(`${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, requestBody, { - responseType: "json", - observe: "response", - headers: { - "Content-Type": "application/json", - Accept: "application/json", - }, - }); + return this.http.post( + `${AppSettings.getApiEndpoint()}/${EXPORT_BASE_URL}`, + requestBody, + { + responseType: "json", + observe: "response", + headers: { + "Content-Type": "application/json", + Accept: "application/json", + }, + } + ); } - } /** diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index 98f104ff6b1..1ba889a67f2 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -59,7 +59,10 @@ export class ResultExportationComponent implements OnInit { // we should allow user to export result without setting `view result` to true let operatorIds: readonly string[]; if (this.sourceTriggered === "menu") { - operatorIds = this.workflowActionService.getTexeraGraph().getAllOperators().map(op => op.operatorID); + operatorIds = this.workflowActionService + .getTexeraGraph() + .getAllOperators() + .map(op => op.operatorID); } else { operatorIds = this.workflowActionService.getJointGraphWrapper().getCurrentHighlightedOperatorIDs(); } @@ -140,9 +143,8 @@ export class ResultExportationComponent implements OnInit { } onClickExportResult(destination: "dataset" | "local", dataset: DashboardDataset = {} as DashboardDataset) { - const datasetIds = destination === "dataset" ? - [dataset.dataset.did].filter((id): id is number => id !== undefined) : - []; + const datasetIds = + destination === "dataset" ? [dataset.dataset.did].filter((id): id is number => id !== undefined) : []; this.workflowResultExportService.exportWorkflowExecutionResult( this.exportType, this.workflowName, diff --git a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts index 04fde824191..b863bf5c641 100644 --- a/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts +++ b/core/gui/src/app/workspace/service/workflow-result-export/workflow-result-export.service.spec.ts @@ -93,5 +93,4 @@ describe("WorkflowResultExportService", () => { it("should be created", () => { expect(service).toBeTruthy(); }); - }); From eb994396bfe0a1e6d43fc8d745178fa9ed709998 Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 18 Feb 2025 12:59:57 -0800 Subject: [PATCH 10/16] fix: comment --- .../edu/uci/ics/texera/web/resource/ResultResource.scala | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala index 2733923a613..b1e79c3a01c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala @@ -33,11 +33,7 @@ class ResultResource extends LazyLogging { resultExportService.exportOperatorsAsZip(user.user, request) if (zipStream == null) { - return Response - .status(Response.Status.INTERNAL_SERVER_ERROR) - .`type`(MediaType.APPLICATION_JSON) - .entity(Map("error" -> "Failed to export multiple operators as zip").asJava) - .build() + throw new RuntimeException("Zip stream is null") } val finalFileName = zipFileNameOpt.getOrElse("operators.zip") From b4d33a17b9e9d52a9e0108cf45b5766d77cd1fcc Mon Sep 17 00:00:00 2001 From: ali Date: Sun, 2 Mar 2025 13:34:16 -0800 Subject: [PATCH 11/16] fix: export service --- .../web/service/ResultExportService.scala | 148 +----------------- 1 file changed, 5 insertions(+), 143 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index fec4e6a29dd..a915a1dd75c 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -1,11 +1,6 @@ package edu.uci.ics.texera.web.service import com.github.tototoshi.csv.CSVWriter -import com.google.api.client.googleapis.json.GoogleJsonResponseException -import com.google.api.services.drive.Drive -import com.google.api.services.drive.model.{File, FileList, Permission} -import com.google.api.services.sheets.v4.Sheets -import com.google.api.services.sheets.v4.model.{Spreadsheet, SpreadsheetProperties, ValueRange} import edu.uci.ics.amber.core.storage.DocumentFactory import edu.uci.ics.amber.core.storage.model.VirtualDocument import edu.uci.ics.amber.core.tuple.Tuple @@ -16,7 +11,6 @@ import edu.uci.ics.amber.util.{ArrowUtils, PathUtils} import edu.uci.ics.texera.dao.jooq.generated.tables.pojos.User import edu.uci.ics.texera.web.model.websocket.request.ResultExportRequest import edu.uci.ics.texera.web.model.websocket.response.ResultExportResponse -import edu.uci.ics.texera.web.resource.GoogleResource import edu.uci.ics.texera.web.resource.dashboard.user.dataset.DatasetResource.createNewDatasetVersionByAddingFiles import edu.uci.ics.texera.web.resource.dashboard.user.workflow.{ WorkflowExecutionsResource, @@ -158,9 +152,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { val attributeNames = results.head.getSchema.getAttributeNames request.exportType match { - case "google_sheet" => - handleGoogleSheetRequest(operatorId, results, attributeNames, request) - case "csv" => handleCSVRequest(operatorId, user, request, results, attributeNames) @@ -335,135 +326,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { writer.end() } - /** - * Handle exporting to Google Sheets. - */ - private def handleGoogleSheetRequest( - operatorId: String, - results: Iterable[Tuple], - header: List[String], - request: ResultExportRequest - ): (Option[String], Option[String]) = { - try { - val sheetService: Sheets = GoogleResource.getSheetService - val sheetId: String = createGoogleSheet(sheetService, s"${request.workflowName}-$operatorId") - if (sheetId == null) { - return (None, Some(s"Fail to create google sheet for operator $operatorId")) - } - - val driveService: Drive = GoogleResource.getDriveService - moveToResultFolder(driveService, sheetId) - - // share: set "anyone" as reader - val perm = new Permission().setType("anyone").setRole("reader") - driveService.permissions().create(sheetId, perm).execute() - - // asynchronously upload data - pool.submit(new Runnable { - override def run(): Unit = { - uploadHeader(sheetService, sheetId, header) - uploadResult(sheetService, sheetId, results) - } - }) - - val link = s"https://docs.google.com/spreadsheets/d/$sheetId/edit" - val cacheKey = s"${request.exportType}-$operatorId" - cache(cacheKey) = link - - val msg = s"Google sheet created for operator $operatorId: $link (results are uploading)" - (Some(msg), None) - } catch { - case ex: Exception => - (None, Some(s"Google Sheet export failed for operator $operatorId: ${ex.getMessage}")) - } - } - - private def createGoogleSheet(sheetService: Sheets, title: String): String = { - val sheetProps = new SpreadsheetProperties().setTitle(title) - val createReq = new Spreadsheet().setProperties(sheetProps) - val target = sheetService.spreadsheets.create(createReq).setFields("spreadsheetId").execute() - target.getSpreadsheetId - } - - @tailrec - private def moveToResultFolder( - driveService: Drive, - sheetId: String, - retryOnce: Boolean = true - ): Unit = { - val folderId = retrieveResultFolderId(driveService) - try { - driveService.files().update(sheetId, null).setAddParents(folderId).execute() - } catch { - case ex: GoogleJsonResponseException => - if (retryOnce) { - // possibly folder was removed or not found, re-check and retry - moveToResultFolder(driveService, sheetId, retryOnce = false) - } else { - throw ex - } - } - } - - private def retrieveResultFolderId(driveService: Drive): String = synchronized { - val folderResult: FileList = - driveService - .files() - .list() - .setQ( - s"mimeType = 'application/vnd.google-apps.folder' and name='$WORKFLOW_RESULT_FOLDER_NAME'" - ) - .setSpaces("drive") - .execute() - - if (folderResult.getFiles.isEmpty) { - val fileMetadata = new File() - fileMetadata.setName(WORKFLOW_RESULT_FOLDER_NAME) - fileMetadata.setMimeType("application/vnd.google-apps.folder") - val targetFolder: File = driveService.files.create(fileMetadata).setFields("id").execute() - targetFolder.getId - } else { - folderResult.getFiles.get(0).getId - } - } - - private def uploadHeader(sheetService: Sheets, sheetId: String, header: List[AnyRef]): Unit = { - uploadContent(sheetService, sheetId, List(header.asJava).asJava) - } - - private def uploadResult(sheetService: Sheets, sheetId: String, result: Iterable[Tuple]): Unit = { - val batch = new util.ArrayList[util.List[AnyRef]](UPLOAD_BATCH_ROW_COUNT) - for (tuple <- result) { - val row: util.List[AnyRef] = tuple.getFields.map(convertUnsupported).toList.asJava - batch.add(row) - - if (batch.size() == UPLOAD_BATCH_ROW_COUNT) { - uploadContent(sheetService, sheetId, batch) - batch.clear() - } - } - if (!batch.isEmpty) { - uploadContent(sheetService, sheetId, batch) - } - } - - private def uploadContent( - sheetService: Sheets, - sheetId: String, - content: util.List[util.List[AnyRef]] - ): Unit = { - val body = new ValueRange().setValues(content) - val range = "A1" - val options = "RAW" - retry(attempts = RETRY_ATTEMPTS, baseBackoffTimeInMS = BASE_BACK_OOF_TIME_IN_MS) { - sheetService.spreadsheets - .values() - .append(sheetId, range, body) - .setValueInputOption(options) - .execute() - } - } - private def convertUnsupported(anyVal: Any): AnyRef = { anyVal match { case null => "" @@ -483,7 +345,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { extension: String ): String = { val latestVersion = - WorkflowVersionResource.getLatestVersion(UInteger.valueOf(request.workflowId)) + WorkflowVersionResource.getLatestVersion(request.workflowId) val timestamp = LocalDateTime .now() .truncatedTo(ChronoUnit.SECONDS) @@ -504,10 +366,10 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { fileName: String ): Unit = { request.datasetIds.foreach { did => - val datasetPath = PathUtils.getDatasetPath(UInteger.valueOf(did)) + val datasetPath = PathUtils.getDatasetPath(did) val filePath = datasetPath.resolve(fileName) createNewDatasetVersionByAddingFiles( - UInteger.valueOf(did), + did, user, Map(filePath -> pipedInputStream) ) @@ -567,7 +429,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { private def writeArrow(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { if (results.isEmpty) return - + println("Check results ", results) val allocator = new RootAllocator() Using.Manager { use => val (writer, root) = createArrowWriter(results, allocator, outputStream) @@ -583,9 +445,9 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { for (batchStart <- 0 until totalSize by batchSize) { val batchEnd = Math.min(batchStart + batchSize, totalSize) val currentBatchSize = batchEnd - batchStart - for (i <- 0 until currentBatchSize) { val tuple = resultList(batchStart + i) + println("Check tuple: " + tuple) ArrowUtils.setTexeraTuple(tuple, i, root) } root.setRowCount(currentBatchSize) From e29366bd403ccee39652771ac4d3c7b4a42c73f8 Mon Sep 17 00:00:00 2001 From: ali Date: Sun, 2 Mar 2025 15:04:00 -0800 Subject: [PATCH 12/16] fix: remove unused cache --- .../ics/texera/web/service/ResultExportService.scala | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index a915a1dd75c..315f0b08579 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -67,8 +67,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { import ResultExportService._ - private val cache = new mutable.HashMap[String, String] - /** * Generate the VirtualDocument for one operator's result. * Incorporates the remote code's extra parameter `None` for sub-operator ID. @@ -129,15 +127,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { operatorId: String ): (Option[String], Option[String]) = { - // Possibly use a unique cache key for operator + export type - val cacheKey = s"${request.exportType}-$operatorId" - if (cache.contains(cacheKey)) { - return ( - Some(s"Link retrieved from cache for operator $operatorId: ${cache(cacheKey)}"), - None - ) - } - val execIdOpt = getLatestExecutionId(workflowIdentity) if (execIdOpt.isEmpty) { return (None, Some(s"Workflow ${request.workflowId} has no execution result")) From 8ae0b75843ebf0245186ba4fec1ad76bdb3933dd Mon Sep 17 00:00:00 2001 From: ali Date: Sun, 2 Mar 2025 15:08:14 -0800 Subject: [PATCH 13/16] fix: remove print --- .../edu/uci/ics/texera/web/service/ResultExportService.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 315f0b08579..6cbef25cc1e 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -418,7 +418,7 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { private def writeArrow(outputStream: OutputStream, results: Iterable[Tuple]): Unit = { if (results.isEmpty) return - println("Check results ", results) + val allocator = new RootAllocator() Using.Manager { use => val (writer, root) = createArrowWriter(results, allocator, outputStream) @@ -436,7 +436,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { val currentBatchSize = batchEnd - batchStart for (i <- 0 until currentBatchSize) { val tuple = resultList(batchStart + i) - println("Check tuple: " + tuple) ArrowUtils.setTexeraTuple(tuple, i, root) } root.setRowCount(currentBatchSize) From 2aecf63851340695cb83769ad399b3848cb7955e Mon Sep 17 00:00:00 2001 From: ali Date: Sun, 2 Mar 2025 15:17:24 -0800 Subject: [PATCH 14/16] fix: duplication --- .../result-exportation.component.ts | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts index 1ba889a67f2..755f321760b 100644 --- a/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts +++ b/core/gui/src/app/workspace/component/result-exportation/result-exportation.component.ts @@ -112,36 +112,6 @@ export class ResultExportationComponent implements OnInit { } } - onClickSaveResultFileToDatasets(dataset: DashboardDataset) { - if (dataset.dataset.did) { - this.workflowResultExportService.exportWorkflowExecutionResult( - this.exportType, - this.workflowName, - [dataset.dataset.did], - this.rowIndex, - this.columnIndex, - this.inputFileName, - this.sourceTriggered === "menu", - "dataset" - ); - this.modalRef.close(); - } - } - - onClickSaveResultFileToLocal() { - this.workflowResultExportService.exportWorkflowExecutionResult( - this.exportType, - this.workflowName, - [], - this.rowIndex, - this.columnIndex, - this.inputFileName, - this.sourceTriggered === "menu", - "local" - ); - this.modalRef.close(); - } - onClickExportResult(destination: "dataset" | "local", dataset: DashboardDataset = {} as DashboardDataset) { const datasetIds = destination === "dataset" ? [dataset.dataset.did].filter((id): id is number => id !== undefined) : []; From 881e1655c171362a1286ce581bd8d5974eacf996 Mon Sep 17 00:00:00 2001 From: ali Date: Mon, 3 Mar 2025 12:42:00 -0800 Subject: [PATCH 15/16] fix: remove extra parts --- .../ics/texera/web/service/ResultExportService.scala | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 6cbef25cc1e..081528c33be 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -53,10 +53,6 @@ private class NonClosingOutputStream(os: OutputStream) extends FilterOutputStrea } object ResultExportService { - final private val UPLOAD_BATCH_ROW_COUNT = 10000 - final private val RETRY_ATTEMPTS = 7 - final private val BASE_BACK_OOF_TIME_IN_MS = 1000 - final private val WORKFLOW_RESULT_FOLDER_NAME = "workflow_results" // Matches the remote's approach for a thread pool final private val pool: ThreadPoolExecutor = @@ -315,14 +311,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { writer.end() } - private def convertUnsupported(anyVal: Any): AnyRef = { - anyVal match { - case null => "" - case s: String => s - case n: Number => n - case other => other.toString - } - } /** * Generate a file name for an operator's exported file. From db55beb6a89f4b746876bfb888ee0d531bb5dd76 Mon Sep 17 00:00:00 2001 From: ali Date: Tue, 4 Mar 2025 08:21:10 -0800 Subject: [PATCH 16/16] fix: versioning and format --- .../edu/uci/ics/texera/web/resource/ResultResource.scala | 8 ++++++++ .../uci/ics/texera/web/service/ResultExportService.scala | 1 - core/gui/package.json | 4 ++-- core/gui/yarn.lock | 8 ++++---- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala index b1e79c3a01c..0eeada8d644 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/resource/ResultResource.scala @@ -23,6 +23,14 @@ class ResultResource extends LazyLogging { request: ResultExportRequest, @Auth user: SessionUser ): Response = { + + if (request.operatorIds.size <= 0) + Response + .status(Response.Status.BAD_REQUEST) + .`type`(MediaType.APPLICATION_JSON) + .entity(Map("error" -> "No operator selected").asJava) + .build() + try { request.destination match { case "local" => diff --git a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala index 081528c33be..021eb1b7cc9 100644 --- a/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala +++ b/core/amber/src/main/scala/edu/uci/ics/texera/web/service/ResultExportService.scala @@ -311,7 +311,6 @@ class ResultExportService(workflowIdentity: WorkflowIdentity) { writer.end() } - /** * Generate a file name for an operator's exported file. * Preserves your logic: uses operatorId in the name. diff --git a/core/gui/package.json b/core/gui/package.json index 4fac27a2441..59a9aa1b645 100644 --- a/core/gui/package.json +++ b/core/gui/package.json @@ -47,7 +47,7 @@ "@types/plotly.js-basic-dist-min": "2.12.4", "ajv": "8.10.0", "backbone": "1.4.1", - "content-disposition": "^0.5.4", + "content-disposition": "0.5.4", "dagre": "0.8.5", "deep-map": "2.0.0", "edit-distance": "1.0.4", @@ -112,7 +112,7 @@ "@nrwl/nx-cloud": "19.1.0", "@nx/angular": "20.0.3", "@types/backbone": "1.4.15", - "@types/content-disposition": "^0", + "@types/content-disposition": "0", "@types/dagre": "0.7.47", "@types/file-saver": "2.0.5", "@types/graphlib": "2.1.8", diff --git a/core/gui/yarn.lock b/core/gui/yarn.lock index 7e43fc36104..050ab8bfee1 100644 --- a/core/gui/yarn.lock +++ b/core/gui/yarn.lock @@ -5038,7 +5038,7 @@ __metadata: languageName: node linkType: hard -"@types/content-disposition@npm:^0": +"@types/content-disposition@npm:0": version: 0.5.8 resolution: "@types/content-disposition@npm:0.5.8" checksum: 10c0/f10baeab2ec44579012c1170763851687e740ea30531a80cd7a403475730ce7d7ead4f88927cea6970cc2d5e74fa7af38cdf4f039c5f115fba1bb98ec0014977 @@ -7731,7 +7731,7 @@ __metadata: languageName: node linkType: hard -"content-disposition@npm:0.5.4, content-disposition@npm:^0.5.4, content-disposition@npm:~0.5.2": +"content-disposition@npm:0.5.4, content-disposition@npm:~0.5.2": version: 0.5.4 resolution: "content-disposition@npm:0.5.4" dependencies: @@ -11118,7 +11118,7 @@ __metadata: "@nx/angular": "npm:20.0.3" "@stoplight/json-ref-resolver": "npm:3.1.5" "@types/backbone": "npm:1.4.15" - "@types/content-disposition": "npm:^0" + "@types/content-disposition": "npm:0" "@types/dagre": "npm:0.7.47" "@types/file-saver": "npm:2.0.5" "@types/graphlib": "npm:2.1.8" @@ -11138,7 +11138,7 @@ __metadata: babel-plugin-dynamic-import-node: "npm:2.3.3" backbone: "npm:1.4.1" concurrently: "npm:7.4.0" - content-disposition: "npm:^0.5.4" + content-disposition: "npm:0.5.4" dagre: "npm:0.8.5" deep-map: "npm:2.0.0" edit-distance: "npm:1.0.4"