diff --git a/docs/api.yaml b/docs/api.yaml index 650ba51f8..d0f4c1b40 100644 --- a/docs/api.yaml +++ b/docs/api.yaml @@ -3892,7 +3892,13 @@ paths: - Individual Form summary: Downloading a Form Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given. + To download a single file, use this endpoint. + + The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` will be given. `Content-Type` will be either: + + 1. the type supplied at upload time, or if not supplied + 2. a type derived from the file extension, or if none found + 3. `application/octet-stream` This endpoint supports `ETag`, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, it returns a value in `ETag` header, you can pass this value in the header `If-None-Match` of subsequent requests. If the file has not been changed since the previous request, you will receive `304 Not Modified` response otherwise you'll get the latest file. operationId: Downloading a Form Attachment @@ -5123,7 +5129,13 @@ paths: - Published Form Versions summary: Downloading a Form Version Attachment description: |- - To download a single file, use this endpoint. The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` (based on the type supplied at upload time, or `application/octet-stream` if none was supplied) will be given. + To download a single file, use this endpoint. + + The appropriate `Content-Disposition` (attachment with a filename) and `Content-Type` will be given. `Content-Type` will be either: + + 1. the type supplied at upload time, or if not supplied + 2. a type derived from the file extension, or if none found + 3. `application/octet-stream` This endpoint supports `ETag` header, which can be used to avoid downloading the same content more than once. When an API consumer calls this endpoint, the endpoint returns a value in `ETag` header. If you pass that value in the `If-None-Match` header of a subsequent request, then if the file has not been changed since the previous request, you will receive `304 Not Modified` response; otherwise you'll get the latest file. operationId: Downloading a Form Version Attachment diff --git a/lib/model/query/submission-attachments.js b/lib/model/query/submission-attachments.js index aaf5657fc..0f90b73c8 100644 --- a/lib/model/query/submission-attachments.js +++ b/lib/model/query/submission-attachments.js @@ -20,7 +20,7 @@ const { insertMany, QueryOptions } = require('../../util/db'); const { resolve } = require('../../util/promise'); const { isBlank, construct } = require('../../util/util'); const { traverseXml, findAll, root, node, text } = require('../../util/xml'); -const { streamBlobs } = require('../../util/blob'); +const { defaultMimetypeFor, streamBlobs } = require('../../util/blob'); //////////////////////////////////////////////////////////////////////////////// @@ -29,7 +29,7 @@ const { streamBlobs } = require('../../util/blob'); const _makeAttachment = (ensure, submissionDefId, name, file = null, deprecated = null, index = null, isClientAudit = null) => { const data = { submissionDefId, name, index, isClientAudit, blobId: deprecated }; return (file == null) ? resolve(new Submission.Attachment(data)) - : ensure(Blob.fromBuffer(file.buffer, file.mimetype)).then((blobId) => { + : ensure(Blob.fromBuffer(file.buffer, file.mimetype || defaultMimetypeFor(name))).then((blobId) => { data.blobId = blobId; return new Submission.Attachment(data); }); @@ -106,7 +106,7 @@ const upsert = (def, files) => ({ Blobs, SubmissionAttachments }) => const lookup = new Set(expecteds.map((att) => att.name)); const present = files.filter((file) => lookup.has(file.fieldname)); return Promise.all(present - .map((file) => Blobs.ensure(Blob.fromBuffer(file.buffer, file.mimetype)) + .map((file) => Blobs.ensure(Blob.fromBuffer(file.buffer, file.mimetype || defaultMimetypeFor(file.fieldname))) .then((blobId) => SubmissionAttachments.attach(def.id, file.fieldname, blobId)))); }); diff --git a/lib/resources/forms.js b/lib/resources/forms.js index 15cf8f585..a37b9d16f 100644 --- a/lib/resources/forms.js +++ b/lib/resources/forms.js @@ -13,7 +13,7 @@ const { Blob, Form } = require('../model/frames'); const { ensureDef } = require('../model/frame'); const { QueryOptions } = require('../util/db'); const { isTrue, xml, contentDisposition, withEtag } = require('../util/http'); -const { blobResponse } = require('../util/blob'); +const { blobResponse, defaultMimetypeFor } = require('../util/blob'); const Problem = require('../util/problem'); const { sanitizeFieldsForOdata, setVersion } = require('../data/schema'); const { getOrNotFound, reject, resolve, rejectIf } = require('../util/promise'); @@ -380,7 +380,7 @@ module.exports = (service, endpoint) => { .then(getOrNotFound) .then((form) => auth.canOrReject('form.update', form)) .then((form) => Promise.all([ - Blob.fromStream(request, headers['content-type']).then((blob) => Blobs.ensure(blob)), + Blob.fromStream(request, headers['content-type'] || defaultMimetypeFor(params.name)).then((blob) => Blobs.ensure(blob)), FormAttachments.getByFormDefIdAndName(form.draftDefId, params.name).then(getOrNotFound) ]) .then(([ blobId, attachment ]) => FormAttachments.update(form, attachment, blobId, null)) diff --git a/lib/resources/submissions.js b/lib/resources/submissions.js index 390e402bb..c8a39ffc0 100644 --- a/lib/resources/submissions.js +++ b/lib/resources/submissions.js @@ -16,7 +16,7 @@ const { createdMessage } = require('../formats/openrosa'); const { getOrNotFound, getOrReject, rejectIf, reject } = require('../util/promise'); const { QueryOptions } = require('../util/db'); const { success, xml, isFalse, contentDisposition, redirect, url } = require('../util/http'); -const { blobResponse } = require('../util/blob'); +const { blobResponse, defaultMimetypeFor } = require('../util/blob'); const Problem = require('../util/problem'); const { streamBriefcaseCsvs } = require('../data/briefcase'); const { streamAttachments } = require('../data/attachments'); @@ -445,7 +445,7 @@ module.exports = (service, endpoint) => { .then((defId) => SubmissionAttachments.getBySubmissionDefIdAndName(defId, params.name) // just for audit logging .then(getOrNotFound) .then((oldAttachment) => [ form, defId, oldAttachment ]))), - Blob.fromStream(request, headers['content-type']).then(Blobs.ensure) + Blob.fromStream(request, headers['content-type'] || defaultMimetypeFor(params.name)).then(Blobs.ensure) ]) .then(([ [ form, defId, oldAttachment ], blobId ]) => Promise.all([ SubmissionAttachments.attach(defId, params.name, blobId), diff --git a/lib/util/blob.js b/lib/util/blob.js index 099f5aa14..5362dc331 100644 --- a/lib/util/blob.js +++ b/lib/util/blob.js @@ -7,6 +7,7 @@ // including this file, may be copied, modified, propagated, or distributed // except according to the terms contained in the LICENSE file. +const { extname } = require('node:path'); const { Transform } = require('stream'); const { PartialPipe } = require('./stream'); const { contentDisposition, redirect, withEtag } = require('./http'); @@ -67,4 +68,13 @@ async function blobResponse(s3, filename, blob) { } } -module.exports = { blobContent, blobResponse, streamBlobs, streamEncBlobs }; +function defaultMimetypeFor(filename) { + if (!filename) return null; + switch (extname(filename).toLowerCase()) { + case '.csv': return 'text/csv'; // eslint-disable-line no-multi-spaces + case '.geojson': return 'application/geo+json'; + default: return null; // eslint-disable-line no-multi-spaces + } +} + +module.exports = { blobContent, blobResponse, defaultMimetypeFor, streamBlobs, streamEncBlobs }; diff --git a/test/data/xml.js b/test/data/xml.js index ba2c2b082..8328edce3 100644 --- a/test/data/xml.js +++ b/test/data/xml.js @@ -637,6 +637,7 @@ module.exports = { two: instance('binaryType', 'btwo', 'here_is_file2.jpg'), both: instance('binaryType', 'both', 'my_file1.mp4here_is_file2.jpg'), unicode: instance('binaryType', 'both', 'fîlé2f😂le3صادق'), + withFile: (filename) => instance('binaryType', 'with-file', `${filename}`), }, encrypted: { // TODO: the jpg binary associated with this sample blob is >3MB. will replace diff --git a/test/integration/api/submissions.js b/test/integration/api/submissions.js index 0c21d0076..8f795da01 100644 --- a/test/integration/api/submissions.js +++ b/test/integration/api/submissions.js @@ -4419,6 +4419,87 @@ one,h,/data/h,2000-01-01T00:06,2000-01-01T00:07,-5,-6,,ee,ff body.toString().should.equal('testvideo'); }))))))); + [ + // express ALWAYS adds "charset=..." suffix to text-based Content-Type response headers + // See: https://github.com/expressjs/express/issues/2654 + [ 'CSV', 'myfile.csv', 'text/csv; charset=utf-8', 'a,b,c' ], // eslint-disable-line no-multi-spaces + [ 'GeoJSON', 'myfile.geojson', 'application/geo+json', '{}' ], // eslint-disable-line no-multi-spaces + [ 'Custom', 'myfile.custom1', 'application/octet-stream', 'anything' ], // eslint-disable-line no-multi-spaces + ].forEach(([ humanType, filename, officialContentType, fileContents ]) => { + describe(`special handling for ${humanType}`, () => { + it('should attach the given file and serve it with supplied mime type', testService((service) => + service.login('alice', (asAlice) => + asAlice.post('/v1/projects/1/forms?publish=true') + .set('Content-Type', 'application/xml') + .send(testData.forms.binaryType) + .expect(200) + .then(() => asAlice.post('/v1/projects/1/forms/binaryType/submissions') + .send(testData.instances.binaryType.withFile(filename)) + .set('Content-Type', 'text/xml') + .expect(200) + .then(() => asAlice.post(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .set('Content-Type', 'application/x-abiword') + .send(fileContents) + .expect(200) + .then(() => asAlice.get(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .expect(200) + .then(({ headers, text }) => { + headers['content-type'].should.equal('application/x-abiword'); + text.toString().should.equal(fileContents); // use 'text' instead of 'body' to avoid supertest response parsing + }))))))); + + it(`should attach a given ${humanType} file with empty Content-Type and serve it with mime type ${officialContentType}`, testService((service) => + service.login('alice', (asAlice) => + asAlice.post('/v1/projects/1/forms?publish=true') + .set('Content-Type', 'application/xml') + .send(testData.forms.binaryType) + .expect(200) + .then(() => asAlice.post('/v1/projects/1/forms/binaryType/submissions') + .send(testData.instances.binaryType.withFile(filename)) + .set('Content-Type', 'text/xml') + .expect(200) + .then(() => asAlice.post(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .send(fileContents) + .set('Content-Type', '') // N.B. must be called _after_ send() + .expect(200) + .then(() => asAlice.get(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .expect(200) + .then(({ headers, body, text }) => { + headers['content-type'].should.equal(officialContentType); + + // Both body & text must be checked here to avoid supertest response parsing: + // * for JSON-based formats, body will contain parsed JSON objects + // * for general binary formats, text will be undefined + (text ?? body).toString().should.equal(fileContents); + }))))))); + + it(`should attach a given ${humanType} file with missing Content-Type and serve it with mime type ${officialContentType}`, testService((service) => + service.login('alice', (asAlice) => + asAlice.post('/v1/projects/1/forms?publish=true') + .set('Content-Type', 'application/xml') + .send(testData.forms.binaryType) + .expect(200) + .then(() => asAlice.post('/v1/projects/1/forms/binaryType/submissions') + .send(testData.instances.binaryType.withFile(filename)) + .set('Content-Type', 'text/xml') + .expect(200) + .then(() => asAlice.post(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .send(fileContents) + .unset('Content-Type') // N.B. must be called _after_ send() + .expect(200) + .then(() => asAlice.get(`/v1/projects/1/forms/binaryType/submissions/with-file/attachments/${filename}`) + .expect(200) + .then(({ headers, body, text }) => { + headers['content-type'].should.equal(officialContentType); + + // Both body & text must be checked here to avoid supertest response parsing: + // * for JSON-based formats, body will contain parsed JSON objects + // * for general binary formats, text will be undefined + (text ?? body).toString().should.equal(fileContents); + }))))))); + }); + }); + it('should log an audit entry about initial attachment', testService((service, { Audits, Forms, Submissions, SubmissionAttachments }) => service.login('alice', (asAlice) => asAlice.post('/v1/projects/1/forms?publish=true')