Skip to content

Commit

Permalink
Attempt to address gnarroway#55 by calculating the content-length of …
Browse files Browse the repository at this point in the history
…the multipart body (where appropriate)

- This introduces a new protocol called MultipartParam which can be extended to support additional
content types. By default we're matching essentially what io/input-stream does by default to
minimize any backwards incompatible changes here
- When we know the content-length of the multipart body, supply it to the HttpClient
  • Loading branch information
Vincent Pizzo committed May 19, 2023
1 parent 460b6c5 commit 580844b
Show file tree
Hide file tree
Showing 3 changed files with 198 additions and 68 deletions.
14 changes: 12 additions & 2 deletions src/hato/middleware.clj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
[hato.conversion :as conversion])
(:import
(hato.conversion DefaultDecoder)
(java.net.http HttpRequest$BodyPublishers)
(java.util
Base64)
(java.io
Expand All @@ -19,6 +20,7 @@
URLDecoder
URLEncoder
URL)
(java.util.function Supplier)
(java.util.zip
GZIPInputStream InflaterInputStream ZipException Inflater)))

Expand Down Expand Up @@ -695,10 +697,18 @@
"Adds appropriate body and header if making a multipart request."
[{:keys [multipart] :as req}]
(if multipart
(let [b (multipart/boundary)]
(let [b (multipart/boundary)
segments (multipart/raw-multipart-payload-segments multipart b)
input-stream (multipart/body segments)
content-length (multipart/content-length segments)]
(-> req
(dissoc :multipart)
(assoc :body (multipart/body multipart b))
(assoc :body
(if (= -1 content-length)
input-stream
(HttpRequest$BodyPublishers/fromPublisher
(HttpRequest$BodyPublishers/ofInputStream (reify Supplier (get [_] input-stream)))
content-length)))
(update :headers assoc "content-type" (str "multipart/form-data; boundary=" b))))
req))

Expand Down
186 changes: 131 additions & 55 deletions src/hato/multipart.clj
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,10 @@
(:refer-clojure :exclude [get])
(:require [clojure.java.io :as io]
[clojure.spec.alpha :as s])
(:import (java.io ByteArrayInputStream File SequenceInputStream)
(:import (java.io ByteArrayInputStream File InputStream SequenceInputStream)
(java.net Socket URI URL)
(java.nio.charset Charset StandardCharsets)
(java.nio.file Files)
(java.nio.file Files Path)
(java.util Collections)))

;;; Helpers
Expand Down Expand Up @@ -83,23 +84,94 @@
(take 30)
(apply str "hatoBoundary")))

(defmulti multipart-content->input-stream
(fn [content _opts]
(type content)))

(defmethod multipart-content->input-stream String [^String content opts]
(ByteArrayInputStream. (.getBytes content (charset-encoding opts))))

(defmethod multipart-content->input-stream :default [content _opts]
(io/input-stream content))
(defprotocol MultipartParam
(input-stream [content opts])
(length [content opts]))

(extend-protocol MultipartParam
String
(input-stream [^String content opts]
(io/input-stream (.getBytes content (charset-encoding opts))))
(length [^String content opts]
(count (.getBytes content (charset-encoding opts))))

File
(input-stream [^File content _opts]
(io/input-stream content))
(length [^File content _opts]
(.length content))

Path
(input-stream [^Path content _opts]
(io/input-stream (.toFile content)))
(length [^Path content opts]
(length (.toFile content) opts))

InputStream
(input-stream [^InputStream content _opts]
content)
(length [^InputStream _content opts]
(or (:content-length opts) -1))

URL
(input-stream [^URL content _opts]
(io/input-stream content))
(length [^URL content opts]
(if (= "file" (.getProtocol content))
(length (io/file content) opts)
(or (:content-length opts) -1)))

URI
(input-stream [^URI content _opts]
(io/input-stream content))
(length [^URI content opts]
(length (.toURL content) opts))

Socket
(input-stream [^InputStream content _opts]
(io/input-stream content))
(length [^InputStream _content opts]
(or (:content-length opts) -1)))

;; See https://clojure.atlassian.net/browse/CLJ-1381 for why these are defined separately

(extend-protocol MultipartParam
(Class/forName "[B")
(input-stream [^bytes content _opts]
(io/input-stream content))
(length [^bytes content _opts]
(count content)))

(defn raw-multipart-payload-segments
"Given a collection of multipart parts, return a collection of tuples containing a segment of data
representing the multipart body. Each tuple is the segment's content and options for the specific
part (if applicable). These individual segments may be used to compute the content length or construct
an InputStream.
By default, a part's :content type must extend the MultipartParam protocol above!"
[parts boundary]
(let [payload-end-signal (.getBytes (str "--" boundary "--" line-break) StandardCharsets/UTF_8)]
(concat
(for [part parts
raw-segment [[(.getBytes (str "--"
boundary
line-break
(content-disposition part)
line-break
(content-type part)
line-break
(content-transfer-encoding part)
line-break
line-break)
StandardCharsets/UTF_8) nil]
[(:content part) (dissoc part :content)]
[(.getBytes line-break StandardCharsets/UTF_8) nil]]]
raw-segment)
[[payload-end-signal nil]])))

(defn body
"Returns an InputStream from the multipart inputs.
This is achieved by combining all the inputs/parts into a new InputStream. Parts that
are not a string should be coercible to an InputStream via clojure.java.io/input-stream
or by extending `multipart-content->input-stream`. Ideally this input stream is lazy
for parts with contents of a File/InputStream/URL/URI/Socket/etc.
"Returns an InputStream from the supplied multipart parts. See raw-multipart-payload-segments
for more information.
Output looks something like:
Expand All @@ -111,43 +183,47 @@
Some Content\r
--hatoBoundary....\r
...more components
--hatoBoundary....--\r
"
[ms b]
(SequenceInputStream.
(Collections/enumeration
(concat (for [m ms
s [(ByteArrayInputStream. (.getBytes (str "--"
b
line-break
(content-disposition m)
line-break
(content-type m)
line-break
(content-transfer-encoding m)
line-break
line-break)
StandardCharsets/UTF_8))
(multipart-content->input-stream (:content m) m)
(ByteArrayInputStream. (.getBytes line-break StandardCharsets/UTF_8))]]
s)
[(ByteArrayInputStream. (.getBytes (str "--" b "--" line-break) StandardCharsets/UTF_8))]))))
--hatoBoundary....--\r"
([raw-multipart-payload-segments]
(->> raw-multipart-payload-segments
(mapv (fn [[content opts]]
(input-stream content opts)))
Collections/enumeration
(SequenceInputStream.)))
([ms b]
(body (raw-multipart-payload-segments ms b))))

(defn content-length
"Returns the content length from the supplied multipart parts. If any of the parts return a
content length of -1, return -1 indicating that we can't determine the appropriate size of
the multipart body. See raw-multipart-payload-segments for more information."
([raw-multipart-payload-segments]
(reduce
(fn [acc [content opts]]
(let [len (length content opts)]
(if (= -1 len)
(reduced -1)
(+ acc len))))
0
raw-multipart-payload-segments))
([ms b]
(content-length (raw-multipart-payload-segments ms b))))

(comment
(def b (boundary))
(def ms [{:name "title" :content "My Awesome Picture"}
{:name "Content/type" :content "image/jpeg"}
{:name "foo.txt" :part-name "eggplant" :content "Eggplants"}
{:name "file" :content (io/file ".nrepl-port")}])

; Create the body
(body ms b)

; Copy to out for testing
(with-open [xin (io/input-stream *1)
xout (java.io.ByteArrayOutputStream.)]
(io/copy xin xout)
(.toByteArray xout))

; Print as string
(String. *1))
(def b (boundary))
(def ms [{:name "title" :content "My Awesome Picture"}
{:name "Content/type" :content "image/jpeg"}
{:name "foo.txt" :part-name "eggplant" :content "Eggplants"}
{:name "file" :content (io/file ".nrepl-port")}])

; Create the body
(body ms b)

; Copy to out for testing
(with-open [xin (io/input-stream *1)
xout (java.io.ByteArrayOutputStream.)]
(io/copy xin xout)
(.toByteArray xout))

; Print as string
(String. *1))
66 changes: 55 additions & 11 deletions test/hato/multipart_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,28 @@
(:import (java.io ByteArrayInputStream ByteArrayOutputStream)
(java.nio.charset StandardCharsets)))

(defn- make-test-segments
[]
(spit (io/file ".test-data") "[\"hello world\"]")
[{:name "title" :content "My Awesome Picture"}
{:name "diffCs" :content "custom cs" :content-type "text/plain; charset=\"iso-8859-1\""}
{:name "expandedCs" :content "expanded cs" :content-type {:mime-type "text/plain" :charset StandardCharsets/US_ASCII}}
{:name "expandedCsStr" :content "expanded cs str" :content-type {:mime-type "text/plain" :charset "utf-8"}}
{:name "Content/type" :content "image/jpeg"}
{:name "foo.txt" :part-name "eggplant" :content "Eggplants"}
{:name "uri" :content (.toURI (io/file ".test-data"))}
{:name "url" :content (.toURL (.toURI (io/file ".test-data")))}
{:name "file" :content (io/file ".test-data")}
{:name "is" :content (ByteArrayInputStream. (.getBytes ".test-data" "UTF-8")) :file-name "data.info" :content-length (count (.getBytes ".test-data" "UTF-8"))}
{:name "data" :content (.getBytes "hi" "UTF-8") :content-type "text/plain" :file-name "data.txt"}
{:name "jsonParam" :content (io/file ".test-data") :content-type "application/json" :file-name "data.json"}])

(deftest test-boundary
(let [b (boundary)]
(is (nil? (s/explain-data ::multipart/boundary b)))))

(deftest test-body
(let [_ (spit (io/file ".test-data") "[\"hello world\"]")
ms [{:name "title" :content "My Awesome Picture"}
{:name "diffCs" :content "custom cs" :content-type "text/plain; charset=\"iso-8859-1\""}
{:name "expandedCs" :content "expanded cs" :content-type {:mime-type "text/plain" :charset StandardCharsets/US_ASCII}}
{:name "expandedCsStr" :content "expanded cs str" :content-type {:mime-type "text/plain" :charset "utf-8"}}
{:name "Content/type" :content "image/jpeg"}
{:name "foo.txt" :part-name "eggplant" :content "Eggplants"}
{:name "file" :content (io/file ".test-data")}
{:name "is" :content (ByteArrayInputStream. (.getBytes ".test-data" "UTF-8")) :file-name "data.info"}
{:name "data" :content (.getBytes "hi" "UTF-8") :content-type "text/plain" :file-name "data.txt"}
{:name "jsonParam" :content (io/file ".test-data") :content-type "application/json" :file-name "data.json"}]
(let [ms (make-test-segments)
b (body ms "boundary")
out-string (with-open [xin (io/input-stream b)
xout (ByteArrayOutputStream.)]
Expand Down Expand Up @@ -70,6 +76,20 @@
"\r\n"
"Eggplants\r\n"

"--boundary\r\n"
"Content-Disposition: form-data; name=\"uri\"\r\n"
"Content-Type: application/octet-stream\r\n"
"Content-Transfer-Encoding: binary\r\n"
"\r\n"
"[\"hello world\"]\r\n"

"--boundary\r\n"
"Content-Disposition: form-data; name=\"url\"\r\n"
"Content-Type: application/octet-stream\r\n"
"Content-Transfer-Encoding: binary\r\n"
"\r\n"
"[\"hello world\"]\r\n"

"--boundary\r\n"
"Content-Disposition: form-data; name=\"file\"; filename=\".test-data\"\r\n"
"Content-Type: application/octet-stream\r\n"
Expand Down Expand Up @@ -99,5 +119,29 @@
"[\"hello world\"]\r\n"
"--boundary--\r\n") out-string))))

(deftest test-content-length
(testing "when all segment content lengths are known, the computed length matches the actual body length"
(let [ms (make-test-segments)
segments (raw-multipart-payload-segments ms "boundary")
b (body segments)
computed-length (content-length segments)
actual-length (with-open [xin (io/input-stream b)
xout (ByteArrayOutputStream.)]
(io/copy xin xout)
(count (.toByteArray xout)))]
(is (= computed-length actual-length))))
(testing "when a single segment's length is unknown, computed length is -1"
(let [ms (concat (make-test-segments)
[{:name "is" :content (ByteArrayInputStream. (.getBytes ".test-data" "UTF-8")) :file-name "data.info"}])
segments (raw-multipart-payload-segments ms "boundary")
b (body segments)
computed-length (content-length segments)
actual-length (with-open [xin (io/input-stream b)
xout (ByteArrayOutputStream.)]
(io/copy xin xout)
(count (.toByteArray xout)))]
(is (not= computed-length actual-length))
(is (= computed-length -1)))))

(comment
(run-tests))

0 comments on commit 580844b

Please sign in to comment.