Skip to content

Commit

Permalink
Body encoding now always defaults to UTF-8
Browse files Browse the repository at this point in the history
Was previously always encoding to system default character encoding,
which is UTF-8 unless on Windows then it is windows-1252.

Now:
- encoding body to requested body encoding
- and defaulting to UTF-8 if no body encoding requested

Also:
- Added extended characters to http bodies in tests to exercise
body encoding/decoding
- Verified :auto body decoding with an extra test

Note that :auto body decoding might not be entirely correct.
It will only look at content-type charset if content-type starts with
"text/". Diagnosing/fixing this is out of scope for this PR.

Fixes clj-commons#18
  • Loading branch information
lread committed Aug 13, 2022
1 parent 09b4e0e commit 81235c7
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 24 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
### Unreleased

- Body encoding now repects requested encoding and defaults to UTF-8 ([#18](https://github.com/clj-commons/clj-http-lite/issues/18)) ([@lread](https://github.com/lread))
- Quality
- Automated CI testing added for Windows ([#21](https://github.com/clj-commons/clj-http-lite/issues/21)) ([@lread](https://github.com/lread))

### 0.4.384

- Support self-signed certificates via `:insecure? true` option
Expand Down
16 changes: 9 additions & 7 deletions src/clj_http/lite/client.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
[clj-http.lite.util :as util]
[clojure.java.io :as io]
[clojure.string :as str])
(:import (java.net UnknownHostException))
(:import (java.net UnknownHostException)
(java.nio.charset Charset))
(:refer-clojure :exclude (get update)))

(set! *warn-on-reflection* true)
Expand Down Expand Up @@ -106,12 +107,13 @@
(fn [{:keys [body body-encoding _length] :as req}]
(if body
(cond
(string? body)
(client (-> req (assoc :body (.getBytes ^String body)
:character-encoding (or body-encoding
"UTF-8"))))
:else
(client req))
(string? body)
(let [encoding-name (or body-encoding "UTF-8")
charset (Charset/forName encoding-name)]
(client (-> req (assoc :body (.getBytes ^String body charset)
:character-encoding encoding-name))))
:else
(client req))
(client req))))

(defn content-type-value [type]
Expand Down
38 changes: 22 additions & 16 deletions test/clj_http/test/client.clj
Original file line number Diff line number Diff line change
Expand Up @@ -93,21 +93,21 @@
(let [client (fn [req]
(is (= "gzip, deflate"
(get-in req [:headers "Accept-Encoding"])))
{:body (util/gzip (util/utf8-bytes "foofoofoo"))
{:body (util/gzip (util/utf8-bytes "foofoofooƒ⊙⊙"))
:headers {"Content-Encoding" "gzip"}})
c-client (client/wrap-decompression client)
resp (c-client {})]
(is (= "foofoofoo" (util/utf8-string (:body resp))))))
(is (= "foofoofooƒ⊙⊙" (util/utf8-string (:body resp))))))

(deftest apply-on-deflated
(let [client (fn [req]
(is (= "gzip, deflate"
(get-in req [:headers "Accept-Encoding"])))
{:body (util/deflate (util/utf8-bytes "barbarbar"))
{:body (util/deflate (util/utf8-bytes "barbarbar⒝⒜⒭"))
:headers {"Content-Encoding" "deflate"}})
c-client (client/wrap-decompression client)
resp (c-client {})]
(is (= "barbarbar" (util/utf8-string (:body resp))))))
(is (= "barbarbar⒝⒜⒭" (util/utf8-string (:body resp))))))

(deftest pass-on-non-compressed
(let [c-client (client/wrap-decompression (fn [req] {:body "foo"}))
Expand Down Expand Up @@ -141,11 +141,18 @@
(is-passed client/wrap-accept-encoding
{:uri "/foo"}))

(deftest apply-on-output-coercion
(let [client (fn [req] {:body (util/utf8-bytes "foo")})
(deftest apply-on-utf8-output-coercion
(let [client (fn [req] {:body (util/utf8-bytes "fooⓕⓞⓞ")})
o-client (client/wrap-output-coercion client)
resp (o-client {:uri "/foo"})]
(is (= "foo" (:body resp)))))
(is (= "fooⓕⓞⓞ" (:body resp)))))

(deftest apply-on-other-output-coercion
(let [client (fn [req] {:body (.getBytes "sõme ßÒññÝ chÀråcters" "windows-1252")
:headers {"content-type" "text/foo;charset=windows-1252"}})
o-client (client/wrap-output-coercion client)
resp (o-client {:uri "/foo" :as :auto})]
(is (= "sõme ßÒññÝ chÀråcters" (:body resp)))))

(deftest pass-on-no-output-coercion
(let [client (fn [req] {:body nil})
Expand All @@ -158,15 +165,14 @@
(is (= :thebytes (:body resp)))))

(deftest apply-on-input-coercion
(let [i-client (client/wrap-input-coercion identity)
resp (i-client {:body "foo"})
resp2 (i-client {:body "foo2" :body-encoding "ASCII"})
data (slurp (:body resp))
data2 (slurp (:body resp2))]
(is (= "UTF-8" (:character-encoding resp)))
(is (= "foo" data))
(is (= "ASCII" (:character-encoding resp2)))
(is (= "foo2" data2))))
(let [i-client (client/wrap-input-coercion identity)]
(doseq [[in-body encoding expected-encoding] [["μτƒ8 нαs мαηλ ςнαяαςτεяs ൠ" nil "UTF-8"]
["plain text" "ASCII" "ASCII"]
["sõme ßÒññÝ chÀråcters" "windows-1252" "windows-1252"]]]
(let [resp (i-client {:body in-body :body-encoding encoding})
decoded-body (slurp (:body resp) :encoding expected-encoding)]
(is (= expected-encoding (:character-encoding resp)) "character encoding")
(is (= in-body decoded-body) "body")))))

(deftest pass-on-no-input-coercion
(is-passed client/wrap-input-coercion
Expand Down
2 changes: 1 addition & 1 deletion test/clj_http/test/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@

(deftest ^{:integration true} t-save-request-obj
(let [resp (request {:request-method :post :uri "/post"
:body (.getBytes "foo bar")
:body (.getBytes "foo bar" "UTF-8")
:save-request? true})]
(is (= 200 (:status resp)))
(is (= {:scheme :http
Expand Down

0 comments on commit 81235c7

Please sign in to comment.