From 259cb46ff1d7007ab04d126106aa59752b2d1f00 Mon Sep 17 00:00:00 2001 From: Daniel Janus Date: Wed, 10 Aug 2022 19:05:51 +0200 Subject: [PATCH] Strip quotes in detect-charset RFC 7231 Section 3.1.1.1 allows the charset specified in the Content-Type header to be a quoted string. We want detect-charset to return unquoted charset names, so that they can be passed to, say, java.nio.charset.Charset/forName without postprocessing. This commit fixes that, adds a test for a quoted charset, and rewrites t-detect-charset-by-content-type to use clojure.test/are to reduce repetition and read more nicely. --- src/clj_http/client.clj | 11 ++++++++++- test/clj_http/test/client_test.clj | 18 +++++++++--------- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/src/clj_http/client.clj b/src/clj_http/client.clj index 8c38cf0c..18d395cc 100644 --- a/src/clj_http/client.clj +++ b/src/clj_http/client.clj @@ -756,13 +756,22 @@ ([req respond raise] (client (accept-encoding-request req) respond raise)))) +(defn- strip-quotes + "If s starts and ends with \", returns s with quotes stripped, + else returns it unchanged." + [^String s] + (if (and (.startsWith s "\"") + (.endsWith s "\"")) + (subs s 1 (dec (count s))) + s)) + (defn detect-charset "Given a charset header, detect the charset, returns UTF-8 if not found." [content-type] (or (when-let [found (when content-type (re-find #"(?i)charset\s*=\s*([^\s]+)" content-type))] - (second found)) + (strip-quotes (second found))) "UTF-8")) (defn- multi-param-entries [key values multi-param-style encoding] diff --git a/test/clj_http/test/client_test.clj b/test/clj_http/test/client_test.clj index d2c2c59b..3298dd8c 100644 --- a/test/clj_http/test/client_test.clj +++ b/test/clj_http/test/client_test.clj @@ -1633,15 +1633,15 @@ client/wrap-request-timing)))) (deftest t-detect-charset-by-content-type - (is (= "UTF-8" (client/detect-charset nil))) - (is (= "UTF-8"(client/detect-charset "application/json"))) - (is (= "UTF-8"(client/detect-charset "text/html"))) - (is (= "GBK"(client/detect-charset "application/json; charset=GBK"))) - (is (= "ISO-8859-1" (client/detect-charset - "application/json; charset=ISO-8859-1"))) - (is (= "ISO-8859-1" (client/detect-charset - "application/json; charset = ISO-8859-1"))) - (is (= "GB2312" (client/detect-charset "text/html; Charset=GB2312")))) + (are [content-type expected-charset] (= expected-charset (client/detect-charset content-type)) + nil "UTF-8" + "application/json" "UTF-8" + "text/html" "UTF-8" + "application/json; charset=GBK" "GBK" + "application/json; charset=ISO-8859-1" "ISO-8859-1" + "application/json; charset = ISO-8859-1" "ISO-8859-1" + "text/html; Charset=GB2312" "GB2312" + "text/html; charset=\"ISO-8859-1\"" "ISO-8859-1")) (deftest ^:integration customMethodTest (run-server)