From de1c700d5e093addce50657802e0b8d0386fdc3f Mon Sep 17 00:00:00 2001 From: Matthew Phillips Date: Thu, 15 Dec 2011 15:21:00 +1030 Subject: [PATCH] Fix write-json-string for strings with codepoints outside the BMP. Was not iterating over surrogate characters correctly. --- src/main/clojure/clojure/data/json.clj | 53 +++++++++++++++++---------- src/test/clojure/clojure/data/json_test.clj | 4 ++ 2 files changed, 37 insertions(+), 20 deletions(-) diff --git a/src/main/clojure/clojure/data/json.clj b/src/main/clojure/clojure/data/json.clj index dbf8282..112db34 100644 --- a/src/main/clojure/clojure/data/json.clj +++ b/src/main/clojure/clojure/data/json.clj @@ -198,29 +198,42 @@ (write-json [object out escape-unicode?] "Print object to PrintWriter out as JSON")) +;; NB this was in contrib.string, but no longer in 1.3 +(defn- codepoints + "Returns a sequence of integer Unicode code points in s. Handles + Unicode supplementary characters (above U+FFFF) correctly." + [^String s] + (let [len (.length s) + f (fn thisfn [^String s i] + (when (< i len) + (let [c (.charAt s i)] + (if (Character/isHighSurrogate c) + (cons (.codePointAt s i) (thisfn s (+ 2 i))) + (cons (int c) (thisfn s (inc i)))))))] + (lazy-seq (f s 0)))) + (defn- write-json-string [^CharSequence s ^PrintWriter out escape-unicode?] (let [sb (StringBuilder. ^Integer (count s))] (.append sb \") - (dotimes [i (count s)] - (let [cp (Character/codePointAt s i)] - (cond - ;; Handle printable JSON escapes before ASCII - (= cp 34) (.append sb "\\\"") - (= cp 92) (.append sb "\\\\") - (= cp 47) (.append sb "\\/") - ;; Print simple ASCII characters - (< 31 cp 127) (.append sb (.charAt s i)) - ;; Handle non-printable JSON escapes - (= cp 8) (.append sb "\\b") - (= cp 12) (.append sb "\\f") - (= cp 10) (.append sb "\\n") - (= cp 13) (.append sb "\\r") - (= cp 9) (.append sb "\\t") - ;; Any other character is Unicode - :else (if escape-unicode? - ;; Hexadecimal-escaped - (.append sb (format "\\u%04x" cp)) - (.appendCodePoint sb cp))))) + (doseq [cp (codepoints s)] + (cond + ;; Handle printable JSON escapes before ASCII + (= cp 34) (.append sb "\\\"") + (= cp 92) (.append sb "\\\\") + (= cp 47) (.append sb "\\/") + ;; Print simple ASCII characters + (< 31 cp 127) (.appendCodePoint sb cp) + ;; Handle non-printable JSON escapes + (= cp 8) (.append sb "\\b") + (= cp 12) (.append sb "\\f") + (= cp 10) (.append sb "\\n") + (= cp 13) (.append sb "\\r") + (= cp 9) (.append sb "\\t") + ;; Any other character is Unicode + :else (if escape-unicode? + ;; Hexadecimal-escaped + (.append sb (format "\\u%04x" cp)) + (.appendCodePoint sb cp)))) (.append sb \") (.print out (str sb)))) diff --git a/src/test/clojure/clojure/data/json_test.clj b/src/test/clojure/clojure/data/json_test.clj index 1805928..4b0085b 100644 --- a/src/test/clojure/clojure/data/json_test.clj +++ b/src/test/clojure/clojure/data/json_test.clj @@ -27,6 +27,10 @@ (deftest handles-unicode-escapes (is (= " \u0beb " (read-json "\" \\u0bEb \"")))) +(deftest handles-unicode-outside-bmp + (is (= "\"smiling face: \uD83D\uDE03\"" + (json-str "smiling face: \uD83D\uDE03" :escape-unicode false)))) + (deftest handles-escaped-whitespace (is (= "foo\nbar" (read-json "\"foo\\nbar\""))) (is (= "foo\rbar" (read-json "\"foo\\rbar\""))) -- 1.7.5.4