From 54116100dd7efbff02362038a3a1f5efb7bd9142 Mon Sep 17 00:00:00 2001 From: Andy Fingerhut Date: Wed, 15 Aug 2012 09:14:34 -0700 Subject: [PATCH] Fix for CLJ-753, CLJ-870, and CLJ-905. --- src/clj/clojure/string.clj | 86 ++++++++++++++++++++++++++-------- test/clojure/test_clojure/string.clj | 30 ++++++++++-- 2 files changed, 94 insertions(+), 22 deletions(-) diff --git a/src/clj/clojure/string.clj b/src/clj/clojure/string.clj index 188b518..21aa622 100644 --- a/src/clj/clojure/string.clj +++ b/src/clj/clojure/string.clj @@ -36,11 +36,11 @@ Design notes for clojure.string: general than String. In ordinary usage you will almost always pass concrete strings. If you are doing something unusual, e.g. passing a mutable implementation of CharSequence, then - thead-safety is your responsibility." + thread-safety is your responsibility." :author "Stuart Sierra, Stuart Halloway, David Liebke"} clojure.string (:refer-clojure :exclude (replace reverse)) - (:import (java.util.regex Pattern) + (:import (java.util.regex Pattern Matcher) clojure.lang.LazilyPersistentVector)) (defn ^String reverse @@ -49,16 +49,26 @@ Design notes for clojure.string: [^CharSequence s] (.toString (.reverse (StringBuilder. s)))) +(defn ^String re-quote-replacement + "Given a replacement string that you wish to be a literal + replacement for a pattern match in replace or replace-first, do the + necessary escaping of special characters in the replacement." + {:added "1.4"} + [^CharSequence replacement] + (Matcher/quoteReplacement (.toString ^CharSequence replacement))) + (defn- replace-by [^CharSequence s re f] (let [m (re-matcher re s)] - (let [buffer (StringBuffer. (.length s))] - (loop [] - (if (.find m) - (do (.appendReplacement m buffer (f (re-groups m))) - (recur)) - (do (.appendTail m buffer) - (.toString buffer))))))) + (if (.find m) + (let [buffer (StringBuffer. (.length s))] + (loop [found true] + (if found + (do (.appendReplacement m buffer (Matcher/quoteReplacement (f (re-groups m)))) + (recur (.find m))) + (do (.appendTail m buffer) + (.toString buffer))))) + s))) (defn ^String replace "Replaces all instance of match with replacement in s. @@ -69,7 +79,21 @@ Design notes for clojure.string: char / char pattern / (string or function of match). - See also replace-first." + See also replace-first. + + The replacement is literal (i.e. none of its characters are treated + specially) for all cases above except pattern / string. + + For pattern / string, $1, $2, etc. in the replacement string are + substituted with the string that matched the corresponding + parenthesized group in the pattern. If you wish your replacement + string r to be used literally, use (re-quote-replacement r) as the + replacement argument. See also documentation for + java.util.regex.Matcher's appendReplacement method. + + Example: + (clojure.string/replace \"Almost Pig Latin\" #\"\\b(\\w)(\\w+)\\b\" \"$2$1ay\") + -> \"lmostAay igPay atinLay\"" {:added "1.2"} [^CharSequence s match replacement] (let [s (.toString s)] @@ -85,12 +109,13 @@ Design notes for clojure.string: (defn- replace-first-by [^CharSequence s ^Pattern re f] (let [m (re-matcher re s)] - (let [buffer (StringBuffer. (.length s))] - (if (.find m) - (let [rep (f (re-groups m))] - (.appendReplacement m buffer rep) - (.appendTail m buffer) - (str buffer)))))) + (if (.find m) + (let [buffer (StringBuffer. (.length s)) + rep (Matcher/quoteReplacement (f (re-groups m)))] + (.appendReplacement m buffer rep) + (.appendTail m buffer) + (str buffer)) + s))) (defn- replace-first-char [^CharSequence s ^Character match replace] @@ -100,6 +125,14 @@ Design notes for clojure.string: s (str (subs s 0 i) replace (subs s (inc i)))))) +(defn- replace-first-str + [^CharSequence s ^String match ^String replace] + (let [^String s (.toString s) + i (.indexOf s match)] + (if (= -1 i) + s + (str (subs s 0 i) replace (subs s (+ i (.length match))))))) + (defn ^String replace-first "Replaces the first instance of match with replacement in s. @@ -109,7 +142,22 @@ Design notes for clojure.string: string / string pattern / (string or function of match). - See also replace-all." + See also replace. + + The replacement is literal (i.e. none of its characters are treated + specially) for all cases above except pattern / string. + + For pattern / string, $1, $2, etc. in the replacement string are + substituted with the string that matched the corresponding + parenthesized group in the pattern. If you wish your replacement + string r to be used literally, use (re-quote-replacement r) as the + replacement argument. See also documentation for + java.util.regex.Matcher's appendReplacement method. + + Example: + (clojure.string/replace-first \"swap first two words\" + #\"(\\w+)(\\s+)(\\w+)\" \"$3$2$1\") + -> \"first swap two words\"" {:added "1.2"} [^CharSequence s match replacement] (let [s (.toString s)] @@ -117,8 +165,8 @@ Design notes for clojure.string: (instance? Character match) (replace-first-char s match replacement) (instance? CharSequence match) - (.replaceFirst s (Pattern/quote (.toString ^CharSequence match)) - (.toString ^CharSequence replacement)) + (replace-first-str s (.toString ^CharSequence match) + (.toString ^CharSequence replacement)) (instance? Pattern match) (if (instance? CharSequence replacement) (.replaceFirst (re-matcher ^Pattern match s) diff --git a/test/clojure/test_clojure/string.clj b/test/clojure/test_clojure/string.clj index d6f6469..c6672cd 100644 --- a/test/clojure/test_clojure/string.clj +++ b/test/clojure/test_clojure/string.clj @@ -12,14 +12,36 @@ (deftest t-replace (is (= "faabar" (s/replace "foobar" \o \a))) + (is (= "foobar" (s/replace "foobar" \z \a))) (is (= "barbarbar" (s/replace "foobarfoo" "foo" "bar"))) - (is (= "FOObarFOO" (s/replace "foobarfoo" #"foo" s/upper-case)))) + (is (= "foobarfoo" (s/replace "foobarfoo" "baz" "bar"))) + (is (= "f$$d" (s/replace "food" "o" "$"))) + (is (= "f\\\\d" (s/replace "food" "o" "\\"))) + (is (= "barbarbar" (s/replace "foobarfoo" #"foo" "bar"))) + (is (= "foobarfoo" (s/replace "foobarfoo" #"baz" "bar"))) + (is (= "f$$d" (s/replace "food" #"o" (s/re-quote-replacement "$")))) + (is (= "f\\\\d" (s/replace "food" #"o" (s/re-quote-replacement "\\")))) + (is (= "FOObarFOO" (s/replace "foobarfoo" #"foo" s/upper-case))) + (is (= "foobarfoo" (s/replace "foobarfoo" #"baz" s/upper-case))) + (is (= "OObarOO" (s/replace "foobarfoo" #"f(o+)" (fn [[m g1]] (s/upper-case g1))))) + (is (= "baz\\bang\\" (s/replace "bazslashbangslash" #"slash" (constantly "\\"))))) (deftest t-replace-first + (is (= "faobar" (s/replace-first "foobar" \o \a))) + (is (= "foobar" (s/replace-first "foobar" \z \a))) + (is (= "z.ology" (s/replace-first "zoology" \o \.))) (is (= "barbarfoo" (s/replace-first "foobarfoo" "foo" "bar"))) + (is (= "foobarfoo" (s/replace-first "foobarfoo" "baz" "bar"))) + (is (= "f$od" (s/replace-first "food" "o" "$"))) + (is (= "f\\od" (s/replace-first "food" "o" "\\"))) (is (= "barbarfoo" (s/replace-first "foobarfoo" #"foo" "bar"))) - (is (= "z.ology" (s/replace-first "zoology" \o \.))) - (is (= "FOObarfoo" (s/replace-first "foobarfoo" #"foo" s/upper-case)))) + (is (= "foobarfoo" (s/replace-first "foobarfoo" #"baz" "bar"))) + (is (= "f$od" (s/replace-first "food" #"o" (s/re-quote-replacement "$")))) + (is (= "f\\od" (s/replace-first "food" #"o" (s/re-quote-replacement "\\")))) + (is (= "FOObarfoo" (s/replace-first "foobarfoo" #"foo" s/upper-case))) + (is (= "foobarfoo" (s/replace-first "foobarfoo" #"baz" s/upper-case))) + (is (= "OObarfoo" (s/replace-first "foobarfoo" #"f(o+)" (fn [[m g1]] (s/upper-case g1))))) + (is (= "baz\\bangslash" (s/replace-first "bazslashbangslash" #"slash" (constantly "\\"))))) (deftest t-join (are [x coll] (= x (s/join coll)) @@ -65,6 +87,7 @@ s/reverse [nil] s/replace [nil #"foo" "bar"] s/replace-first [nil #"foo" "bar"] + s/re-quote-replacement [nil] s/capitalize [nil] s/upper-case [nil] s/lower-case [nil] @@ -85,6 +108,7 @@ "baz::quux" s/replace-first ["baz--quux" #"--" "::"] "baz::quux" s/replace-first ["baz--quux" (StringBuffer. "--") (StringBuffer. "::")] "zim-zam" s/replace-first ["zim zam" #" " (StringBuffer. "-")] + "\\\\ \\$" s/re-quote-replacement ["\\ $"] "Pow" s/capitalize ["POW"] "BOOM" s/upper-case ["boom"] "whimper" s/lower-case ["whimPER"] -- 1.7.10