From ae1ea5637a71dab1478a9c1dc6162d1b18486cc9 Mon Sep 17 00:00:00 2001 From: Mike Anderson Date: Sun, 4 Dec 2011 22:58:50 -0800 Subject: [PATCH] Improvements to reduce performance Convert reduce to use internal-reduce for both arities, backed by IReduce in Java --- src/clj/clojure/core.clj | 7 +- src/clj/clojure/core/protocols.clj | 117 ++++++++++++++++----------- src/jvm/clojure/lang/ArrayChunk.java | 14 +++- src/jvm/clojure/lang/ArraySeq.java | 2 +- src/jvm/clojure/lang/IReduce.java | 7 +- src/jvm/clojure/lang/PersistentHashMap.java | 64 ++++++++++++++- src/jvm/clojure/lang/PersistentVector.java | 35 +++++++- src/jvm/clojure/lang/RT.java | 38 +++++++++ src/jvm/clojure/lang/StringSeq.java | 21 +++++- src/script/run_tests.clj | 1 + test/clojure/test_clojure/reduce.clj | 56 +++++++++++++ 11 files changed, 295 insertions(+), 67 deletions(-) create mode 100644 test/clojure/test_clojure/reduce.clj diff --git a/src/clj/clojure/core.clj b/src/clj/clojure/core.clj index 1dc12c3..6bed802 100644 --- a/src/clj/clojure/core.clj +++ b/src/clj/clojure/core.clj @@ -6016,12 +6016,9 @@ items, returns val and f is not called." {:added "1.0"} ([f coll] - (if-let [s (seq coll)] - (reduce f (first s) (next s)) - (f))) + (clojure.core.protocols/internal-reduce coll f)) ([f val coll] - (let [s (seq coll)] - (clojure.core.protocols/internal-reduce s f val)))) + (clojure.core.protocols/internal-reduce coll f val))) (defn into "Returns a new coll consisting of to-coll with all of the items of diff --git a/src/clj/clojure/core/protocols.clj b/src/clj/clojure/core/protocols.clj index bcf4f7b..ac0b66a 100644 --- a/src/clj/clojure/core/protocols.clj +++ b/src/clj/clojure/core/protocols.clj @@ -9,71 +9,95 @@ (ns clojure.core.protocols) (defprotocol InternalReduce - "Protocol for concrete seq types that can reduce themselves + "Protocol for concrete types that can reduce themselves faster than first/next recursion. Called by clojure.core/reduce." - (internal-reduce [seq f start])) + (internal-reduce + [seq f] + [seq f start])) (extend-protocol InternalReduce nil (internal-reduce - [s f val] - val) + ([s f] + (f)) + ([s f val] + val)) - ;; handles vectors and ranges - clojure.lang.IChunkedSeq - (internal-reduce - [s f val] - (if-let [s (seq s)] - (if (chunked-seq? s) - (recur (chunk-next s) - f - (.reduce (chunk-first s) f val)) - (internal-reduce s f val)) - val)) - - clojure.lang.StringSeq + ;; handle reducible collection types directly + clojure.lang.IReduce (internal-reduce - [str-seq f val] - (let [s (.s str-seq)] - (loop [i (.i str-seq) - val val] - (if (< i (.length s)) - (recur (inc i) (f val (.charAt s i))) - val)))) + ([s f] + (.reduce s f)) + ([s f val] + (.reduce s f val))) - clojure.lang.ArraySeq + ;; special case handling for strings + java.lang.String (internal-reduce - [a-seq f val] - (let [^objects arr (.array a-seq)] - (loop [i (.index a-seq) - val val] - (if (< i (alength arr)) - (recur (inc i) (f val (aget arr i))) - val)))) + ([s f] + (if (= 0 (.length s)) + (f) + (loop [i 1 + val ^Object (.charAt s 0)] + (if (< i (.length s)) + (recur (inc i) (f val (.charAt s i))) + val)))) + ([s f val] + (loop [i 0 + val val] + (if (< i (.length s)) + (recur (inc i) (f val (.charAt s i))) + val)))) - java.lang.Object + clojure.lang.ISeq (internal-reduce - [s f val] - (loop [cls (class s) - s s - f f - val val] - (if-let [s (seq s)] - ;; roll over to faster implementation if underlying seq changes type - (if (identical? (class s) cls) - (recur cls (next s) f (f val (first s))) - (internal-reduce s f val)) - val)))) + ([s f] + (internal-reduce (next s) f (first s))) + ([s f val] + (loop [cls (class s) + s (seq s) + f f + val val] + (if s + ;; roll over to faster implementation if underlying seq changes type + (if (identical? (class s) cls) + (recur cls (next s) f (f val (first s))) + (internal-reduce s f val)) + val)))) + + java.lang.Object + (internal-reduce + ([s f] + (if-let [ss ^clojure.lang.ISeq (seq s)] + (internal-reduce (next ss) f (first ss)) + (f))) + ([s f val] + (if-let [ss ^clojure.lang.ISeq (seq s)] + (internal-reduce ss f val) + val))) + + ) (def arr-impl '(internal-reduce - [a-seq f val] + ([a-seq f] + (let [arr (.array a-seq) + end (alength arr) + i (.index a-seq)] + (if (< i end) + (loop [offset (inc i) + val ^Object (aget arr i)] + (if (< offset end) + (recur (inc offset) (f val (aget arr offset))) + val)) + (f)))) + ([a-seq f val] (let [arr (.array a-seq)] (loop [i (.index a-seq) val val] (if (< i (alength arr)) (recur (inc i) (f val (aget arr i))) - val))))) + val)))))) (defn- emit-array-impls* [syms] @@ -91,4 +115,3 @@ ~@(emit-array-impls* syms))) (emit-array-impls int long float double byte char boolean) - diff --git a/src/jvm/clojure/lang/ArrayChunk.java b/src/jvm/clojure/lang/ArrayChunk.java index 76e818c..ad72b94 100644 --- a/src/jvm/clojure/lang/ArrayChunk.java +++ b/src/jvm/clojure/lang/ArrayChunk.java @@ -14,7 +14,7 @@ package clojure.lang; import java.io.Serializable; -public final class ArrayChunk implements IChunk, Serializable { +public final class ArrayChunk implements IChunk, Serializable, IReduce { final Object[] array; final int off; @@ -55,9 +55,17 @@ public IChunk dropFirst(){ } public Object reduce(IFn f, Object start) { - Object ret = f.invoke(start, array[off]); - for(int x = off + 1; x < end; x++) + Object ret = start; + for(int x = off; x < end; x++) ret = f.invoke(ret, array[x]); return ret; } + +public Object reduce(IFn f) { + if (off>=end) return f.invoke(); + Object ret = array[off]; + for(int x = off+1; x < end; x++) + ret = f.invoke(ret, array[x]); + return ret; +} } diff --git a/src/jvm/clojure/lang/ArraySeq.java b/src/jvm/clojure/lang/ArraySeq.java index f6f0dd7..249d52d 100644 --- a/src/jvm/clojure/lang/ArraySeq.java +++ b/src/jvm/clojure/lang/ArraySeq.java @@ -14,7 +14,7 @@ package clojure.lang; import java.lang.reflect.Array; -public class ArraySeq extends ASeq implements IndexedSeq, IReduce{ +public class ArraySeq extends ASeq implements IndexedSeq, IReduce { public final Object array; final int i; final Object[] oa; diff --git a/src/jvm/clojure/lang/IReduce.java b/src/jvm/clojure/lang/IReduce.java index 364225b..f51d1c1 100644 --- a/src/jvm/clojure/lang/IReduce.java +++ b/src/jvm/clojure/lang/IReduce.java @@ -12,8 +12,7 @@ package clojure.lang; -public interface IReduce{ -Object reduce(IFn f) ; - -Object reduce(IFn f, Object start) ; +public interface IReduce { + Object reduce(IFn f) ; + Object reduce(IFn f, Object start); } diff --git a/src/jvm/clojure/lang/PersistentHashMap.java b/src/jvm/clojure/lang/PersistentHashMap.java index 928b123..94796ee 100644 --- a/src/jvm/clojure/lang/PersistentHashMap.java +++ b/src/jvm/clojure/lang/PersistentHashMap.java @@ -26,7 +26,7 @@ import java.util.concurrent.atomic.AtomicReference; Any errors are my own */ -public class PersistentHashMap extends APersistentMap implements IEditableCollection, IObj { +public class PersistentHashMap extends APersistentMap implements IEditableCollection, IObj, IReduce { final int count; final INode root; @@ -187,6 +187,14 @@ public ISeq seq(){ return hasNull ? new Cons(new MapEntry(null, nullValue), s) : s; } +public Object reduce(IFn function, Object value) { + return root.reduce(function,value); +} + +public Object reduce(IFn function) { + return root.reduce(function); +} + public IPersistentCollection empty(){ return EMPTY.withMeta(meta()); } @@ -300,6 +308,10 @@ static final class TransientHashMap extends ATransientMap { static interface INode extends Serializable { INode assoc(int shift, int hash, Object key, Object val, Box addedLeaf); + Object reduce(IFn function); + + Object reduce(IFn function, Object value); + INode without(int shift, int hash, Object key); IMapEntry find(int shift, int hash, Object key); @@ -476,6 +488,23 @@ final static class ArrayNode implements INode{ } } + + public Object reduce(IFn function, Object value) { + for (int i=0; i edit; @@ -232,7 +233,7 @@ public ISeq seq(){ return chunkedSeq(); } -static public final class ChunkedSeq extends ASeq implements IChunkedSeq{ +static public final class ChunkedSeq extends ASeq implements IChunkedSeq, IReduce { public final PersistentVector vec; final Object[] node; @@ -263,13 +264,13 @@ static public final class ChunkedSeq extends ASeq implements IChunkedSeq{ public IChunk chunkedFirst() { return new ArrayChunk(node, offset); - } + } - public ISeq chunkedNext(){ + public ChunkedSeq chunkedNext(){ if(i + node.length < vec.cnt) return new ChunkedSeq(vec,i+ node.length,0); return null; - } + } public ISeq chunkedMore(){ ISeq s = chunkedNext(); @@ -293,6 +294,30 @@ static public final class ChunkedSeq extends ASeq implements IChunkedSeq{ return new ChunkedSeq(vec, node, i, offset + 1); return chunkedNext(); } + + public Object reduce(IFn f) { + Object val=node[offset]; + + for (int j=offset+1; j=length) return f.invoke(); + Object val=s.charAt(i); + for (int j=i+1; j