<!-- 
RSS generated by JIRA (4.4#649-r158309) at Mon May 20 16:26:44 CDT 2013

It is possible to restrict the fields that are returned in this document by specifying the 'field' parameter in your request.
For example, to request only the issue key and summary add field=key&field=summary to the URL of your request.
For example:
http://dev.clojure.org/jira/si/jira.issueviews:issue-xml/CLJ-1000/CLJ-1000.xml?field=key&field=summary
-->
<rss version="0.92" >
<channel>
    <title>Clojure JIRA</title>
    <link>http://dev.clojure.org/jira</link>
    <description>This file is an XML representation of an issue</description>
    <language>en-us</language>    <build-info>
        <version>4.4</version>
        <build-number>649</build-number>
        <build-date>25-07-2011</build-date>
    </build-info>

<item>
            <title>[CLJ-1000] Performance drop in PersistentHashMap.valAt(...) in v.1.4 -- Util.hasheq(...) ?</title>
                <link>http://dev.clojure.org/jira/browse/CLJ-1000</link>
                <project id="10010" key="CLJ">Clojure</project>
                        <description>&lt;p&gt;It seems there is a 30-40% performance degradation of PersistentHashMap.valAt(...) in Clojure 1.4.&lt;br/&gt;
Possibly due to references to new CPU-hungry implementation of Util.hasheq(...).&lt;/p&gt;

&lt;p&gt;I have created a demo project with more details and some profiling information here:&lt;br/&gt;
&lt;a href=&quot;https://github.com/oshyshko/clj-perf&quot;&gt;https://github.com/oshyshko/clj-perf&lt;/a&gt;&lt;/p&gt;</description>
                <environment>Java(TM) SE Runtime Environment (build 1.7.0_04-b21)&lt;br/&gt;
Java HotSpot(TM) 64-Bit Server VM (build 23.0-b21, mixed mode)</environment>
            <key id="15462">CLJ-1000</key>
            <summary>Performance drop in PersistentHashMap.valAt(...) in v.1.4 -- Util.hasheq(...) ?</summary>
                <type id="4" iconUrl="http://dev.clojure.org/jira/images/icons/improvement.gif">Enhancement</type>
                                <priority id="3" iconUrl="http://dev.clojure.org/jira/images/icons/priority_major.gif">Major</priority>
                    <status id="6" iconUrl="http://dev.clojure.org/jira/images/icons/status_closed.gif">Closed</status>
                    <resolution id="1">Completed</resolution>
                                <assignee username="halgari">Timothy Baldridge</assignee>
                                <reporter username="oshyshko">Oleksandr Shyshko</reporter>
                        <labels>
                        <label>performance</label>
                    </labels>
                <created>Mon, 21 May 2012 22:06:54 -0500</created>
                <updated>Fri, 1 Mar 2013 09:49:21 -0600</updated>
                    <resolved>Tue, 11 Dec 2012 11:14:49 -0600</resolved>
                            <version>Release 1.4</version>
                                <fixVersion>Release 1.5</fixVersion>
                                        <due></due>
                    <votes>1</votes>
                        <watches>2</watches>
                        <comments>
                    <comment id="30047" author="cgrand" created="Tue, 27 Nov 2012 08:30:26 -0600"  >&lt;p&gt;I added a patch consisting of three commits:&lt;/p&gt;
&lt;ul&gt;
	&lt;li&gt;one which adds caching to seqs, sets, maps, vectors and queues&lt;/li&gt;
	&lt;li&gt;one that aligns the shape of Util/hasheq on the one Util/equiv (to have a consistent behavior from the JIT compiler: without that deoptimization was more penalizing for hasheq than for equiv)&lt;/li&gt;
	&lt;li&gt;one that fix hasheq on records (which was non consistent with its equiv impl.) &amp;#8211; and this commit relies on a static method introduced in the &quot;caching hasheq&quot; commit&lt;/li&gt;
&lt;/ul&gt;
</comment>
                    <comment id="30112" author="halgari" created="Fri, 30 Nov 2012 12:10:26 -0600"  >&lt;p&gt;In the process of screening this, I&apos;m not seeing much of a performance difference after applying the patch. &lt;/p&gt;

&lt;p&gt;before patch:&lt;br/&gt;
user=&amp;gt; (-main)&lt;/p&gt;

&lt;p&gt;Version:  1.5.0-master-SNAPSHOT&lt;br/&gt;
&quot;Elapsed time: 6373.752 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6578.037 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6476.399 msecs&quot;&lt;/p&gt;


&lt;p&gt;after patch:&lt;br/&gt;
user=&amp;gt; (-main)&lt;/p&gt;

&lt;p&gt;Version:  1.5.0-master-SNAPSHOT&lt;br/&gt;
&quot;Elapsed time: 6182.699 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6548.086 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6496.711 msecs&quot;&lt;/p&gt;


&lt;p&gt;clojure 1.4:&lt;br/&gt;
user=&amp;gt; (-main)&lt;/p&gt;

&lt;p&gt;Version:  1.4.0&lt;br/&gt;
&quot;Elapsed time: 6484.234 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6243.672 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6248.898 msecs&quot;&lt;/p&gt;

&lt;p&gt;clojure 1.3&lt;br/&gt;
user=&amp;gt; (-main)&lt;/p&gt;

&lt;p&gt;Version:  1.3.0&lt;br/&gt;
&quot;Elapsed time: 3584.966 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3618.189 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3372.979 msecs&quot;&lt;/p&gt;


&lt;p&gt;I blew away my local clojure repo and re-applied the patch just to make sure, but the results are the same. Does this fix not optimize the case given in the original test project? &lt;/p&gt;

&lt;p&gt;For reference I&apos;m running this code:&lt;/p&gt;

&lt;p&gt;(defn -main&lt;br/&gt;
  &lt;span class=&quot;error&quot;&gt;&amp;#91;&amp;amp; args&amp;#93;&lt;/span&gt;&lt;/p&gt;

&lt;p&gt;  (println)&lt;br/&gt;
  (println &quot;Version: &quot; (clojure-version))&lt;/p&gt;

&lt;p&gt;  (def mm 10000)&lt;/p&gt;

&lt;p&gt;  (def str-keys (map str (range mm)))&lt;br/&gt;
  (def m (zipmap str-keys (range mm)))&lt;br/&gt;
  (time (dotimes &lt;span class=&quot;error&quot;&gt;&amp;#91;i mm&amp;#93;&lt;/span&gt; (doseq &lt;span class=&quot;error&quot;&gt;&amp;#91;k str-keys&amp;#93;&lt;/span&gt; (m k))))&lt;/p&gt;

&lt;p&gt;  (def kw-keys (map #(keyword (str %)) (range mm)))&lt;br/&gt;
  (def m (zipmap kw-keys (range mm)))&lt;br/&gt;
  (time (dotimes &lt;span class=&quot;error&quot;&gt;&amp;#91;i mm&amp;#93;&lt;/span&gt; (doseq &lt;span class=&quot;error&quot;&gt;&amp;#91;k kw-keys&amp;#93;&lt;/span&gt; (m k))))&lt;/p&gt;

&lt;p&gt;  (def sym-keys (map #(symbol (str %)) (range mm)))&lt;br/&gt;
  (def m (zipmap sym-keys (range mm)))&lt;br/&gt;
  (time (dotimes &lt;span class=&quot;error&quot;&gt;&amp;#91;i mm&amp;#93;&lt;/span&gt; (doseq &lt;span class=&quot;error&quot;&gt;&amp;#91;k sym-keys&amp;#93;&lt;/span&gt; (m k))))&lt;/p&gt;

&lt;p&gt;  (println))&lt;/p&gt;</comment>
                    <comment id="30116" author="cgrand" created="Fri, 30 Nov 2012 14:10:19 -0600"  >&lt;p&gt;Sorry, I was too quick to react on the ML (someone said it was related to hasheq caching and since I had the patch almost ready: on a project I noticed too much time spent computing hasheq on vectors).&lt;br/&gt;
So no, my patch doesn&apos;t improve anything with kws, syms or strs as keys. However when keys are collections, it fares better.&lt;/p&gt;

&lt;p&gt;In 1.4, for a &quot;regular&quot; object, it must fails two instanceof tests before calling .hashCode().&lt;br/&gt;
If we make keywords and symbols implement IHashEq and reverse the test order (first IHashEq, then Number) it should improve the performance of the above benchmark &amp;#8211; except for Strings.&lt;/p&gt;</comment>
                    <comment id="30117" author="halgari" created="Fri, 30 Nov 2012 14:16:42 -0600"  >&lt;p&gt;Marking as incomplete, should we also delete the patch as it seems like it should be in a different ticket?&lt;/p&gt;</comment>
                    <comment id="30153" author="cgrand" created="Mon, 3 Dec 2012 10:00:07 -0600"  >&lt;p&gt;In 1.3, #&apos;hash was going through Object.hashCode and thus was simple and fast. Plus collections hashes were cached.&lt;br/&gt;
In 1.4 and master, #&apos;hash goes now through two instanceof test (Number and IHasheq in this order) before trying Object.hashCode in last resort. Plus collections hashes are not cached.&lt;br/&gt;
As such I&apos;m not sure Util.hasheq inherent slowness (compared to Util.hash) and hasheq caching should be separated in two issues.&lt;/p&gt;

&lt;p&gt;The caching-hasheq-v2.diff patchset reintroduces hashes caching for collections/hasheq and reorders the instanceof tests (to test for IHashEq before Number) and makes Keyword and Symbol implement IHashEq to branch fast in Util.hasheq.&lt;/p&gt;

&lt;p&gt;I recommend adding a collection test to the current benchmark:&lt;/p&gt;

&lt;div class=&quot;code panel&quot; style=&quot;border-width: 1px;&quot;&gt;&lt;div class=&quot;codeContent panelContent&quot;&gt;
&lt;pre class=&quot;code-java&quot;&gt;(defn -main
[&amp;amp; args]

(println)
(println &lt;span class=&quot;code-quote&quot;&gt;&quot;Version: &quot;&lt;/span&gt; (clojure-version))

(def mm 10000)

(def str-keys (map str (range mm)))
(def m (zipmap str-keys (range mm)))
(time (dotimes [i mm] (doseq [k str-keys] (m k))))

(def kw-keys (map #(keyword (str %)) (range mm)))
(def m (zipmap kw-keys (range mm)))
(time (dotimes [i mm] (doseq [k kw-keys] (m k))))

(def sym-keys (map #(symbol (str %)) (range mm)))
(def m (zipmap sym-keys (range mm)))
(time (dotimes [i mm] (doseq [k sym-keys] (m k))))

(def vec-keys (map (comp (juxt keyword symbol identity) str) (range mm)))
(def m (zipmap vec-keys (range mm)))
(time (dotimes [i mm] (doseq [k vec-keys] (m k))))

(println))&lt;/pre&gt;
&lt;/div&gt;&lt;/div&gt;</comment>
                    <comment id="30158" author="halgari" created="Mon, 3 Dec 2012 10:38:45 -0600"  >&lt;p&gt;For some reason I can&apos;t get v2 to build against master. It applies cleanly, but fails to build.&lt;/p&gt;</comment>
                    <comment id="30165" author="cgrand" created="Mon, 3 Dec 2012 11:30:04 -0600"  >&lt;p&gt;Timothy: I inadvertently deleted a &quot;public&quot; modifier before commiting... fixed in caching-hasheq-v3.diff&lt;/p&gt;</comment>
                    <comment id="30203" author="halgari" created="Mon, 10 Dec 2012 11:00:29 -0600"  >&lt;p&gt;I now get the following results:&lt;/p&gt;

&lt;p&gt;Version:  1.4.0&lt;br/&gt;
&quot;Elapsed time: 6281.345 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6344.321 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6108.55 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 36172.135 msecs&quot;&lt;/p&gt;

&lt;p&gt;Version:  1.5.0-master-SNAPSHOT (pre-patch)&lt;br/&gt;
&quot;Elapsed time: 6126.337 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6320.857 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 6237.251 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 18167.05 msecs&quot;&lt;/p&gt;

&lt;p&gt;Version:  1.5.0-master-SNAPSHOT (post-patch)&lt;br/&gt;
&quot;Elapsed time: 6501.929 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3861.987 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3871.557 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 5049.067 msecs&quot;&lt;/p&gt;


&lt;p&gt;Marking as screened&lt;/p&gt;</comment>
                    <comment id="30206" author="oshyshko" created="Mon, 10 Dec 2012 15:53:06 -0600"  >&lt;p&gt;Please, could you add as a comment the bench result using 1.3 vs 1.5-master-post-patch?&lt;/p&gt;</comment>
                    <comment id="30217" author="oshyshko" created="Tue, 11 Dec 2012 14:13:31 -0600"  >&lt;p&gt;The performance with 1.5-master is now very close to 1.3 for 3/4 of the benchmark.&lt;/p&gt;

&lt;p&gt;However, this code is still showing 43% performance drop (3411 ms VS 6030 ms &amp;#8211; 1.3 VS 1.5-master):&lt;/p&gt;

&lt;p&gt;(def str-keys (map str (range mm)))&lt;br/&gt;
(def m (zipmap str-keys (range mm)))&lt;br/&gt;
(time (dotimes &lt;span class=&quot;error&quot;&gt;&amp;#91;i mm&amp;#93;&lt;/span&gt; (doseq &lt;span class=&quot;error&quot;&gt;&amp;#91;k str-keys&amp;#93;&lt;/span&gt; (m k))))&lt;/p&gt;


&lt;p&gt;Version:  1.3.0&lt;br/&gt;
&quot;Elapsed time: 3411.353 msecs&quot;  &amp;lt;---&lt;br/&gt;
&quot;Elapsed time: 3459.992 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3365.182 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3813.637 msecs&quot;&lt;/p&gt;

&lt;p&gt;Version:  1.4.0&lt;br/&gt;
&quot;Elapsed time: 5710.073 msecs&quot; &amp;lt;---&lt;br/&gt;
&quot;Elapsed time: 5817.356 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 5774.856 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 18754.482 msecs&quot;&lt;/p&gt;

&lt;p&gt;Version:  1.5.0-master-SNAPSHOT&lt;br/&gt;
&quot;Elapsed time: 6030.247 msecs&quot; &amp;lt;---&lt;br/&gt;
&quot;Elapsed time: 3372.637 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3267.481 msecs&quot;&lt;br/&gt;
&quot;Elapsed time: 3852.927 msecs&quot;&lt;/p&gt;


&lt;p&gt;To reproduce:&lt;br/&gt;
$ git clone &lt;a href=&quot;https://github.com/clojure/clojure.git&quot;&gt;https://github.com/clojure/clojure.git&lt;/a&gt;&lt;br/&gt;
$ cd clojure&lt;br/&gt;
$ mvn install -Dmaven.test.skip=true&lt;/p&gt;

&lt;p&gt;$ cd ..&lt;/p&gt;

&lt;p&gt;$ git clone &lt;a href=&quot;https://github.com/oshyshko/clj-perf.git&quot;&gt;https://github.com/oshyshko/clj-perf.git&lt;/a&gt;&lt;br/&gt;
$ cd clj-perf&lt;br/&gt;
$ lein run-all&lt;/p&gt;</comment>
                </comments>
                    <attachments>
                    <attachment id="11743" name="caching-hasheq-v3.diff" size="10183" author="cgrand" created="Mon, 3 Dec 2012 11:30:04 -0600" />
                    <attachment id="11240" name="clj_13.png" size="139196" author="oshyshko" created="Mon, 21 May 2012 22:06:54 -0500" />
                    <attachment id="11241" name="clj_14.png" size="143427" author="oshyshko" created="Mon, 21 May 2012 22:06:54 -0500" />
                </attachments>
            <subtasks>
        </subtasks>
                <customfields>
                                <customfield id="customfield_10002" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                <customfieldname>Approval</customfieldname>
                <customfieldvalues>
                        <customfieldvalue key="10007">Ok</customfieldvalue>

                </customfieldvalues>
            </customfield>
                                                                                    <customfield id="customfield_10010" key="com.pyxis.greenhopper.jira:gh-global-rank">
                <customfieldname>Global Rank</customfieldname>
                <customfieldvalues>
                    
                </customfieldvalues>
            </customfield>
                                            <customfield id="customfield_10000" key="com.atlassian.jira.plugin.system.customfieldtypes:select">
                <customfieldname>Patch</customfieldname>
                <customfieldvalues>
                        <customfieldvalue key="10001">Code</customfieldvalue>

                </customfieldvalues>
            </customfield>
                                                                                        </customfields>
    </item>
</channel>
</rss>