2 * Copyright (C) 2011-2015 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "DFGCSEPhase.h"
31 #include "DFGAbstractHeap.h"
32 #include "DFGBlockMapInlines.h"
33 #include "DFGClobberSet.h"
34 #include "DFGClobberize.h"
35 #include "DFGEdgeUsesStructure.h"
38 #include "JSCInlines.h"
40 #include <wtf/FastBitVector.h>
42 namespace JSC
{ namespace DFG
{
44 // This file contains two CSE implementations: local and global. LocalCSE typically runs when we're
45 // in DFG mode, i.e. we want to compile quickly. LocalCSE contains a lot of optimizations for
46 // compile time. GlobalCSE, on the other hand, is fairly straight-forward. It will find more
47 // optimization opportunities by virtue of being global.
51 const bool verbose
= false;
55 ClobberFilter(AbstractHeap heap
)
60 bool operator()(const ImpureMap::KeyValuePairType
& pair
) const
62 return m_heap
.overlaps(pair
.key
.heap());
69 inline void clobber(ImpureMap
& map
, AbstractHeap heap
)
71 ClobberFilter
filter(heap
);
75 class LocalCSEPhase
: public Phase
{
77 LocalCSEPhase(Graph
& graph
)
78 : Phase(graph
, "local common subexpression elimination")
86 ASSERT(m_graph
.m_fixpointState
== FixpointNotConverged
);
87 ASSERT(m_graph
.m_form
== ThreadedCPS
|| m_graph
.m_form
== LoadStore
);
91 m_graph
.clearReplacements();
93 for (BlockIndex blockIndex
= m_graph
.numBlocks(); blockIndex
--;) {
94 BasicBlock
* block
= m_graph
.block(blockIndex
);
98 if (block
->size() <= SmallMaps::capacity
)
99 changed
|= m_smallBlock
.run(block
);
101 changed
|= m_largeBlock
.run(block
);
110 // This permits SmallMaps to be used for blocks that have up to 100 nodes. In practice,
111 // fewer than half of the nodes in a block have pure defs, and even fewer have impure defs.
112 // Thus, a capacity limit of 100 probably means that somewhere around ~40 things may end up
113 // in one of these "small" list-based maps. That number still seems largeish, except that
114 // the overhead of HashMaps can be quite high currently: clearing them, or even removing
115 // enough things from them, deletes (or resizes) their backing store eagerly. Hence
116 // HashMaps induce a lot of malloc traffic.
117 static const unsigned capacity
= 100;
131 void write(AbstractHeap heap
)
133 for (unsigned i
= 0; i
< m_impureLength
; ++i
) {
134 if (heap
.overlaps(m_impureMap
[i
].key
.heap()))
135 m_impureMap
[i
--] = m_impureMap
[--m_impureLength
];
139 Node
* addPure(PureValue value
, Node
* node
)
141 for (unsigned i
= m_pureLength
; i
--;) {
142 if (m_pureMap
[i
].key
== value
)
143 return m_pureMap
[i
].value
;
146 ASSERT(m_pureLength
< capacity
);
147 m_pureMap
[m_pureLength
++] = WTF::KeyValuePair
<PureValue
, Node
*>(value
, node
);
151 LazyNode
findReplacement(HeapLocation location
)
153 for (unsigned i
= m_impureLength
; i
--;) {
154 if (m_impureMap
[i
].key
== location
)
155 return m_impureMap
[i
].value
;
160 LazyNode
addImpure(HeapLocation location
, LazyNode node
)
162 // FIXME: If we are using small maps, we must not def() derived values.
163 // For now the only derived values we def() are constant-based.
164 if (location
.index() && !location
.index().isNode())
166 if (LazyNode result
= findReplacement(location
))
168 ASSERT(m_impureLength
< capacity
);
169 m_impureMap
[m_impureLength
++] = WTF::KeyValuePair
<HeapLocation
, LazyNode
>(location
, node
);
174 WTF::KeyValuePair
<PureValue
, Node
*> m_pureMap
[capacity
];
175 WTF::KeyValuePair
<HeapLocation
, LazyNode
> m_impureMap
[capacity
];
176 unsigned m_pureLength
;
177 unsigned m_impureLength
;
192 void write(AbstractHeap heap
)
194 clobber(m_impureMap
, heap
);
197 Node
* addPure(PureValue value
, Node
* node
)
199 auto result
= m_pureMap
.add(value
, node
);
200 if (result
.isNewEntry
)
202 return result
.iterator
->value
;
205 LazyNode
findReplacement(HeapLocation location
)
207 return m_impureMap
.get(location
);
210 LazyNode
addImpure(HeapLocation location
, LazyNode node
)
212 auto result
= m_impureMap
.add(location
, node
);
213 if (result
.isNewEntry
)
215 return result
.iterator
->value
;
219 HashMap
<PureValue
, Node
*> m_pureMap
;
220 HashMap
<HeapLocation
, LazyNode
> m_impureMap
;
223 template<typename Maps
>
226 BlockCSE(Graph
& graph
)
228 , m_insertionSet(graph
)
232 bool run(BasicBlock
* block
)
238 for (unsigned nodeIndex
= 0; nodeIndex
< block
->size(); ++nodeIndex
) {
239 m_node
= block
->at(nodeIndex
);
240 m_graph
.performSubstitution(m_node
);
242 if (m_node
->op() == Identity
) {
243 m_node
->replaceWith(m_node
->child1().node());
246 // This rule only makes sense for local CSE, since in SSA form we have already
247 // factored the bounds check out of the PutByVal. It's kind of gross, but we
248 // still have reason to believe that PutByValAlias is a good optimization and
249 // that it's better to do it with a single node rather than separating out the
251 if (m_node
->op() == PutByVal
|| m_node
->op() == PutByValDirect
) {
254 Node
* base
= m_graph
.varArgChild(m_node
, 0).node();
255 Node
* index
= m_graph
.varArgChild(m_node
, 1).node();
257 ArrayMode mode
= m_node
->arrayMode();
258 switch (mode
.type()) {
260 if (!mode
.isInBounds())
263 IndexedPropertyLoc
, IndexedInt32Properties
, base
, index
);
267 if (!mode
.isInBounds())
270 IndexedPropertyLoc
, IndexedDoubleProperties
, base
, index
);
273 case Array::Contiguous
:
274 if (!mode
.isInBounds())
277 IndexedPropertyLoc
, IndexedContiguousProperties
, base
, index
);
280 case Array::Int8Array
:
281 case Array::Int16Array
:
282 case Array::Int32Array
:
283 case Array::Uint8Array
:
284 case Array::Uint8ClampedArray
:
285 case Array::Uint16Array
:
286 case Array::Uint32Array
:
287 case Array::Float32Array
:
288 case Array::Float64Array
:
289 if (!mode
.isInBounds())
292 IndexedPropertyLoc
, TypedArrayProperties
, base
, index
);
299 if (!!heap
&& m_maps
.findReplacement(heap
))
300 m_node
->setOp(PutByValAlias
);
303 clobberize(m_graph
, m_node
, *this);
307 m_insertionSet
.execute(block
);
312 void read(AbstractHeap
) { }
314 void write(AbstractHeap heap
)
319 void def(PureValue value
)
321 Node
* match
= m_maps
.addPure(value
, m_node
);
325 m_node
->replaceWith(match
);
329 void def(HeapLocation location
, LazyNode value
)
331 LazyNode match
= m_maps
.addImpure(location
, value
);
335 if (m_node
->op() == GetLocal
) {
336 // Usually the CPS rethreading phase does this. But it's OK for us to mess with
337 // locals so long as:
339 // - We dethread the graph. Any changes we make may invalidate the assumptions of
340 // our CPS form, particularly if this GetLocal is linked to the variablesAtTail.
342 // - We don't introduce a Phantom for the child of the GetLocal. This wouldn't be
343 // totally wrong but it would pessimize the code. Just because there is a
344 // GetLocal doesn't mean that the child was live. Simply rerouting the all uses
345 // of this GetLocal will preserve the live-at-exit information just fine.
347 // We accomplish the latter by just clearing the child; then the Phantom that we
348 // introduce won't have children and so it will eventually just be deleted.
350 m_node
->child1() = Edge();
354 if (value
.isNode() && value
.asNode() == m_node
) {
355 match
.ensureIsNode(m_insertionSet
, m_block
, 0)->owner
= m_block
;
356 ASSERT(match
.isNode());
357 m_node
->replaceWith(match
.asNode());
371 InsertionSet m_insertionSet
;
374 BlockCSE
<SmallMaps
> m_smallBlock
;
375 BlockCSE
<LargeMaps
> m_largeBlock
;
378 class GlobalCSEPhase
: public Phase
{
380 GlobalCSEPhase(Graph
& graph
)
381 : Phase(graph
, "global common subexpression elimination")
382 , m_impureDataMap(graph
)
383 , m_insertionSet(graph
)
389 ASSERT(m_graph
.m_fixpointState
== FixpointNotConverged
);
390 ASSERT(m_graph
.m_form
== SSA
);
392 m_graph
.initializeNodeOwners();
393 m_graph
.m_dominators
.computeIfNecessary(m_graph
);
395 m_preOrder
= m_graph
.blocksInPreOrder();
397 // First figure out what gets clobbered by blocks. Node that this uses the preOrder list
398 // for convenience only.
399 for (unsigned i
= m_preOrder
.size(); i
--;) {
400 m_block
= m_preOrder
[i
];
401 m_impureData
= &m_impureDataMap
[m_block
];
402 for (unsigned nodeIndex
= m_block
->size(); nodeIndex
--;)
403 addWrites(m_graph
, m_block
->at(nodeIndex
), m_impureData
->writes
);
406 // Based on my experience doing this before, what follows might have to be made iterative.
407 // Right now it doesn't have to be iterative because everything is dominator-bsed. But when
408 // validation is enabled, we check if iterating would find new CSE opportunities.
410 bool changed
= iterate();
412 // FIXME: It should be possible to assert that CSE will not find any new opportunities if you
413 // run it a second time. Unfortunately, we cannot assert this right now. Note that if we did
414 // this, we'd have to first reset all of our state.
415 // https://bugs.webkit.org/show_bug.cgi?id=145853
423 dataLog("Performing iteration.\n");
426 m_graph
.clearReplacements();
428 for (unsigned i
= 0; i
< m_preOrder
.size(); ++i
) {
429 m_block
= m_preOrder
[i
];
430 m_impureData
= &m_impureDataMap
[m_block
];
431 m_writesSoFar
.clear();
434 dataLog("Processing block ", *m_block
, ":\n");
436 for (unsigned nodeIndex
= 0; nodeIndex
< m_block
->size(); ++nodeIndex
) {
437 m_nodeIndex
= nodeIndex
;
438 m_node
= m_block
->at(nodeIndex
);
440 dataLog(" Looking at node ", m_node
, ":\n");
442 m_graph
.performSubstitution(m_node
);
444 if (m_node
->op() == Identity
) {
445 m_node
->replaceWith(m_node
->child1().node());
448 clobberize(m_graph
, m_node
, *this);
451 m_insertionSet
.execute(m_block
);
453 m_impureData
->didVisit
= true;
459 void read(AbstractHeap
) { }
461 void write(AbstractHeap heap
)
463 clobber(m_impureData
->availableAtTail
, heap
);
464 m_writesSoFar
.add(heap
);
466 dataLog(" Clobbered, new tail map: ", mapDump(m_impureData
->availableAtTail
), "\n");
469 void def(PureValue value
)
471 // With pure values we do not have to worry about the possibility of some control flow path
472 // clobbering the value. So, we just search for all of the like values that have been
473 // computed. We pick one that is in a block that dominates ours. Note that this means that
474 // a PureValue will map to a list of nodes, since there may be many places in the control
475 // flow graph that compute a value but only one of them that dominates us. We may build up
476 // a large list of nodes that compute some value in the case of gnarly control flow. This
479 auto result
= m_pureValues
.add(value
, Vector
<Node
*>());
480 if (result
.isNewEntry
) {
481 result
.iterator
->value
.append(m_node
);
485 for (unsigned i
= result
.iterator
->value
.size(); i
--;) {
486 Node
* candidate
= result
.iterator
->value
[i
];
487 if (m_graph
.m_dominators
.dominates(candidate
->owner
, m_block
)) {
488 m_node
->replaceWith(candidate
);
494 result
.iterator
->value
.append(m_node
);
497 LazyNode
findReplacement(HeapLocation location
)
499 // At this instant, our "availableAtTail" reflects the set of things that are available in
500 // this block so far. We check this map to find block-local CSE opportunities before doing
502 LazyNode match
= m_impureData
->availableAtTail
.get(location
);
505 dataLog(" Found local match: ", match
, "\n");
509 // If it's not available at this point in the block, and at some prior point in the block
510 // we have clobbered this heap location, then there is no point in doing a global search.
511 if (m_writesSoFar
.overlaps(location
.heap())) {
513 dataLog(" Not looking globally because of local clobber: ", m_writesSoFar
, "\n");
517 // This perfoms a backward search over the control flow graph to find some possible
518 // non-local def() that matches our heap location. Such a match is only valid if there does
519 // not exist any path from that def() to our block that contains a write() that overlaps
520 // our heap. This algorithm looks for both of these things (the matching def and the
521 // overlapping writes) in one backwards DFS pass.
523 // This starts by looking at the starting block's predecessors, and then it continues along
524 // their predecessors. As soon as this finds a possible def() - one that defines the heap
525 // location we want while dominating our starting block - it assumes that this one must be
526 // the match. It then lets the DFS over predecessors complete, but it doesn't add the
527 // def()'s predecessors; this ensures that any blocks we visit thereafter are on some path
528 // from the def() to us. As soon as the DFG finds a write() that overlaps the location's
529 // heap, it stops, assuming that there is no possible match. Note that the write() case may
530 // trigger before we find a def(), or after. Either way, the write() case causes this
531 // function to immediately return nullptr.
533 // If the write() is found before we find the def(), then we know that any def() we would
534 // find would have a path to us that trips over the write() and hence becomes invalid. This
535 // is just a direct outcome of us looking for a def() that dominates us. Given a block A
536 // that dominates block B - so that A is the one that would have the def() and B is our
537 // starting block - we know that any other block must either be on the path from A to B, or
538 // it must be on a path from the root to A, but not both. So, if we haven't found A yet but
539 // we already have found a block C that has a write(), then C must be on some path from A
540 // to B, which means that A's def() is invalid for our purposes. Hence, before we find the
541 // def(), stopping on write() is the right thing to do.
543 // Stopping on write() is also the right thing to do after we find the def(). After we find
544 // the def(), we don't add that block's predecessors to the search worklist. That means
545 // that henceforth the only blocks we will see in the search are blocks on the path from
546 // the def() to us. If any such block has a write() that clobbers our heap then we should
549 // Hence this graph search algorithm ends up being deceptively simple: any overlapping
550 // write() causes us to immediately return nullptr, and a matching def() means that we just
551 // record it and neglect to visit its precessors.
553 Vector
<BasicBlock
*, 8> worklist
;
554 Vector
<BasicBlock
*, 8> seenList
;
557 for (unsigned i
= m_block
->predecessors
.size(); i
--;) {
558 BasicBlock
* predecessor
= m_block
->predecessors
[i
];
559 if (!seen
.get(predecessor
->index
)) {
560 worklist
.append(predecessor
);
561 seen
.set(predecessor
->index
);
565 while (!worklist
.isEmpty()) {
566 BasicBlock
* block
= worklist
.takeLast();
567 seenList
.append(block
);
570 dataLog(" Searching in block ", *block
, "\n");
571 ImpureBlockData
& data
= m_impureDataMap
[block
];
573 // We require strict domination because this would only see things in our own block if
574 // they came *after* our position in the block. Clearly, while our block dominates
575 // itself, the things in the block after us don't dominate us.
576 if (m_graph
.m_dominators
.strictlyDominates(block
, m_block
)) {
578 dataLog(" It strictly dominates.\n");
579 DFG_ASSERT(m_graph
, m_node
, data
.didVisit
);
580 DFG_ASSERT(m_graph
, m_node
, !match
);
582 dataLog(" Availability map: ", mapDump(data
.availableAtTail
), "\n");
583 match
= data
.availableAtTail
.get(location
);
585 dataLog(" Availability: ", match
, "\n");
587 // Don't examine the predecessors of a match. At this point we just want to
588 // establish that other blocks on the path from here to there don't clobber
589 // the location we're interested in.
595 dataLog(" Dealing with write set ", data
.writes
, "\n");
596 if (data
.writes
.overlaps(location
.heap())) {
598 dataLog(" Clobbered.\n");
602 for (unsigned i
= block
->predecessors
.size(); i
--;) {
603 BasicBlock
* predecessor
= block
->predecessors
[i
];
604 if (!seen
.get(predecessor
->index
)) {
605 worklist
.append(predecessor
);
606 seen
.set(predecessor
->index
);
614 // Cache the results for next time. We cache them both for this block and for all of our
615 // predecessors, since even though we've already visited our predecessors, our predecessors
616 // probably have successors other than us.
617 // FIXME: Consider caching failed searches as well, when match is null. It's not clear that
618 // the reduction in compile time would warrant the increase in complexity, though.
619 // https://bugs.webkit.org/show_bug.cgi?id=134876
620 for (BasicBlock
* block
: seenList
)
621 m_impureDataMap
[block
].availableAtTail
.add(location
, match
);
622 m_impureData
->availableAtTail
.add(location
, match
);
627 void def(HeapLocation location
, LazyNode value
)
630 dataLog(" Got heap location def: ", location
, " -> ", value
, "\n");
632 LazyNode match
= findReplacement(location
);
635 dataLog(" Got match: ", match
, "\n");
639 dataLog(" Adding at-tail mapping: ", location
, " -> ", value
, "\n");
640 auto result
= m_impureData
->availableAtTail
.add(location
, value
);
641 ASSERT_UNUSED(result
, result
.isNewEntry
);
645 if (value
.isNode() && value
.asNode() == m_node
) {
646 if (!match
.isNode()) {
647 // We need to properly record the constant in order to use an existing one if applicable.
648 // This ensures that re-running GCSE will not find new optimizations.
649 match
.ensureIsNode(m_insertionSet
, m_block
, m_nodeIndex
)->owner
= m_block
;
650 auto result
= m_pureValues
.add(PureValue(match
.asNode(), match
->constant()), Vector
<Node
*>());
651 bool replaced
= false;
652 if (!result
.isNewEntry
) {
653 for (unsigned i
= result
.iterator
->value
.size(); i
--;) {
654 Node
* candidate
= result
.iterator
->value
[i
];
655 if (m_graph
.m_dominators
.dominates(candidate
->owner
, m_block
)) {
657 match
->replaceWith(candidate
);
658 match
.setNode(candidate
);
665 result
.iterator
->value
.append(match
.asNode());
667 ASSERT(match
.asNode());
668 m_node
->replaceWith(match
.asNode());
673 struct ImpureBlockData
{
680 ImpureMap availableAtTail
;
684 Vector
<BasicBlock
*> m_preOrder
;
686 PureMultiMap m_pureValues
;
687 BlockMap
<ImpureBlockData
> m_impureDataMap
;
691 unsigned m_nodeIndex
;
692 ImpureBlockData
* m_impureData
;
693 ClobberSet m_writesSoFar
;
694 InsertionSet m_insertionSet
;
699 } // anonymous namespace
701 bool performLocalCSE(Graph
& graph
)
703 SamplingRegion
samplingRegion("DFG LocalCSE Phase");
704 return runPhase
<LocalCSEPhase
>(graph
);
707 bool performGlobalCSE(Graph
& graph
)
709 SamplingRegion
samplingRegion("DFG GlobalCSE Phase");
710 return runPhase
<GlobalCSEPhase
>(graph
);
713 } } // namespace JSC::DFG
715 #endif // ENABLE(DFG_JIT)