]>
git.saurik.com Git - apple/javascriptcore.git/blob - jit/BinarySwitch.cpp
866b2788f050b9d9885584c5ca92e092ca7b7624
2 * Copyright (C) 2013, 2015 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "BinarySwitch.h"
31 #include "JSCInlines.h"
35 static unsigned globalCounter
; // We use a different seed every time we are invoked.
37 BinarySwitch::BinarySwitch(GPRReg value
, const Vector
<int64_t>& cases
, Type type
)
39 , m_weakRandom(globalCounter
++)
41 , m_caseIndex(UINT_MAX
)
47 for (unsigned i
= 0; i
< cases
.size(); ++i
)
48 m_cases
.append(Case(cases
[i
], i
));
50 std::sort(m_cases
.begin(), m_cases
.end());
52 for (unsigned i
= 1; i
< m_cases
.size(); ++i
)
53 RELEASE_ASSERT(m_cases
[i
- 1] < m_cases
[i
]);
55 build(0, false, m_cases
.size());
58 BinarySwitch::~BinarySwitch()
62 bool BinarySwitch::advance(MacroAssembler
& jit
)
64 if (m_cases
.isEmpty()) {
65 m_fallThrough
.append(jit
.jump());
69 if (m_index
== m_branches
.size()) {
70 RELEASE_ASSERT(m_jumpStack
.isEmpty());
75 const BranchCode
& code
= m_branches
[m_index
++];
77 case NotEqualToFallThrough
:
80 m_fallThrough
.append(jit
.branch32(
81 MacroAssembler::NotEqual
, m_value
,
82 MacroAssembler::Imm32(static_cast<int32_t>(m_cases
[code
.index
].value
))));
85 m_fallThrough
.append(jit
.branchPtr(
86 MacroAssembler::NotEqual
, m_value
,
87 MacroAssembler::ImmPtr(bitwise_cast
<const void*>(static_cast<intptr_t>(m_cases
[code
.index
].value
)))));
94 m_jumpStack
.append(jit
.branch32(
95 MacroAssembler::NotEqual
, m_value
,
96 MacroAssembler::Imm32(static_cast<int32_t>(m_cases
[code
.index
].value
))));
99 m_jumpStack
.append(jit
.branchPtr(
100 MacroAssembler::NotEqual
, m_value
,
101 MacroAssembler::ImmPtr(bitwise_cast
<const void*>(static_cast<intptr_t>(m_cases
[code
.index
].value
)))));
108 m_jumpStack
.append(jit
.branch32(
109 MacroAssembler::LessThan
, m_value
,
110 MacroAssembler::Imm32(static_cast<int32_t>(m_cases
[code
.index
].value
))));
113 m_jumpStack
.append(jit
.branchPtr(
114 MacroAssembler::LessThan
, m_value
,
115 MacroAssembler::ImmPtr(bitwise_cast
<const void*>(static_cast<intptr_t>(m_cases
[code
.index
].value
)))));
120 m_jumpStack
.takeLast().link(&jit
);
123 m_caseIndex
= code
.index
;
129 void BinarySwitch::build(unsigned start
, bool hardStart
, unsigned end
)
131 unsigned size
= end
- start
;
133 RELEASE_ASSERT(size
);
135 // This code uses some random numbers to keep things balanced. It's important to keep in mind
136 // that this does not improve average-case throughput under the assumption that all cases fire
137 // with equal probability. It just ensures that there will not be some switch structure that
138 // when combined with some input will always produce pathologically good or pathologically bad
141 const unsigned leafThreshold
= 3;
143 if (size
<= leafThreshold
) {
144 // It turns out that for exactly three cases or less, it's better to just compare each
145 // case individually. This saves 1/6 of a branch on average, and up to 1/3 of a branch in
146 // extreme cases where the divide-and-conquer bottoms out in a lot of 3-case subswitches.
148 // This assumes that we care about the cost of hitting some case more than we care about
149 // bottoming out in a default case. I believe that in most places where we use switch
150 // statements, we are more likely to hit one of the cases than we are to fall through to
151 // default. Intuitively, if we wanted to improve the performance of default, we would
152 // reduce the value of leafThreshold to 2 or even to 1. See below for a deeper discussion.
154 bool allConsecutive
= false;
156 if ((hardStart
|| (start
&& m_cases
[start
- 1].value
== m_cases
[start
].value
- 1))
157 && start
+ size
< m_cases
.size()
158 && m_cases
[start
+ size
- 1].value
== m_cases
[start
+ size
].value
- 1) {
159 allConsecutive
= true;
160 for (unsigned i
= 0; i
< size
- 1; ++i
) {
161 if (m_cases
[i
].value
+ 1 != m_cases
[i
+ 1].value
) {
162 allConsecutive
= false;
168 Vector
<unsigned, 3> localCaseIndices
;
169 for (unsigned i
= 0; i
< size
; ++i
)
170 localCaseIndices
.append(start
+ i
);
173 localCaseIndices
.begin(), localCaseIndices
.end(),
174 [this] (unsigned n
) {
175 // We use modulo to get a random number in the range we want fully knowing that
176 // this introduces a tiny amount of bias, but we're fine with such tiny bias.
177 return m_weakRandom
.getUint32() % n
;
180 for (unsigned i
= 0; i
< size
- 1; ++i
) {
181 m_branches
.append(BranchCode(NotEqualToPush
, localCaseIndices
[i
]));
182 m_branches
.append(BranchCode(ExecuteCase
, localCaseIndices
[i
]));
183 m_branches
.append(BranchCode(Pop
));
187 m_branches
.append(BranchCode(NotEqualToFallThrough
, localCaseIndices
.last()));
189 m_branches
.append(BranchCode(ExecuteCase
, localCaseIndices
.last()));
193 // There are two different strategies we could consider here:
195 // Isolate median and split: pick a median and check if the comparison value is equal to it;
196 // if so, execute the median case. Otherwise check if the value is less than the median, and
197 // recurse left or right based on this. This has two subvariants: we could either first test
198 // equality for the median and then do the less-than, or we could first do the less-than and
199 // then check equality on the not-less-than path.
201 // Ignore median and split: do a less-than comparison on a value that splits the cases in two
202 // equal-sized halves. Recurse left or right based on the comparison. Do not test for equality
203 // against the median (or anything else); let the recursion handle those equality comparisons
204 // once we bottom out in a list that case 3 cases or less (see above).
206 // I'll refer to these strategies as Isolate and Ignore. I initially believed that Isolate
207 // would be faster since it leads to less branching for some lucky cases. It turns out that
208 // Isolate is almost a total fail in the average, assuming all cases are equally likely. How
209 // bad Isolate is depends on whether you believe that doing two consecutive branches based on
210 // the same comparison is cheaper than doing the compare/branches separately. This is
211 // difficult to evaluate. For small immediates that aren't blinded, we just care about
212 // avoiding a second compare instruction. For large immediates or when blinding is in play, we
213 // also care about the instructions used to materialize the immediate a second time. Isolate
214 // can help with both costs since it involves first doing a < compare+branch on some value,
215 // followed by a == compare+branch on the same exact value (or vice-versa). Ignore will do a <
216 // compare+branch on some value, and then the == compare+branch on that same value will happen
219 // To evaluate these costs, I wrote the recurrence relation for Isolate and Ignore, assuming
220 // that ComparisonCost is the cost of a compare+branch and ChainedComparisonCost is the cost
221 // of a compare+branch on some value that you've just done another compare+branch for. These
222 // recurrence relations compute the total cost incurred if you executed the switch statement
223 // on each matching value. So the average cost of hitting some case can be computed as
224 // Isolate[n]/n or Ignore[n]/n, respectively for the two relations.
226 // Isolate[1] = ComparisonCost
227 // Isolate[2] = (2 + 1) * ComparisonCost
228 // Isolate[3] = (3 + 2 + 1) * ComparisonCost
229 // Isolate[n_] := With[
230 // {medianIndex = Floor[n/2] + If[EvenQ[n], RandomInteger[], 1]},
231 // ComparisonCost + ChainedComparisonCost +
232 // (ComparisonCost * (medianIndex - 1) + Isolate[medianIndex - 1]) +
233 // (2 * ComparisonCost * (n - medianIndex) + Isolate[n - medianIndex])]
235 // Ignore[1] = ComparisonCost
236 // Ignore[2] = (2 + 1) * ComparisonCost
237 // Ignore[3] = (3 + 2 + 1) * ComparisonCost
238 // Ignore[n_] := With[
239 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
240 // (medianIndex * ComparisonCost + Ignore[medianIndex]) +
241 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]
243 // This does not account for the average cost of hitting the default case. See further below
244 // for a discussion of that.
246 // It turns out that for ComparisonCost = 1 and ChainedComparisonCost = 1, Ignore is always
247 // better than Isolate. If we assume that ChainedComparisonCost = 0, then Isolate wins for
248 // switch statements that have 20 cases or fewer, though the margin of victory is never large
249 // - it might sometimes save an average of 0.3 ComparisonCost. For larger switch statements,
250 // we see divergence between the two with Ignore winning. This is of course rather
251 // unrealistic since the chained comparison is never free. For ChainedComparisonCost = 0.5, we
252 // see Isolate winning for 10 cases or fewer, by maybe 0.2 ComparisonCost. Again we see
253 // divergence for large switches with Ignore winning, for example if a switch statement has
254 // 100 cases then Ignore saves one branch on average.
256 // Our current JIT backends don't provide for optimization for chained comparisons, except for
257 // reducing the code for materializing the immediate if the immediates are large or blinding
258 // comes into play. Probably our JIT backends live somewhere north of
259 // ChainedComparisonCost = 0.5.
261 // This implies that using the Ignore strategy is likely better. If we wanted to incorporate
262 // the Isolate strategy, we'd want to determine the switch size threshold at which the two
263 // cross over and then use Isolate for switches that are smaller than that size.
265 // The average cost of hitting the default case is similar, but involves a different cost for
266 // the base cases: you have to assume that you will always fail each branch. For the Ignore
267 // strategy we would get this recurrence relation; the same kind of thing happens to the
270 // Ignore[1] = ComparisonCost
271 // Ignore[2] = (2 + 2) * ComparisonCost
272 // Ignore[3] = (3 + 3 + 3) * ComparisonCost
273 // Ignore[n_] := With[
274 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},
275 // (medianIndex * ComparisonCost + Ignore[medianIndex]) +
276 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]
278 // This means that if we cared about the default case more, we would likely reduce
279 // leafThreshold. Reducing it to 2 would reduce the average cost of the default case by 1/3
280 // in the most extreme cases (num switch cases = 3, 6, 12, 24, ...). But it would also
281 // increase the average cost of taking one of the non-default cases by 1/3. Typically the
282 // difference is 1/6 in either direction. This makes it a very simple trade-off: if we believe
283 // that the default case is more important then we would want leafThreshold to be 2, and the
284 // default case would become 1/6 faster on average. But we believe that most switch statements
285 // are more likely to take one of the cases than the default, so we use leafThreshold = 3
286 // and get a 1/6 speed-up on average for taking an explicit case.
288 unsigned medianIndex
= (start
+ end
) / 2;
290 // We want medianIndex to point to the thing we will do a less-than compare against. We want
291 // this less-than compare to split the current sublist into equal-sized sublists, or
292 // nearly-equal-sized with some randomness if we're in the odd case. With the above
293 // calculation, in the odd case we will have medianIndex pointing at either the element we
294 // want or the element to the left of the one we want. Consider the case of five elements:
298 // start will be 0, end will be 5. The average is 2.5, which rounds down to 2. If we do
299 // value < 2, then we will split the list into 2 elements on the left and three on the right.
300 // That's pretty good, but in this odd case we'd like to at random choose 3 instead to ensure
301 // that we don't become unbalanced on the right. This does not improve throughput since one
302 // side will always get shafted, and that side might still be odd, in which case it will also
303 // have two sides and one of them will get shafted - and so on. We just want to avoid
304 // deterministic pathologies.
306 // In the even case, we will always end up pointing at the element we want:
310 // start will be 0, end will be 4. So, the average is 2, which is what we'd like.
312 RELEASE_ASSERT(medianIndex
- start
+ 1 == end
- medianIndex
);
313 medianIndex
+= m_weakRandom
.getUint32() & 1;
315 RELEASE_ASSERT(medianIndex
- start
== end
- medianIndex
);
317 RELEASE_ASSERT(medianIndex
> start
);
318 RELEASE_ASSERT(medianIndex
+ 1 < end
);
320 m_branches
.append(BranchCode(LessThanToPush
, medianIndex
));
321 build(medianIndex
, true, end
);
322 m_branches
.append(BranchCode(Pop
));
323 build(start
, hardStart
, medianIndex
);
328 #endif // ENABLE(JIT)