]>
Commit | Line | Data |
---|---|---|
ed1e77d3 A |
1 | /* |
2 | * Copyright (C) 2013, 2015 Apple Inc. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions | |
6 | * are met: | |
7 | * 1. Redistributions of source code must retain the above copyright | |
8 | * notice, this list of conditions and the following disclaimer. | |
9 | * 2. Redistributions in binary form must reproduce the above copyright | |
10 | * notice, this list of conditions and the following disclaimer in the | |
11 | * documentation and/or other materials provided with the distribution. | |
12 | * | |
13 | * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
14 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
15 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
16 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
17 | * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
18 | * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
19 | * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
20 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
21 | * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
22 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
23 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
24 | */ | |
25 | ||
26 | #include "config.h" | |
27 | #include "BinarySwitch.h" | |
28 | ||
29 | #if ENABLE(JIT) | |
30 | ||
31 | #include "JSCInlines.h" | |
32 | ||
33 | namespace JSC { | |
34 | ||
35 | static unsigned globalCounter; // We use a different seed every time we are invoked. | |
36 | ||
37 | BinarySwitch::BinarySwitch(GPRReg value, const Vector<int64_t>& cases, Type type) | |
38 | : m_value(value) | |
39 | , m_weakRandom(globalCounter++) | |
40 | , m_index(0) | |
41 | , m_caseIndex(UINT_MAX) | |
42 | , m_type(type) | |
43 | { | |
44 | if (cases.isEmpty()) | |
45 | return; | |
46 | ||
47 | for (unsigned i = 0; i < cases.size(); ++i) | |
48 | m_cases.append(Case(cases[i], i)); | |
49 | ||
50 | std::sort(m_cases.begin(), m_cases.end()); | |
51 | ||
52 | for (unsigned i = 1; i < m_cases.size(); ++i) | |
53 | RELEASE_ASSERT(m_cases[i - 1] < m_cases[i]); | |
54 | ||
55 | build(0, false, m_cases.size()); | |
56 | } | |
57 | ||
58 | BinarySwitch::~BinarySwitch() | |
59 | { | |
60 | } | |
61 | ||
62 | bool BinarySwitch::advance(MacroAssembler& jit) | |
63 | { | |
64 | if (m_cases.isEmpty()) { | |
65 | m_fallThrough.append(jit.jump()); | |
66 | return false; | |
67 | } | |
68 | ||
69 | if (m_index == m_branches.size()) { | |
70 | RELEASE_ASSERT(m_jumpStack.isEmpty()); | |
71 | return false; | |
72 | } | |
73 | ||
74 | for (;;) { | |
75 | const BranchCode& code = m_branches[m_index++]; | |
76 | switch (code.kind) { | |
77 | case NotEqualToFallThrough: | |
78 | switch (m_type) { | |
79 | case Int32: | |
80 | m_fallThrough.append(jit.branch32( | |
81 | MacroAssembler::NotEqual, m_value, | |
82 | MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value)))); | |
83 | break; | |
84 | case IntPtr: | |
85 | m_fallThrough.append(jit.branchPtr( | |
86 | MacroAssembler::NotEqual, m_value, | |
87 | MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value))))); | |
88 | break; | |
89 | } | |
90 | break; | |
91 | case NotEqualToPush: | |
92 | switch (m_type) { | |
93 | case Int32: | |
94 | m_jumpStack.append(jit.branch32( | |
95 | MacroAssembler::NotEqual, m_value, | |
96 | MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value)))); | |
97 | break; | |
98 | case IntPtr: | |
99 | m_jumpStack.append(jit.branchPtr( | |
100 | MacroAssembler::NotEqual, m_value, | |
101 | MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value))))); | |
102 | break; | |
103 | } | |
104 | break; | |
105 | case LessThanToPush: | |
106 | switch (m_type) { | |
107 | case Int32: | |
108 | m_jumpStack.append(jit.branch32( | |
109 | MacroAssembler::LessThan, m_value, | |
110 | MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value)))); | |
111 | break; | |
112 | case IntPtr: | |
113 | m_jumpStack.append(jit.branchPtr( | |
114 | MacroAssembler::LessThan, m_value, | |
115 | MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value))))); | |
116 | break; | |
117 | } | |
118 | break; | |
119 | case Pop: | |
120 | m_jumpStack.takeLast().link(&jit); | |
121 | break; | |
122 | case ExecuteCase: | |
123 | m_caseIndex = code.index; | |
124 | return true; | |
125 | } | |
126 | } | |
127 | } | |
128 | ||
129 | void BinarySwitch::build(unsigned start, bool hardStart, unsigned end) | |
130 | { | |
131 | unsigned size = end - start; | |
132 | ||
133 | RELEASE_ASSERT(size); | |
134 | ||
135 | // This code uses some random numbers to keep things balanced. It's important to keep in mind | |
136 | // that this does not improve average-case throughput under the assumption that all cases fire | |
137 | // with equal probability. It just ensures that there will not be some switch structure that | |
138 | // when combined with some input will always produce pathologically good or pathologically bad | |
139 | // performance. | |
140 | ||
141 | const unsigned leafThreshold = 3; | |
142 | ||
143 | if (size <= leafThreshold) { | |
144 | // It turns out that for exactly three cases or less, it's better to just compare each | |
145 | // case individually. This saves 1/6 of a branch on average, and up to 1/3 of a branch in | |
146 | // extreme cases where the divide-and-conquer bottoms out in a lot of 3-case subswitches. | |
147 | // | |
148 | // This assumes that we care about the cost of hitting some case more than we care about | |
149 | // bottoming out in a default case. I believe that in most places where we use switch | |
150 | // statements, we are more likely to hit one of the cases than we are to fall through to | |
151 | // default. Intuitively, if we wanted to improve the performance of default, we would | |
152 | // reduce the value of leafThreshold to 2 or even to 1. See below for a deeper discussion. | |
153 | ||
154 | bool allConsecutive = false; | |
155 | ||
156 | if ((hardStart || (start && m_cases[start - 1].value == m_cases[start].value - 1)) | |
157 | && start + size < m_cases.size() | |
158 | && m_cases[start + size - 1].value == m_cases[start + size].value - 1) { | |
159 | allConsecutive = true; | |
160 | for (unsigned i = 0; i < size - 1; ++i) { | |
161 | if (m_cases[i].value + 1 != m_cases[i + 1].value) { | |
162 | allConsecutive = false; | |
163 | break; | |
164 | } | |
165 | } | |
166 | } | |
167 | ||
168 | Vector<unsigned, 3> localCaseIndices; | |
169 | for (unsigned i = 0; i < size; ++i) | |
170 | localCaseIndices.append(start + i); | |
171 | ||
172 | std::random_shuffle( | |
173 | localCaseIndices.begin(), localCaseIndices.end(), | |
174 | [this] (unsigned n) { | |
175 | // We use modulo to get a random number in the range we want fully knowing that | |
176 | // this introduces a tiny amount of bias, but we're fine with such tiny bias. | |
177 | return m_weakRandom.getUint32() % n; | |
178 | }); | |
179 | ||
180 | for (unsigned i = 0; i < size - 1; ++i) { | |
181 | m_branches.append(BranchCode(NotEqualToPush, localCaseIndices[i])); | |
182 | m_branches.append(BranchCode(ExecuteCase, localCaseIndices[i])); | |
183 | m_branches.append(BranchCode(Pop)); | |
184 | } | |
185 | ||
186 | if (!allConsecutive) | |
187 | m_branches.append(BranchCode(NotEqualToFallThrough, localCaseIndices.last())); | |
188 | ||
189 | m_branches.append(BranchCode(ExecuteCase, localCaseIndices.last())); | |
190 | return; | |
191 | } | |
192 | ||
193 | // There are two different strategies we could consider here: | |
194 | // | |
195 | // Isolate median and split: pick a median and check if the comparison value is equal to it; | |
196 | // if so, execute the median case. Otherwise check if the value is less than the median, and | |
197 | // recurse left or right based on this. This has two subvariants: we could either first test | |
198 | // equality for the median and then do the less-than, or we could first do the less-than and | |
199 | // then check equality on the not-less-than path. | |
200 | // | |
201 | // Ignore median and split: do a less-than comparison on a value that splits the cases in two | |
202 | // equal-sized halves. Recurse left or right based on the comparison. Do not test for equality | |
203 | // against the median (or anything else); let the recursion handle those equality comparisons | |
204 | // once we bottom out in a list that case 3 cases or less (see above). | |
205 | // | |
206 | // I'll refer to these strategies as Isolate and Ignore. I initially believed that Isolate | |
207 | // would be faster since it leads to less branching for some lucky cases. It turns out that | |
208 | // Isolate is almost a total fail in the average, assuming all cases are equally likely. How | |
209 | // bad Isolate is depends on whether you believe that doing two consecutive branches based on | |
210 | // the same comparison is cheaper than doing the compare/branches separately. This is | |
211 | // difficult to evaluate. For small immediates that aren't blinded, we just care about | |
212 | // avoiding a second compare instruction. For large immediates or when blinding is in play, we | |
213 | // also care about the instructions used to materialize the immediate a second time. Isolate | |
214 | // can help with both costs since it involves first doing a < compare+branch on some value, | |
215 | // followed by a == compare+branch on the same exact value (or vice-versa). Ignore will do a < | |
216 | // compare+branch on some value, and then the == compare+branch on that same value will happen | |
217 | // much later. | |
218 | // | |
219 | // To evaluate these costs, I wrote the recurrence relation for Isolate and Ignore, assuming | |
220 | // that ComparisonCost is the cost of a compare+branch and ChainedComparisonCost is the cost | |
221 | // of a compare+branch on some value that you've just done another compare+branch for. These | |
222 | // recurrence relations compute the total cost incurred if you executed the switch statement | |
223 | // on each matching value. So the average cost of hitting some case can be computed as | |
224 | // Isolate[n]/n or Ignore[n]/n, respectively for the two relations. | |
225 | // | |
226 | // Isolate[1] = ComparisonCost | |
227 | // Isolate[2] = (2 + 1) * ComparisonCost | |
228 | // Isolate[3] = (3 + 2 + 1) * ComparisonCost | |
229 | // Isolate[n_] := With[ | |
230 | // {medianIndex = Floor[n/2] + If[EvenQ[n], RandomInteger[], 1]}, | |
231 | // ComparisonCost + ChainedComparisonCost + | |
232 | // (ComparisonCost * (medianIndex - 1) + Isolate[medianIndex - 1]) + | |
233 | // (2 * ComparisonCost * (n - medianIndex) + Isolate[n - medianIndex])] | |
234 | // | |
235 | // Ignore[1] = ComparisonCost | |
236 | // Ignore[2] = (2 + 1) * ComparisonCost | |
237 | // Ignore[3] = (3 + 2 + 1) * ComparisonCost | |
238 | // Ignore[n_] := With[ | |
239 | // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]}, | |
240 | // (medianIndex * ComparisonCost + Ignore[medianIndex]) + | |
241 | // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])] | |
242 | // | |
243 | // This does not account for the average cost of hitting the default case. See further below | |
244 | // for a discussion of that. | |
245 | // | |
246 | // It turns out that for ComparisonCost = 1 and ChainedComparisonCost = 1, Ignore is always | |
247 | // better than Isolate. If we assume that ChainedComparisonCost = 0, then Isolate wins for | |
248 | // switch statements that have 20 cases or fewer, though the margin of victory is never large | |
249 | // - it might sometimes save an average of 0.3 ComparisonCost. For larger switch statements, | |
250 | // we see divergence between the two with Ignore winning. This is of course rather | |
251 | // unrealistic since the chained comparison is never free. For ChainedComparisonCost = 0.5, we | |
252 | // see Isolate winning for 10 cases or fewer, by maybe 0.2 ComparisonCost. Again we see | |
253 | // divergence for large switches with Ignore winning, for example if a switch statement has | |
254 | // 100 cases then Ignore saves one branch on average. | |
255 | // | |
256 | // Our current JIT backends don't provide for optimization for chained comparisons, except for | |
257 | // reducing the code for materializing the immediate if the immediates are large or blinding | |
258 | // comes into play. Probably our JIT backends live somewhere north of | |
259 | // ChainedComparisonCost = 0.5. | |
260 | // | |
261 | // This implies that using the Ignore strategy is likely better. If we wanted to incorporate | |
262 | // the Isolate strategy, we'd want to determine the switch size threshold at which the two | |
263 | // cross over and then use Isolate for switches that are smaller than that size. | |
264 | // | |
265 | // The average cost of hitting the default case is similar, but involves a different cost for | |
266 | // the base cases: you have to assume that you will always fail each branch. For the Ignore | |
267 | // strategy we would get this recurrence relation; the same kind of thing happens to the | |
268 | // Isolate strategy: | |
269 | // | |
270 | // Ignore[1] = ComparisonCost | |
271 | // Ignore[2] = (2 + 2) * ComparisonCost | |
272 | // Ignore[3] = (3 + 3 + 3) * ComparisonCost | |
273 | // Ignore[n_] := With[ | |
274 | // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]}, | |
275 | // (medianIndex * ComparisonCost + Ignore[medianIndex]) + | |
276 | // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])] | |
277 | // | |
278 | // This means that if we cared about the default case more, we would likely reduce | |
279 | // leafThreshold. Reducing it to 2 would reduce the average cost of the default case by 1/3 | |
280 | // in the most extreme cases (num switch cases = 3, 6, 12, 24, ...). But it would also | |
281 | // increase the average cost of taking one of the non-default cases by 1/3. Typically the | |
282 | // difference is 1/6 in either direction. This makes it a very simple trade-off: if we believe | |
283 | // that the default case is more important then we would want leafThreshold to be 2, and the | |
284 | // default case would become 1/6 faster on average. But we believe that most switch statements | |
285 | // are more likely to take one of the cases than the default, so we use leafThreshold = 3 | |
286 | // and get a 1/6 speed-up on average for taking an explicit case. | |
287 | ||
288 | unsigned medianIndex = (start + end) / 2; | |
289 | ||
290 | // We want medianIndex to point to the thing we will do a less-than compare against. We want | |
291 | // this less-than compare to split the current sublist into equal-sized sublists, or | |
292 | // nearly-equal-sized with some randomness if we're in the odd case. With the above | |
293 | // calculation, in the odd case we will have medianIndex pointing at either the element we | |
294 | // want or the element to the left of the one we want. Consider the case of five elements: | |
295 | // | |
296 | // 0 1 2 3 4 | |
297 | // | |
298 | // start will be 0, end will be 5. The average is 2.5, which rounds down to 2. If we do | |
299 | // value < 2, then we will split the list into 2 elements on the left and three on the right. | |
300 | // That's pretty good, but in this odd case we'd like to at random choose 3 instead to ensure | |
301 | // that we don't become unbalanced on the right. This does not improve throughput since one | |
302 | // side will always get shafted, and that side might still be odd, in which case it will also | |
303 | // have two sides and one of them will get shafted - and so on. We just want to avoid | |
304 | // deterministic pathologies. | |
305 | // | |
306 | // In the even case, we will always end up pointing at the element we want: | |
307 | // | |
308 | // 0 1 2 3 | |
309 | // | |
310 | // start will be 0, end will be 4. So, the average is 2, which is what we'd like. | |
311 | if (size & 1) { | |
312 | RELEASE_ASSERT(medianIndex - start + 1 == end - medianIndex); | |
313 | medianIndex += m_weakRandom.getUint32() & 1; | |
314 | } else | |
315 | RELEASE_ASSERT(medianIndex - start == end - medianIndex); | |
316 | ||
317 | RELEASE_ASSERT(medianIndex > start); | |
318 | RELEASE_ASSERT(medianIndex + 1 < end); | |
319 | ||
320 | m_branches.append(BranchCode(LessThanToPush, medianIndex)); | |
321 | build(medianIndex, true, end); | |
322 | m_branches.append(BranchCode(Pop)); | |
323 | build(start, hardStart, medianIndex); | |
324 | } | |
325 | ||
326 | } // namespace JSC | |
327 | ||
328 | #endif // ENABLE(JIT) | |
329 |