git.saurik.com Git - apple/javascriptcore.git/blob

1 /*

3 *

4 * Redistribution and use in source and binary forms, with or without

5 * modification, are permitted provided that the following conditions

6 * are met:

7 * 1. Redistributions of source code must retain the above copyright

8 * notice, this list of conditions and the following disclaimer.

9 * 2. Redistributions in binary form must reproduce the above copyright

10 * notice, this list of conditions and the following disclaimer in the

11 * documentation and/or other materials provided with the distribution.

12 *

13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY

14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR

16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR

17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,

18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,

19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR

20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY

21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

24 */

25

26 #include "config.h"

27 #include "BinarySwitch.h"

28

29 #if ENABLE(JIT)

30

31 #include "JSCInlines.h"

32

33 namespace JSC {

34

35 static unsigned globalCounter; // We use a different seed every time we are invoked.

36

37 BinarySwitch::BinarySwitch(GPRReg value, const Vector<int64_t>& cases, Type type)

38 : m_value(value)

39 , m_weakRandom(globalCounter++)

40 , m_index(0)

41 , m_caseIndex(UINT_MAX)

42 , m_type(type)

43 {

44 if (cases.isEmpty())

45 return;

46

47 for (unsigned i = 0; i < cases.size(); ++i)

48 m_cases.append(Case(cases[i], i));

49

50 std::sort(m_cases.begin(), m_cases.end());

51

52 for (unsigned i = 1; i < m_cases.size(); ++i)

53 RELEASE_ASSERT(m_cases[i - 1] < m_cases[i]);

54

55 build(0, false, m_cases.size());

56 }

57

58 BinarySwitch::~BinarySwitch()

59 {

60 }

61

62 bool BinarySwitch::advance(MacroAssembler& jit)

63 {

64 if (m_cases.isEmpty()) {

65 m_fallThrough.append(jit.jump());

66 return false;

67 }

68

69 if (m_index == m_branches.size()) {

70 RELEASE_ASSERT(m_jumpStack.isEmpty());

71 return false;

72 }

73

74 for (;;) {

75 const BranchCode& code = m_branches[m_index++];

76 switch (code.kind) {

77 case NotEqualToFallThrough:

78 switch (m_type) {

79 case Int32:

80 m_fallThrough.append(jit.branch32(

81 MacroAssembler::NotEqual, m_value,

82 MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));

83 break;

84 case IntPtr:

85 m_fallThrough.append(jit.branchPtr(

86 MacroAssembler::NotEqual, m_value,

87 MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));

88 break;

89 }

90 break;

91 case NotEqualToPush:

92 switch (m_type) {

93 case Int32:

94 m_jumpStack.append(jit.branch32(

95 MacroAssembler::NotEqual, m_value,

96 MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));

97 break;

98 case IntPtr:

99 m_jumpStack.append(jit.branchPtr(

100 MacroAssembler::NotEqual, m_value,

101 MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));

102 break;

103 }

104 break;

105 case LessThanToPush:

106 switch (m_type) {

107 case Int32:

108 m_jumpStack.append(jit.branch32(

109 MacroAssembler::LessThan, m_value,

110 MacroAssembler::Imm32(static_cast<int32_t>(m_cases[code.index].value))));

111 break;

112 case IntPtr:

113 m_jumpStack.append(jit.branchPtr(

114 MacroAssembler::LessThan, m_value,

115 MacroAssembler::ImmPtr(bitwise_cast<const void*>(static_cast<intptr_t>(m_cases[code.index].value)))));

116 break;

117 }

118 break;

119 case Pop:

120 m_jumpStack.takeLast().link(&jit);

121 break;

122 case ExecuteCase:

123 m_caseIndex = code.index;

124 return true;

125 }

126 }

127 }

128

129 void BinarySwitch::build(unsigned start, bool hardStart, unsigned end)

130 {

131 unsigned size = end - start;

132

133 RELEASE_ASSERT(size);

134

135 // This code uses some random numbers to keep things balanced. It's important to keep in mind

136 // that this does not improve average-case throughput under the assumption that all cases fire

137 // with equal probability. It just ensures that there will not be some switch structure that

138 // when combined with some input will always produce pathologically good or pathologically bad

139 // performance.

140

141 const unsigned leafThreshold = 3;

142

143 if (size <= leafThreshold) {

144 // It turns out that for exactly three cases or less, it's better to just compare each

145 // case individually. This saves 1/6 of a branch on average, and up to 1/3 of a branch in

146 // extreme cases where the divide-and-conquer bottoms out in a lot of 3-case subswitches.

147 //

148 // This assumes that we care about the cost of hitting some case more than we care about

149 // bottoming out in a default case. I believe that in most places where we use switch

150 // statements, we are more likely to hit one of the cases than we are to fall through to

151 // default. Intuitively, if we wanted to improve the performance of default, we would

152 // reduce the value of leafThreshold to 2 or even to 1. See below for a deeper discussion.

153

154 bool allConsecutive = false;

155

156 if ((hardStart || (start && m_cases[start - 1].value == m_cases[start].value - 1))

157 && start + size < m_cases.size()

158 && m_cases[start + size - 1].value == m_cases[start + size].value - 1) {

159 allConsecutive = true;

160 for (unsigned i = 0; i < size - 1; ++i) {

161 if (m_cases[i].value + 1 != m_cases[i + 1].value) {

162 allConsecutive = false;

163 break;

164 }

165 }

166 }

167

168 Vector<unsigned, 3> localCaseIndices;

169 for (unsigned i = 0; i < size; ++i)

170 localCaseIndices.append(start + i);

171

172 std::random_shuffle(

173 localCaseIndices.begin(), localCaseIndices.end(),

174 [this] (unsigned n) {

175 // We use modulo to get a random number in the range we want fully knowing that

176 // this introduces a tiny amount of bias, but we're fine with such tiny bias.

177 return m_weakRandom.getUint32() % n;

178 });

179

180 for (unsigned i = 0; i < size - 1; ++i) {

181 m_branches.append(BranchCode(NotEqualToPush, localCaseIndices[i]));

182 m_branches.append(BranchCode(ExecuteCase, localCaseIndices[i]));

183 m_branches.append(BranchCode(Pop));

184 }

185

186 if (!allConsecutive)

187 m_branches.append(BranchCode(NotEqualToFallThrough, localCaseIndices.last()));

188

189 m_branches.append(BranchCode(ExecuteCase, localCaseIndices.last()));

190 return;

191 }

192

193 // There are two different strategies we could consider here:

194 //

195 // Isolate median and split: pick a median and check if the comparison value is equal to it;

196 // if so, execute the median case. Otherwise check if the value is less than the median, and

197 // recurse left or right based on this. This has two subvariants: we could either first test

198 // equality for the median and then do the less-than, or we could first do the less-than and

199 // then check equality on the not-less-than path.

200 //

201 // Ignore median and split: do a less-than comparison on a value that splits the cases in two

202 // equal-sized halves. Recurse left or right based on the comparison. Do not test for equality

203 // against the median (or anything else); let the recursion handle those equality comparisons

204 // once we bottom out in a list that case 3 cases or less (see above).

205 //

206 // I'll refer to these strategies as Isolate and Ignore. I initially believed that Isolate

207 // would be faster since it leads to less branching for some lucky cases. It turns out that

208 // Isolate is almost a total fail in the average, assuming all cases are equally likely. How

209 // bad Isolate is depends on whether you believe that doing two consecutive branches based on

210 // the same comparison is cheaper than doing the compare/branches separately. This is

211 // difficult to evaluate. For small immediates that aren't blinded, we just care about

212 // avoiding a second compare instruction. For large immediates or when blinding is in play, we

213 // also care about the instructions used to materialize the immediate a second time. Isolate

214 // can help with both costs since it involves first doing a < compare+branch on some value,

215 // followed by a == compare+branch on the same exact value (or vice-versa). Ignore will do a <

216 // compare+branch on some value, and then the == compare+branch on that same value will happen

217 // much later.

218 //

219 // To evaluate these costs, I wrote the recurrence relation for Isolate and Ignore, assuming

220 // that ComparisonCost is the cost of a compare+branch and ChainedComparisonCost is the cost

221 // of a compare+branch on some value that you've just done another compare+branch for. These

222 // recurrence relations compute the total cost incurred if you executed the switch statement

223 // on each matching value. So the average cost of hitting some case can be computed as

224 // Isolate[n]/n or Ignore[n]/n, respectively for the two relations.

225 //

226 // Isolate[1] = ComparisonCost

227 // Isolate[2] = (2 + 1) * ComparisonCost

228 // Isolate[3] = (3 + 2 + 1) * ComparisonCost

229 // Isolate[n_] := With[

230 // {medianIndex = Floor[n/2] + If[EvenQ[n], RandomInteger[], 1]},

231 // ComparisonCost + ChainedComparisonCost +

232 // (ComparisonCost * (medianIndex - 1) + Isolate[medianIndex - 1]) +

233 // (2 * ComparisonCost * (n - medianIndex) + Isolate[n - medianIndex])]

234 //

235 // Ignore[1] = ComparisonCost

236 // Ignore[2] = (2 + 1) * ComparisonCost

237 // Ignore[3] = (3 + 2 + 1) * ComparisonCost

238 // Ignore[n_] := With[

239 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},

240 // (medianIndex * ComparisonCost + Ignore[medianIndex]) +

241 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]

242 //

243 // This does not account for the average cost of hitting the default case. See further below

244 // for a discussion of that.

245 //

246 // It turns out that for ComparisonCost = 1 and ChainedComparisonCost = 1, Ignore is always

247 // better than Isolate. If we assume that ChainedComparisonCost = 0, then Isolate wins for

248 // switch statements that have 20 cases or fewer, though the margin of victory is never large

249 // - it might sometimes save an average of 0.3 ComparisonCost. For larger switch statements,

250 // we see divergence between the two with Ignore winning. This is of course rather

251 // unrealistic since the chained comparison is never free. For ChainedComparisonCost = 0.5, we

252 // see Isolate winning for 10 cases or fewer, by maybe 0.2 ComparisonCost. Again we see

253 // divergence for large switches with Ignore winning, for example if a switch statement has

254 // 100 cases then Ignore saves one branch on average.

255 //

256 // Our current JIT backends don't provide for optimization for chained comparisons, except for

257 // reducing the code for materializing the immediate if the immediates are large or blinding

258 // comes into play. Probably our JIT backends live somewhere north of

259 // ChainedComparisonCost = 0.5.

260 //

261 // This implies that using the Ignore strategy is likely better. If we wanted to incorporate

262 // the Isolate strategy, we'd want to determine the switch size threshold at which the two

263 // cross over and then use Isolate for switches that are smaller than that size.

264 //

265 // The average cost of hitting the default case is similar, but involves a different cost for

266 // the base cases: you have to assume that you will always fail each branch. For the Ignore

267 // strategy we would get this recurrence relation; the same kind of thing happens to the

268 // Isolate strategy:

269 //

270 // Ignore[1] = ComparisonCost

271 // Ignore[2] = (2 + 2) * ComparisonCost

272 // Ignore[3] = (3 + 3 + 3) * ComparisonCost

273 // Ignore[n_] := With[

274 // {medianIndex = If[EvenQ[n], n/2, Floor[n/2] + RandomInteger[]]},

275 // (medianIndex * ComparisonCost + Ignore[medianIndex]) +

276 // ((n - medianIndex) * ComparisonCost + Ignore[n - medianIndex])]

277 //

278 // This means that if we cared about the default case more, we would likely reduce

279 // leafThreshold. Reducing it to 2 would reduce the average cost of the default case by 1/3

280 // in the most extreme cases (num switch cases = 3, 6, 12, 24, ...). But it would also

281 // increase the average cost of taking one of the non-default cases by 1/3. Typically the

282 // difference is 1/6 in either direction. This makes it a very simple trade-off: if we believe

283 // that the default case is more important then we would want leafThreshold to be 2, and the

284 // default case would become 1/6 faster on average. But we believe that most switch statements

285 // are more likely to take one of the cases than the default, so we use leafThreshold = 3

286 // and get a 1/6 speed-up on average for taking an explicit case.

287

288 unsigned medianIndex = (start + end) / 2;

289

290 // We want medianIndex to point to the thing we will do a less-than compare against. We want

291 // this less-than compare to split the current sublist into equal-sized sublists, or

292 // nearly-equal-sized with some randomness if we're in the odd case. With the above

293 // calculation, in the odd case we will have medianIndex pointing at either the element we

294 // want or the element to the left of the one we want. Consider the case of five elements:

295 //

296 // 0 1 2 3 4

297 //

298 // start will be 0, end will be 5. The average is 2.5, which rounds down to 2. If we do

299 // value < 2, then we will split the list into 2 elements on the left and three on the right.

300 // That's pretty good, but in this odd case we'd like to at random choose 3 instead to ensure

301 // that we don't become unbalanced on the right. This does not improve throughput since one

302 // side will always get shafted, and that side might still be odd, in which case it will also

303 // have two sides and one of them will get shafted - and so on. We just want to avoid

304 // deterministic pathologies.

305 //

306 // In the even case, we will always end up pointing at the element we want:

307 //

308 // 0 1 2 3

309 //

310 // start will be 0, end will be 4. So, the average is 2, which is what we'd like.

311 if (size & 1) {

312 RELEASE_ASSERT(medianIndex - start + 1 == end - medianIndex);

313 medianIndex += m_weakRandom.getUint32() & 1;

314 } else

315 RELEASE_ASSERT(medianIndex - start == end - medianIndex);

316

317 RELEASE_ASSERT(medianIndex > start);

318 RELEASE_ASSERT(medianIndex + 1 < end);

319

320 m_branches.append(BranchCode(LessThanToPush, medianIndex));

321 build(medianIndex, true, end);

322 m_branches.append(BranchCode(Pop));

323 build(start, hardStart, medianIndex);

324 }

325

326 } // namespace JSC

327

328 #endif // ENABLE(JIT)

329