dfg/DFGOSRExitCompiler64.cpp

   1 /*
   2  * Copyright (C) 2011, 2013, 2014 Apple Inc. All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  * 1. Redistributions of source code must retain the above copyright
   8  *    notice, this list of conditions and the following disclaimer.
   9  * 2. Redistributions in binary form must reproduce the above copyright
  10  *    notice, this list of conditions and the following disclaimer in the
  11  *    documentation and/or other materials provided with the distribution.
  12  *
  13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
  14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
  17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  24  */
  25
  26 #include "config.h"
  27 #include "DFGOSRExitCompiler.h"
  28
  29 #if ENABLE(DFG_JIT) && USE(JSVALUE64)
  30
  31 #include "DFGOperations.h"
  32 #include "DFGOSRExitCompilerCommon.h"
  33 #include "DFGSpeculativeJIT.h"
  34 #include "JSCInlines.h"
  35 #include "VirtualRegister.h"
  36
  37 #include <wtf/DataLog.h>
  38
  39 namespace JSC { namespace DFG {
  40
  41 void OSRExitCompiler::compileExit(const OSRExit& exit, const Operands<ValueRecovery>& operands, SpeculationRecovery* recovery)
  42 {
  43     m_jit.jitAssertTagsInPlace();
  44
  45     // 1) Pro-forma stuff.
  46     if (Options::printEachOSRExit()) {
  47         SpeculationFailureDebugInfo* debugInfo = new SpeculationFailureDebugInfo;
  48         debugInfo->codeBlock = m_jit.codeBlock();
  49         debugInfo->kind = exit.m_kind;
  50         debugInfo->bytecodeOffset = exit.m_codeOrigin.bytecodeIndex;
  51
  52         m_jit.debugCall(debugOperationPrintSpeculationFailure, debugInfo);
  53     }
  54
  55     // Need to ensure that the stack pointer accounts for the worst-case stack usage at exit.
  56     m_jit.addPtr(
  57         CCallHelpers::TrustedImm32(
  58             -m_jit.codeBlock()->jitCode()->dfgCommon()->requiredRegisterCountForExit * sizeof(Register)),
  59         CCallHelpers::framePointerRegister, CCallHelpers::stackPointerRegister);
  60
  61     // 2) Perform speculation recovery. This only comes into play when an operation
  62     //    starts mutating state before verifying the speculation it has already made.
  63
  64     if (recovery) {
  65         switch (recovery->type()) {
  66         case SpeculativeAdd:
  67             m_jit.sub32(recovery->src(), recovery->dest());
  68             m_jit.or64(GPRInfo::tagTypeNumberRegister, recovery->dest());
  69             break;
  70
  71         case BooleanSpeculationCheck:
  72             m_jit.xor64(AssemblyHelpers::TrustedImm32(static_cast<int32_t>(ValueFalse)), recovery->dest());
  73             break;
  74
  75         default:
  76             break;
  77         }
  78     }
  79
  80     // 3) Refine some array and/or value profile, if appropriate.
  81
  82     if (!!exit.m_jsValueSource) {
  83         if (exit.m_kind == BadCache || exit.m_kind == BadIndexingType) {
  84             // If the instruction that this originated from has an array profile, then
  85             // refine it. If it doesn't, then do nothing. The latter could happen for
  86             // hoisted checks, or checks emitted for operations that didn't have array
  87             // profiling - either ops that aren't array accesses at all, or weren't
  88             // known to be array acceses in the bytecode. The latter case is a FIXME
  89             // while the former case is an outcome of a CheckStructure not knowing why
  90             // it was emitted (could be either due to an inline cache of a property
  91             // property access, or due to an array profile).
  92
  93             CodeOrigin codeOrigin = exit.m_codeOriginForExitProfile;
  94             if (ArrayProfile* arrayProfile = m_jit.baselineCodeBlockFor(codeOrigin)->getArrayProfile(codeOrigin.bytecodeIndex)) {
  95                 GPRReg usedRegister;
  96                 if (exit.m_jsValueSource.isAddress())
  97                     usedRegister = exit.m_jsValueSource.base();
  98                 else
  99                     usedRegister = exit.m_jsValueSource.gpr();
 100
 101                 GPRReg scratch1;
 102                 GPRReg scratch2;
 103                 scratch1 = AssemblyHelpers::selectScratchGPR(usedRegister);
 104                 scratch2 = AssemblyHelpers::selectScratchGPR(usedRegister, scratch1);
 105
 106 #if CPU(ARM64)
 107                 m_jit.pushToSave(scratch1);
 108                 m_jit.pushToSave(scratch2);
 109 #else
 110                 m_jit.push(scratch1);
 111                 m_jit.push(scratch2);
 112 #endif
 113
 114                 GPRReg value;
 115                 if (exit.m_jsValueSource.isAddress()) {
 116                     value = scratch1;
 117                     m_jit.loadPtr(AssemblyHelpers::Address(exit.m_jsValueSource.asAddress()), value);
 118                 } else
 119                     value = exit.m_jsValueSource.gpr();
 120
 121                 m_jit.load32(AssemblyHelpers::Address(value, JSCell::structureIDOffset()), scratch1);
 122                 m_jit.store32(scratch1, arrayProfile->addressOfLastSeenStructureID());
 123                 m_jit.load8(AssemblyHelpers::Address(value, JSCell::indexingTypeOffset()), scratch1);
 124                 m_jit.move(AssemblyHelpers::TrustedImm32(1), scratch2);
 125                 m_jit.lshift32(scratch1, scratch2);
 126                 m_jit.or32(scratch2, AssemblyHelpers::AbsoluteAddress(arrayProfile->addressOfArrayModes()));
 127
 128 #if CPU(ARM64)
 129                 m_jit.popToRestore(scratch2);
 130                 m_jit.popToRestore(scratch1);
 131 #else
 132                 m_jit.pop(scratch2);
 133                 m_jit.pop(scratch1);
 134 #endif
 135             }
 136         }
 137
 138         if (!!exit.m_valueProfile) {
 139             EncodedJSValue* bucket = exit.m_valueProfile.getSpecFailBucket(0);
 140
 141             if (exit.m_jsValueSource.isAddress()) {
 142                 // We can't be sure that we have a spare register. So use the tagTypeNumberRegister,
 143                 // since we know how to restore it.
 144                 m_jit.load64(AssemblyHelpers::Address(exit.m_jsValueSource.asAddress()), GPRInfo::tagTypeNumberRegister);
 145                 m_jit.store64(GPRInfo::tagTypeNumberRegister, bucket);
 146                 m_jit.move(AssemblyHelpers::TrustedImm64(TagTypeNumber), GPRInfo::tagTypeNumberRegister);
 147             } else
 148                 m_jit.store64(exit.m_jsValueSource.gpr(), bucket);
 149         }
 150     }
 151
 152     // What follows is an intentionally simple OSR exit implementation that generates
 153     // fairly poor code but is very easy to hack. In particular, it dumps all state that
 154     // needs conversion into a scratch buffer so that in step 6, where we actually do the
 155     // conversions, we know that all temp registers are free to use and the variable is
 156     // definitely in a well-known spot in the scratch buffer regardless of whether it had
 157     // originally been in a register or spilled. This allows us to decouple "where was
 158     // the variable" from "how was it represented". Consider that the
 159     // Int32DisplacedInJSStack recovery: it tells us that the value is in a
 160     // particular place and that that place holds an unboxed int32. We have two different
 161     // places that a value could be (displaced, register) and a bunch of different
 162     // ways of representing a value. The number of recoveries is two * a bunch. The code
 163     // below means that we have to have two + a bunch cases rather than two * a bunch.
 164     // Once we have loaded the value from wherever it was, the reboxing is the same
 165     // regardless of its location. Likewise, before we do the reboxing, the way we get to
 166     // the value (i.e. where we load it from) is the same regardless of its type. Because
 167     // the code below always dumps everything into a scratch buffer first, the two
 168     // questions become orthogonal, which simplifies adding new types and adding new
 169     // locations.
 170     //
 171     // This raises the question: does using such a suboptimal implementation of OSR exit,
 172     // where we always emit code to dump all state into a scratch buffer only to then
 173     // dump it right back into the stack, hurt us in any way? The asnwer is that OSR exits
 174     // are rare. Our tiering strategy ensures this. This is because if an OSR exit is
 175     // taken more than ~100 times, we jettison the DFG code block along with all of its
 176     // exits. It is impossible for an OSR exit - i.e. the code we compile below - to
 177     // execute frequently enough for the codegen to matter that much. It probably matters
 178     // enough that we don't want to turn this into some super-slow function call, but so
 179     // long as we're generating straight-line code, that code can be pretty bad. Also
 180     // because we tend to exit only along one OSR exit from any DFG code block - that's an
 181     // empirical result that we're extremely confident about - the code size of this
 182     // doesn't matter much. Hence any attempt to optimize the codegen here is just purely
 183     // harmful to the system: it probably won't reduce either net memory usage or net
 184     // execution time. It will only prevent us from cleanly decoupling "where was the
 185     // variable" from "how was it represented", which will make it more difficult to add
 186     // features in the future and it will make it harder to reason about bugs.
 187
 188     // 4) Save all state from GPRs into the scratch buffer.
 189
 190     ScratchBuffer* scratchBuffer = m_jit.vm()->scratchBufferForSize(sizeof(EncodedJSValue) * operands.size());
 191     EncodedJSValue* scratch = scratchBuffer ? static_cast<EncodedJSValue*>(scratchBuffer->dataBuffer()) : 0;
 192
 193     for (size_t index = 0; index < operands.size(); ++index) {
 194         const ValueRecovery& recovery = operands[index];
 195
 196         switch (recovery.technique()) {
 197         case InGPR:
 198         case UnboxedInt32InGPR:
 199         case UnboxedInt52InGPR:
 200         case UnboxedStrictInt52InGPR:
 201         case UnboxedCellInGPR:
 202             m_jit.store64(recovery.gpr(), scratch + index);
 203             break;
 204
 205         default:
 206             break;
 207         }
 208     }
 209
 210     // And voila, all GPRs are free to reuse.
 211
 212     // 5) Save all state from FPRs into the scratch buffer.
 213
 214     for (size_t index = 0; index < operands.size(); ++index) {
 215         const ValueRecovery& recovery = operands[index];
 216
 217         switch (recovery.technique()) {
 218         case InFPR:
 219             m_jit.move(AssemblyHelpers::TrustedImmPtr(scratch + index), GPRInfo::regT0);
 220             m_jit.storeDouble(recovery.fpr(), MacroAssembler::Address(GPRInfo::regT0));
 221             break;
 222
 223         default:
 224             break;
 225         }
 226     }
 227
 228     // Now, all FPRs are also free.
 229
 230     // 6) Save all state from the stack into the scratch buffer. For simplicity we
 231     //    do this even for state that's already in the right place on the stack.
 232     //    It makes things simpler later.
 233
 234     for (size_t index = 0; index < operands.size(); ++index) {
 235         const ValueRecovery& recovery = operands[index];
 236
 237         switch (recovery.technique()) {
 238         case DisplacedInJSStack:
 239         case CellDisplacedInJSStack:
 240         case BooleanDisplacedInJSStack:
 241         case Int32DisplacedInJSStack:
 242         case DoubleDisplacedInJSStack:
 243         case Int52DisplacedInJSStack:
 244         case StrictInt52DisplacedInJSStack:
 245             m_jit.load64(AssemblyHelpers::addressFor(recovery.virtualRegister()), GPRInfo::regT0);
 246             m_jit.store64(GPRInfo::regT0, scratch + index);
 247             break;
 248
 249         default:
 250             break;
 251         }
 252     }
 253
 254     // 7) Do all data format conversions and store the results into the stack.
 255
 256     bool haveArguments = false;
 257
 258     for (size_t index = 0; index < operands.size(); ++index) {
 259         const ValueRecovery& recovery = operands[index];
 260         int operand = operands.operandForIndex(index);
 261
 262         switch (recovery.technique()) {
 263         case InGPR:
 264         case UnboxedCellInGPR:
 265         case DisplacedInJSStack:
 266         case CellDisplacedInJSStack:
 267         case BooleanDisplacedInJSStack:
 268             m_jit.load64(scratch + index, GPRInfo::regT0);
 269             m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
 270             break;
 271
 272         case UnboxedInt32InGPR:
 273         case Int32DisplacedInJSStack:
 274             m_jit.load64(scratch + index, GPRInfo::regT0);
 275             m_jit.zeroExtend32ToPtr(GPRInfo::regT0, GPRInfo::regT0);
 276             m_jit.or64(GPRInfo::tagTypeNumberRegister, GPRInfo::regT0);
 277             m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
 278             break;
 279
 280         case UnboxedInt52InGPR:
 281         case Int52DisplacedInJSStack:
 282             m_jit.load64(scratch + index, GPRInfo::regT0);
 283             m_jit.rshift64(
 284                 AssemblyHelpers::TrustedImm32(JSValue::int52ShiftAmount), GPRInfo::regT0);
 285             m_jit.boxInt52(GPRInfo::regT0, GPRInfo::regT0, GPRInfo::regT1, FPRInfo::fpRegT0);
 286             m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
 287             break;
 288
 289         case UnboxedStrictInt52InGPR:
 290         case StrictInt52DisplacedInJSStack:
 291             m_jit.load64(scratch + index, GPRInfo::regT0);
 292             m_jit.boxInt52(GPRInfo::regT0, GPRInfo::regT0, GPRInfo::regT1, FPRInfo::fpRegT0);
 293             m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
 294             break;
 295
 296         case InFPR:
 297         case DoubleDisplacedInJSStack:
 298             m_jit.move(AssemblyHelpers::TrustedImmPtr(scratch + index), GPRInfo::regT0);
 299             m_jit.loadDouble(MacroAssembler::Address(GPRInfo::regT0), FPRInfo::fpRegT0);
 300             m_jit.purifyNaN(FPRInfo::fpRegT0);
 301             m_jit.boxDouble(FPRInfo::fpRegT0, GPRInfo::regT0);
 302             m_jit.store64(GPRInfo::regT0, AssemblyHelpers::addressFor(operand));
 303             break;
 304
 305         case Constant:
 306             m_jit.store64(
 307                 AssemblyHelpers::TrustedImm64(JSValue::encode(recovery.constant())),
 308                 AssemblyHelpers::addressFor(operand));
 309             break;
 310
 311         case ArgumentsThatWereNotCreated:
 312             haveArguments = true;
 313             // We can't restore this yet but we can make sure that the stack appears
 314             // sane.
 315             m_jit.store64(
 316                 AssemblyHelpers::TrustedImm64(JSValue::encode(JSValue())),
 317                 AssemblyHelpers::addressFor(operand));
 318             break;
 319
 320         default:
 321             break;
 322         }
 323     }
 324
 325     // 8) Adjust the old JIT's execute counter. Since we are exiting OSR, we know
 326     //    that all new calls into this code will go to the new JIT, so the execute
 327     //    counter only affects call frames that performed OSR exit and call frames
 328     //    that were still executing the old JIT at the time of another call frame's
 329     //    OSR exit. We want to ensure that the following is true:
 330     //
 331     //    (a) Code the performs an OSR exit gets a chance to reenter optimized
 332     //        code eventually, since optimized code is faster. But we don't
 333     //        want to do such reentery too aggressively (see (c) below).
 334     //
 335     //    (b) If there is code on the call stack that is still running the old
 336     //        JIT's code and has never OSR'd, then it should get a chance to
 337     //        perform OSR entry despite the fact that we've exited.
 338     //
 339     //    (c) Code the performs an OSR exit should not immediately retry OSR
 340     //        entry, since both forms of OSR are expensive. OSR entry is
 341     //        particularly expensive.
 342     //
 343     //    (d) Frequent OSR failures, even those that do not result in the code
 344     //        running in a hot loop, result in recompilation getting triggered.
 345     //
 346     //    To ensure (c), we'd like to set the execute counter to
 347     //    counterValueForOptimizeAfterWarmUp(). This seems like it would endanger
 348     //    (a) and (b), since then every OSR exit would delay the opportunity for
 349     //    every call frame to perform OSR entry. Essentially, if OSR exit happens
 350     //    frequently and the function has few loops, then the counter will never
 351     //    become non-negative and OSR entry will never be triggered. OSR entry
 352     //    will only happen if a loop gets hot in the old JIT, which does a pretty
 353     //    good job of ensuring (a) and (b). But that doesn't take care of (d),
 354     //    since each speculation failure would reset the execute counter.
 355     //    So we check here if the number of speculation failures is significantly
 356     //    larger than the number of successes (we want 90% success rate), and if
 357     //    there have been a large enough number of failures. If so, we set the
 358     //    counter to 0; otherwise we set the counter to
 359     //    counterValueForOptimizeAfterWarmUp().
 360
 361     handleExitCounts(m_jit, exit);
 362
 363     // 9) Reify inlined call frames.
 364
 365     reifyInlinedCallFrames(m_jit, exit);
 366
 367     // 10) Create arguments if necessary and place them into the appropriate aliased
 368     //     registers.
 369
 370     if (haveArguments) {
 371         ArgumentsRecoveryGenerator argumentsRecovery;
 372
 373         for (size_t index = 0; index < operands.size(); ++index) {
 374             const ValueRecovery& recovery = operands[index];
 375             if (recovery.technique() != ArgumentsThatWereNotCreated)
 376                 continue;
 377             argumentsRecovery.generateFor(
 378                 operands.operandForIndex(index), exit.m_codeOrigin, m_jit);
 379         }
 380     }
 381
 382     // 12) And finish.
 383     adjustAndJumpToTarget(m_jit, exit);
 384 }
 385
 386 } } // namespace JSC::DFG
 387
 388 #endif // ENABLE(DFG_JIT) && USE(JSVALUE64)