/*
- * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2009, 2013 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
#include "config.h"
#include "YarrJIT.h"
-#include "ASCIICType.h"
+#include <wtf/ASCIICType.h>
#include "LinkBuffer.h"
+#include "Options.h"
#include "Yarr.h"
+#include "YarrCanonicalizeUCS2.h"
#if ENABLE(YARR_JIT)
namespace JSC { namespace Yarr {
+template<YarrJITCompileMode compileMode>
class YarrGenerator : private MacroAssembler {
- friend void jitCompile(JSGlobalData*, YarrCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
+ friend void jitCompile(VM*, YarrCodeBlock& jitObject, const String& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
#if CPU(ARM)
static const RegisterID input = ARMRegisters::r0;
static const RegisterID index = ARMRegisters::r1;
static const RegisterID length = ARMRegisters::r2;
- static const RegisterID output = ARMRegisters::r4;
+ static const RegisterID output = ARMRegisters::r3;
- static const RegisterID regT0 = ARMRegisters::r5;
- static const RegisterID regT1 = ARMRegisters::r6;
+ static const RegisterID regT0 = ARMRegisters::r4;
+ static const RegisterID regT1 = ARMRegisters::r5;
static const RegisterID returnRegister = ARMRegisters::r0;
+ static const RegisterID returnRegister2 = ARMRegisters::r1;
+#elif CPU(ARM64)
+ static const RegisterID input = ARM64Registers::x0;
+ static const RegisterID index = ARM64Registers::x1;
+ static const RegisterID length = ARM64Registers::x2;
+ static const RegisterID output = ARM64Registers::x3;
+
+ static const RegisterID regT0 = ARM64Registers::x4;
+ static const RegisterID regT1 = ARM64Registers::x5;
+
+ static const RegisterID returnRegister = ARM64Registers::x0;
+ static const RegisterID returnRegister2 = ARM64Registers::x1;
#elif CPU(MIPS)
static const RegisterID input = MIPSRegisters::a0;
static const RegisterID index = MIPSRegisters::a1;
static const RegisterID regT1 = MIPSRegisters::t5;
static const RegisterID returnRegister = MIPSRegisters::v0;
+ static const RegisterID returnRegister2 = MIPSRegisters::v1;
#elif CPU(SH4)
static const RegisterID input = SH4Registers::r4;
static const RegisterID index = SH4Registers::r5;
static const RegisterID regT1 = SH4Registers::r1;
static const RegisterID returnRegister = SH4Registers::r0;
+ static const RegisterID returnRegister2 = SH4Registers::r1;
#elif CPU(X86)
static const RegisterID input = X86Registers::eax;
static const RegisterID index = X86Registers::edx;
static const RegisterID regT1 = X86Registers::esi;
static const RegisterID returnRegister = X86Registers::eax;
+ static const RegisterID returnRegister2 = X86Registers::edx;
#elif CPU(X86_64)
+#if !OS(WINDOWS)
static const RegisterID input = X86Registers::edi;
static const RegisterID index = X86Registers::esi;
static const RegisterID length = X86Registers::edx;
static const RegisterID output = X86Registers::ecx;
+#else
+ // If the return value doesn't fit in 64bits, its destination is pointed by rcx and the parameters are shifted.
+ // http://msdn.microsoft.com/en-us/library/7572ztz4.aspx
+ COMPILE_ASSERT(sizeof(MatchResult) > sizeof(void*), MatchResult_does_not_fit_in_64bits);
+ static const RegisterID input = X86Registers::edx;
+ static const RegisterID index = X86Registers::r8;
+ static const RegisterID length = X86Registers::r9;
+ static const RegisterID output = X86Registers::r10;
+#endif
static const RegisterID regT0 = X86Registers::eax;
static const RegisterID regT1 = X86Registers::ebx;
static const RegisterID returnRegister = X86Registers::eax;
+ static const RegisterID returnRegister2 = X86Registers::edx;
#endif
void optimizeAlternative(PatternAlternative* alternative)
void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
{
if (charClass->m_table) {
- ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table->m_table));
- matchDest.append(branchTest8(charClass->m_table->m_inverted ? Zero : NonZero, tableEntry));
+ ExtendedAddress tableEntry(character, reinterpret_cast<intptr_t>(charClass->m_table));
+ matchDest.append(branchTest8(charClass->m_tableInverted ? Zero : NonZero, tableEntry));
return;
}
Jump unicodeFail;
return branch32(NotEqual, index, length);
}
- Jump jumpIfCharEquals(UChar ch, int inputPosition)
+ Jump jumpIfCharNotEquals(UChar ch, int inputPosition, RegisterID character)
{
- return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
- }
+ readCharacter(inputPosition, character);
- Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
- {
- return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ or32(TrustedImm32(0x20), character);
+ ch |= 0x20;
+ }
+
+ return branch32(NotEqual, character, Imm32(ch));
}
void readCharacter(int inputPosition, RegisterID reg)
{
- load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, index, TimesOne, inputPosition * sizeof(char)), reg);
+ else
+ load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
}
void storeToFrame(RegisterID reg, unsigned frameLocation)
jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
}
+ unsigned alignCallFrameSizeInBytes(unsigned callFrameSize)
+ {
+ callFrameSize *= sizeof(void*);
+ if (callFrameSize / sizeof(void*) != m_pattern.m_body->m_callFrameSize)
+ CRASH();
+ callFrameSize = (callFrameSize + 0x3f) & ~0x3f;
+ if (!callFrameSize)
+ CRASH();
+ return callFrameSize;
+ }
+ void initCallFrame()
+ {
+ unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
+ if (callFrameSize)
+ subPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister);
+ }
+ void removeCallFrame()
+ {
+ unsigned callFrameSize = m_pattern.m_body->m_callFrameSize;
+ if (callFrameSize)
+ addPtr(Imm32(alignCallFrameSizeInBytes(callFrameSize)), stackPointerRegister);
+ }
+
+ // Used to record subpatters, should only be called if compileMode is IncludeSubpatterns.
+ void setSubpatternStart(RegisterID reg, unsigned subpattern)
+ {
+ ASSERT(subpattern);
+ // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
+ store32(reg, Address(output, (subpattern << 1) * sizeof(int)));
+ }
+ void setSubpatternEnd(RegisterID reg, unsigned subpattern)
+ {
+ ASSERT(subpattern);
+ // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
+ store32(reg, Address(output, ((subpattern << 1) + 1) * sizeof(int)));
+ }
+ void clearSubpatternStart(unsigned subpattern)
+ {
+ ASSERT(subpattern);
+ // FIXME: should be able to ASSERT(compileMode == IncludeSubpatterns), but then this function is conditionally NORETURN. :-(
+ store32(TrustedImm32(-1), Address(output, (subpattern << 1) * sizeof(int)));
+ }
+
+ // We use one of three different strategies to track the start of the current match,
+ // while matching.
+ // 1) If the pattern has a fixed size, do nothing! - we calculate the value lazily
+ // at the end of matching. This is irrespective of compileMode, and in this case
+ // these methods should never be called.
+ // 2) If we're compiling IncludeSubpatterns, 'output' contains a pointer to an output
+ // vector, store the match start in the output vector.
+ // 3) If we're compiling MatchOnly, 'output' is unused, store the match start directly
+ // in this register.
+ void setMatchStart(RegisterID reg)
+ {
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ if (compileMode == IncludeSubpatterns)
+ store32(reg, output);
+ else
+ move(reg, output);
+ }
+ void getMatchStart(RegisterID reg)
+ {
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ if (compileMode == IncludeSubpatterns)
+ load32(output, reg);
+ else
+ move(output, reg);
+ }
+
enum YarrOpCode {
// These nodes wrap body alternatives - those in the main disjunction,
// rather than subpatterns or assertions. These are chained together in
Label m_reentry;
JumpList m_jumps;
+ // Used for backtracking when the prior alternative did not consume any
+ // characters but matched.
+ Jump m_zeroLengthMatch;
+
// This flag is used to null out the second pattern character, when
// two are fused to match a pair together.
bool m_isDeadCode;
{
YarrOp& op = m_ops[opIndex];
+ if (op.m_isDeadCode)
+ return;
+
// m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
// node, so there must always be at least one more node.
ASSERT(opIndex + 1 < m_ops.size());
- YarrOp& nextOp = m_ops[opIndex + 1];
-
- if (op.m_isDeadCode)
- return;
+ YarrOp* nextOp = &m_ops[opIndex + 1];
PatternTerm* term = op.m_term;
UChar ch = term->patternCharacter;
- const RegisterID character = regT0;
+ if ((ch > 0xff) && (m_charSize == Char8)) {
+ // Have a 16 bit pattern character and an 8 bit string - short circuit
+ op.m_jumps.append(jump());
+ return;
+ }
- if (nextOp.m_op == OpTerm) {
- PatternTerm* nextTerm = nextOp.m_term;
- if (nextTerm->type == PatternTerm::TypePatternCharacter
- && nextTerm->quantityType == QuantifierFixedCount
- && nextTerm->quantityCount == 1
- && nextTerm->inputPosition == (term->inputPosition + 1)) {
+ const RegisterID character = regT0;
+ int maxCharactersAtOnce = m_charSize == Char8 ? 4 : 2;
+ unsigned ignoreCaseMask = 0;
+#if CPU(BIG_ENDIAN)
+ int allCharacters = ch << (m_charSize == Char8 ? 24 : 16);
+#else
+ int allCharacters = ch;
+#endif
+ int numberCharacters;
+ int startTermPosition = term->inputPosition;
+
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
+
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch))
+#if CPU(BIG_ENDIAN)
+ ignoreCaseMask |= 32 << (m_charSize == Char8 ? 24 : 16);
+#else
+ ignoreCaseMask |= 32;
+#endif
- UChar ch2 = nextTerm->patternCharacter;
+ for (numberCharacters = 1; numberCharacters < maxCharactersAtOnce && nextOp->m_op == OpTerm; ++numberCharacters, nextOp = &m_ops[opIndex + numberCharacters]) {
+ PatternTerm* nextTerm = nextOp->m_term;
+
+ if (nextTerm->type != PatternTerm::TypePatternCharacter
+ || nextTerm->quantityType != QuantifierFixedCount
+ || nextTerm->quantityCount != 1
+ || nextTerm->inputPosition != (startTermPosition + numberCharacters))
+ break;
- int mask = 0;
- int chPair = ch | (ch2 << 16);
+ nextOp->m_isDeadCode = true;
- if (m_pattern.m_ignoreCase) {
- if (isASCIIAlpha(ch))
- mask |= 32;
- if (isASCIIAlpha(ch2))
- mask |= 32 << 16;
- }
+#if CPU(BIG_ENDIAN)
+ int shiftAmount = (m_charSize == Char8 ? 24 : 16) - ((m_charSize == Char8 ? 8 : 16) * numberCharacters);
+#else
+ int shiftAmount = (m_charSize == Char8 ? 8 : 16) * numberCharacters;
+#endif
- BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
- if (mask) {
- load32WithUnalignedHalfWords(address, character);
- or32(Imm32(mask), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
- } else
- op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
+ UChar currentCharacter = nextTerm->patternCharacter;
- nextOp.m_isDeadCode = true;
+ if ((currentCharacter > 0xff) && (m_charSize == Char8)) {
+ // Have a 16 bit pattern character and an 8 bit string - short circuit
+ op.m_jumps.append(jump());
return;
}
+
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(currentCharacter) || isCanonicallyUnique(currentCharacter));
+
+ allCharacters |= (currentCharacter << shiftAmount);
+
+ if ((m_pattern.m_ignoreCase) && (isASCIIAlpha(currentCharacter)))
+ ignoreCaseMask |= 32 << shiftAmount;
}
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(term->inputPosition - m_checked, character);
- or32(TrustedImm32(32), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ if (m_charSize == Char8) {
+ switch (numberCharacters) {
+ case 1:
+ op.m_jumps.append(jumpIfCharNotEquals(ch, startTermPosition - m_checked, character));
+ return;
+ case 2: {
+ BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+ load16Unaligned(address, character);
+ break;
+ }
+ case 3: {
+ BaseIndex highAddress(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+ load16Unaligned(highAddress, character);
+ if (ignoreCaseMask)
+ or32(Imm32(ignoreCaseMask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32((allCharacters & 0xffff) | ignoreCaseMask)));
+ op.m_jumps.append(jumpIfCharNotEquals(allCharacters >> 16, startTermPosition + 2 - m_checked, character));
+ return;
+ }
+ case 4: {
+ BaseIndex address(input, index, TimesOne, (startTermPosition - m_checked) * sizeof(LChar));
+ load32WithUnalignedHalfWords(address, character);
+ break;
+ }
+ }
} else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
+ switch (numberCharacters) {
+ case 1:
+ op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
+ return;
+ case 2:
+ BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
+ load32WithUnalignedHalfWords(address, character);
+ break;
+ }
}
+
+ if (ignoreCaseMask)
+ or32(Imm32(ignoreCaseMask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(allCharacters | ignoreCaseMask)));
+ return;
}
void backtrackPatternCharacterOnce(size_t opIndex)
{
sub32(Imm32(term->quantityCount.unsafeGet()), countRegister);
Label loop(this);
- BaseIndex address(input, countRegister, TimesTwo, ((term->inputPosition - m_checked + Checked<int>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet());
+ BaseIndex address(input, countRegister, m_charScale, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(m_charSize == Char8 ? sizeof(char) : sizeof(UChar))).unsafeGet());
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ if (m_charSize == Char8)
+ load8(address, character);
+ else
load16(address, character);
- or32(TrustedImm32(32), character);
- op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
- } else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
+
+ // For case-insesitive compares, non-ascii characters that have different
+ // upper & lower case representations are converted to a character class.
+ ASSERT(!m_pattern.m_ignoreCase || isASCIIAlpha(ch) || isCanonicallyUnique(ch));
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ or32(TrustedImm32(0x20), character);
+ ch |= 0x20;
}
+
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(ch)));
add32(TrustedImm32(1), countRegister);
branch32(NotEqual, countRegister, index).linkTo(loop, this);
}
move(TrustedImm32(0), countRegister);
- JumpList failures;
- Label loop(this);
- failures.append(atEndOfInput());
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(term->inputPosition - m_checked, character);
- or32(TrustedImm32(32), character);
- failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
- } else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
- }
-
- add32(TrustedImm32(1), countRegister);
- add32(TrustedImm32(1), index);
- if (term->quantityCount == quantifyInfinite)
- jump(loop);
- else
- branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
+ // Unless have a 16 bit pattern character and an 8 bit string - short circuit
+ if (!((ch > 0xff) && (m_charSize == Char8))) {
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+ failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
+
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
+ if (term->quantityCount == quantifyInfinite)
+ jump(loop);
+ else
+ branch32(NotEqual, countRegister, Imm32(term->quantityCount.unsafeGet())).linkTo(loop, this);
- failures.link(this);
+ failures.link(this);
+ }
op.m_reentry = label();
storeToFrame(countRegister, term->frameLocation);
-
}
void backtrackPatternCharacterGreedy(size_t opIndex)
{
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
- JumpList nonGreedyFailures;
-
m_backtrackingState.link(this);
loadFromFrame(term->frameLocation, countRegister);
- nonGreedyFailures.append(atEndOfInput());
- if (term->quantityCount != quantifyInfinite)
- nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
- if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(term->inputPosition - m_checked, character);
- or32(TrustedImm32(32), character);
- nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
- } else {
- ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
- }
+ // Unless have a 16 bit pattern character and an 8 bit string - short circuit
+ if (!((ch > 0xff) && (m_charSize == Char8))) {
+ JumpList nonGreedyFailures;
+ nonGreedyFailures.append(atEndOfInput());
+ if (term->quantityCount != quantifyInfinite)
+ nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount.unsafeGet())));
+ nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked, character));
- add32(TrustedImm32(1), countRegister);
- add32(TrustedImm32(1), index);
+ add32(TrustedImm32(1), countRegister);
+ add32(TrustedImm32(1), index);
- jump(op.m_reentry);
+ jump(op.m_reentry);
+ nonGreedyFailures.link(this);
+ }
- nonGreedyFailures.link(this);
sub32(countRegister, index);
m_backtrackingState.fallthrough();
}
Label loop(this);
JumpList matchDest;
- load16(BaseIndex(input, countRegister, TimesTwo, ((term->inputPosition - m_checked + Checked<int>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, countRegister, TimesOne, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(char))).unsafeGet()), character);
+ else
+ load16(BaseIndex(input, countRegister, TimesTwo, (Checked<int>(term->inputPosition - m_checked + Checked<int64_t>(term->quantityCount)) * static_cast<int>(sizeof(UChar))).unsafeGet()), character);
matchCharacterClass(character, matchDest, term->characterClass);
if (term->invert())
m_backtrackingState.link(this);
- Label backtrackBegin(this);
loadFromFrame(term->frameLocation, countRegister);
nonGreedyFailures.append(atEndOfInput());
JumpList saveStartIndex;
JumpList foundEndingNewLine;
- if (m_pattern.m_body->m_hasFixedSize) {
- move(index, matchPos);
- sub32(Imm32(m_checked), matchPos);
- } else
- load32(Address(output), matchPos);
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ getMatchStart(matchPos);
saveStartIndex.append(branchTest32(Zero, matchPos));
Label findBOLLoop(this);
sub32(TrustedImm32(1), matchPos);
- load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, matchPos, TimesOne, 0), character);
+ else
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
matchCharacterClass(character, foundBeginningNewLine, m_pattern.newlineCharacterClass());
branchTest32(NonZero, matchPos).linkTo(findBOLLoop, this);
saveStartIndex.append(jump());
if (!m_pattern.m_multiline && term->anchors.bolAnchor)
op.m_jumps.append(branchTest32(NonZero, matchPos));
- store32(matchPos, Address(output));
+ ASSERT(!m_pattern.m_body->m_hasFixedSize);
+ setMatchStart(matchPos);
move(index, matchPos);
Label findEOLLoop(this);
foundEndingNewLine.append(branch32(Equal, matchPos, length));
- load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
+ if (m_charSize == Char8)
+ load8(BaseIndex(input, matchPos, TimesOne, 0), character);
+ else
+ load16(BaseIndex(input, matchPos, TimesTwo, 0), character);
matchCharacterClass(character, foundEndingNewLine, m_pattern.newlineCharacterClass());
add32(TrustedImm32(1), matchPos);
jump(findEOLLoop);
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
- ASSERT_NOT_REACHED();
+ RELEASE_ASSERT_NOT_REACHED();
case PatternTerm::TypeBackReference:
m_shouldFallBack = true;
break;
case PatternTerm::TypeParenthesesSubpattern:
case PatternTerm::TypeParentheticalAssertion:
- ASSERT_NOT_REACHED();
+ RELEASE_ASSERT_NOT_REACHED();
case PatternTerm::TypeDotStarEnclosure:
backtrackDotStarEnclosure(opIndex);
// If we get here, the prior alternative matched - return success.
// Adjust the stack pointer to remove the pattern's frame.
- if (m_pattern.m_body->m_callFrameSize)
- addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+ removeCallFrame();
// Load appropriate values into the return register and the first output
// slot, and return. In the case of pattern with a fixed size, we will
move(index, returnRegister);
if (priorAlternative->m_minimumSize)
sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
- store32(returnRegister, output);
+ if (compileMode == IncludeSubpatterns)
+ store32(returnRegister, output);
} else
- load32(Address(output), returnRegister);
- store32(index, Address(output, 4));
+ getMatchStart(returnRegister);
+ if (compileMode == IncludeSubpatterns)
+ store32(index, Address(output, 4));
+ move(index, returnRegister2);
+
generateReturn();
// This is the divide between the tail of the prior alternative, above, and
sub32(Imm32(priorAlternative->m_minimumSize - alternative->m_minimumSize), index);
} else if (op.m_nextOp == notFound) {
// This is the reentry point for the End of 'once through' alternatives,
- // jumped to when the las alternative fails to match.
+ // jumped to when the last alternative fails to match.
op.m_reentry = label();
sub32(Imm32(priorAlternative->m_minimumSize), index);
}
op.m_checkAdjust -= disjunction->m_minimumSize;
if (op.m_checkAdjust)
op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
-
+
m_checked += op.m_checkAdjust;
break;
}
op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
}
+ if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
+ // If the previous alternative matched without consuming characters then
+ // backtrack to try to match while consumming some input.
+ op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ }
+
// If we reach here then the last alternative has matched - jump to the
// End node, to skip over any further alternatives.
//
op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
}
+ if (term->quantityType != QuantifierFixedCount && !m_ops[op.m_previousOp].m_alternative->m_minimumSize) {
+ // If the previous alternative matched without consuming characters then
+ // backtrack to try to match while consumming some input.
+ op.m_zeroLengthMatch = branch32(Equal, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ }
+
// If this set of alternatives contains more than one alternative,
// then the Next nodes will have planted jumps to the End, and added
// them to this node's m_jumps list.
// FIXME: could avoid offsetting this value in JIT code, apply
// offsets only afterwards, at the point the results array is
// being accessed.
- if (term->capture()) {
- int offsetId = term->parentheses.subpatternId << 1;
+ if (term->capture() && compileMode == IncludeSubpatterns) {
int inputOffset = term->inputPosition - m_checked;
if (term->quantityType == QuantifierFixedCount)
inputOffset -= term->parentheses.disjunction->m_minimumSize;
if (inputOffset) {
move(index, indexTemporary);
add32(Imm32(inputOffset), indexTemporary);
- store32(indexTemporary, Address(output, offsetId * sizeof(int)));
+ setSubpatternStart(indexTemporary, term->parentheses.subpatternId);
} else
- store32(index, Address(output, offsetId * sizeof(int)));
+ setSubpatternStart(index, term->parentheses.subpatternId);
}
break;
}
case OpParenthesesSubpatternOnceEnd: {
PatternTerm* term = op.m_term;
- unsigned parenthesesFrameLocation = term->frameLocation;
const RegisterID indexTemporary = regT0;
ASSERT(term->quantityCount == 1);
- // For Greedy/NonGreedy quantified parentheses, we must reject zero length
- // matches. If the minimum size is know to be non-zero we need not check.
- if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
- op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
+ // Runtime ASSERT to make sure that the nested alternative handled the
+ // "no input consumed" check.
+ if (!ASSERT_DISABLED && term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize) {
+ Jump pastBreakpoint;
+ pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ abortWithReason(YARRNoInputConsumed);
+ pastBreakpoint.link(this);
+ }
// If the parenthese are capturing, store the ending index value to the
// captures array, offsetting as necessary.
// FIXME: could avoid offsetting this value in JIT code, apply
// offsets only afterwards, at the point the results array is
// being accessed.
- if (term->capture()) {
- int offsetId = (term->parentheses.subpatternId << 1) + 1;
+ if (term->capture() && compileMode == IncludeSubpatterns) {
int inputOffset = term->inputPosition - m_checked;
if (inputOffset) {
move(index, indexTemporary);
add32(Imm32(inputOffset), indexTemporary);
- store32(indexTemporary, Address(output, offsetId * sizeof(int)));
+ setSubpatternEnd(indexTemporary, term->parentheses.subpatternId);
} else
- store32(index, Address(output, offsetId * sizeof(int)));
+ setSubpatternEnd(index, term->parentheses.subpatternId);
}
// If the parentheses are quantified Greedy then add a label to jump back
break;
}
case OpParenthesesSubpatternTerminalEnd: {
- PatternTerm* term = op.m_term;
-
- // Check for zero length matches - if the match is non-zero, then we
- // can accept it & loop back up to the head of the subpattern.
YarrOp& beginOp = m_ops[op.m_previousOp];
- branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
+ if (!ASSERT_DISABLED) {
+ PatternTerm* term = op.m_term;
+
+ // Runtime ASSERT to make sure that the nested alternative handled the
+ // "no input consumed" check.
+ Jump pastBreakpoint;
+ pastBreakpoint = branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)));
+ abortWithReason(YARRNoInputConsumed);
+ pastBreakpoint.link(this);
+ }
- // Reject the match - backtrack back into the subpattern.
- op.m_jumps.append(jump());
+ // We know that the match is non-zero, we can accept it and
+ // loop back up to the head of the subpattern.
+ jump(beginOp.m_reentry);
// This is the entry point to jump to when we stop matching - we will
// do so once the subpattern cannot match any more.
}
case OpMatchFailed:
- if (m_pattern.m_body->m_callFrameSize)
- addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
- move(TrustedImm32(-1), returnRegister);
+ removeCallFrame();
+ move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
+ move(TrustedImm32(0), returnRegister2);
generateReturn();
break;
}
// If the pattern size is not fixed, then store the start index, for use if we match.
if (!m_pattern.m_body->m_hasFixedSize) {
if (alternative->m_minimumSize == 1)
- store32(index, Address(output));
+ setMatchStart(index);
else {
move(index, regT0);
if (alternative->m_minimumSize)
sub32(Imm32(alternative->m_minimumSize - 1), regT0);
else
- add32(Imm32(1), regT0);
- store32(regT0, Address(output));
+ add32(TrustedImm32(1), regT0);
+ setMatchStart(regT0);
}
}
// disjunction is 0, e.g. /a*|b/).
if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
// index is already incremented by 1, so just store it now!
- store32(index, Address(output));
+ setMatchStart(index);
needsToUpdateMatchStart = false;
}
if (alternative->m_minimumSize == m_pattern.m_body->m_minimumSize) {
// If the last alternative had the same minimum size as the disjunction,
// just simply increment input pos by 1, no adjustment based on minimum size.
- add32(Imm32(1), index);
+ add32(TrustedImm32(1), index);
} else {
// If the minumum for the last alternative was one greater than than that
// for the disjunction, we're already progressed by 1, nothing to do!
if (needsToUpdateMatchStart) {
if (!m_pattern.m_body->m_minimumSize)
- store32(index, Address(output));
+ setMatchStart(index);
else {
move(index, regT0);
sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
- store32(regT0, Address(output));
+ setMatchStart(regT0);
}
}
// run any matches, and need to return a failure state from JIT code.
matchFailed.link(this);
- if (m_pattern.m_body->m_callFrameSize)
- addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
- move(TrustedImm32(-1), returnRegister);
+ removeCallFrame();
+ move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
+ move(TrustedImm32(0), returnRegister2);
generateReturn();
break;
}
// An alternative that is not the last should jump to its successor.
jump(nextOp.m_reentry);
} else if (!isBegin) {
- // The last of more than one alternatives must jump back to the begnning.
+ // The last of more than one alternatives must jump back to the beginning.
nextOp.m_jumps.append(jump());
} else {
// A single alternative on its own can fall through.
// An alternative that is not the last should jump to its successor.
m_backtrackingState.linkTo(nextOp.m_reentry, this);
} else if (!isBegin) {
- // The last of more than one alternatives must jump back to the begnning.
+ // The last of more than one alternatives must jump back to the beginning.
m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
}
// In the case of a single alternative on its own do nothing - it can fall through.
}
+ // If there is a backtrack jump from a zero length match link it here.
+ if (op.m_zeroLengthMatch.isSet())
+ m_backtrackingState.append(op.m_zeroLengthMatch);
+
// At this point we've handled the backtracking back into this node.
// Now link any backtracks that need to jump to here.
case OpNestedAlternativeEnd: {
PatternTerm* term = op.m_term;
+ // If there is a backtrack jump from a zero length match link it here.
+ if (op.m_zeroLengthMatch.isSet())
+ m_backtrackingState.append(op.m_zeroLengthMatch);
+
// If we backtrack into the end of a simple subpattern do nothing;
// just continue through into the last alternative. If we backtrack
// into the end of a non-simple set of alterntives we need to jump
ASSERT(term->quantityCount == 1);
// We only need to backtrack to thispoint if capturing or greedy.
- if (term->capture() || term->quantityType == QuantifierGreedy) {
+ if ((term->capture() && compileMode == IncludeSubpatterns) || term->quantityType == QuantifierGreedy) {
m_backtrackingState.link(this);
// If capturing, clear the capture (we only need to reset start).
- if (term->capture())
- store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
+ if (term->capture() && compileMode == IncludeSubpatterns)
+ clearSubpatternStart(term->parentheses.subpatternId);
// If Greedy, jump to the end.
if (term->quantityType == QuantifierGreedy) {
m_ops.append(alternativeBeginOpCode);
m_ops.last().m_previousOp = notFound;
m_ops.last().m_term = term;
- Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
+ Vector<OwnPtr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
for (unsigned i = 0; i < alternatives.size(); ++i) {
size_t lastOpIndex = m_ops.size() - 1;
- PatternAlternative* nestedAlternative = alternatives[i];
+ PatternAlternative* nestedAlternative = alternatives[i].get();
opCompileAlternative(nestedAlternative);
size_t thisOpIndex = m_ops.size();
m_ops.append(OpSimpleNestedAlternativeBegin);
m_ops.last().m_previousOp = notFound;
m_ops.last().m_term = term;
- Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
+ Vector<OwnPtr<PatternAlternative>>& alternatives = term->parentheses.disjunction->m_alternatives;
for (unsigned i = 0; i < alternatives.size(); ++i) {
size_t lastOpIndex = m_ops.size() - 1;
- PatternAlternative* nestedAlternative = alternatives[i];
+ PatternAlternative* nestedAlternative = alternatives[i].get();
opCompileAlternative(nestedAlternative);
size_t thisOpIndex = m_ops.size();
// to return the failing result.
void opCompileBody(PatternDisjunction* disjunction)
{
- Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives;
+ Vector<OwnPtr<PatternAlternative>>& alternatives = disjunction->m_alternatives;
size_t currentAlternativeIndex = 0;
// Emit the 'once through' alternatives.
do {
size_t lastOpIndex = m_ops.size() - 1;
- PatternAlternative* alternative = alternatives[currentAlternativeIndex];
+ PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
opCompileAlternative(alternative);
size_t thisOpIndex = m_ops.size();
m_ops.last().m_previousOp = notFound;
do {
size_t lastOpIndex = m_ops.size() - 1;
- PatternAlternative* alternative = alternatives[currentAlternativeIndex];
+ PatternAlternative* alternative = alternatives[currentAlternativeIndex].get();
ASSERT(!alternative->onceThrough());
opCompileAlternative(alternative);
push(X86Registers::ebp);
move(stackPointerRegister, X86Registers::ebp);
push(X86Registers::ebx);
+ // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
+ zeroExtend32ToPtr(index, index);
+ zeroExtend32ToPtr(length, length);
+#if OS(WINDOWS)
+ if (compileMode == IncludeSubpatterns)
+ loadPtr(Address(X86Registers::ebp, 6 * sizeof(void*)), output);
+#endif
#elif CPU(X86)
push(X86Registers::ebp);
move(stackPointerRegister, X86Registers::ebp);
loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), input);
loadPtr(Address(X86Registers::ebp, 3 * sizeof(void*)), index);
loadPtr(Address(X86Registers::ebp, 4 * sizeof(void*)), length);
- loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
+ if (compileMode == IncludeSubpatterns)
+ loadPtr(Address(X86Registers::ebp, 5 * sizeof(void*)), output);
#else
- loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
+ if (compileMode == IncludeSubpatterns)
+ loadPtr(Address(X86Registers::ebp, 2 * sizeof(void*)), output);
#endif
+#elif CPU(ARM64)
+ // The ABI doesn't guarantee the upper bits are zero on unsigned arguments, so clear them ourselves.
+ zeroExtend32ToPtr(index, index);
+ zeroExtend32ToPtr(length, length);
#elif CPU(ARM)
push(ARMRegisters::r4);
push(ARMRegisters::r5);
push(ARMRegisters::r6);
-#if CPU(ARM_TRADITIONAL)
- push(ARMRegisters::r8); // scratch register
-#endif
- move(ARMRegisters::r3, output);
#elif CPU(SH4)
push(SH4Registers::r11);
push(SH4Registers::r13);
void generateReturn()
{
#if CPU(X86_64)
+#if OS(WINDOWS)
+ // Store the return value in the allocated space pointed by rcx.
+ store64(returnRegister, Address(X86Registers::ecx));
+ store64(returnRegister2, Address(X86Registers::ecx, sizeof(void*)));
+ move(X86Registers::ecx, returnRegister);
+#endif
pop(X86Registers::ebx);
pop(X86Registers::ebp);
#elif CPU(X86)
pop(X86Registers::ebx);
pop(X86Registers::ebp);
#elif CPU(ARM)
-#if CPU(ARM_TRADITIONAL)
- pop(ARMRegisters::r8); // scratch register
-#endif
pop(ARMRegisters::r6);
pop(ARMRegisters::r5);
pop(ARMRegisters::r4);
}
public:
- YarrGenerator(YarrPattern& pattern)
+ YarrGenerator(YarrPattern& pattern, YarrCharSize charSize)
: m_pattern(pattern)
+ , m_charSize(charSize)
+ , m_charScale(m_charSize == Char8 ? TimesOne: TimesTwo)
, m_shouldFallBack(false)
, m_checked(0)
{
}
- void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
+ void compile(VM* vm, YarrCodeBlock& jitObject)
{
generateEnter();
+ Jump hasInput = checkInput();
+ move(TrustedImmPtr((void*)WTF::notFound), returnRegister);
+ move(TrustedImm32(0), returnRegister2);
+ generateReturn();
+ hasInput.link(this);
+
+ if (compileMode == IncludeSubpatterns) {
+ for (unsigned i = 0; i < m_pattern.m_numSubpatterns + 1; ++i)
+ store32(TrustedImm32(-1), Address(output, (i << 1) * sizeof(int)));
+ }
+
if (!m_pattern.m_body->m_hasFixedSize)
- store32(index, Address(output));
+ setMatchStart(index);
- if (m_pattern.m_body->m_callFrameSize)
- subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+ initCallFrame();
// Compile the pattern to the internal 'YarrOp' representation.
opCompileBody(m_pattern.m_body);
backtrack();
// Link & finalize the code.
- LinkBuffer linkBuffer(*globalData, this, globalData->regexAllocator);
+ LinkBuffer linkBuffer(*vm, *this, REGEXP_CODE_ID);
m_backtrackingState.linkDataLabels(linkBuffer);
- jitObject.set(linkBuffer.finalizeCode());
+
+ if (compileMode == MatchOnly) {
+ if (m_charSize == Char8)
+ jitObject.set8BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 8-bit regular expression")));
+ else
+ jitObject.set16BitCodeMatchOnly(FINALIZE_CODE(linkBuffer, ("Match-only 16-bit regular expression")));
+ } else {
+ if (m_charSize == Char8)
+ jitObject.set8BitCode(FINALIZE_CODE(linkBuffer, ("8-bit regular expression")));
+ else
+ jitObject.set16BitCode(FINALIZE_CODE(linkBuffer, ("16-bit regular expression")));
+ }
jitObject.setFallBack(m_shouldFallBack);
}
private:
YarrPattern& m_pattern;
+ YarrCharSize m_charSize;
+
+ Scale m_charScale;
+
// Used to detect regular expression constructs that are not currently
// supported in the JIT; fall back to the interpreter when this is detected.
bool m_shouldFallBack;
BacktrackingState m_backtrackingState;
};
-void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject)
-{
- YarrGenerator(pattern).compile(globalData, jitObject);
-}
-
-int execute(YarrCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output)
+void jitCompile(YarrPattern& pattern, YarrCharSize charSize, VM* vm, YarrCodeBlock& jitObject, YarrJITCompileMode mode)
{
- return jitObject.execute(input, start, length, output);
+ if (mode == MatchOnly)
+ YarrGenerator<MatchOnly>(pattern, charSize).compile(vm, jitObject);
+ else
+ YarrGenerator<IncludeSubpatterns>(pattern, charSize).compile(vm, jitObject);
}
}}