2 * Copyright (C) 2008 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "CharacterClassConstructor.h"
32 #include "Interpreter.h"
33 #include "WRECFunctors.h"
34 #include "WRECParser.h"
35 #include "pcre_internal.h"
39 namespace JSC
{ namespace WREC
{
41 void Generator::generateEnter()
44 // On x86-64 edi and esi are caller preserved, so nothing to do here.
45 // The four arguments have been passed in the registers %rdi, %rsi,
46 // %rdx, %rcx - shuffle these into the expected locations.
47 move(X86::edi
, input
); // (arg 1) edi -> eax
48 move(X86::ecx
, output
); // (arg 4) ecx -> edi
49 move(X86::edx
, length
); // (arg 3) edx -> ecx
50 move(X86::esi
, index
); // (arg 2) esi -> edx
53 // On x86 edi & esi are callee preserved registers.
58 // Move the arguments into registers.
64 // On gcc the function is regparm(3), so the input, index, and length registers
65 // (eax, edx, and ecx respectively) already contain the appropriate values.
66 // Just load the fourth argument (output) into edi
72 // ASSERT that the output register is not null.
73 Jump outputNotNull
= jnzPtr(output
);
75 outputNotNull
.link(this);
79 void Generator::generateReturnSuccess()
82 pop(X86::eax
); // match begin
83 store32(X86::eax
, output
);
84 store32(index
, Address(output
, 4)); // match end
86 // Restore callee save registers.
94 void Generator::generateSaveIndex()
99 void Generator::generateIncrementIndex(Jump
* failure
)
103 *failure
= je32(length
, index
);
104 add32(Imm32(1), index
);
108 void Generator::generateLoadCharacter(JumpList
& failures
)
110 failures
.append(je32(length
, index
));
111 load16(BaseIndex(input
, index
, TimesTwo
), character
);
114 // For the sake of end-of-line assertions, we treat one-past-the-end as if it
115 // were part of the input string.
116 void Generator::generateJumpIfNotEndOfInput(Label target
)
118 jle32(index
, length
, target
);
121 void Generator::generateReturnFailure()
124 move(Imm32(-1), X86::eax
);
125 #if !PLATFORM(X86_64)
132 void Generator::generateBacktrack1()
134 sub32(Imm32(1), index
);
137 void Generator::generateBacktrackBackreference(unsigned subpatternId
)
139 sub32(Address(output
, (2 * subpatternId
+ 1) * sizeof(int)), index
);
140 add32(Address(output
, (2 * subpatternId
) * sizeof(int)), index
);
143 void Generator::generateBackreferenceQuantifier(JumpList
& failures
, Quantifier::Type quantifierType
, unsigned subpatternId
, unsigned min
, unsigned max
)
145 GenerateBackreferenceFunctor
functor(subpatternId
);
147 load32(Address(output
, (2 * subpatternId
) * sizeof(int)), character
);
148 Jump skipIfEmpty
= je32(Address(output
, ((2 * subpatternId
) + 1) * sizeof(int)), character
);
150 ASSERT(quantifierType
== Quantifier::Greedy
|| quantifierType
== Quantifier::NonGreedy
);
151 if (quantifierType
== Quantifier::Greedy
)
152 generateGreedyQuantifier(failures
, functor
, min
, max
);
154 generateNonGreedyQuantifier(failures
, functor
, min
, max
);
156 skipIfEmpty
.link(this);
159 void Generator::generateNonGreedyQuantifier(JumpList
& failures
, GenerateAtomFunctor
& functor
, unsigned min
, unsigned max
)
161 JumpList atomFailedList
;
162 JumpList alternativeFailedList
;
164 // (0) Setup: Save, then init repeatCount.
166 move(Imm32(0), repeatCount
);
169 // (4) Quantifier failed: No more atom reading possible.
170 Label
quantifierFailed(this);
172 failures
.append(jump());
174 // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again.
175 Label
alternativeFailed(this);
177 if (max
!= Quantifier::Infinity
)
178 je32(repeatCount
, Imm32(max
), quantifierFailed
);
183 Label
readAtom(this);
184 functor
.generateAtom(this, atomFailedList
);
185 atomFailedList
.linkTo(quantifierFailed
, this);
186 add32(Imm32(1), repeatCount
);
188 // (2) Keep reading if we're under the minimum.
190 jl32(repeatCount
, Imm32(min
), readAtom
);
192 // (3) Test the rest of the alternative.
196 m_parser
.parseAlternative(alternativeFailedList
);
197 alternativeFailedList
.linkTo(alternativeFailed
, this);
203 void Generator::generateGreedyQuantifier(JumpList
& failures
, GenerateAtomFunctor
& functor
, unsigned min
, unsigned max
)
208 JumpList doneReadingAtomsList
;
209 JumpList alternativeFailedList
;
211 // (0) Setup: Save, then init repeatCount.
213 move(Imm32(0), repeatCount
);
215 // (1) Greedily read as many copies of the atom as possible, then jump to (2).
216 Label
readAtom(this);
217 functor
.generateAtom(this, doneReadingAtomsList
);
218 add32(Imm32(1), repeatCount
);
219 if (max
== Quantifier::Infinity
)
222 doneReadingAtomsList
.append(jump());
224 jne32(repeatCount
, Imm32(max
), readAtom
);
225 doneReadingAtomsList
.append(jump());
228 // (5) Quantifier failed: No more backtracking possible.
229 Label
quantifierFailed(this);
231 failures
.append(jump());
233 // (4) Alternative failed: Backtrack, then fall through to (2) to try again.
234 Label
alternativeFailed(this);
236 functor
.backtrack(this);
237 sub32(Imm32(1), repeatCount
);
239 // (2) Verify that we have enough atoms.
240 doneReadingAtomsList
.link(this);
241 jl32(repeatCount
, Imm32(min
), quantifierFailed
);
243 // (3) Test the rest of the alternative.
245 m_parser
.parseAlternative(alternativeFailedList
);
246 alternativeFailedList
.linkTo(alternativeFailed
, this);
252 void Generator::generatePatternCharacterSequence(JumpList
& failures
, int* sequence
, size_t count
)
254 for (size_t i
= 0; i
< count
;) {
256 if (generatePatternCharacterPair(failures
, sequence
[i
], sequence
[i
+ 1])) {
262 generatePatternCharacter(failures
, sequence
[i
]);
267 bool Generator::generatePatternCharacterPair(JumpList
& failures
, int ch1
, int ch2
)
269 if (m_parser
.ignoreCase()) {
270 // Non-trivial case folding requires more than one test, so we can't
271 // test as a pair with an adjacent character.
272 if (!isASCII(ch1
) && Unicode::toLower(ch1
) != Unicode::toUpper(ch1
))
274 if (!isASCII(ch2
) && Unicode::toLower(ch2
) != Unicode::toUpper(ch2
))
278 // Optimistically consume 2 characters.
279 add32(Imm32(2), index
);
280 failures
.append(jg32(index
, length
));
282 // Load the characters we just consumed, offset -2 characters from index.
283 load32(BaseIndex(input
, index
, TimesTwo
, -2 * 2), character
);
285 if (m_parser
.ignoreCase()) {
286 // Convert ASCII alphabet characters to upper case before testing for
287 // equality. (ASCII non-alphabet characters don't require upper-casing
288 // because they have no uppercase equivalents. Unicode characters don't
289 // require upper-casing because we only handle Unicode characters whose
290 // upper and lower cases are equal.)
292 if (isASCIIAlpha(ch1
)) {
298 if (isASCIIAlpha(ch2
)) {
303 int mask
= ch1Mask
| (ch2Mask
<< 16);
305 or32(Imm32(mask
), character
);
307 int pair
= ch1
| (ch2
<< 16);
309 failures
.append(jne32(character
, Imm32(pair
)));
313 void Generator::generatePatternCharacter(JumpList
& failures
, int ch
)
315 generateLoadCharacter(failures
);
317 // used for unicode case insensitive
318 bool hasUpper
= false;
321 // if case insensitive match
322 if (m_parser
.ignoreCase()) {
325 // check for ascii case sensitive characters
326 if (isASCIIAlpha(ch
)) {
327 or32(Imm32(32), character
);
329 } else if (!isASCII(ch
) && ((lower
= Unicode::toLower(ch
)) != (upper
= Unicode::toUpper(ch
)))) {
330 // handle unicode case sentitive characters - branch to success on upper
331 isUpper
= je32(character
, Imm32(upper
));
337 // checks for ch, or lower case version of ch, if insensitive
338 failures
.append(jne32(character
, Imm32((unsigned short)ch
)));
340 if (m_parser
.ignoreCase() && hasUpper
) {
341 // for unicode case insensitive matches, branch here if upper matches.
345 // on success consume the char
346 add32(Imm32(1), index
);
349 void Generator::generateCharacterClassInvertedRange(JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
352 // pick which range we're going to generate
353 int which
= count
>> 1;
354 char lo
= ranges
[which
].begin
;
355 char hi
= ranges
[which
].end
;
357 // check if there are any ranges or matches below lo. If not, just jl to failure -
358 // if there is anything else to check, check that first, if it falls through jmp to failure.
359 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
360 Jump loOrAbove
= jge32(character
, Imm32((unsigned short)lo
));
362 // generate code for all ranges before this one
364 generateCharacterClassInvertedRange(failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
366 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
367 matchDest
.append(je32(character
, Imm32((unsigned short)matches
[*matchIndex
])));
370 failures
.append(jump());
372 loOrAbove
.link(this);
374 Jump loOrAbove
= jge32(character
, Imm32((unsigned short)lo
));
376 generateCharacterClassInvertedRange(failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
377 failures
.append(jump());
379 loOrAbove
.link(this);
381 failures
.append(jl32(character
, Imm32((unsigned short)lo
)));
383 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
386 matchDest
.append(jle32(character
, Imm32((unsigned short)hi
)));
387 // fall through to here, the value is above hi.
389 // shuffle along & loop around if there are any more matches to handle.
390 unsigned next
= which
+ 1;
396 void Generator::generateCharacterClassInverted(JumpList
& matchDest
, const CharacterClass
& charClass
)
399 if (charClass
.numMatchesUnicode
|| charClass
.numRangesUnicode
) {
400 Jump isAscii
= jle32(character
, Imm32(0x7f));
402 if (charClass
.numMatchesUnicode
) {
403 for (unsigned i
= 0; i
< charClass
.numMatchesUnicode
; ++i
) {
404 UChar ch
= charClass
.matchesUnicode
[i
];
405 matchDest
.append(je32(character
, Imm32(ch
)));
409 if (charClass
.numRangesUnicode
) {
410 for (unsigned i
= 0; i
< charClass
.numRangesUnicode
; ++i
) {
411 UChar lo
= charClass
.rangesUnicode
[i
].begin
;
412 UChar hi
= charClass
.rangesUnicode
[i
].end
;
414 Jump below
= jl32(character
, Imm32(lo
));
415 matchDest
.append(jle32(character
, Imm32(hi
)));
420 unicodeFail
= jump();
424 if (charClass
.numRanges
) {
425 unsigned matchIndex
= 0;
427 generateCharacterClassInvertedRange(failures
, matchDest
, charClass
.ranges
, charClass
.numRanges
, &matchIndex
, charClass
.matches
, charClass
.numMatches
);
428 while (matchIndex
< charClass
.numMatches
)
429 matchDest
.append(je32(character
, Imm32((unsigned short)charClass
.matches
[matchIndex
++])));
432 } else if (charClass
.numMatches
) {
433 // optimization: gather 'a','A' etc back together, can mask & test once.
434 Vector
<char> matchesAZaz
;
436 for (unsigned i
= 0; i
< charClass
.numMatches
; ++i
) {
437 char ch
= charClass
.matches
[i
];
438 if (m_parser
.ignoreCase()) {
439 if (isASCIILower(ch
)) {
440 matchesAZaz
.append(ch
);
443 if (isASCIIUpper(ch
))
446 matchDest
.append(je32(character
, Imm32((unsigned short)ch
)));
449 if (unsigned countAZaz
= matchesAZaz
.size()) {
450 or32(Imm32(32), character
);
451 for (unsigned i
= 0; i
< countAZaz
; ++i
)
452 matchDest
.append(je32(character
, Imm32(matchesAZaz
[i
])));
456 if (charClass
.numMatchesUnicode
|| charClass
.numRangesUnicode
)
457 unicodeFail
.link(this);
460 void Generator::generateCharacterClass(JumpList
& failures
, const CharacterClass
& charClass
, bool invert
)
462 generateLoadCharacter(failures
);
465 generateCharacterClassInverted(failures
, charClass
);
468 generateCharacterClassInverted(successes
, charClass
);
469 failures
.append(jump());
470 successes
.link(this);
473 add32(Imm32(1), index
);
476 void Generator::generateParenthesesAssertion(JumpList
& failures
)
478 JumpList disjunctionFailed
;
481 m_parser
.parseDisjunction(disjunctionFailed
);
482 Jump success
= jump();
484 disjunctionFailed
.link(this);
486 failures
.append(jump());
492 void Generator::generateParenthesesInvertedAssertion(JumpList
& failures
)
494 JumpList disjunctionFailed
;
497 m_parser
.parseDisjunction(disjunctionFailed
);
499 // If the disjunction succeeded, the inverted assertion failed.
501 failures
.append(jump());
503 // If the disjunction failed, the inverted assertion succeeded.
504 disjunctionFailed
.link(this);
508 void Generator::generateParenthesesNonGreedy(JumpList
& failures
, Label start
, Jump success
, Jump fail
)
512 failures
.append(fail
);
515 Generator::Jump
Generator::generateParenthesesResetTrampoline(JumpList
& newFailures
, unsigned subpatternIdBefore
, unsigned subpatternIdAfter
)
518 newFailures
.link(this);
519 for (unsigned i
= subpatternIdBefore
+ 1; i
<= subpatternIdAfter
; ++i
) {
520 store32(Imm32(-1), Address(output
, (2 * i
) * sizeof(int)));
521 store32(Imm32(-1), Address(output
, (2 * i
+ 1) * sizeof(int)));
524 Jump newFailJump
= jump();
530 void Generator::generateAssertionBOL(JumpList
& failures
)
532 if (m_parser
.multiline()) {
533 JumpList previousIsNewline
;
535 // begin of input == success
536 previousIsNewline
.append(je32(index
, Imm32(0)));
538 // now check prev char against newline characters.
539 load16(BaseIndex(input
, index
, TimesTwo
, -2), character
);
540 generateCharacterClassInverted(previousIsNewline
, CharacterClass::newline());
542 failures
.append(jump());
544 previousIsNewline
.link(this);
546 failures
.append(jne32(index
, Imm32(0)));
549 void Generator::generateAssertionEOL(JumpList
& failures
)
551 if (m_parser
.multiline()) {
552 JumpList nextIsNewline
;
554 generateLoadCharacter(nextIsNewline
); // end of input == success
555 generateCharacterClassInverted(nextIsNewline
, CharacterClass::newline());
556 failures
.append(jump());
557 nextIsNewline
.link(this);
559 failures
.append(jne32(length
, index
));
563 void Generator::generateAssertionWordBoundary(JumpList
& failures
, bool invert
)
565 JumpList wordBoundary
;
566 JumpList notWordBoundary
;
568 // (1) Check if the previous value was a word char
570 // (1.1) check for begin of input
571 Jump atBegin
= je32(index
, Imm32(0));
572 // (1.2) load the last char, and chck if is word character
573 load16(BaseIndex(input
, index
, TimesTwo
, -2), character
);
574 JumpList previousIsWord
;
575 generateCharacterClassInverted(previousIsWord
, CharacterClass::wordchar());
576 // (1.3) if we get here, previous is not a word char
579 // (2) Handle situation where previous was NOT a \w
581 generateLoadCharacter(notWordBoundary
);
582 generateCharacterClassInverted(wordBoundary
, CharacterClass::wordchar());
583 // (2.1) If we get here, neither chars are word chars
584 notWordBoundary
.append(jump());
586 // (3) Handle situation where previous was a \w
588 // (3.0) link success in first match to here
589 previousIsWord
.link(this);
590 generateLoadCharacter(wordBoundary
);
591 generateCharacterClassInverted(notWordBoundary
, CharacterClass::wordchar());
592 // (3.1) If we get here, this is an end of a word, within the input.
594 // (4) Link everything up
597 // handle the fall through case
598 wordBoundary
.append(jump());
600 // looking for non word boundaries, so link boundary fails to here.
601 notWordBoundary
.link(this);
603 failures
.append(wordBoundary
);
605 // looking for word boundaries, so link successes here.
606 wordBoundary
.link(this);
608 failures
.append(notWordBoundary
);
612 void Generator::generateBackreference(JumpList
& failures
, unsigned subpatternId
)
617 // get the start pos of the backref into repeatCount (multipurpose!)
618 load32(Address(output
, (2 * subpatternId
) * sizeof(int)), repeatCount
);
620 Jump skipIncrement
= jump();
621 Label
topOfLoop(this);
623 add32(Imm32(1), index
);
624 add32(Imm32(1), repeatCount
);
625 skipIncrement
.link(this);
627 // check if we're at the end of backref (if we are, success!)
628 Jump endOfBackRef
= je32(Address(output
, ((2 * subpatternId
) + 1) * sizeof(int)), repeatCount
);
630 load16(BaseIndex(input
, repeatCount
, MacroAssembler::TimesTwo
), character
);
632 // check if we've run out of input (this would be a can o'fail)
633 Jump endOfInput
= je32(length
, index
);
635 je16(character
, BaseIndex(input
, index
, TimesTwo
), topOfLoop
);
637 endOfInput
.link(this);
642 failures
.append(jump());
645 endOfBackRef
.link(this);
650 void Generator::terminateAlternative(JumpList
& successes
, JumpList
& failures
)
652 successes
.append(jump());
658 void Generator::terminateDisjunction(JumpList
& successes
)
660 successes
.link(this);
663 } } // namespace JSC::WREC
665 #endif // ENABLE(WREC)