2 * Copyright (C) 2008 Apple Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #include "CharacterClassConstructor.h"
32 #include "Interpreter.h"
33 #include "WRECFunctors.h"
34 #include "WRECParser.h"
35 #include "pcre_internal.h"
39 namespace JSC
{ namespace WREC
{
41 void Generator::generateEnter()
44 // On x86 edi & esi are callee preserved registers.
49 // Move the arguments into registers.
55 // On gcc the function is regparm(3), so the input, index, and length registers
56 // (eax, edx, and ecx respectively) already contain the appropriate values.
57 // Just load the fourth argument (output) into edi
63 void Generator::generateReturnSuccess()
65 ASSERT(returnRegister
!= index
);
66 ASSERT(returnRegister
!= output
);
69 pop(returnRegister
); // match begin
70 store32(returnRegister
, output
);
71 store32(index
, Address(output
, 4)); // match end
73 // Restore callee save registers.
81 void Generator::generateSaveIndex()
86 void Generator::generateIncrementIndex(Jump
* failure
)
90 *failure
= branch32(Equal
, length
, index
);
91 add32(Imm32(1), index
);
95 void Generator::generateLoadCharacter(JumpList
& failures
)
97 failures
.append(branch32(Equal
, length
, index
));
98 load16(BaseIndex(input
, index
, TimesTwo
), character
);
101 // For the sake of end-of-line assertions, we treat one-past-the-end as if it
102 // were part of the input string.
103 void Generator::generateJumpIfNotEndOfInput(Label target
)
105 branch32(LessThanOrEqual
, index
, length
, target
);
108 void Generator::generateReturnFailure()
111 move(Imm32(-1), returnRegister
);
120 void Generator::generateBacktrack1()
122 sub32(Imm32(1), index
);
125 void Generator::generateBacktrackBackreference(unsigned subpatternId
)
127 sub32(Address(output
, (2 * subpatternId
+ 1) * sizeof(int)), index
);
128 add32(Address(output
, (2 * subpatternId
) * sizeof(int)), index
);
131 void Generator::generateBackreferenceQuantifier(JumpList
& failures
, Quantifier::Type quantifierType
, unsigned subpatternId
, unsigned min
, unsigned max
)
133 GenerateBackreferenceFunctor
functor(subpatternId
);
135 load32(Address(output
, (2 * subpatternId
) * sizeof(int)), character
);
136 Jump skipIfEmpty
= branch32(Equal
, Address(output
, ((2 * subpatternId
) + 1) * sizeof(int)), character
);
138 ASSERT(quantifierType
== Quantifier::Greedy
|| quantifierType
== Quantifier::NonGreedy
);
139 if (quantifierType
== Quantifier::Greedy
)
140 generateGreedyQuantifier(failures
, functor
, min
, max
);
142 generateNonGreedyQuantifier(failures
, functor
, min
, max
);
144 skipIfEmpty
.link(this);
147 void Generator::generateNonGreedyQuantifier(JumpList
& failures
, GenerateAtomFunctor
& functor
, unsigned min
, unsigned max
)
149 JumpList atomFailedList
;
150 JumpList alternativeFailedList
;
152 // (0) Setup: Save, then init repeatCount.
154 move(Imm32(0), repeatCount
);
157 // (4) Quantifier failed: No more atom reading possible.
158 Label
quantifierFailed(this);
160 failures
.append(jump());
162 // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again.
163 Label
alternativeFailed(this);
165 if (max
!= Quantifier::Infinity
)
166 branch32(Equal
, repeatCount
, Imm32(max
), quantifierFailed
);
171 Label
readAtom(this);
172 functor
.generateAtom(this, atomFailedList
);
173 atomFailedList
.linkTo(quantifierFailed
, this);
174 add32(Imm32(1), repeatCount
);
176 // (2) Keep reading if we're under the minimum.
178 branch32(LessThan
, repeatCount
, Imm32(min
), readAtom
);
180 // (3) Test the rest of the alternative.
184 m_parser
.parseAlternative(alternativeFailedList
);
185 alternativeFailedList
.linkTo(alternativeFailed
, this);
191 void Generator::generateGreedyQuantifier(JumpList
& failures
, GenerateAtomFunctor
& functor
, unsigned min
, unsigned max
)
196 JumpList doneReadingAtomsList
;
197 JumpList alternativeFailedList
;
199 // (0) Setup: Save, then init repeatCount.
201 move(Imm32(0), repeatCount
);
203 // (1) Greedily read as many copies of the atom as possible, then jump to (2).
204 Label
readAtom(this);
205 functor
.generateAtom(this, doneReadingAtomsList
);
206 add32(Imm32(1), repeatCount
);
207 if (max
== Quantifier::Infinity
)
210 doneReadingAtomsList
.append(jump());
212 branch32(NotEqual
, repeatCount
, Imm32(max
), readAtom
);
213 doneReadingAtomsList
.append(jump());
216 // (5) Quantifier failed: No more backtracking possible.
217 Label
quantifierFailed(this);
219 failures
.append(jump());
221 // (4) Alternative failed: Backtrack, then fall through to (2) to try again.
222 Label
alternativeFailed(this);
224 functor
.backtrack(this);
225 sub32(Imm32(1), repeatCount
);
227 // (2) Verify that we have enough atoms.
228 doneReadingAtomsList
.link(this);
229 branch32(LessThan
, repeatCount
, Imm32(min
), quantifierFailed
);
231 // (3) Test the rest of the alternative.
233 m_parser
.parseAlternative(alternativeFailedList
);
234 alternativeFailedList
.linkTo(alternativeFailed
, this);
240 void Generator::generatePatternCharacterSequence(JumpList
& failures
, int* sequence
, size_t count
)
242 for (size_t i
= 0; i
< count
;) {
244 if (generatePatternCharacterPair(failures
, sequence
[i
], sequence
[i
+ 1])) {
250 generatePatternCharacter(failures
, sequence
[i
]);
255 bool Generator::generatePatternCharacterPair(JumpList
& failures
, int ch1
, int ch2
)
257 if (m_parser
.ignoreCase()) {
258 // Non-trivial case folding requires more than one test, so we can't
259 // test as a pair with an adjacent character.
260 if (!isASCII(ch1
) && Unicode::toLower(ch1
) != Unicode::toUpper(ch1
))
262 if (!isASCII(ch2
) && Unicode::toLower(ch2
) != Unicode::toUpper(ch2
))
266 // Optimistically consume 2 characters.
267 add32(Imm32(2), index
);
268 failures
.append(branch32(GreaterThan
, index
, length
));
270 // Load the characters we just consumed, offset -2 characters from index.
271 load32(BaseIndex(input
, index
, TimesTwo
, -2 * 2), character
);
273 if (m_parser
.ignoreCase()) {
274 // Convert ASCII alphabet characters to upper case before testing for
275 // equality. (ASCII non-alphabet characters don't require upper-casing
276 // because they have no uppercase equivalents. Unicode characters don't
277 // require upper-casing because we only handle Unicode characters whose
278 // upper and lower cases are equal.)
280 if (isASCIIAlpha(ch1
)) {
286 if (isASCIIAlpha(ch2
)) {
291 int mask
= ch1Mask
| (ch2Mask
<< 16);
293 or32(Imm32(mask
), character
);
295 int pair
= ch1
| (ch2
<< 16);
297 failures
.append(branch32(NotEqual
, character
, Imm32(pair
)));
301 void Generator::generatePatternCharacter(JumpList
& failures
, int ch
)
303 generateLoadCharacter(failures
);
305 // used for unicode case insensitive
306 bool hasUpper
= false;
309 // if case insensitive match
310 if (m_parser
.ignoreCase()) {
313 // check for ascii case sensitive characters
314 if (isASCIIAlpha(ch
)) {
315 or32(Imm32(32), character
);
317 } else if (!isASCII(ch
) && ((lower
= Unicode::toLower(ch
)) != (upper
= Unicode::toUpper(ch
)))) {
318 // handle unicode case sentitive characters - branch to success on upper
319 isUpper
= branch32(Equal
, character
, Imm32(upper
));
325 // checks for ch, or lower case version of ch, if insensitive
326 failures
.append(branch32(NotEqual
, character
, Imm32((unsigned short)ch
)));
328 if (m_parser
.ignoreCase() && hasUpper
) {
329 // for unicode case insensitive matches, branch here if upper matches.
333 // on success consume the char
334 add32(Imm32(1), index
);
337 void Generator::generateCharacterClassInvertedRange(JumpList
& failures
, JumpList
& matchDest
, const CharacterRange
* ranges
, unsigned count
, unsigned* matchIndex
, const UChar
* matches
, unsigned matchCount
)
340 // pick which range we're going to generate
341 int which
= count
>> 1;
342 char lo
= ranges
[which
].begin
;
343 char hi
= ranges
[which
].end
;
345 // check if there are any ranges or matches below lo. If not, just jl to failure -
346 // if there is anything else to check, check that first, if it falls through jmp to failure.
347 if ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
348 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
350 // generate code for all ranges before this one
352 generateCharacterClassInvertedRange(failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
354 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] < lo
)) {
355 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)matches
[*matchIndex
])));
358 failures
.append(jump());
360 loOrAbove
.link(this);
362 Jump loOrAbove
= branch32(GreaterThanOrEqual
, character
, Imm32((unsigned short)lo
));
364 generateCharacterClassInvertedRange(failures
, matchDest
, ranges
, which
, matchIndex
, matches
, matchCount
);
365 failures
.append(jump());
367 loOrAbove
.link(this);
369 failures
.append(branch32(LessThan
, character
, Imm32((unsigned short)lo
)));
371 while ((*matchIndex
< matchCount
) && (matches
[*matchIndex
] <= hi
))
374 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32((unsigned short)hi
)));
375 // fall through to here, the value is above hi.
377 // shuffle along & loop around if there are any more matches to handle.
378 unsigned next
= which
+ 1;
384 void Generator::generateCharacterClassInverted(JumpList
& matchDest
, const CharacterClass
& charClass
)
387 if (charClass
.numMatchesUnicode
|| charClass
.numRangesUnicode
) {
388 Jump isAscii
= branch32(LessThanOrEqual
, character
, Imm32(0x7f));
390 if (charClass
.numMatchesUnicode
) {
391 for (unsigned i
= 0; i
< charClass
.numMatchesUnicode
; ++i
) {
392 UChar ch
= charClass
.matchesUnicode
[i
];
393 matchDest
.append(branch32(Equal
, character
, Imm32(ch
)));
397 if (charClass
.numRangesUnicode
) {
398 for (unsigned i
= 0; i
< charClass
.numRangesUnicode
; ++i
) {
399 UChar lo
= charClass
.rangesUnicode
[i
].begin
;
400 UChar hi
= charClass
.rangesUnicode
[i
].end
;
402 Jump below
= branch32(LessThan
, character
, Imm32(lo
));
403 matchDest
.append(branch32(LessThanOrEqual
, character
, Imm32(hi
)));
408 unicodeFail
= jump();
412 if (charClass
.numRanges
) {
413 unsigned matchIndex
= 0;
415 generateCharacterClassInvertedRange(failures
, matchDest
, charClass
.ranges
, charClass
.numRanges
, &matchIndex
, charClass
.matches
, charClass
.numMatches
);
416 while (matchIndex
< charClass
.numMatches
)
417 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)charClass
.matches
[matchIndex
++])));
420 } else if (charClass
.numMatches
) {
421 // optimization: gather 'a','A' etc back together, can mask & test once.
422 Vector
<char> matchesAZaz
;
424 for (unsigned i
= 0; i
< charClass
.numMatches
; ++i
) {
425 char ch
= charClass
.matches
[i
];
426 if (m_parser
.ignoreCase()) {
427 if (isASCIILower(ch
)) {
428 matchesAZaz
.append(ch
);
431 if (isASCIIUpper(ch
))
434 matchDest
.append(branch32(Equal
, character
, Imm32((unsigned short)ch
)));
437 if (unsigned countAZaz
= matchesAZaz
.size()) {
438 or32(Imm32(32), character
);
439 for (unsigned i
= 0; i
< countAZaz
; ++i
)
440 matchDest
.append(branch32(Equal
, character
, Imm32(matchesAZaz
[i
])));
444 if (charClass
.numMatchesUnicode
|| charClass
.numRangesUnicode
)
445 unicodeFail
.link(this);
448 void Generator::generateCharacterClass(JumpList
& failures
, const CharacterClass
& charClass
, bool invert
)
450 generateLoadCharacter(failures
);
453 generateCharacterClassInverted(failures
, charClass
);
456 generateCharacterClassInverted(successes
, charClass
);
457 failures
.append(jump());
458 successes
.link(this);
461 add32(Imm32(1), index
);
464 void Generator::generateParenthesesAssertion(JumpList
& failures
)
466 JumpList disjunctionFailed
;
469 m_parser
.parseDisjunction(disjunctionFailed
);
470 Jump success
= jump();
472 disjunctionFailed
.link(this);
474 failures
.append(jump());
480 void Generator::generateParenthesesInvertedAssertion(JumpList
& failures
)
482 JumpList disjunctionFailed
;
485 m_parser
.parseDisjunction(disjunctionFailed
);
487 // If the disjunction succeeded, the inverted assertion failed.
489 failures
.append(jump());
491 // If the disjunction failed, the inverted assertion succeeded.
492 disjunctionFailed
.link(this);
496 void Generator::generateParenthesesNonGreedy(JumpList
& failures
, Label start
, Jump success
, Jump fail
)
500 failures
.append(fail
);
503 Generator::Jump
Generator::generateParenthesesResetTrampoline(JumpList
& newFailures
, unsigned subpatternIdBefore
, unsigned subpatternIdAfter
)
506 newFailures
.link(this);
507 for (unsigned i
= subpatternIdBefore
+ 1; i
<= subpatternIdAfter
; ++i
) {
508 store32(Imm32(-1), Address(output
, (2 * i
) * sizeof(int)));
509 store32(Imm32(-1), Address(output
, (2 * i
+ 1) * sizeof(int)));
512 Jump newFailJump
= jump();
518 void Generator::generateAssertionBOL(JumpList
& failures
)
520 if (m_parser
.multiline()) {
521 JumpList previousIsNewline
;
523 // begin of input == success
524 previousIsNewline
.append(branch32(Equal
, index
, Imm32(0)));
526 // now check prev char against newline characters.
527 load16(BaseIndex(input
, index
, TimesTwo
, -2), character
);
528 generateCharacterClassInverted(previousIsNewline
, CharacterClass::newline());
530 failures
.append(jump());
532 previousIsNewline
.link(this);
534 failures
.append(branch32(NotEqual
, index
, Imm32(0)));
537 void Generator::generateAssertionEOL(JumpList
& failures
)
539 if (m_parser
.multiline()) {
540 JumpList nextIsNewline
;
542 generateLoadCharacter(nextIsNewline
); // end of input == success
543 generateCharacterClassInverted(nextIsNewline
, CharacterClass::newline());
544 failures
.append(jump());
545 nextIsNewline
.link(this);
547 failures
.append(branch32(NotEqual
, length
, index
));
551 void Generator::generateAssertionWordBoundary(JumpList
& failures
, bool invert
)
553 JumpList wordBoundary
;
554 JumpList notWordBoundary
;
556 // (1) Check if the previous value was a word char
558 // (1.1) check for begin of input
559 Jump atBegin
= branch32(Equal
, index
, Imm32(0));
560 // (1.2) load the last char, and chck if is word character
561 load16(BaseIndex(input
, index
, TimesTwo
, -2), character
);
562 JumpList previousIsWord
;
563 generateCharacterClassInverted(previousIsWord
, CharacterClass::wordchar());
564 // (1.3) if we get here, previous is not a word char
567 // (2) Handle situation where previous was NOT a \w
569 generateLoadCharacter(notWordBoundary
);
570 generateCharacterClassInverted(wordBoundary
, CharacterClass::wordchar());
571 // (2.1) If we get here, neither chars are word chars
572 notWordBoundary
.append(jump());
574 // (3) Handle situation where previous was a \w
576 // (3.0) link success in first match to here
577 previousIsWord
.link(this);
578 generateLoadCharacter(wordBoundary
);
579 generateCharacterClassInverted(notWordBoundary
, CharacterClass::wordchar());
580 // (3.1) If we get here, this is an end of a word, within the input.
582 // (4) Link everything up
585 // handle the fall through case
586 wordBoundary
.append(jump());
588 // looking for non word boundaries, so link boundary fails to here.
589 notWordBoundary
.link(this);
591 failures
.append(wordBoundary
);
593 // looking for word boundaries, so link successes here.
594 wordBoundary
.link(this);
596 failures
.append(notWordBoundary
);
600 void Generator::generateBackreference(JumpList
& failures
, unsigned subpatternId
)
605 // get the start pos of the backref into repeatCount (multipurpose!)
606 load32(Address(output
, (2 * subpatternId
) * sizeof(int)), repeatCount
);
608 Jump skipIncrement
= jump();
609 Label
topOfLoop(this);
611 add32(Imm32(1), index
);
612 add32(Imm32(1), repeatCount
);
613 skipIncrement
.link(this);
615 // check if we're at the end of backref (if we are, success!)
616 Jump endOfBackRef
= branch32(Equal
, Address(output
, ((2 * subpatternId
) + 1) * sizeof(int)), repeatCount
);
618 load16(BaseIndex(input
, repeatCount
, MacroAssembler::TimesTwo
), character
);
620 // check if we've run out of input (this would be a can o'fail)
621 Jump endOfInput
= branch32(Equal
, length
, index
);
623 branch16(Equal
, BaseIndex(input
, index
, TimesTwo
), character
, topOfLoop
);
625 endOfInput
.link(this);
630 failures
.append(jump());
633 endOfBackRef
.link(this);
638 void Generator::terminateAlternative(JumpList
& successes
, JumpList
& failures
)
640 successes
.append(jump());
646 void Generator::terminateDisjunction(JumpList
& successes
)
648 successes
.link(this);
651 } } // namespace JSC::WREC
653 #endif // ENABLE(WREC)