]> git.saurik.com Git - apple/javascriptcore.git/blame_incremental - wrec/WRECGenerator.cpp
JavaScriptCore-521.tar.gz
[apple/javascriptcore.git] / wrec / WRECGenerator.cpp
... / ...
CommitLineData
1/*
2 * Copyright (C) 2008 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#include "config.h"
27#include "WREC.h"
28
29#if ENABLE(WREC)
30
31#include "CharacterClassConstructor.h"
32#include "Interpreter.h"
33#include "WRECFunctors.h"
34#include "WRECParser.h"
35#include "pcre_internal.h"
36
37using namespace WTF;
38
39namespace JSC { namespace WREC {
40
41void Generator::generateEnter()
42{
43#if PLATFORM(X86_64)
44 // On x86-64 edi and esi are caller preserved, so nothing to do here.
45 // The four arguments have been passed in the registers %rdi, %rsi,
46 // %rdx, %rcx - shuffle these into the expected locations.
47 move(X86::edi, input); // (arg 1) edi -> eax
48 move(X86::ecx, output); // (arg 4) ecx -> edi
49 move(X86::edx, length); // (arg 3) edx -> ecx
50 move(X86::esi, index); // (arg 2) esi -> edx
51
52#else
53 // On x86 edi & esi are callee preserved registers.
54 push(X86::edi);
55 push(X86::esi);
56
57#if COMPILER(MSVC)
58 // Move the arguments into registers.
59 peek(input, 3);
60 peek(index, 4);
61 peek(length, 5);
62 peek(output, 6);
63#else
64 // On gcc the function is regparm(3), so the input, index, and length registers
65 // (eax, edx, and ecx respectively) already contain the appropriate values.
66 // Just load the fourth argument (output) into edi
67 peek(output, 3);
68#endif
69#endif
70
71#ifndef NDEBUG
72 // ASSERT that the output register is not null.
73 Jump outputNotNull = jnzPtr(output);
74 breakpoint();
75 outputNotNull.link(this);
76#endif
77}
78
79void Generator::generateReturnSuccess()
80{
81 // Set return value.
82 pop(X86::eax); // match begin
83 store32(X86::eax, output);
84 store32(index, Address(output, 4)); // match end
85
86 // Restore callee save registers.
87#if !PLATFORM(X86_64)
88 pop(X86::esi);
89 pop(X86::edi);
90#endif
91 ret();
92}
93
94void Generator::generateSaveIndex()
95{
96 push(index);
97}
98
99void Generator::generateIncrementIndex(Jump* failure)
100{
101 peek(index);
102 if (failure)
103 *failure = je32(length, index);
104 add32(Imm32(1), index);
105 poke(index);
106}
107
108void Generator::generateLoadCharacter(JumpList& failures)
109{
110 failures.append(je32(length, index));
111 load16(BaseIndex(input, index, TimesTwo), character);
112}
113
114// For the sake of end-of-line assertions, we treat one-past-the-end as if it
115// were part of the input string.
116void Generator::generateJumpIfNotEndOfInput(Label target)
117{
118 jle32(index, length, target);
119}
120
121void Generator::generateReturnFailure()
122{
123 pop();
124 move(Imm32(-1), X86::eax);
125#if !PLATFORM(X86_64)
126 pop(X86::esi);
127 pop(X86::edi);
128#endif
129 ret();
130}
131
132void Generator::generateBacktrack1()
133{
134 sub32(Imm32(1), index);
135}
136
137void Generator::generateBacktrackBackreference(unsigned subpatternId)
138{
139 sub32(Address(output, (2 * subpatternId + 1) * sizeof(int)), index);
140 add32(Address(output, (2 * subpatternId) * sizeof(int)), index);
141}
142
143void Generator::generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max)
144{
145 GenerateBackreferenceFunctor functor(subpatternId);
146
147 load32(Address(output, (2 * subpatternId) * sizeof(int)), character);
148 Jump skipIfEmpty = je32(Address(output, ((2 * subpatternId) + 1) * sizeof(int)), character);
149
150 ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy);
151 if (quantifierType == Quantifier::Greedy)
152 generateGreedyQuantifier(failures, functor, min, max);
153 else
154 generateNonGreedyQuantifier(failures, functor, min, max);
155
156 skipIfEmpty.link(this);
157}
158
159void Generator::generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
160{
161 JumpList atomFailedList;
162 JumpList alternativeFailedList;
163
164 // (0) Setup: Save, then init repeatCount.
165 push(repeatCount);
166 move(Imm32(0), repeatCount);
167 Jump start = jump();
168
169 // (4) Quantifier failed: No more atom reading possible.
170 Label quantifierFailed(this);
171 pop(repeatCount);
172 failures.append(jump());
173
174 // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again.
175 Label alternativeFailed(this);
176 pop(index);
177 if (max != Quantifier::Infinity)
178 je32(repeatCount, Imm32(max), quantifierFailed);
179
180 // (1) Read an atom.
181 if (min)
182 start.link(this);
183 Label readAtom(this);
184 functor.generateAtom(this, atomFailedList);
185 atomFailedList.linkTo(quantifierFailed, this);
186 add32(Imm32(1), repeatCount);
187
188 // (2) Keep reading if we're under the minimum.
189 if (min > 1)
190 jl32(repeatCount, Imm32(min), readAtom);
191
192 // (3) Test the rest of the alternative.
193 if (!min)
194 start.link(this);
195 push(index);
196 m_parser.parseAlternative(alternativeFailedList);
197 alternativeFailedList.linkTo(alternativeFailed, this);
198
199 pop();
200 pop(repeatCount);
201}
202
203void Generator::generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max)
204{
205 if (!max)
206 return;
207
208 JumpList doneReadingAtomsList;
209 JumpList alternativeFailedList;
210
211 // (0) Setup: Save, then init repeatCount.
212 push(repeatCount);
213 move(Imm32(0), repeatCount);
214
215 // (1) Greedily read as many copies of the atom as possible, then jump to (2).
216 Label readAtom(this);
217 functor.generateAtom(this, doneReadingAtomsList);
218 add32(Imm32(1), repeatCount);
219 if (max == Quantifier::Infinity)
220 jump(readAtom);
221 else if (max == 1)
222 doneReadingAtomsList.append(jump());
223 else {
224 jne32(repeatCount, Imm32(max), readAtom);
225 doneReadingAtomsList.append(jump());
226 }
227
228 // (5) Quantifier failed: No more backtracking possible.
229 Label quantifierFailed(this);
230 pop(repeatCount);
231 failures.append(jump());
232
233 // (4) Alternative failed: Backtrack, then fall through to (2) to try again.
234 Label alternativeFailed(this);
235 pop(index);
236 functor.backtrack(this);
237 sub32(Imm32(1), repeatCount);
238
239 // (2) Verify that we have enough atoms.
240 doneReadingAtomsList.link(this);
241 jl32(repeatCount, Imm32(min), quantifierFailed);
242
243 // (3) Test the rest of the alternative.
244 push(index);
245 m_parser.parseAlternative(alternativeFailedList);
246 alternativeFailedList.linkTo(alternativeFailed, this);
247
248 pop();
249 pop(repeatCount);
250}
251
252void Generator::generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count)
253{
254 for (size_t i = 0; i < count;) {
255 if (i < count - 1) {
256 if (generatePatternCharacterPair(failures, sequence[i], sequence[i + 1])) {
257 i += 2;
258 continue;
259 }
260 }
261
262 generatePatternCharacter(failures, sequence[i]);
263 ++i;
264 }
265}
266
267bool Generator::generatePatternCharacterPair(JumpList& failures, int ch1, int ch2)
268{
269 if (m_parser.ignoreCase()) {
270 // Non-trivial case folding requires more than one test, so we can't
271 // test as a pair with an adjacent character.
272 if (!isASCII(ch1) && Unicode::toLower(ch1) != Unicode::toUpper(ch1))
273 return false;
274 if (!isASCII(ch2) && Unicode::toLower(ch2) != Unicode::toUpper(ch2))
275 return false;
276 }
277
278 // Optimistically consume 2 characters.
279 add32(Imm32(2), index);
280 failures.append(jg32(index, length));
281
282 // Load the characters we just consumed, offset -2 characters from index.
283 load32(BaseIndex(input, index, TimesTwo, -2 * 2), character);
284
285 if (m_parser.ignoreCase()) {
286 // Convert ASCII alphabet characters to upper case before testing for
287 // equality. (ASCII non-alphabet characters don't require upper-casing
288 // because they have no uppercase equivalents. Unicode characters don't
289 // require upper-casing because we only handle Unicode characters whose
290 // upper and lower cases are equal.)
291 int ch1Mask = 0;
292 if (isASCIIAlpha(ch1)) {
293 ch1 |= 32;
294 ch1Mask = 32;
295 }
296
297 int ch2Mask = 0;
298 if (isASCIIAlpha(ch2)) {
299 ch2 |= 32;
300 ch2Mask = 32;
301 }
302
303 int mask = ch1Mask | (ch2Mask << 16);
304 if (mask)
305 or32(Imm32(mask), character);
306 }
307 int pair = ch1 | (ch2 << 16);
308
309 failures.append(jne32(character, Imm32(pair)));
310 return true;
311}
312
313void Generator::generatePatternCharacter(JumpList& failures, int ch)
314{
315 generateLoadCharacter(failures);
316
317 // used for unicode case insensitive
318 bool hasUpper = false;
319 Jump isUpper;
320
321 // if case insensitive match
322 if (m_parser.ignoreCase()) {
323 UChar lower, upper;
324
325 // check for ascii case sensitive characters
326 if (isASCIIAlpha(ch)) {
327 or32(Imm32(32), character);
328 ch |= 32;
329 } else if (!isASCII(ch) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) {
330 // handle unicode case sentitive characters - branch to success on upper
331 isUpper = je32(character, Imm32(upper));
332 hasUpper = true;
333 ch = lower;
334 }
335 }
336
337 // checks for ch, or lower case version of ch, if insensitive
338 failures.append(jne32(character, Imm32((unsigned short)ch)));
339
340 if (m_parser.ignoreCase() && hasUpper) {
341 // for unicode case insensitive matches, branch here if upper matches.
342 isUpper.link(this);
343 }
344
345 // on success consume the char
346 add32(Imm32(1), index);
347}
348
349void Generator::generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
350{
351 do {
352 // pick which range we're going to generate
353 int which = count >> 1;
354 char lo = ranges[which].begin;
355 char hi = ranges[which].end;
356
357 // check if there are any ranges or matches below lo. If not, just jl to failure -
358 // if there is anything else to check, check that first, if it falls through jmp to failure.
359 if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
360 Jump loOrAbove = jge32(character, Imm32((unsigned short)lo));
361
362 // generate code for all ranges before this one
363 if (which)
364 generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
365
366 while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
367 matchDest.append(je32(character, Imm32((unsigned short)matches[*matchIndex])));
368 ++*matchIndex;
369 }
370 failures.append(jump());
371
372 loOrAbove.link(this);
373 } else if (which) {
374 Jump loOrAbove = jge32(character, Imm32((unsigned short)lo));
375
376 generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount);
377 failures.append(jump());
378
379 loOrAbove.link(this);
380 } else
381 failures.append(jl32(character, Imm32((unsigned short)lo)));
382
383 while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
384 ++*matchIndex;
385
386 matchDest.append(jle32(character, Imm32((unsigned short)hi)));
387 // fall through to here, the value is above hi.
388
389 // shuffle along & loop around if there are any more matches to handle.
390 unsigned next = which + 1;
391 ranges += next;
392 count -= next;
393 } while (count);
394}
395
396void Generator::generateCharacterClassInverted(JumpList& matchDest, const CharacterClass& charClass)
397{
398 Jump unicodeFail;
399 if (charClass.numMatchesUnicode || charClass.numRangesUnicode) {
400 Jump isAscii = jle32(character, Imm32(0x7f));
401
402 if (charClass.numMatchesUnicode) {
403 for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) {
404 UChar ch = charClass.matchesUnicode[i];
405 matchDest.append(je32(character, Imm32(ch)));
406 }
407 }
408
409 if (charClass.numRangesUnicode) {
410 for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) {
411 UChar lo = charClass.rangesUnicode[i].begin;
412 UChar hi = charClass.rangesUnicode[i].end;
413
414 Jump below = jl32(character, Imm32(lo));
415 matchDest.append(jle32(character, Imm32(hi)));
416 below.link(this);
417 }
418 }
419
420 unicodeFail = jump();
421 isAscii.link(this);
422 }
423
424 if (charClass.numRanges) {
425 unsigned matchIndex = 0;
426 JumpList failures;
427 generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches);
428 while (matchIndex < charClass.numMatches)
429 matchDest.append(je32(character, Imm32((unsigned short)charClass.matches[matchIndex++])));
430
431 failures.link(this);
432 } else if (charClass.numMatches) {
433 // optimization: gather 'a','A' etc back together, can mask & test once.
434 Vector<char> matchesAZaz;
435
436 for (unsigned i = 0; i < charClass.numMatches; ++i) {
437 char ch = charClass.matches[i];
438 if (m_parser.ignoreCase()) {
439 if (isASCIILower(ch)) {
440 matchesAZaz.append(ch);
441 continue;
442 }
443 if (isASCIIUpper(ch))
444 continue;
445 }
446 matchDest.append(je32(character, Imm32((unsigned short)ch)));
447 }
448
449 if (unsigned countAZaz = matchesAZaz.size()) {
450 or32(Imm32(32), character);
451 for (unsigned i = 0; i < countAZaz; ++i)
452 matchDest.append(je32(character, Imm32(matchesAZaz[i])));
453 }
454 }
455
456 if (charClass.numMatchesUnicode || charClass.numRangesUnicode)
457 unicodeFail.link(this);
458}
459
460void Generator::generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert)
461{
462 generateLoadCharacter(failures);
463
464 if (invert)
465 generateCharacterClassInverted(failures, charClass);
466 else {
467 JumpList successes;
468 generateCharacterClassInverted(successes, charClass);
469 failures.append(jump());
470 successes.link(this);
471 }
472
473 add32(Imm32(1), index);
474}
475
476void Generator::generateParenthesesAssertion(JumpList& failures)
477{
478 JumpList disjunctionFailed;
479
480 push(index);
481 m_parser.parseDisjunction(disjunctionFailed);
482 Jump success = jump();
483
484 disjunctionFailed.link(this);
485 pop(index);
486 failures.append(jump());
487
488 success.link(this);
489 pop(index);
490}
491
492void Generator::generateParenthesesInvertedAssertion(JumpList& failures)
493{
494 JumpList disjunctionFailed;
495
496 push(index);
497 m_parser.parseDisjunction(disjunctionFailed);
498
499 // If the disjunction succeeded, the inverted assertion failed.
500 pop(index);
501 failures.append(jump());
502
503 // If the disjunction failed, the inverted assertion succeeded.
504 disjunctionFailed.link(this);
505 pop(index);
506}
507
508void Generator::generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail)
509{
510 jump(start);
511 success.link(this);
512 failures.append(fail);
513}
514
515Generator::Jump Generator::generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter)
516{
517 Jump skip = jump();
518 newFailures.link(this);
519 for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) {
520 store32(Imm32(-1), Address(output, (2 * i) * sizeof(int)));
521 store32(Imm32(-1), Address(output, (2 * i + 1) * sizeof(int)));
522 }
523
524 Jump newFailJump = jump();
525 skip.link(this);
526
527 return newFailJump;
528}
529
530void Generator::generateAssertionBOL(JumpList& failures)
531{
532 if (m_parser.multiline()) {
533 JumpList previousIsNewline;
534
535 // begin of input == success
536 previousIsNewline.append(je32(index, Imm32(0)));
537
538 // now check prev char against newline characters.
539 load16(BaseIndex(input, index, TimesTwo, -2), character);
540 generateCharacterClassInverted(previousIsNewline, CharacterClass::newline());
541
542 failures.append(jump());
543
544 previousIsNewline.link(this);
545 } else
546 failures.append(jne32(index, Imm32(0)));
547}
548
549void Generator::generateAssertionEOL(JumpList& failures)
550{
551 if (m_parser.multiline()) {
552 JumpList nextIsNewline;
553
554 generateLoadCharacter(nextIsNewline); // end of input == success
555 generateCharacterClassInverted(nextIsNewline, CharacterClass::newline());
556 failures.append(jump());
557 nextIsNewline.link(this);
558 } else {
559 failures.append(jne32(length, index));
560 }
561}
562
563void Generator::generateAssertionWordBoundary(JumpList& failures, bool invert)
564{
565 JumpList wordBoundary;
566 JumpList notWordBoundary;
567
568 // (1) Check if the previous value was a word char
569
570 // (1.1) check for begin of input
571 Jump atBegin = je32(index, Imm32(0));
572 // (1.2) load the last char, and chck if is word character
573 load16(BaseIndex(input, index, TimesTwo, -2), character);
574 JumpList previousIsWord;
575 generateCharacterClassInverted(previousIsWord, CharacterClass::wordchar());
576 // (1.3) if we get here, previous is not a word char
577 atBegin.link(this);
578
579 // (2) Handle situation where previous was NOT a \w
580
581 generateLoadCharacter(notWordBoundary);
582 generateCharacterClassInverted(wordBoundary, CharacterClass::wordchar());
583 // (2.1) If we get here, neither chars are word chars
584 notWordBoundary.append(jump());
585
586 // (3) Handle situation where previous was a \w
587
588 // (3.0) link success in first match to here
589 previousIsWord.link(this);
590 generateLoadCharacter(wordBoundary);
591 generateCharacterClassInverted(notWordBoundary, CharacterClass::wordchar());
592 // (3.1) If we get here, this is an end of a word, within the input.
593
594 // (4) Link everything up
595
596 if (invert) {
597 // handle the fall through case
598 wordBoundary.append(jump());
599
600 // looking for non word boundaries, so link boundary fails to here.
601 notWordBoundary.link(this);
602
603 failures.append(wordBoundary);
604 } else {
605 // looking for word boundaries, so link successes here.
606 wordBoundary.link(this);
607
608 failures.append(notWordBoundary);
609 }
610}
611
612void Generator::generateBackreference(JumpList& failures, unsigned subpatternId)
613{
614 push(index);
615 push(repeatCount);
616
617 // get the start pos of the backref into repeatCount (multipurpose!)
618 load32(Address(output, (2 * subpatternId) * sizeof(int)), repeatCount);
619
620 Jump skipIncrement = jump();
621 Label topOfLoop(this);
622
623 add32(Imm32(1), index);
624 add32(Imm32(1), repeatCount);
625 skipIncrement.link(this);
626
627 // check if we're at the end of backref (if we are, success!)
628 Jump endOfBackRef = je32(Address(output, ((2 * subpatternId) + 1) * sizeof(int)), repeatCount);
629
630 load16(BaseIndex(input, repeatCount, MacroAssembler::TimesTwo), character);
631
632 // check if we've run out of input (this would be a can o'fail)
633 Jump endOfInput = je32(length, index);
634
635 je16(character, BaseIndex(input, index, TimesTwo), topOfLoop);
636
637 endOfInput.link(this);
638
639 // Failure
640 pop(repeatCount);
641 pop(index);
642 failures.append(jump());
643
644 // Success
645 endOfBackRef.link(this);
646 pop(repeatCount);
647 pop();
648}
649
650void Generator::terminateAlternative(JumpList& successes, JumpList& failures)
651{
652 successes.append(jump());
653
654 failures.link(this);
655 peek(index);
656}
657
658void Generator::terminateDisjunction(JumpList& successes)
659{
660 successes.link(this);
661}
662
663} } // namespace JSC::WREC
664
665#endif // ENABLE(WREC)