]> git.saurik.com Git - apple/javascriptcore.git/blobdiff - yarr/YarrPattern.h
JavaScriptCore-903.tar.gz
[apple/javascriptcore.git] / yarr / YarrPattern.h
diff --git a/yarr/YarrPattern.h b/yarr/YarrPattern.h
new file mode 100644 (file)
index 0000000..97c3870
--- /dev/null
@@ -0,0 +1,419 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
+ */
+
+#ifndef YarrPattern_h
+#define YarrPattern_h
+
+#include <runtime/UString.h>
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+namespace JSC { namespace Yarr {
+
+struct PatternDisjunction;
+
+struct CharacterRange {
+    UChar begin;
+    UChar end;
+
+    CharacterRange(UChar begin, UChar end)
+        : begin(begin)
+        , end(end)
+    {
+    }
+};
+
+struct CharacterClassTable : RefCounted<CharacterClassTable> {
+    const char* m_table;
+    bool m_inverted;
+    static PassRefPtr<CharacterClassTable> create(const char* table, bool inverted)
+    {
+        return adoptRef(new CharacterClassTable(table, inverted));
+    }
+
+private:
+    CharacterClassTable(const char* table, bool inverted)
+        : m_table(table)
+        , m_inverted(inverted)
+    {
+    }
+};
+
+struct CharacterClass {
+    WTF_MAKE_FAST_ALLOCATED;
+public:
+    // All CharacterClass instances have to have the full set of matches and ranges,
+    // they may have an optional table for faster lookups (which must match the
+    // specified matches and ranges)
+    CharacterClass(PassRefPtr<CharacterClassTable> table)
+        : m_table(table)
+    {
+    }
+    Vector<UChar> m_matches;
+    Vector<CharacterRange> m_ranges;
+    Vector<UChar> m_matchesUnicode;
+    Vector<CharacterRange> m_rangesUnicode;
+    RefPtr<CharacterClassTable> m_table;
+};
+
+enum QuantifierType {
+    QuantifierFixedCount,
+    QuantifierGreedy,
+    QuantifierNonGreedy,
+};
+
+struct PatternTerm {
+    enum Type {
+        TypeAssertionBOL,
+        TypeAssertionEOL,
+        TypeAssertionWordBoundary,
+        TypePatternCharacter,
+        TypeCharacterClass,
+        TypeBackReference,
+        TypeForwardReference,
+        TypeParenthesesSubpattern,
+        TypeParentheticalAssertion,
+        TypeDotStarEnclosure,
+    } type;
+    bool m_capture :1;
+    bool m_invert :1;
+    union {
+        UChar patternCharacter;
+        CharacterClass* characterClass;
+        unsigned backReferenceSubpatternId;
+        struct {
+            PatternDisjunction* disjunction;
+            unsigned subpatternId;
+            unsigned lastSubpatternId;
+            bool isCopy;
+            bool isTerminal;
+        } parentheses;
+        struct {
+            bool bolAnchor : 1;
+            bool eolAnchor : 1;
+        } anchors;
+    };
+    QuantifierType quantityType;
+    unsigned quantityCount;
+    int inputPosition;
+    unsigned frameLocation;
+
+    PatternTerm(UChar ch)
+        : type(PatternTerm::TypePatternCharacter)
+        , m_capture(false)
+        , m_invert(false)
+    {
+        patternCharacter = ch;
+        quantityType = QuantifierFixedCount;
+        quantityCount = 1;
+    }
+
+    PatternTerm(CharacterClass* charClass, bool invert)
+        : type(PatternTerm::TypeCharacterClass)
+        , m_capture(false)
+        , m_invert(invert)
+    {
+        characterClass = charClass;
+        quantityType = QuantifierFixedCount;
+        quantityCount = 1;
+    }
+
+    PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool capture = false, bool invert = false)
+        : type(type)
+        , m_capture(capture)
+        , m_invert(invert)
+    {
+        parentheses.disjunction = disjunction;
+        parentheses.subpatternId = subpatternId;
+        parentheses.isCopy = false;
+        parentheses.isTerminal = false;
+        quantityType = QuantifierFixedCount;
+        quantityCount = 1;
+    }
+    
+    PatternTerm(Type type, bool invert = false)
+        : type(type)
+        , m_capture(false)
+        , m_invert(invert)
+    {
+        quantityType = QuantifierFixedCount;
+        quantityCount = 1;
+    }
+
+    PatternTerm(unsigned spatternId)
+        : type(TypeBackReference)
+        , m_capture(false)
+        , m_invert(false)
+    {
+        backReferenceSubpatternId = spatternId;
+        quantityType = QuantifierFixedCount;
+        quantityCount = 1;
+    }
+
+    PatternTerm(bool bolAnchor, bool eolAnchor)
+        : type(TypeDotStarEnclosure)
+        , m_capture(false)
+        , m_invert(false)
+    {
+        anchors.bolAnchor = bolAnchor;
+        anchors.eolAnchor = eolAnchor;
+        quantityType = QuantifierFixedCount;
+        quantityCount = 1;
+    }
+    
+    static PatternTerm ForwardReference()
+    {
+        return PatternTerm(TypeForwardReference);
+    }
+
+    static PatternTerm BOL()
+    {
+        return PatternTerm(TypeAssertionBOL);
+    }
+
+    static PatternTerm EOL()
+    {
+        return PatternTerm(TypeAssertionEOL);
+    }
+
+    static PatternTerm WordBoundary(bool invert)
+    {
+        return PatternTerm(TypeAssertionWordBoundary, invert);
+    }
+    
+    bool invert()
+    {
+        return m_invert;
+    }
+
+    bool capture()
+    {
+        return m_capture;
+    }
+    
+    void quantify(unsigned count, QuantifierType type)
+    {
+        quantityCount = count;
+        quantityType = type;
+    }
+};
+
+struct PatternAlternative {
+    WTF_MAKE_FAST_ALLOCATED;
+public:
+    PatternAlternative(PatternDisjunction* disjunction)
+        : m_parent(disjunction)
+        , m_onceThrough(false)
+        , m_hasFixedSize(false)
+        , m_startsWithBOL(false)
+        , m_containsBOL(false)
+    {
+    }
+
+    PatternTerm& lastTerm()
+    {
+        ASSERT(m_terms.size());
+        return m_terms[m_terms.size() - 1];
+    }
+    
+    void removeLastTerm()
+    {
+        ASSERT(m_terms.size());
+        m_terms.shrink(m_terms.size() - 1);
+    }
+    
+    void setOnceThrough()
+    {
+        m_onceThrough = true;
+    }
+    
+    bool onceThrough()
+    {
+        return m_onceThrough;
+    }
+
+    Vector<PatternTerm> m_terms;
+    PatternDisjunction* m_parent;
+    unsigned m_minimumSize;
+    bool m_onceThrough : 1;
+    bool m_hasFixedSize : 1;
+    bool m_startsWithBOL : 1;
+    bool m_containsBOL : 1;
+};
+
+struct PatternDisjunction {
+    WTF_MAKE_FAST_ALLOCATED;
+public:
+    PatternDisjunction(PatternAlternative* parent = 0)
+        : m_parent(parent)
+        , m_hasFixedSize(false)
+    {
+    }
+    
+    ~PatternDisjunction()
+    {
+        deleteAllValues(m_alternatives);
+    }
+
+    PatternAlternative* addNewAlternative()
+    {
+        PatternAlternative* alternative = new PatternAlternative(this);
+        m_alternatives.append(alternative);
+        return alternative;
+    }
+
+    Vector<PatternAlternative*> m_alternatives;
+    PatternAlternative* m_parent;
+    unsigned m_minimumSize;
+    unsigned m_callFrameSize;
+    bool m_hasFixedSize;
+};
+
+// You probably don't want to be calling these functions directly
+// (please to be calling newlineCharacterClass() et al on your
+// friendly neighborhood YarrPattern instance to get nicely
+// cached copies).
+CharacterClass* newlineCreate();
+CharacterClass* digitsCreate();
+CharacterClass* spacesCreate();
+CharacterClass* wordcharCreate();
+CharacterClass* nondigitsCreate();
+CharacterClass* nonspacesCreate();
+CharacterClass* nonwordcharCreate();
+
+struct TermChain {
+    TermChain(PatternTerm term)
+        : term(term)
+    {}
+
+    PatternTerm term;
+    Vector<TermChain> hotTerms;
+};
+
+struct YarrPattern {
+    YarrPattern(const UString& pattern, bool ignoreCase, bool multiline, const char** error);
+
+    ~YarrPattern()
+    {
+        deleteAllValues(m_disjunctions);
+        deleteAllValues(m_userCharacterClasses);
+    }
+
+    void reset()
+    {
+        m_numSubpatterns = 0;
+        m_maxBackReference = 0;
+
+        m_containsBackreferences = false;
+        m_containsBOL = false;
+
+        newlineCached = 0;
+        digitsCached = 0;
+        spacesCached = 0;
+        wordcharCached = 0;
+        nondigitsCached = 0;
+        nonspacesCached = 0;
+        nonwordcharCached = 0;
+
+        deleteAllValues(m_disjunctions);
+        m_disjunctions.clear();
+        deleteAllValues(m_userCharacterClasses);
+        m_userCharacterClasses.clear();
+    }
+
+    bool containsIllegalBackReference()
+    {
+        return m_maxBackReference > m_numSubpatterns;
+    }
+
+    CharacterClass* newlineCharacterClass()
+    {
+        if (!newlineCached)
+            m_userCharacterClasses.append(newlineCached = newlineCreate());
+        return newlineCached;
+    }
+    CharacterClass* digitsCharacterClass()
+    {
+        if (!digitsCached)
+            m_userCharacterClasses.append(digitsCached = digitsCreate());
+        return digitsCached;
+    }
+    CharacterClass* spacesCharacterClass()
+    {
+        if (!spacesCached)
+            m_userCharacterClasses.append(spacesCached = spacesCreate());
+        return spacesCached;
+    }
+    CharacterClass* wordcharCharacterClass()
+    {
+        if (!wordcharCached)
+            m_userCharacterClasses.append(wordcharCached = wordcharCreate());
+        return wordcharCached;
+    }
+    CharacterClass* nondigitsCharacterClass()
+    {
+        if (!nondigitsCached)
+            m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
+        return nondigitsCached;
+    }
+    CharacterClass* nonspacesCharacterClass()
+    {
+        if (!nonspacesCached)
+            m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
+        return nonspacesCached;
+    }
+    CharacterClass* nonwordcharCharacterClass()
+    {
+        if (!nonwordcharCached)
+            m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
+        return nonwordcharCached;
+    }
+
+    bool m_ignoreCase : 1;
+    bool m_multiline : 1;
+    bool m_containsBackreferences : 1;
+    bool m_containsBOL : 1;
+    unsigned m_numSubpatterns;
+    unsigned m_maxBackReference;
+    PatternDisjunction* m_body;
+    Vector<PatternDisjunction*, 4> m_disjunctions;
+    Vector<CharacterClass*> m_userCharacterClasses;
+
+private:
+    const char* compile(const UString& patternString);
+
+    CharacterClass* newlineCached;
+    CharacterClass* digitsCached;
+    CharacterClass* spacesCached;
+    CharacterClass* wordcharCached;
+    CharacterClass* nondigitsCached;
+    CharacterClass* nonspacesCached;
+    CharacterClass* nonwordcharCached;
+};
+
+} } // namespace JSC::Yarr
+
+#endif // YarrPattern_h