]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegularExpression.cpp
JavaScriptCore-7600.1.4.13.1.tar.gz
[apple/javascriptcore.git] / yarr / RegularExpression.cpp
1 /*
2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd.
4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28 #include "config.h"
29 #include "RegularExpression.h"
30
31 #include "Yarr.h"
32 #include <wtf/Assertions.h>
33 #include <wtf/BumpPointerAllocator.h>
34
35 namespace JSC { namespace Yarr {
36
37 class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
38 public:
39 static PassRefPtr<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
40 {
41 return adoptRef(new Private(pattern, caseSensitivity, multilineMode));
42 }
43
44 int lastMatchLength;
45
46 unsigned m_numSubpatterns;
47 OwnPtr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
48
49 private:
50 Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
51 : lastMatchLength(-1)
52 , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode))
53 , m_constructionError(nullptr)
54 {
55 }
56
57 PassOwnPtr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
58 {
59 JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &m_constructionError);
60 if (m_constructionError) {
61 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError);
62 return nullptr;
63 }
64
65 m_numSubpatterns = pattern.m_numSubpatterns;
66
67 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
68 }
69
70 BumpPointerAllocator m_regexAllocator;
71 const char* m_constructionError;
72 };
73
74 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
75 : d(Private::create(pattern, caseSensitivity, multilineMode))
76 {
77 }
78
79 RegularExpression::RegularExpression(const RegularExpression& re)
80 : d(re.d)
81 {
82 }
83
84 RegularExpression::~RegularExpression()
85 {
86 }
87
88 RegularExpression& RegularExpression::operator=(const RegularExpression& re)
89 {
90 d = re.d;
91 return *this;
92 }
93
94 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
95 {
96 if (!d->m_regExpByteCode)
97 return -1;
98
99 if (str.isNull())
100 return -1;
101
102 int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
103 unsigned* offsetVector;
104 Vector<unsigned, 32> nonReturnedOvector;
105
106 nonReturnedOvector.resize(offsetVectorSize);
107 offsetVector = nonReturnedOvector.data();
108
109 ASSERT(offsetVector);
110 for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
111 offsetVector[j] = JSC::Yarr::offsetNoMatch;
112
113 unsigned result;
114 if (str.length() <= INT_MAX)
115 result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector);
116 else {
117 // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
118 // can be represented as ints.
119 result = JSC::Yarr::offsetNoMatch;
120 }
121
122 if (result == JSC::Yarr::offsetNoMatch) {
123 d->lastMatchLength = -1;
124 return -1;
125 }
126
127 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
128 d->lastMatchLength = offsetVector[1] - offsetVector[0];
129 if (matchLength)
130 *matchLength = d->lastMatchLength;
131 return offsetVector[0];
132 }
133
134 int RegularExpression::searchRev(const String& str) const
135 {
136 // FIXME: This could be faster if it actually searched backwards.
137 // Instead, it just searches forwards, multiple times until it finds the last match.
138
139 int start = 0;
140 int pos;
141 int lastPos = -1;
142 int lastMatchLength = -1;
143 do {
144 int matchLength;
145 pos = match(str, start, &matchLength);
146 if (pos >= 0) {
147 if (pos + matchLength > lastPos + lastMatchLength) {
148 // replace last match if this one is later and not a subset of the last match
149 lastPos = pos;
150 lastMatchLength = matchLength;
151 }
152 start = pos + 1;
153 }
154 } while (pos != -1);
155 d->lastMatchLength = lastMatchLength;
156 return lastPos;
157 }
158
159 int RegularExpression::matchedLength() const
160 {
161 return d->lastMatchLength;
162 }
163
164 void replace(String& string, const RegularExpression& target, const String& replacement)
165 {
166 int index = 0;
167 while (index < static_cast<int>(string.length())) {
168 int matchLength;
169 index = target.match(string, index, &matchLength);
170 if (index < 0)
171 break;
172 string.replace(index, matchLength, replacement);
173 index += replacement.length();
174 if (!matchLength)
175 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
176 }
177 }
178
179 bool RegularExpression::isValid() const
180 {
181 return d->m_regExpByteCode;
182 }
183
184 } } // namespace JSC::Yarr