]> git.saurik.com Git - apple/javascriptcore.git/blob - yarr/RegularExpression.cpp
JavaScriptCore-7601.1.46.3.tar.gz
[apple/javascriptcore.git] / yarr / RegularExpression.cpp
1
2 /*
3 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2008 Collabora Ltd.
5 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include "config.h"
30 #include "RegularExpression.h"
31
32 #include "Yarr.h"
33 #include <wtf/Assertions.h>
34 #include <wtf/BumpPointerAllocator.h>
35
36 namespace JSC { namespace Yarr {
37
38 class RegularExpression::Private : public RefCounted<RegularExpression::Private> {
39 public:
40 static Ref<Private> create(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
41 {
42 return adoptRef(*new Private(pattern, caseSensitivity, multilineMode));
43 }
44
45 int lastMatchLength;
46
47 unsigned m_numSubpatterns;
48 std::unique_ptr<JSC::Yarr::BytecodePattern> m_regExpByteCode;
49
50 private:
51 Private(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
52 : lastMatchLength(-1)
53 , m_regExpByteCode(compile(pattern, caseSensitivity, multilineMode))
54 , m_constructionError(nullptr)
55 {
56 }
57
58 std::unique_ptr<JSC::Yarr::BytecodePattern> compile(const String& patternString, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
59 {
60 JSC::Yarr::YarrPattern pattern(patternString, (caseSensitivity == TextCaseInsensitive), (multilineMode == MultilineEnabled), &m_constructionError);
61 if (m_constructionError) {
62 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError);
63 return nullptr;
64 }
65
66 m_numSubpatterns = pattern.m_numSubpatterns;
67
68 return JSC::Yarr::byteCompile(pattern, &m_regexAllocator);
69 }
70
71 BumpPointerAllocator m_regexAllocator;
72 const char* m_constructionError;
73 };
74
75 RegularExpression::RegularExpression(const String& pattern, TextCaseSensitivity caseSensitivity, MultilineMode multilineMode)
76 : d(Private::create(pattern, caseSensitivity, multilineMode))
77 {
78 }
79
80 RegularExpression::RegularExpression(const RegularExpression& re)
81 : d(re.d)
82 {
83 }
84
85 RegularExpression::~RegularExpression()
86 {
87 }
88
89 RegularExpression& RegularExpression::operator=(const RegularExpression& re)
90 {
91 d = re.d;
92 return *this;
93 }
94
95 int RegularExpression::match(const String& str, int startFrom, int* matchLength) const
96 {
97 if (!d->m_regExpByteCode)
98 return -1;
99
100 if (str.isNull())
101 return -1;
102
103 int offsetVectorSize = (d->m_numSubpatterns + 1) * 2;
104 unsigned* offsetVector;
105 Vector<unsigned, 32> nonReturnedOvector;
106
107 nonReturnedOvector.resize(offsetVectorSize);
108 offsetVector = nonReturnedOvector.data();
109
110 ASSERT(offsetVector);
111 for (unsigned j = 0, i = 0; i < d->m_numSubpatterns + 1; j += 2, i++)
112 offsetVector[j] = JSC::Yarr::offsetNoMatch;
113
114 unsigned result;
115 if (str.length() <= INT_MAX)
116 result = JSC::Yarr::interpret(d->m_regExpByteCode.get(), str, startFrom, offsetVector);
117 else {
118 // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
119 // can be represented as ints.
120 result = JSC::Yarr::offsetNoMatch;
121 }
122
123 if (result == JSC::Yarr::offsetNoMatch) {
124 d->lastMatchLength = -1;
125 return -1;
126 }
127
128 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
129 d->lastMatchLength = offsetVector[1] - offsetVector[0];
130 if (matchLength)
131 *matchLength = d->lastMatchLength;
132 return offsetVector[0];
133 }
134
135 int RegularExpression::searchRev(const String& str) const
136 {
137 // FIXME: This could be faster if it actually searched backwards.
138 // Instead, it just searches forwards, multiple times until it finds the last match.
139
140 int start = 0;
141 int pos;
142 int lastPos = -1;
143 int lastMatchLength = -1;
144 do {
145 int matchLength;
146 pos = match(str, start, &matchLength);
147 if (pos >= 0) {
148 if (pos + matchLength > lastPos + lastMatchLength) {
149 // replace last match if this one is later and not a subset of the last match
150 lastPos = pos;
151 lastMatchLength = matchLength;
152 }
153 start = pos + 1;
154 }
155 } while (pos != -1);
156 d->lastMatchLength = lastMatchLength;
157 return lastPos;
158 }
159
160 int RegularExpression::matchedLength() const
161 {
162 return d->lastMatchLength;
163 }
164
165 void replace(String& string, const RegularExpression& target, const String& replacement)
166 {
167 int index = 0;
168 while (index < static_cast<int>(string.length())) {
169 int matchLength;
170 index = target.match(string, index, &matchLength);
171 if (index < 0)
172 break;
173 string.replace(index, matchLength, replacement);
174 index += replacement.length();
175 if (!matchLength)
176 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
177 }
178 }
179
180 bool RegularExpression::isValid() const
181 {
182 return d->m_regExpByteCode.get();
183 }
184
185 } } // namespace JSC::Yarr