3 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2008 Collabora Ltd.
5 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
20 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include "RegularExpression.h"
33 #include <wtf/Assertions.h>
34 #include <wtf/BumpPointerAllocator.h>
36 namespace JSC
{ namespace Yarr
{
38 class RegularExpression::Private
: public RefCounted
<RegularExpression::Private
> {
40 static Ref
<Private
> create(const String
& pattern
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
42 return adoptRef(*new Private(pattern
, caseSensitivity
, multilineMode
));
47 unsigned m_numSubpatterns
;
48 std::unique_ptr
<JSC::Yarr::BytecodePattern
> m_regExpByteCode
;
51 Private(const String
& pattern
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
53 , m_regExpByteCode(compile(pattern
, caseSensitivity
, multilineMode
))
54 , m_constructionError(nullptr)
58 std::unique_ptr
<JSC::Yarr::BytecodePattern
> compile(const String
& patternString
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
60 JSC::Yarr::YarrPattern
pattern(patternString
, (caseSensitivity
== TextCaseInsensitive
), (multilineMode
== MultilineEnabled
), &m_constructionError
);
61 if (m_constructionError
) {
62 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError
);
66 m_numSubpatterns
= pattern
.m_numSubpatterns
;
68 return JSC::Yarr::byteCompile(pattern
, &m_regexAllocator
);
71 BumpPointerAllocator m_regexAllocator
;
72 const char* m_constructionError
;
75 RegularExpression::RegularExpression(const String
& pattern
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
76 : d(Private::create(pattern
, caseSensitivity
, multilineMode
))
80 RegularExpression::RegularExpression(const RegularExpression
& re
)
85 RegularExpression::~RegularExpression()
89 RegularExpression
& RegularExpression::operator=(const RegularExpression
& re
)
95 int RegularExpression::match(const String
& str
, int startFrom
, int* matchLength
) const
97 if (!d
->m_regExpByteCode
)
103 int offsetVectorSize
= (d
->m_numSubpatterns
+ 1) * 2;
104 unsigned* offsetVector
;
105 Vector
<unsigned, 32> nonReturnedOvector
;
107 nonReturnedOvector
.resize(offsetVectorSize
);
108 offsetVector
= nonReturnedOvector
.data();
110 ASSERT(offsetVector
);
111 for (unsigned j
= 0, i
= 0; i
< d
->m_numSubpatterns
+ 1; j
+= 2, i
++)
112 offsetVector
[j
] = JSC::Yarr::offsetNoMatch
;
115 if (str
.length() <= INT_MAX
)
116 result
= JSC::Yarr::interpret(d
->m_regExpByteCode
.get(), str
, startFrom
, offsetVector
);
118 // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
119 // can be represented as ints.
120 result
= JSC::Yarr::offsetNoMatch
;
123 if (result
== JSC::Yarr::offsetNoMatch
) {
124 d
->lastMatchLength
= -1;
128 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
129 d
->lastMatchLength
= offsetVector
[1] - offsetVector
[0];
131 *matchLength
= d
->lastMatchLength
;
132 return offsetVector
[0];
135 int RegularExpression::searchRev(const String
& str
) const
137 // FIXME: This could be faster if it actually searched backwards.
138 // Instead, it just searches forwards, multiple times until it finds the last match.
143 int lastMatchLength
= -1;
146 pos
= match(str
, start
, &matchLength
);
148 if (pos
+ matchLength
> lastPos
+ lastMatchLength
) {
149 // replace last match if this one is later and not a subset of the last match
151 lastMatchLength
= matchLength
;
156 d
->lastMatchLength
= lastMatchLength
;
160 int RegularExpression::matchedLength() const
162 return d
->lastMatchLength
;
165 void replace(String
& string
, const RegularExpression
& target
, const String
& replacement
)
168 while (index
< static_cast<int>(string
.length())) {
170 index
= target
.match(string
, index
, &matchLength
);
173 string
.replace(index
, matchLength
, replacement
);
174 index
+= replacement
.length();
176 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
180 bool RegularExpression::isValid() const
182 return d
->m_regExpByteCode
.get();
185 } } // namespace JSC::Yarr