2 * Copyright (C) 2004, 2008, 2009 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Collabora Ltd.
4 * Copyright (C) 2011 Peter Varga (pvarga@webkit.org), University of Szeged
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 #include "RegularExpression.h"
32 #include <wtf/Assertions.h>
33 #include <wtf/BumpPointerAllocator.h>
35 namespace JSC
{ namespace Yarr
{
37 class RegularExpression::Private
: public RefCounted
<RegularExpression::Private
> {
39 static PassRefPtr
<Private
> create(const String
& pattern
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
41 return adoptRef(new Private(pattern
, caseSensitivity
, multilineMode
));
46 unsigned m_numSubpatterns
;
47 OwnPtr
<JSC::Yarr::BytecodePattern
> m_regExpByteCode
;
50 Private(const String
& pattern
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
52 , m_regExpByteCode(compile(pattern
, caseSensitivity
, multilineMode
))
53 , m_constructionError(nullptr)
57 PassOwnPtr
<JSC::Yarr::BytecodePattern
> compile(const String
& patternString
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
59 JSC::Yarr::YarrPattern
pattern(patternString
, (caseSensitivity
== TextCaseInsensitive
), (multilineMode
== MultilineEnabled
), &m_constructionError
);
60 if (m_constructionError
) {
61 LOG_ERROR("RegularExpression: YARR compile failed with '%s'", m_constructionError
);
65 m_numSubpatterns
= pattern
.m_numSubpatterns
;
67 return JSC::Yarr::byteCompile(pattern
, &m_regexAllocator
);
70 BumpPointerAllocator m_regexAllocator
;
71 const char* m_constructionError
;
74 RegularExpression::RegularExpression(const String
& pattern
, TextCaseSensitivity caseSensitivity
, MultilineMode multilineMode
)
75 : d(Private::create(pattern
, caseSensitivity
, multilineMode
))
79 RegularExpression::RegularExpression(const RegularExpression
& re
)
84 RegularExpression::~RegularExpression()
88 RegularExpression
& RegularExpression::operator=(const RegularExpression
& re
)
94 int RegularExpression::match(const String
& str
, int startFrom
, int* matchLength
) const
96 if (!d
->m_regExpByteCode
)
102 int offsetVectorSize
= (d
->m_numSubpatterns
+ 1) * 2;
103 unsigned* offsetVector
;
104 Vector
<unsigned, 32> nonReturnedOvector
;
106 nonReturnedOvector
.resize(offsetVectorSize
);
107 offsetVector
= nonReturnedOvector
.data();
109 ASSERT(offsetVector
);
110 for (unsigned j
= 0, i
= 0; i
< d
->m_numSubpatterns
+ 1; j
+= 2, i
++)
111 offsetVector
[j
] = JSC::Yarr::offsetNoMatch
;
114 if (str
.length() <= INT_MAX
)
115 result
= JSC::Yarr::interpret(d
->m_regExpByteCode
.get(), str
, startFrom
, offsetVector
);
117 // This code can't handle unsigned offsets. Limit our processing to strings with offsets that
118 // can be represented as ints.
119 result
= JSC::Yarr::offsetNoMatch
;
122 if (result
== JSC::Yarr::offsetNoMatch
) {
123 d
->lastMatchLength
= -1;
127 // 1 means 1 match; 0 means more than one match. First match is recorded in offsetVector.
128 d
->lastMatchLength
= offsetVector
[1] - offsetVector
[0];
130 *matchLength
= d
->lastMatchLength
;
131 return offsetVector
[0];
134 int RegularExpression::searchRev(const String
& str
) const
136 // FIXME: This could be faster if it actually searched backwards.
137 // Instead, it just searches forwards, multiple times until it finds the last match.
142 int lastMatchLength
= -1;
145 pos
= match(str
, start
, &matchLength
);
147 if (pos
+ matchLength
> lastPos
+ lastMatchLength
) {
148 // replace last match if this one is later and not a subset of the last match
150 lastMatchLength
= matchLength
;
155 d
->lastMatchLength
= lastMatchLength
;
159 int RegularExpression::matchedLength() const
161 return d
->lastMatchLength
;
164 void replace(String
& string
, const RegularExpression
& target
, const String
& replacement
)
167 while (index
< static_cast<int>(string
.length())) {
169 index
= target
.match(string
, index
, &matchLength
);
172 string
.replace(index
, matchLength
, replacement
);
173 index
+= replacement
.length();
175 break; // Avoid infinite loop on 0-length matches, e.g. [a-z]*
179 bool RegularExpression::isValid() const
181 return d
->m_regExpByteCode
;
184 } } // namespace JSC::Yarr