2 * Copyright (C) 1999-2001, 2004 Harri Porten (porten@kde.org)
3 * Copyright (c) 2007, 2008 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Torch Mobile, Inc.
5 * Copyright (C) 2010 Peter Varga (pvarga@inf.u-szeged.hu), University of Szeged
7 * This library is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2 of the License, or (at your option) any later version.
12 * This library is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this library; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "RegExpCache.h"
28 #include "yarr/Yarr.h"
29 #include "yarr/YarrJIT.h"
33 #include <wtf/Assertions.h>
34 #include <wtf/OwnArrayPtr.h>
38 const ClassInfo
RegExp::s_info
= { "RegExp", 0, 0, 0 };
40 RegExpFlags
regExpFlags(const UString
& string
)
42 RegExpFlags flags
= NoFlags
;
44 for (unsigned i
= 0; i
< string
.length(); ++i
) {
45 switch (string
.characters()[i
]) {
47 if (flags
& FlagGlobal
)
49 flags
= static_cast<RegExpFlags
>(flags
| FlagGlobal
);
53 if (flags
& FlagIgnoreCase
)
55 flags
= static_cast<RegExpFlags
>(flags
| FlagIgnoreCase
);
59 if (flags
& FlagMultiline
)
61 flags
= static_cast<RegExpFlags
>(flags
| FlagMultiline
);
72 struct RegExpRepresentation
{
74 Yarr::YarrCodeBlock m_regExpJITCode
;
76 OwnPtr
<Yarr::BytecodePattern
> m_regExpBytecode
;
79 RegExp::RegExp(JSGlobalData
* globalData
, const UString
& patternString
, RegExpFlags flags
)
80 : JSCell(*globalData
, globalData
->regExpStructure
.get())
81 , m_state(NotCompiled
)
82 , m_patternString(patternString
)
84 , m_constructionError(0)
86 #if ENABLE(REGEXP_TRACING)
87 , m_rtMatchCallCount(0)
88 , m_rtMatchFoundCount(0)
91 Yarr::YarrPattern
pattern(m_patternString
, ignoreCase(), multiline(), &m_constructionError
);
92 if (m_constructionError
)
95 m_numSubpatterns
= pattern
.m_numSubpatterns
;
102 RegExp
* RegExp::create(JSGlobalData
* globalData
, const UString
& patternString
, RegExpFlags flags
)
104 return globalData
->regExpCache()->lookupOrCreate(patternString
, flags
);
107 void RegExp::compile(JSGlobalData
* globalData
)
109 ASSERT(m_state
== NotCompiled
);
110 m_representation
= adoptPtr(new RegExpRepresentation
);
112 Yarr::YarrPattern
pattern(m_patternString
, ignoreCase(), multiline(), &m_constructionError
);
113 if (m_constructionError
) {
114 ASSERT_NOT_REACHED();
115 m_state
= ParseError
;
119 globalData
->regExpCache()->addToStrongCache(this);
121 ASSERT(m_numSubpatterns
== pattern
.m_numSubpatterns
);
124 if (!pattern
.m_containsBackreferences
&& globalData
->canUseJIT()) {
125 Yarr::jitCompile(pattern
, globalData
, m_representation
->m_regExpJITCode
);
126 #if ENABLE(YARR_JIT_DEBUG)
127 if (!m_representation
->m_regExpJITCode
.isFallBack())
132 if (!m_representation
->m_regExpJITCode
.isFallBack()) {
140 m_representation
->m_regExpBytecode
= Yarr::byteCompile(pattern
, &globalData
->m_regExpAllocator
);
145 int RegExp::match(JSGlobalData
& globalData
, const UString
& s
, int startOffset
, Vector
<int, 32>* ovector
)
150 #if ENABLE(REGEXP_TRACING)
151 m_rtMatchCallCount
++;
154 if (static_cast<unsigned>(startOffset
) > s
.length() || s
.isNull())
157 if (m_state
!= ParseError
) {
158 compileIfNecessary(globalData
);
160 int offsetVectorSize
= (m_numSubpatterns
+ 1) * 2;
162 Vector
<int, 32> nonReturnedOvector
;
164 ovector
->resize(offsetVectorSize
);
165 offsetVector
= ovector
->data();
167 nonReturnedOvector
.resize(offsetVectorSize
);
168 offsetVector
= nonReturnedOvector
.data();
171 ASSERT(offsetVector
);
172 // Initialize offsetVector with the return value (index 0) and the
173 // first subpattern start indicies (even index values) set to -1.
174 // No need to init the subpattern end indicies.
175 for (unsigned j
= 0, i
= 0; i
< m_numSubpatterns
+ 1; j
+= 2, i
++)
176 offsetVector
[j
] = -1;
180 if (m_state
== JITCode
) {
181 result
= Yarr::execute(m_representation
->m_regExpJITCode
, s
.characters(), startOffset
, s
.length(), offsetVector
);
182 #if ENABLE(YARR_JIT_DEBUG)
183 matchCompareWithInterpreter(s
, startOffset
, offsetVector
, result
);
187 result
= Yarr::interpret(m_representation
->m_regExpBytecode
.get(), s
.characters(), startOffset
, s
.length(), offsetVector
);
188 ASSERT(result
>= -1);
190 #if ENABLE(REGEXP_TRACING)
192 m_rtMatchFoundCount
++;
201 void RegExp::invalidateCode()
203 if (!m_representation
|| m_state
== Compiling
)
205 m_state
= NotCompiled
;
206 m_representation
.clear();
209 #if ENABLE(YARR_JIT_DEBUG)
210 void RegExp::matchCompareWithInterpreter(const UString
& s
, int startOffset
, int* offsetVector
, int jitResult
)
212 int offsetVectorSize
= (m_numSubpatterns
+ 1) * 2;
213 Vector
<int, 32> interpreterOvector
;
214 interpreterOvector
.resize(offsetVectorSize
);
215 int* interpreterOffsetVector
= interpreterOvector
.data();
216 int interpreterResult
= 0;
219 // Initialize interpreterOffsetVector with the return value (index 0) and the
220 // first subpattern start indicies (even index values) set to -1.
221 // No need to init the subpattern end indicies.
222 for (unsigned j
= 0, i
= 0; i
< m_numSubpatterns
+ 1; j
+= 2, i
++)
223 interpreterOffsetVector
[j
] = -1;
225 interpreterResult
= Yarr::interpret(m_representation
->m_regExpBytecode
.get(), s
.characters(), startOffset
, s
.length(), interpreterOffsetVector
);
227 if (jitResult
!= interpreterResult
)
230 for (unsigned j
= 2, i
= 0; i
< m_numSubpatterns
; j
+=2, i
++)
231 if ((offsetVector
[j
] != interpreterOffsetVector
[j
])
232 || ((offsetVector
[j
] >= 0) && (offsetVector
[j
+1] != interpreterOffsetVector
[j
+1])))
236 fprintf(stderr
, "RegExp Discrepency for /%s/\n string input ", pattern().utf8().data());
237 unsigned segmentLen
= s
.length() - static_cast<unsigned>(startOffset
);
239 fprintf(stderr
, (segmentLen
< 150) ? "\"%s\"\n" : "\"%148s...\"\n", s
.utf8().data() + startOffset
);
241 if (jitResult
!= interpreterResult
) {
242 fprintf(stderr
, " JIT result = %d, blah interpreted result = %d\n", jitResult
, interpreterResult
);
245 fprintf(stderr
, " Correct result = %d\n", jitResult
);
249 for (unsigned j
= 2, i
= 0; i
< m_numSubpatterns
; j
+=2, i
++) {
250 if (offsetVector
[j
] != interpreterOffsetVector
[j
])
251 fprintf(stderr
, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j
, offsetVector
[j
], j
, interpreterOffsetVector
[j
]);
252 if ((offsetVector
[j
] >= 0) && (offsetVector
[j
+1] != interpreterOffsetVector
[j
+1]))
253 fprintf(stderr
, " JIT offset[%d] = %d, interpreted offset[%d] = %d\n", j
+1, offsetVector
[j
+1], j
+1, interpreterOffsetVector
[j
+1]);
260 #if ENABLE(REGEXP_TRACING)
261 void RegExp::printTraceData()
263 char formattedPattern
[41];
266 strncpy(rawPattern
, pattern().utf8().data(), 40);
267 rawPattern
[40]= '\0';
269 int pattLen
= strlen(rawPattern
);
271 snprintf(formattedPattern
, 41, (pattLen
<= 38) ? "/%.38s/" : "/%.36s...", rawPattern
);
274 Yarr::YarrCodeBlock
& codeBlock
= m_representation
->m_regExpJITCode
;
276 const size_t jitAddrSize
= 20;
277 char jitAddr
[jitAddrSize
];
278 if (m_state
== JITCode
)
279 snprintf(jitAddr
, jitAddrSize
, "fallback");
281 snprintf(jitAddr
, jitAddrSize
, "0x%014lx", reinterpret_cast<unsigned long int>(codeBlock
.getAddr()));
283 const char* jitAddr
= "JIT Off";
286 printf("%-40.40s %16.16s %10d %10d\n", formattedPattern
, jitAddr
, m_rtMatchCallCount
, m_rtMatchFoundCount
);