2 * Copyright (C) 2011 Google Inc. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above
12 * copyright notice, this list of conditions and the following disclaimer
13 * in the documentation and/or other materials provided with the
16 * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. AND ITS CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC.
20 * OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 #include "ContentSearchUtilities.h"
32 #include "InspectorValues.h"
33 #include "RegularExpression.h"
35 #include <wtf/BumpPointerAllocator.h>
36 #include <wtf/StdLibExtras.h>
37 #include <wtf/text/StringBuilder.h>
39 using namespace JSC::Yarr
;
42 namespace ContentSearchUtilities
{
44 static const char regexSpecialCharacters
[] = "[](){}+-*.,?\\^$|";
46 static String
createSearchRegexSource(const String
& text
)
50 for (unsigned i
= 0; i
< text
.length(); i
++) {
51 UChar character
= text
[i
];
52 if (isASCII(character
) && strchr(regexSpecialCharacters
, character
))
54 result
.append(character
);
57 return result
.toString();
60 static inline size_t sizetExtractor(const size_t* value
)
65 TextPosition
textPositionFromOffset(size_t offset
, const Vector
<size_t>& lineEndings
)
67 const size_t* foundNextStart
= approximateBinarySearch
<size_t, size_t>(lineEndings
, lineEndings
.size(), offset
, sizetExtractor
);
68 size_t lineIndex
= foundNextStart
- &lineEndings
.at(0);
69 if (offset
>= *foundNextStart
)
71 size_t lineStartOffset
= lineIndex
> 0 ? lineEndings
.at(lineIndex
- 1) : 0;
72 size_t column
= offset
- lineStartOffset
;
73 return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex
), OrdinalNumber::fromZeroBasedInt(column
));
76 static Vector
<std::pair
<size_t, String
>> getRegularExpressionMatchesByLines(const JSC::Yarr::RegularExpression
& regex
, const String
& text
)
78 Vector
<std::pair
<size_t, String
>> result
;
82 std::unique_ptr
<Vector
<size_t>> endings(lineEndings(text
));
83 size_t size
= endings
->size();
86 for (size_t lineNumber
= 0; lineNumber
< size
; ++lineNumber
) {
87 size_t nextStart
= endings
->at(lineNumber
);
88 String line
= text
.substring(start
, nextStart
- start
);
91 if (regex
.match(line
, 0, &matchLength
) != -1)
92 result
.append(std::pair
<size_t, String
>(lineNumber
, line
));
100 std::unique_ptr
<Vector
<size_t>> lineEndings(const String
& text
)
102 auto result
= std::make_unique
<Vector
<size_t>>();
105 while (start
< text
.length()) {
106 size_t nextStart
= text
.findNextLineStart(start
);
107 if (nextStart
== notFound
) {
108 result
->append(text
.length());
112 result
->append(nextStart
);
116 result
->append(text
.length());
121 static Ref
<Inspector::Protocol::GenericTypes::SearchMatch
> buildObjectForSearchMatch(size_t lineNumber
, const String
& lineContent
)
123 return Inspector::Protocol::GenericTypes::SearchMatch::create()
124 .setLineNumber(lineNumber
)
125 .setLineContent(lineContent
)
129 JSC::Yarr::RegularExpression
createSearchRegex(const String
& query
, bool caseSensitive
, bool isRegex
)
131 String regexSource
= isRegex
? query
: createSearchRegexSource(query
);
132 return JSC::Yarr::RegularExpression(regexSource
, caseSensitive
? TextCaseSensitive
: TextCaseInsensitive
);
135 int countRegularExpressionMatches(const JSC::Yarr::RegularExpression
& regex
, const String
& content
)
137 if (content
.isEmpty())
144 while ((position
= regex
.match(content
, start
, &matchLength
)) != -1) {
145 if (start
>= content
.length())
149 start
= position
+ 1;
154 Ref
<Inspector::Protocol::Array
<Inspector::Protocol::GenericTypes::SearchMatch
>> searchInTextByLines(const String
& text
, const String
& query
, const bool caseSensitive
, const bool isRegex
)
156 Ref
<Inspector::Protocol::Array
<Inspector::Protocol::GenericTypes::SearchMatch
>> result
= Inspector::Protocol::Array
<Inspector::Protocol::GenericTypes::SearchMatch
>::create();
158 JSC::Yarr::RegularExpression regex
= ContentSearchUtilities::createSearchRegex(query
, caseSensitive
, isRegex
);
159 Vector
<std::pair
<size_t, String
>> matches
= getRegularExpressionMatchesByLines(regex
, text
);
161 for (const auto& match
: matches
) {
162 Ref
<Inspector::Protocol::GenericTypes::SearchMatch
> matchObject
= buildObjectForSearchMatch(match
.first
, match
.second
);
163 result
->addItem(WTF::move(matchObject
));
169 static String
scriptCommentPattern(const String
& name
)
171 // "//# <name>=<value>" and deprecated "//@"
172 return "//[#@][\040\t]" + name
+ "=[\040\t]*([^\\s\'\"]*)[\040\t]*$";
175 static String
stylesheetCommentPattern(const String
& name
)
177 // "/*# <name>=<value> */" and deprecated "/*@"
178 return "/\\*[#@][\040\t]" + name
+ "=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/";
181 static String
findMagicComment(const String
& content
, const String
& patternString
)
183 ASSERT(!content
.isNull());
184 const char* error
= nullptr;
185 JSC::Yarr::YarrPattern
pattern(patternString
, false, true, &error
);
187 BumpPointerAllocator regexAllocator
;
188 auto bytecodePattern
= JSC::Yarr::byteCompile(pattern
, ®exAllocator
);
189 ASSERT(bytecodePattern
);
191 ASSERT(pattern
.m_numSubpatterns
== 1);
192 Vector
<int, 4> matches
;
194 unsigned result
= JSC::Yarr::interpret(bytecodePattern
.get(), content
, 0, reinterpret_cast<unsigned*>(matches
.data()));
195 if (result
== JSC::Yarr::offsetNoMatch
)
198 ASSERT(matches
[2] > 0 && matches
[3] > 0);
199 return content
.substring(matches
[2], matches
[3] - matches
[2]);
202 String
findScriptSourceURL(const String
& content
)
204 return findMagicComment(content
, scriptCommentPattern(ASCIILiteral("sourceURL")));
207 String
findScriptSourceMapURL(const String
& content
)
209 return findMagicComment(content
, scriptCommentPattern(ASCIILiteral("sourceMappingURL")));
212 String
findStylesheetSourceMapURL(const String
& content
)
214 return findMagicComment(content
, stylesheetCommentPattern(ASCIILiteral("sourceMappingURL")));
217 } // namespace ContentSearchUtilities
218 } // namespace Inspector