]>
Commit | Line | Data |
---|---|---|
81345200 A |
1 | /* |
2 | * Copyright (C) 2011 Google Inc. All rights reserved. | |
3 | * | |
4 | * Redistribution and use in source and binary forms, with or without | |
5 | * modification, are permitted provided that the following conditions are | |
6 | * met: | |
7 | * | |
8 | * 1. Redistributions of source code must retain the above copyright | |
9 | * notice, this list of conditions and the following disclaimer. | |
10 | * | |
11 | * 2. Redistributions in binary form must reproduce the above | |
12 | * copyright notice, this list of conditions and the following disclaimer | |
13 | * in the documentation and/or other materials provided with the | |
14 | * distribution. | |
15 | * | |
16 | * THIS SOFTWARE IS PROVIDED BY GOOGLE INC. AND ITS CONTRIBUTORS | |
17 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GOOGLE INC. | |
20 | * OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | */ | |
28 | ||
29 | #include "config.h" | |
30 | #include "ContentSearchUtilities.h" | |
31 | ||
81345200 A |
32 | #include "InspectorValues.h" |
33 | #include "RegularExpression.h" | |
34 | #include "Yarr.h" | |
35 | #include <wtf/BumpPointerAllocator.h> | |
36 | #include <wtf/StdLibExtras.h> | |
37 | #include <wtf/text/StringBuilder.h> | |
38 | ||
39 | using namespace JSC::Yarr; | |
40 | ||
41 | namespace Inspector { | |
42 | namespace ContentSearchUtilities { | |
43 | ||
44 | static const char regexSpecialCharacters[] = "[](){}+-*.,?\\^$|"; | |
45 | ||
46 | static String createSearchRegexSource(const String& text) | |
47 | { | |
48 | StringBuilder result; | |
49 | ||
50 | for (unsigned i = 0; i < text.length(); i++) { | |
51 | UChar character = text[i]; | |
52 | if (isASCII(character) && strchr(regexSpecialCharacters, character)) | |
53 | result.append('\\'); | |
54 | result.append(character); | |
55 | } | |
56 | ||
57 | return result.toString(); | |
58 | } | |
59 | ||
60 | static inline size_t sizetExtractor(const size_t* value) | |
61 | { | |
62 | return *value; | |
63 | } | |
64 | ||
65 | TextPosition textPositionFromOffset(size_t offset, const Vector<size_t>& lineEndings) | |
66 | { | |
67 | const size_t* foundNextStart = approximateBinarySearch<size_t, size_t>(lineEndings, lineEndings.size(), offset, sizetExtractor); | |
68 | size_t lineIndex = foundNextStart - &lineEndings.at(0); | |
69 | if (offset >= *foundNextStart) | |
70 | ++lineIndex; | |
71 | size_t lineStartOffset = lineIndex > 0 ? lineEndings.at(lineIndex - 1) : 0; | |
72 | size_t column = offset - lineStartOffset; | |
73 | return TextPosition(OrdinalNumber::fromZeroBasedInt(lineIndex), OrdinalNumber::fromZeroBasedInt(column)); | |
74 | } | |
75 | ||
76 | static Vector<std::pair<size_t, String>> getRegularExpressionMatchesByLines(const JSC::Yarr::RegularExpression& regex, const String& text) | |
77 | { | |
78 | Vector<std::pair<size_t, String>> result; | |
79 | if (text.isEmpty()) | |
80 | return result; | |
81 | ||
82 | std::unique_ptr<Vector<size_t>> endings(lineEndings(text)); | |
83 | size_t size = endings->size(); | |
84 | size_t start = 0; | |
85 | ||
86 | for (size_t lineNumber = 0; lineNumber < size; ++lineNumber) { | |
87 | size_t nextStart = endings->at(lineNumber); | |
88 | String line = text.substring(start, nextStart - start); | |
89 | ||
90 | int matchLength; | |
91 | if (regex.match(line, 0, &matchLength) != -1) | |
92 | result.append(std::pair<size_t, String>(lineNumber, line)); | |
93 | ||
94 | start = nextStart; | |
95 | } | |
96 | ||
97 | return result; | |
98 | } | |
99 | ||
100 | std::unique_ptr<Vector<size_t>> lineEndings(const String& text) | |
101 | { | |
102 | auto result = std::make_unique<Vector<size_t>>(); | |
103 | ||
104 | size_t start = 0; | |
105 | while (start < text.length()) { | |
106 | size_t nextStart = text.findNextLineStart(start); | |
107 | if (nextStart == notFound) { | |
108 | result->append(text.length()); | |
109 | break; | |
110 | } | |
111 | ||
112 | result->append(nextStart); | |
113 | start = nextStart; | |
114 | } | |
115 | ||
116 | result->append(text.length()); | |
117 | ||
118 | return result; | |
119 | } | |
120 | ||
ed1e77d3 | 121 | static Ref<Inspector::Protocol::GenericTypes::SearchMatch> buildObjectForSearchMatch(size_t lineNumber, const String& lineContent) |
81345200 | 122 | { |
ed1e77d3 | 123 | return Inspector::Protocol::GenericTypes::SearchMatch::create() |
81345200 A |
124 | .setLineNumber(lineNumber) |
125 | .setLineContent(lineContent) | |
126 | .release(); | |
127 | } | |
128 | ||
129 | JSC::Yarr::RegularExpression createSearchRegex(const String& query, bool caseSensitive, bool isRegex) | |
130 | { | |
131 | String regexSource = isRegex ? query : createSearchRegexSource(query); | |
132 | return JSC::Yarr::RegularExpression(regexSource, caseSensitive ? TextCaseSensitive : TextCaseInsensitive); | |
133 | } | |
134 | ||
135 | int countRegularExpressionMatches(const JSC::Yarr::RegularExpression& regex, const String& content) | |
136 | { | |
137 | if (content.isEmpty()) | |
138 | return 0; | |
139 | ||
140 | int result = 0; | |
141 | int position; | |
142 | unsigned start = 0; | |
143 | int matchLength; | |
144 | while ((position = regex.match(content, start, &matchLength)) != -1) { | |
145 | if (start >= content.length()) | |
146 | break; | |
147 | if (matchLength > 0) | |
148 | ++result; | |
149 | start = position + 1; | |
150 | } | |
151 | return result; | |
152 | } | |
153 | ||
ed1e77d3 | 154 | Ref<Inspector::Protocol::Array<Inspector::Protocol::GenericTypes::SearchMatch>> searchInTextByLines(const String& text, const String& query, const bool caseSensitive, const bool isRegex) |
81345200 | 155 | { |
ed1e77d3 | 156 | Ref<Inspector::Protocol::Array<Inspector::Protocol::GenericTypes::SearchMatch>> result = Inspector::Protocol::Array<Inspector::Protocol::GenericTypes::SearchMatch>::create(); |
81345200 A |
157 | |
158 | JSC::Yarr::RegularExpression regex = ContentSearchUtilities::createSearchRegex(query, caseSensitive, isRegex); | |
159 | Vector<std::pair<size_t, String>> matches = getRegularExpressionMatchesByLines(regex, text); | |
160 | ||
ed1e77d3 A |
161 | for (const auto& match : matches) { |
162 | Ref<Inspector::Protocol::GenericTypes::SearchMatch> matchObject = buildObjectForSearchMatch(match.first, match.second); | |
163 | result->addItem(WTF::move(matchObject)); | |
164 | } | |
81345200 A |
165 | |
166 | return result; | |
167 | } | |
168 | ||
169 | static String scriptCommentPattern(const String& name) | |
170 | { | |
171 | // "//# <name>=<value>" and deprecated "//@" | |
172 | return "//[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*$"; | |
173 | } | |
174 | ||
175 | static String stylesheetCommentPattern(const String& name) | |
176 | { | |
177 | // "/*# <name>=<value> */" and deprecated "/*@" | |
178 | return "/\\*[#@][\040\t]" + name + "=[\040\t]*([^\\s\'\"]*)[\040\t]*\\*/"; | |
179 | } | |
180 | ||
181 | static String findMagicComment(const String& content, const String& patternString) | |
182 | { | |
ed1e77d3 | 183 | ASSERT(!content.isNull()); |
81345200 A |
184 | const char* error = nullptr; |
185 | JSC::Yarr::YarrPattern pattern(patternString, false, true, &error); | |
186 | ASSERT(!error); | |
187 | BumpPointerAllocator regexAllocator; | |
ed1e77d3 | 188 | auto bytecodePattern = JSC::Yarr::byteCompile(pattern, ®exAllocator); |
81345200 A |
189 | ASSERT(bytecodePattern); |
190 | ||
191 | ASSERT(pattern.m_numSubpatterns == 1); | |
192 | Vector<int, 4> matches; | |
193 | matches.resize(4); | |
194 | unsigned result = JSC::Yarr::interpret(bytecodePattern.get(), content, 0, reinterpret_cast<unsigned*>(matches.data())); | |
195 | if (result == JSC::Yarr::offsetNoMatch) | |
196 | return String(); | |
197 | ||
198 | ASSERT(matches[2] > 0 && matches[3] > 0); | |
199 | return content.substring(matches[2], matches[3] - matches[2]); | |
200 | } | |
201 | ||
202 | String findScriptSourceURL(const String& content) | |
203 | { | |
204 | return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceURL"))); | |
205 | } | |
206 | ||
207 | String findScriptSourceMapURL(const String& content) | |
208 | { | |
209 | return findMagicComment(content, scriptCommentPattern(ASCIILiteral("sourceMappingURL"))); | |
210 | } | |
211 | ||
212 | String findStylesheetSourceMapURL(const String& content) | |
213 | { | |
214 | return findMagicComment(content, stylesheetCommentPattern(ASCIILiteral("sourceMappingURL"))); | |
215 | } | |
216 | ||
217 | } // namespace ContentSearchUtilities | |
218 | } // namespace Inspector |