2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include "ParserArena.h"
28 #include "ParserTokens.h"
29 #include "SourceCode.h"
30 #include <wtf/ASCIICType.h>
31 #include <wtf/SegmentedVector.h>
32 #include <wtf/Vector.h>
38 bool isKeyword(const Identifier
& ident
) const
40 return m_keywordTable
.entry(ident
);
43 const HashTableValue
* getKeyword(const Identifier
& ident
) const
45 return m_keywordTable
.entry(ident
);
48 explicit Keywords(VM
&);
52 m_keywordTable
.deleteTable();
59 const HashTable m_keywordTable
;
63 LexerFlagsIgnoreReservedWords
= 1,
64 LexerFlagsDontBuildStrings
= 2,
65 LexexFlagsDontBuildKeywords
= 4
68 struct ParsedUnicodeEscapeValue
;
72 WTF_MAKE_NONCOPYABLE(Lexer
);
73 WTF_MAKE_FAST_ALLOCATED
;
76 Lexer(VM
*, JSParserBuiltinMode
);
79 // Character manipulation functions.
80 static bool isWhiteSpace(T character
);
81 static bool isLineTerminator(T character
);
82 static unsigned char convertHex(int c1
, int c2
);
83 static UChar
convertUnicode(int c1
, int c2
, int c3
, int c4
);
85 // Functions to set up parsing.
86 void setCode(const SourceCode
&, ParserArena
*);
87 void setIsReparsing() { m_isReparsing
= true; }
88 bool isReparsing() const { return m_isReparsing
; }
90 #if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
91 void setTokenPosition(JSToken
* tokenRecord
);
93 JSTokenType
lex(JSToken
*, unsigned, bool strictMode
);
94 bool nextTokenIsColon();
95 int lineNumber() const { return m_lineNumber
; }
96 ALWAYS_INLINE
int currentOffset() const { return offsetFromSourcePtr(m_code
); }
97 ALWAYS_INLINE
int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart
); }
98 ALWAYS_INLINE JSTextPosition
currentPosition() const
100 return JSTextPosition(m_lineNumber
, currentOffset(), currentLineStartOffset());
102 JSTextPosition
positionBeforeLastNewline() const { return m_positionBeforeLastNewline
; }
103 JSTokenLocation
lastTokenLocation() const { return m_lastTockenLocation
; }
104 void setLastLineNumber(int lastLineNumber
) { m_lastLineNumber
= lastLineNumber
; }
105 int lastLineNumber() const { return m_lastLineNumber
; }
106 bool prevTerminator() const { return m_terminator
; }
107 bool scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
= 0);
108 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
109 enum class RawStringsBuildMode
{ BuildRawStrings
, DontBuildRawStrings
};
110 JSTokenType
scanTrailingTemplateString(JSToken
*, RawStringsBuildMode
);
114 // Functions for use after parsing.
115 bool sawError() const { return m_error
; }
116 String
getErrorMessage() const { return m_lexErrorMessage
; }
118 void setOffset(int offset
, int lineStartOffset
)
121 m_lexErrorMessage
= String();
123 m_code
= sourcePtrFromOffset(offset
);
124 m_lineStart
= sourcePtrFromOffset(lineStartOffset
);
125 ASSERT(currentOffset() >= currentLineStartOffset());
128 m_buffer16
.resize(0);
129 if (LIKELY(m_code
< m_codeEnd
))
134 void setLineNumber(int line
)
138 void setTerminator(bool terminator
)
140 m_terminator
= terminator
;
143 SourceProvider
* sourceProvider() const { return m_source
->provider(); }
145 JSTokenType
lexExpectIdentifier(JSToken
*, unsigned, bool strictMode
);
149 void append8(const T
*, size_t);
152 void recordUnicodeCodePoint(UChar32
);
153 void append16(const LChar
*, size_t);
154 void append16(const UChar
* characters
, size_t length
) { m_buffer16
.append(characters
, length
); }
156 ALWAYS_INLINE
void shift();
157 ALWAYS_INLINE
bool atEnd() const;
158 ALWAYS_INLINE T
peek(int offset
) const;
160 ParsedUnicodeEscapeValue
parseUnicodeEscape();
161 void shiftLineTerminator();
163 ALWAYS_INLINE
int offsetFromSourcePtr(const T
* ptr
) const { return ptr
- m_codeStart
; }
164 ALWAYS_INLINE
const T
* sourcePtrFromOffset(int offset
) const { return m_codeStart
+ offset
; }
166 String
invalidCharacterMessage() const;
167 ALWAYS_INLINE
const T
* currentSourcePtr() const;
168 ALWAYS_INLINE
void setOffsetFromSourcePtr(const T
* sourcePtr
, unsigned lineStartOffset
) { setOffset(offsetFromSourcePtr(sourcePtr
), lineStartOffset
); }
170 ALWAYS_INLINE
void setCodeStart(const StringImpl
*);
172 ALWAYS_INLINE
const Identifier
* makeIdentifier(const LChar
* characters
, size_t length
);
173 ALWAYS_INLINE
const Identifier
* makeIdentifier(const UChar
* characters
, size_t length
);
174 ALWAYS_INLINE
const Identifier
* makeLCharIdentifier(const LChar
* characters
, size_t length
);
175 ALWAYS_INLINE
const Identifier
* makeLCharIdentifier(const UChar
* characters
, size_t length
);
176 ALWAYS_INLINE
const Identifier
* makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
);
177 ALWAYS_INLINE
const Identifier
* makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
);
178 ALWAYS_INLINE
const Identifier
* makeEmptyIdentifier();
180 ALWAYS_INLINE
bool lastTokenWasRestrKeyword() const;
182 template <int shiftAmount
> void internalShift();
183 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType
parseKeyword(JSTokenData
*);
184 template <bool shouldBuildIdentifiers
> ALWAYS_INLINE JSTokenType
parseIdentifier(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
185 template <bool shouldBuildIdentifiers
> NEVER_INLINE JSTokenType
parseIdentifierSlowCase(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
186 enum StringParseResult
{
187 StringParsedSuccessfully
,
191 template <bool shouldBuildStrings
> ALWAYS_INLINE StringParseResult
parseString(JSTokenData
*, bool strictMode
);
192 template <bool shouldBuildStrings
> NEVER_INLINE StringParseResult
parseStringSlowCase(JSTokenData
*, bool strictMode
);
194 enum class EscapeParseMode
{ Template
, String
};
195 template <bool shouldBuildStrings
> ALWAYS_INLINE StringParseResult
parseComplexEscape(EscapeParseMode
, bool strictMode
, T stringQuoteCharacter
);
196 #if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
197 template <bool shouldBuildStrings
> ALWAYS_INLINE StringParseResult
parseTemplateLiteral(JSTokenData
*, RawStringsBuildMode
);
199 ALWAYS_INLINE
void parseHex(double& returnValue
);
200 ALWAYS_INLINE
bool parseBinary(double& returnValue
);
201 ALWAYS_INLINE
bool parseOctal(double& returnValue
);
202 ALWAYS_INLINE
bool parseDecimal(double& returnValue
);
203 ALWAYS_INLINE
void parseNumberAfterDecimalPoint();
204 ALWAYS_INLINE
bool parseNumberAfterExponentIndicator();
205 ALWAYS_INLINE
bool parseMultilineComment();
207 static const size_t initialReadBufferCapacity
= 32;
210 int m_lastLineNumber
;
212 Vector
<LChar
> m_buffer8
;
213 Vector
<UChar
> m_buffer16
;
214 Vector
<UChar
> m_bufferForRawTemplateString16
;
218 const SourceCode
* m_source
;
219 unsigned m_sourceOffset
;
221 const T
* m_codeStart
;
223 const T
* m_codeStartPlusOffset
;
224 const T
* m_lineStart
;
225 JSTextPosition m_positionBeforeLastNewline
;
226 JSTokenLocation m_lastTockenLocation
;
230 String m_lexErrorMessage
;
234 IdentifierArena
* m_arena
;
237 bool m_parsingBuiltinFunction
;
241 ALWAYS_INLINE
bool Lexer
<LChar
>::isWhiteSpace(LChar ch
)
243 return ch
== ' ' || ch
== '\t' || ch
== 0xB || ch
== 0xC || ch
== 0xA0;
247 ALWAYS_INLINE
bool Lexer
<UChar
>::isWhiteSpace(UChar ch
)
249 // 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such.
250 return (ch
< 256) ? Lexer
<LChar
>::isWhiteSpace(static_cast<LChar
>(ch
)) : (u_charType(ch
) == U_SPACE_SEPARATOR
|| ch
== 0x180E || ch
== 0xFEFF);
254 ALWAYS_INLINE
bool Lexer
<LChar
>::isLineTerminator(LChar ch
)
256 return ch
== '\r' || ch
== '\n';
260 ALWAYS_INLINE
bool Lexer
<UChar
>::isLineTerminator(UChar ch
)
262 return ch
== '\r' || ch
== '\n' || (ch
& ~1) == 0x2028;
265 template <typename T
>
266 inline unsigned char Lexer
<T
>::convertHex(int c1
, int c2
)
268 return (toASCIIHexValue(c1
) << 4) | toASCIIHexValue(c2
);
271 template <typename T
>
272 inline UChar Lexer
<T
>::convertUnicode(int c1
, int c2
, int c3
, int c4
)
274 return (convertHex(c1
, c2
) << 8) | convertHex(c3
, c4
);
277 template <typename T
>
278 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const LChar
* characters
, size_t length
)
280 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
283 template <typename T
>
284 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const UChar
* characters
, size_t length
)
286 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
290 ALWAYS_INLINE
const Identifier
* Lexer
<LChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar
)
292 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
296 ALWAYS_INLINE
const Identifier
* Lexer
<UChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
)
298 if (!(orAllChars
& ~0xff))
299 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
301 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
304 template <typename T
>
305 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeEmptyIdentifier()
307 return &m_arena
->makeEmptyIdentifier(m_vm
);
311 ALWAYS_INLINE
void Lexer
<LChar
>::setCodeStart(const StringImpl
* sourceString
)
313 ASSERT(sourceString
->is8Bit());
314 m_codeStart
= sourceString
->characters8();
318 ALWAYS_INLINE
void Lexer
<UChar
>::setCodeStart(const StringImpl
* sourceString
)
320 ASSERT(!sourceString
->is8Bit());
321 m_codeStart
= sourceString
->characters16();
324 template <typename T
>
325 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
)
327 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
330 template <typename T
>
331 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const LChar
* characters
, size_t length
)
333 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
336 template <typename T
>
337 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const UChar
* characters
, size_t length
)
339 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
343 ALWAYS_INLINE
bool isSafeBuiltinIdentifier(VM
&, const Identifier
*) { return true; }
345 bool isSafeBuiltinIdentifier(VM
&, const Identifier
*);
348 template <typename T
>
349 ALWAYS_INLINE JSTokenType Lexer
<T
>::lexExpectIdentifier(JSToken
* tokenRecord
, unsigned lexerFlags
, bool strictMode
)
351 JSTokenData
* tokenData
= &tokenRecord
->m_data
;
352 JSTokenLocation
* tokenLocation
= &tokenRecord
->m_location
;
353 ASSERT((lexerFlags
& LexerFlagsIgnoreReservedWords
));
354 const T
* start
= m_code
;
355 const T
* ptr
= start
;
356 const T
* end
= m_codeEnd
;
357 JSTextPosition startPosition
= currentPosition();
362 if (!WTF::isASCIIAlpha(*ptr
))
366 if (!WTF::isASCIIAlphanumeric(*ptr
))
373 if ((!WTF::isASCII(*ptr
)) || (*ptr
== '\\') || (*ptr
== '_') || (*ptr
== '$'))
380 ASSERT(currentOffset() >= currentLineStartOffset());
382 // Create the identifier if needed
383 if (lexerFlags
& LexexFlagsDontBuildKeywords
385 && !m_parsingBuiltinFunction
388 tokenData
->ident
= 0;
390 tokenData
->ident
= makeLCharIdentifier(start
, ptr
- start
);
392 tokenLocation
->line
= m_lineNumber
;
393 tokenLocation
->lineStartOffset
= currentLineStartOffset();
394 tokenLocation
->startOffset
= offsetFromSourcePtr(start
);
395 tokenLocation
->endOffset
= currentOffset();
396 ASSERT(tokenLocation
->startOffset
>= tokenLocation
->lineStartOffset
);
397 tokenRecord
->m_startPosition
= startPosition
;
398 tokenRecord
->m_endPosition
= currentPosition();
400 if (m_parsingBuiltinFunction
) {
401 if (!isSafeBuiltinIdentifier(*m_vm
, tokenData
->ident
))
410 return lex(tokenRecord
, lexerFlags
, strictMode
);