2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include "ParserArena.h"
28 #include "ParserTokens.h"
29 #include "SourceCode.h"
30 #include <wtf/ASCIICType.h>
31 #include <wtf/SegmentedVector.h>
32 #include <wtf/Vector.h>
33 #include <wtf/unicode/Unicode.h>
39 bool isKeyword(const Identifier
& ident
) const
41 return m_keywordTable
.entry(m_vm
, ident
);
44 const HashEntry
* getKeyword(const Identifier
& ident
) const
46 return m_keywordTable
.entry(m_vm
, ident
);
51 m_keywordTable
.deleteTable();
60 const HashTable m_keywordTable
;
64 LexerFlagsIgnoreReservedWords
= 1,
65 LexerFlagsDontBuildStrings
= 2,
66 LexexFlagsDontBuildKeywords
= 4
71 WTF_MAKE_NONCOPYABLE(Lexer
);
72 WTF_MAKE_FAST_ALLOCATED
;
78 // Character manipulation functions.
79 static bool isWhiteSpace(T character
);
80 static bool isLineTerminator(T character
);
81 static unsigned char convertHex(int c1
, int c2
);
82 static UChar
convertUnicode(int c1
, int c2
, int c3
, int c4
);
84 // Functions to set up parsing.
85 void setCode(const SourceCode
&, ParserArena
*);
86 void setIsReparsing() { m_isReparsing
= true; }
87 bool isReparsing() const { return m_isReparsing
; }
89 JSTokenType
lex(JSTokenData
*, JSTokenLocation
*, unsigned, bool strictMode
);
90 bool nextTokenIsColon();
91 int lineNumber() const { return m_lineNumber
; }
92 ALWAYS_INLINE
int currentOffset() const { return offsetFromSourcePtr(m_code
); }
93 ALWAYS_INLINE
int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart
); }
94 void setLastLineNumber(int lastLineNumber
) { m_lastLineNumber
= lastLineNumber
; }
95 int lastLineNumber() const { return m_lastLineNumber
; }
96 bool prevTerminator() const { return m_terminator
; }
97 SourceCode
sourceCode(int openBrace
, int closeBrace
, int firstLine
, unsigned startColumn
);
98 bool scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
= 0);
101 // Functions for use after parsing.
102 bool sawError() const { return m_error
; }
103 String
getErrorMessage() const { return m_lexErrorMessage
; }
105 void setOffset(int offset
, int lineStartOffset
)
108 m_lexErrorMessage
= String();
110 m_code
= sourcePtrFromOffset(offset
);
111 m_lineStart
= sourcePtrFromOffset(lineStartOffset
);
112 ASSERT(currentOffset() >= currentLineStartOffset());
115 m_buffer16
.resize(0);
116 if (LIKELY(m_code
< m_codeEnd
))
121 void setLineNumber(int line
)
126 SourceProvider
* sourceProvider() const { return m_source
->provider(); }
128 JSTokenType
lexExpectIdentifier(JSTokenData
*, JSTokenLocation
*, unsigned, bool strictMode
);
132 void append8(const T
*, size_t);
135 void append16(const LChar
*, size_t);
136 void append16(const UChar
* characters
, size_t length
) { m_buffer16
.append(characters
, length
); }
138 ALWAYS_INLINE
void shift();
139 ALWAYS_INLINE
bool atEnd() const;
140 ALWAYS_INLINE T
peek(int offset
) const;
141 struct UnicodeHexValue
{
143 enum ValueType
{ ValidHex
, IncompleteHex
, InvalidHex
};
145 explicit UnicodeHexValue(int value
)
149 explicit UnicodeHexValue(ValueType type
)
150 : m_value(type
== IncompleteHex
? -2 : -1)
154 ValueType
valueType() const
158 return m_value
== -2 ? IncompleteHex
: InvalidHex
;
160 bool isValid() const { return m_value
>= 0; }
163 ASSERT(m_value
>= 0);
170 UnicodeHexValue
parseFourDigitUnicodeHex();
171 void shiftLineTerminator();
173 ALWAYS_INLINE
int offsetFromSourcePtr(const T
* ptr
) const { return ptr
- m_codeStart
; }
174 ALWAYS_INLINE
const T
* sourcePtrFromOffset(int offset
) const { return m_codeStart
+ offset
; }
176 String
invalidCharacterMessage() const;
177 ALWAYS_INLINE
const T
* currentSourcePtr() const;
178 ALWAYS_INLINE
void setOffsetFromSourcePtr(const T
* sourcePtr
, unsigned lineStartOffset
) { setOffset(offsetFromSourcePtr(sourcePtr
), lineStartOffset
); }
180 ALWAYS_INLINE
void setCodeStart(const StringImpl
*);
182 ALWAYS_INLINE
const Identifier
* makeIdentifier(const LChar
* characters
, size_t length
);
183 ALWAYS_INLINE
const Identifier
* makeIdentifier(const UChar
* characters
, size_t length
);
184 ALWAYS_INLINE
const Identifier
* makeLCharIdentifier(const LChar
* characters
, size_t length
);
185 ALWAYS_INLINE
const Identifier
* makeLCharIdentifier(const UChar
* characters
, size_t length
);
186 ALWAYS_INLINE
const Identifier
* makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
);
187 ALWAYS_INLINE
const Identifier
* makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
);
189 ALWAYS_INLINE
bool lastTokenWasRestrKeyword() const;
191 template <int shiftAmount
> void internalShift();
192 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType
parseKeyword(JSTokenData
*);
193 template <bool shouldBuildIdentifiers
> ALWAYS_INLINE JSTokenType
parseIdentifier(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
194 template <bool shouldBuildIdentifiers
> NEVER_INLINE JSTokenType
parseIdentifierSlowCase(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
195 enum StringParseResult
{
196 StringParsedSuccessfully
,
200 template <bool shouldBuildStrings
> ALWAYS_INLINE StringParseResult
parseString(JSTokenData
*, bool strictMode
);
201 template <bool shouldBuildStrings
> NEVER_INLINE StringParseResult
parseStringSlowCase(JSTokenData
*, bool strictMode
);
202 ALWAYS_INLINE
void parseHex(double& returnValue
);
203 ALWAYS_INLINE
bool parseOctal(double& returnValue
);
204 ALWAYS_INLINE
bool parseDecimal(double& returnValue
);
205 ALWAYS_INLINE
void parseNumberAfterDecimalPoint();
206 ALWAYS_INLINE
bool parseNumberAfterExponentIndicator();
207 ALWAYS_INLINE
bool parseMultilineComment();
209 static const size_t initialReadBufferCapacity
= 32;
212 int m_lastLineNumber
;
214 Vector
<LChar
> m_buffer8
;
215 Vector
<UChar
> m_buffer16
;
219 const SourceCode
* m_source
;
220 unsigned m_sourceOffset
;
222 const T
* m_codeStart
;
224 const T
* m_codeStartPlusOffset
;
225 const T
* m_lineStart
;
229 String m_lexErrorMessage
;
233 IdentifierArena
* m_arena
;
239 ALWAYS_INLINE
bool Lexer
<LChar
>::isWhiteSpace(LChar ch
)
241 return ch
== ' ' || ch
== '\t' || ch
== 0xB || ch
== 0xC || ch
== 0xA0;
245 ALWAYS_INLINE
bool Lexer
<UChar
>::isWhiteSpace(UChar ch
)
247 return (ch
< 256) ? Lexer
<LChar
>::isWhiteSpace(static_cast<LChar
>(ch
)) : (WTF::Unicode::isSeparatorSpace(ch
) || ch
== 0xFEFF);
251 ALWAYS_INLINE
bool Lexer
<LChar
>::isLineTerminator(LChar ch
)
253 return ch
== '\r' || ch
== '\n';
257 ALWAYS_INLINE
bool Lexer
<UChar
>::isLineTerminator(UChar ch
)
259 return ch
== '\r' || ch
== '\n' || (ch
& ~1) == 0x2028;
262 template <typename T
>
263 inline unsigned char Lexer
<T
>::convertHex(int c1
, int c2
)
265 return (toASCIIHexValue(c1
) << 4) | toASCIIHexValue(c2
);
268 template <typename T
>
269 inline UChar Lexer
<T
>::convertUnicode(int c1
, int c2
, int c3
, int c4
)
271 return (convertHex(c1
, c2
) << 8) | convertHex(c3
, c4
);
274 template <typename T
>
275 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const LChar
* characters
, size_t length
)
277 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
280 template <typename T
>
281 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const UChar
* characters
, size_t length
)
283 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
287 ALWAYS_INLINE
const Identifier
* Lexer
<LChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar
)
289 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
293 ALWAYS_INLINE
const Identifier
* Lexer
<UChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
)
295 if (!(orAllChars
& ~0xff))
296 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
298 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
302 ALWAYS_INLINE
void Lexer
<LChar
>::setCodeStart(const StringImpl
* sourceString
)
304 ASSERT(sourceString
->is8Bit());
305 m_codeStart
= sourceString
->characters8();
309 ALWAYS_INLINE
void Lexer
<UChar
>::setCodeStart(const StringImpl
* sourceString
)
311 ASSERT(!sourceString
->is8Bit());
312 m_codeStart
= sourceString
->characters16();
315 template <typename T
>
316 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
)
318 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
321 template <typename T
>
322 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const LChar
* characters
, size_t length
)
324 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
327 template <typename T
>
328 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const UChar
* characters
, size_t length
)
330 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
333 template <typename T
>
334 ALWAYS_INLINE JSTokenType Lexer
<T
>::lexExpectIdentifier(JSTokenData
* tokenData
, JSTokenLocation
* tokenLocation
, unsigned lexerFlags
, bool strictMode
)
336 ASSERT((lexerFlags
& LexerFlagsIgnoreReservedWords
));
337 const T
* start
= m_code
;
338 const T
* ptr
= start
;
339 const T
* end
= m_codeEnd
;
344 if (!WTF::isASCIIAlpha(*ptr
))
348 if (!WTF::isASCIIAlphanumeric(*ptr
))
355 if ((!WTF::isASCII(*ptr
)) || (*ptr
== '\\') || (*ptr
== '_') || (*ptr
== '$'))
362 ASSERT(currentOffset() >= currentLineStartOffset());
364 // Create the identifier if needed
365 if (lexerFlags
& LexexFlagsDontBuildKeywords
)
366 tokenData
->ident
= 0;
368 tokenData
->ident
= makeLCharIdentifier(start
, ptr
- start
);
369 tokenLocation
->line
= m_lineNumber
;
370 tokenLocation
->lineStartOffset
= currentLineStartOffset();
371 tokenLocation
->startOffset
= offsetFromSourcePtr(start
);
372 tokenLocation
->endOffset
= currentOffset();
373 ASSERT(tokenLocation
->startOffset
>= tokenLocation
->lineStartOffset
);
378 return lex(tokenData
, tokenLocation
, lexerFlags
, strictMode
);