2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All rights reserved.
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include "ParserArena.h"
28 #include "ParserTokens.h"
29 #include "SourceCode.h"
30 #include <wtf/ASCIICType.h>
31 #include <wtf/AlwaysInline.h>
32 #include <wtf/SegmentedVector.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
40 bool isKeyword(const Identifier
& ident
) const
42 return m_keywordTable
.entry(m_globalData
, ident
);
45 const HashEntry
* getKeyword(const Identifier
& ident
) const
47 return m_keywordTable
.entry(m_globalData
, ident
);
52 m_keywordTable
.deleteTable();
56 friend class JSGlobalData
;
58 Keywords(JSGlobalData
*);
60 JSGlobalData
* m_globalData
;
61 const HashTable m_keywordTable
;
65 LexerFlagsIgnoreReservedWords
= 1,
66 LexerFlagsDontBuildStrings
= 2,
67 LexexFlagsDontBuildKeywords
= 4
72 WTF_MAKE_NONCOPYABLE(Lexer
);
73 WTF_MAKE_FAST_ALLOCATED
;
79 // Character manipulation functions.
80 static bool isWhiteSpace(T character
);
81 static bool isLineTerminator(T character
);
82 static unsigned char convertHex(int c1
, int c2
);
83 static UChar
convertUnicode(int c1
, int c2
, int c3
, int c4
);
85 // Functions to set up parsing.
86 void setCode(const SourceCode
&, ParserArena
*);
87 void setIsReparsing() { m_isReparsing
= true; }
88 bool isReparsing() const { return m_isReparsing
; }
90 JSTokenType
lex(JSTokenData
*, JSTokenInfo
*, unsigned, bool strictMode
);
91 bool nextTokenIsColon();
92 int lineNumber() const { return m_lineNumber
; }
93 void setLastLineNumber(int lastLineNumber
) { m_lastLineNumber
= lastLineNumber
; }
94 int lastLineNumber() const { return m_lastLineNumber
; }
95 bool prevTerminator() const { return m_terminator
; }
96 SourceCode
sourceCode(int openBrace
, int closeBrace
, int firstLine
);
97 bool scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
= 0);
100 // Functions for use after parsing.
101 bool sawError() const { return m_error
; }
102 UString
getErrorMessage() const { return m_lexErrorMessage
; }
104 void setOffset(int offset
)
107 m_lexErrorMessage
= UString();
108 m_code
= m_codeStart
+ offset
;
110 m_buffer16
.resize(0);
111 if (LIKELY(m_code
< m_codeEnd
))
116 void setLineNumber(int line
)
121 SourceProvider
* sourceProvider() const { return m_source
->provider(); }
123 JSTokenType
lexExpectIdentifier(JSTokenData
*, JSTokenInfo
*, unsigned, bool strictMode
);
127 void append8(const T
*, size_t);
130 void append16(const LChar
*, size_t);
131 void append16(const UChar
* characters
, size_t length
) { m_buffer16
.append(characters
, length
); }
133 ALWAYS_INLINE
void shift();
134 ALWAYS_INLINE
bool atEnd() const;
135 ALWAYS_INLINE T
peek(int offset
) const;
136 int parseFourDigitUnicodeHex();
137 void shiftLineTerminator();
139 UString
invalidCharacterMessage() const;
140 ALWAYS_INLINE
const T
* currentCharacter() const;
141 ALWAYS_INLINE
int currentOffset() const { return m_code
- m_codeStart
; }
142 ALWAYS_INLINE
void setOffsetFromCharOffset(const T
* charOffset
) { setOffset(charOffset
- m_codeStart
); }
144 ALWAYS_INLINE
void setCodeStart(const StringImpl
*);
146 ALWAYS_INLINE
const Identifier
* makeIdentifier(const LChar
* characters
, size_t length
);
147 ALWAYS_INLINE
const Identifier
* makeIdentifier(const UChar
* characters
, size_t length
);
148 ALWAYS_INLINE
const Identifier
* makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
);
150 ALWAYS_INLINE
bool lastTokenWasRestrKeyword() const;
152 template <int shiftAmount
> void internalShift();
153 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType
parseKeyword(JSTokenData
*);
154 template <bool shouldBuildIdentifiers
> ALWAYS_INLINE JSTokenType
parseIdentifier(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
155 template <bool shouldBuildIdentifiers
> NEVER_INLINE JSTokenType
parseIdentifierSlowCase(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
156 template <bool shouldBuildStrings
> ALWAYS_INLINE
bool parseString(JSTokenData
*, bool strictMode
);
157 template <bool shouldBuildStrings
> NEVER_INLINE
bool parseStringSlowCase(JSTokenData
*, bool strictMode
);
158 ALWAYS_INLINE
void parseHex(double& returnValue
);
159 ALWAYS_INLINE
bool parseOctal(double& returnValue
);
160 ALWAYS_INLINE
bool parseDecimal(double& returnValue
);
161 ALWAYS_INLINE
void parseNumberAfterDecimalPoint();
162 ALWAYS_INLINE
bool parseNumberAfterExponentIndicator();
163 ALWAYS_INLINE
bool parseMultilineComment();
165 static const size_t initialReadBufferCapacity
= 32;
168 int m_lastLineNumber
;
170 Vector
<LChar
> m_buffer8
;
171 Vector
<UChar
> m_buffer16
;
175 const SourceCode
* m_source
;
177 const T
* m_codeStart
;
182 UString m_lexErrorMessage
;
186 IdentifierArena
* m_arena
;
188 JSGlobalData
* m_globalData
;
192 ALWAYS_INLINE
bool Lexer
<LChar
>::isWhiteSpace(LChar ch
)
194 return ch
== ' ' || ch
== '\t' || ch
== 0xB || ch
== 0xC || ch
== 0xA0;
198 ALWAYS_INLINE
bool Lexer
<UChar
>::isWhiteSpace(UChar ch
)
200 return (ch
< 256) ? Lexer
<LChar
>::isWhiteSpace(static_cast<LChar
>(ch
)) : (WTF::Unicode::isSeparatorSpace(ch
) || ch
== 0xFEFF);
204 ALWAYS_INLINE
bool Lexer
<LChar
>::isLineTerminator(LChar ch
)
206 return ch
== '\r' || ch
== '\n';
210 ALWAYS_INLINE
bool Lexer
<UChar
>::isLineTerminator(UChar ch
)
212 return ch
== '\r' || ch
== '\n' || (ch
& ~1) == 0x2028;
215 template <typename T
>
216 inline unsigned char Lexer
<T
>::convertHex(int c1
, int c2
)
218 return (toASCIIHexValue(c1
) << 4) | toASCIIHexValue(c2
);
221 template <typename T
>
222 inline UChar Lexer
<T
>::convertUnicode(int c1
, int c2
, int c3
, int c4
)
224 return (convertHex(c1
, c2
) << 8) | convertHex(c3
, c4
);
227 template <typename T
>
228 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const LChar
* characters
, size_t length
)
230 return &m_arena
->makeIdentifier(m_globalData
, characters
, length
);
233 template <typename T
>
234 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const UChar
* characters
, size_t length
)
236 return &m_arena
->makeIdentifier(m_globalData
, characters
, length
);
240 ALWAYS_INLINE
void Lexer
<LChar
>::setCodeStart(const StringImpl
* sourceString
)
242 ASSERT(sourceString
->is8Bit());
243 m_codeStart
= sourceString
->characters8();
247 ALWAYS_INLINE
void Lexer
<UChar
>::setCodeStart(const StringImpl
* sourceString
)
249 ASSERT(!sourceString
->is8Bit());
250 m_codeStart
= sourceString
->characters16();
253 template <typename T
>
254 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
)
256 return &m_arena
->makeIdentifierLCharFromUChar(m_globalData
, characters
, length
);
259 template <typename T
>
260 ALWAYS_INLINE JSTokenType Lexer
<T
>::lexExpectIdentifier(JSTokenData
* tokenData
, JSTokenInfo
* tokenInfo
, unsigned lexerFlags
, bool strictMode
)
262 ASSERT((lexerFlags
& LexerFlagsIgnoreReservedWords
));
263 const T
* start
= m_code
;
264 const T
* ptr
= start
;
265 const T
* end
= m_codeEnd
;
270 if (!WTF::isASCIIAlpha(*ptr
))
274 if (!WTF::isASCIIAlphanumeric(*ptr
))
281 if ((!WTF::isASCII(*ptr
)) || (*ptr
== '\\') || (*ptr
== '_') || (*ptr
== '$'))
289 // Create the identifier if needed
290 if (lexerFlags
& LexexFlagsDontBuildKeywords
)
291 tokenData
->ident
= 0;
293 tokenData
->ident
= makeIdentifier(start
, ptr
- start
);
294 tokenInfo
->line
= m_lineNumber
;
295 tokenInfo
->startOffset
= start
- m_codeStart
;
296 tokenInfo
->endOffset
= currentOffset();
301 return lex(tokenData
, tokenInfo
, lexerFlags
, strictMode
);