2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
27 #include "ParserArena.h"
28 #include "ParserTokens.h"
29 #include "SourceCode.h"
30 #include <wtf/ASCIICType.h>
31 #include <wtf/SegmentedVector.h>
32 #include <wtf/Vector.h>
38 bool isKeyword(const Identifier
& ident
) const
40 return m_keywordTable
.entry(m_vm
, ident
);
43 const HashTableValue
* getKeyword(const Identifier
& ident
) const
45 return m_keywordTable
.entry(m_vm
, ident
);
50 m_keywordTable
.deleteTable();
56 explicit Keywords(VM
&);
59 const HashTable m_keywordTable
;
63 LexerFlagsIgnoreReservedWords
= 1,
64 LexerFlagsDontBuildStrings
= 2,
65 LexexFlagsDontBuildKeywords
= 4
70 WTF_MAKE_NONCOPYABLE(Lexer
);
71 WTF_MAKE_FAST_ALLOCATED
;
74 Lexer(VM
*, JSParserStrictness
);
77 // Character manipulation functions.
78 static bool isWhiteSpace(T character
);
79 static bool isLineTerminator(T character
);
80 static unsigned char convertHex(int c1
, int c2
);
81 static UChar
convertUnicode(int c1
, int c2
, int c3
, int c4
);
83 // Functions to set up parsing.
84 void setCode(const SourceCode
&, ParserArena
*);
85 void setIsReparsing() { m_isReparsing
= true; }
86 bool isReparsing() const { return m_isReparsing
; }
88 JSTokenType
lex(JSToken
*, unsigned, bool strictMode
);
89 bool nextTokenIsColon();
90 int lineNumber() const { return m_lineNumber
; }
91 ALWAYS_INLINE
int currentOffset() const { return offsetFromSourcePtr(m_code
); }
92 ALWAYS_INLINE
int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart
); }
93 ALWAYS_INLINE JSTextPosition
currentPosition() const
95 return JSTextPosition(m_lineNumber
, currentOffset(), currentLineStartOffset());
97 JSTextPosition
positionBeforeLastNewline() const { return m_positionBeforeLastNewline
; }
98 void setLastLineNumber(int lastLineNumber
) { m_lastLineNumber
= lastLineNumber
; }
99 int lastLineNumber() const { return m_lastLineNumber
; }
100 bool prevTerminator() const { return m_terminator
; }
101 bool scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
= 0);
104 // Functions for use after parsing.
105 bool sawError() const { return m_error
; }
106 String
getErrorMessage() const { return m_lexErrorMessage
; }
108 void setOffset(int offset
, int lineStartOffset
)
111 m_lexErrorMessage
= String();
113 m_code
= sourcePtrFromOffset(offset
);
114 m_lineStart
= sourcePtrFromOffset(lineStartOffset
);
115 ASSERT(currentOffset() >= currentLineStartOffset());
118 m_buffer16
.resize(0);
119 if (LIKELY(m_code
< m_codeEnd
))
124 void setLineNumber(int line
)
129 SourceProvider
* sourceProvider() const { return m_source
->provider(); }
131 JSTokenType
lexExpectIdentifier(JSToken
*, unsigned, bool strictMode
);
135 void append8(const T
*, size_t);
138 void append16(const LChar
*, size_t);
139 void append16(const UChar
* characters
, size_t length
) { m_buffer16
.append(characters
, length
); }
141 ALWAYS_INLINE
void shift();
142 ALWAYS_INLINE
bool atEnd() const;
143 ALWAYS_INLINE T
peek(int offset
) const;
144 struct UnicodeHexValue
{
146 enum ValueType
{ ValidHex
, IncompleteHex
, InvalidHex
};
148 explicit UnicodeHexValue(int value
)
152 explicit UnicodeHexValue(ValueType type
)
153 : m_value(type
== IncompleteHex
? -2 : -1)
157 ValueType
valueType() const
161 return m_value
== -2 ? IncompleteHex
: InvalidHex
;
163 bool isValid() const { return m_value
>= 0; }
166 ASSERT(m_value
>= 0);
173 UnicodeHexValue
parseFourDigitUnicodeHex();
174 void shiftLineTerminator();
176 ALWAYS_INLINE
int offsetFromSourcePtr(const T
* ptr
) const { return ptr
- m_codeStart
; }
177 ALWAYS_INLINE
const T
* sourcePtrFromOffset(int offset
) const { return m_codeStart
+ offset
; }
179 String
invalidCharacterMessage() const;
180 ALWAYS_INLINE
const T
* currentSourcePtr() const;
181 ALWAYS_INLINE
void setOffsetFromSourcePtr(const T
* sourcePtr
, unsigned lineStartOffset
) { setOffset(offsetFromSourcePtr(sourcePtr
), lineStartOffset
); }
183 ALWAYS_INLINE
void setCodeStart(const StringImpl
*);
185 ALWAYS_INLINE
const Identifier
* makeIdentifier(const LChar
* characters
, size_t length
);
186 ALWAYS_INLINE
const Identifier
* makeIdentifier(const UChar
* characters
, size_t length
);
187 ALWAYS_INLINE
const Identifier
* makeLCharIdentifier(const LChar
* characters
, size_t length
);
188 ALWAYS_INLINE
const Identifier
* makeLCharIdentifier(const UChar
* characters
, size_t length
);
189 ALWAYS_INLINE
const Identifier
* makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
);
190 ALWAYS_INLINE
const Identifier
* makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
);
192 ALWAYS_INLINE
bool lastTokenWasRestrKeyword() const;
194 template <int shiftAmount
> void internalShift();
195 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType
parseKeyword(JSTokenData
*);
196 template <bool shouldBuildIdentifiers
> ALWAYS_INLINE JSTokenType
parseIdentifier(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
197 template <bool shouldBuildIdentifiers
> NEVER_INLINE JSTokenType
parseIdentifierSlowCase(JSTokenData
*, unsigned lexerFlags
, bool strictMode
);
198 enum StringParseResult
{
199 StringParsedSuccessfully
,
203 template <bool shouldBuildStrings
> ALWAYS_INLINE StringParseResult
parseString(JSTokenData
*, bool strictMode
);
204 template <bool shouldBuildStrings
> NEVER_INLINE StringParseResult
parseStringSlowCase(JSTokenData
*, bool strictMode
);
205 ALWAYS_INLINE
void parseHex(double& returnValue
);
206 ALWAYS_INLINE
bool parseOctal(double& returnValue
);
207 ALWAYS_INLINE
bool parseDecimal(double& returnValue
);
208 ALWAYS_INLINE
void parseNumberAfterDecimalPoint();
209 ALWAYS_INLINE
bool parseNumberAfterExponentIndicator();
210 ALWAYS_INLINE
bool parseMultilineComment();
212 static const size_t initialReadBufferCapacity
= 32;
215 int m_lastLineNumber
;
217 Vector
<LChar
> m_buffer8
;
218 Vector
<UChar
> m_buffer16
;
222 const SourceCode
* m_source
;
223 unsigned m_sourceOffset
;
225 const T
* m_codeStart
;
227 const T
* m_codeStartPlusOffset
;
228 const T
* m_lineStart
;
229 JSTextPosition m_positionBeforeLastNewline
;
233 String m_lexErrorMessage
;
237 IdentifierArena
* m_arena
;
240 bool m_parsingBuiltinFunction
;
244 ALWAYS_INLINE
bool Lexer
<LChar
>::isWhiteSpace(LChar ch
)
246 return ch
== ' ' || ch
== '\t' || ch
== 0xB || ch
== 0xC || ch
== 0xA0;
250 ALWAYS_INLINE
bool Lexer
<UChar
>::isWhiteSpace(UChar ch
)
252 // 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such.
253 return (ch
< 256) ? Lexer
<LChar
>::isWhiteSpace(static_cast<LChar
>(ch
)) : (u_charType(ch
) == U_SPACE_SEPARATOR
|| ch
== 0x180E || ch
== 0xFEFF);
257 ALWAYS_INLINE
bool Lexer
<LChar
>::isLineTerminator(LChar ch
)
259 return ch
== '\r' || ch
== '\n';
263 ALWAYS_INLINE
bool Lexer
<UChar
>::isLineTerminator(UChar ch
)
265 return ch
== '\r' || ch
== '\n' || (ch
& ~1) == 0x2028;
268 template <typename T
>
269 inline unsigned char Lexer
<T
>::convertHex(int c1
, int c2
)
271 return (toASCIIHexValue(c1
) << 4) | toASCIIHexValue(c2
);
274 template <typename T
>
275 inline UChar Lexer
<T
>::convertUnicode(int c1
, int c2
, int c3
, int c4
)
277 return (convertHex(c1
, c2
) << 8) | convertHex(c3
, c4
);
280 template <typename T
>
281 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const LChar
* characters
, size_t length
)
283 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
286 template <typename T
>
287 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifier(const UChar
* characters
, size_t length
)
289 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
293 ALWAYS_INLINE
const Identifier
* Lexer
<LChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar
)
295 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
299 ALWAYS_INLINE
const Identifier
* Lexer
<UChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
)
301 if (!(orAllChars
& ~0xff))
302 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
304 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
308 ALWAYS_INLINE
void Lexer
<LChar
>::setCodeStart(const StringImpl
* sourceString
)
310 ASSERT(sourceString
->is8Bit());
311 m_codeStart
= sourceString
->characters8();
315 ALWAYS_INLINE
void Lexer
<UChar
>::setCodeStart(const StringImpl
* sourceString
)
317 ASSERT(!sourceString
->is8Bit());
318 m_codeStart
= sourceString
->characters16();
321 template <typename T
>
322 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
)
324 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
327 template <typename T
>
328 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const LChar
* characters
, size_t length
)
330 return &m_arena
->makeIdentifier(m_vm
, characters
, length
);
333 template <typename T
>
334 ALWAYS_INLINE
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const UChar
* characters
, size_t length
)
336 return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
);
340 ALWAYS_INLINE
bool isSafeBuiltinIdentifier(VM
&, const Identifier
*) { return true; }
342 bool isSafeBuiltinIdentifier(VM
&, const Identifier
*);
345 template <typename T
>
346 ALWAYS_INLINE JSTokenType Lexer
<T
>::lexExpectIdentifier(JSToken
* tokenRecord
, unsigned lexerFlags
, bool strictMode
)
348 JSTokenData
* tokenData
= &tokenRecord
->m_data
;
349 JSTokenLocation
* tokenLocation
= &tokenRecord
->m_location
;
350 ASSERT((lexerFlags
& LexerFlagsIgnoreReservedWords
));
351 const T
* start
= m_code
;
352 const T
* ptr
= start
;
353 const T
* end
= m_codeEnd
;
354 JSTextPosition startPosition
= currentPosition();
359 if (!WTF::isASCIIAlpha(*ptr
))
363 if (!WTF::isASCIIAlphanumeric(*ptr
))
370 if ((!WTF::isASCII(*ptr
)) || (*ptr
== '\\') || (*ptr
== '_') || (*ptr
== '$'))
377 ASSERT(currentOffset() >= currentLineStartOffset());
379 // Create the identifier if needed
380 if (lexerFlags
& LexexFlagsDontBuildKeywords
382 && !m_parsingBuiltinFunction
385 tokenData
->ident
= 0;
387 tokenData
->ident
= makeLCharIdentifier(start
, ptr
- start
);
389 tokenLocation
->line
= m_lineNumber
;
390 tokenLocation
->lineStartOffset
= currentLineStartOffset();
391 tokenLocation
->startOffset
= offsetFromSourcePtr(start
);
392 tokenLocation
->endOffset
= currentOffset();
393 ASSERT(tokenLocation
->startOffset
>= tokenLocation
->lineStartOffset
);
394 tokenRecord
->m_startPosition
= startPosition
;
395 tokenRecord
->m_endPosition
= currentPosition();
397 if (m_parsingBuiltinFunction
) {
398 if (!isSafeBuiltinIdentifier(*m_vm
, tokenData
->ident
))
407 return lex(tokenRecord
, lexerFlags
, strictMode
);