2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
28 #include "ParserArena.h"
29 #include "SourceCode.h"
30 #include <wtf/ASCIICType.h>
31 #include <wtf/AlwaysInline.h>
32 #include <wtf/SegmentedVector.h>
33 #include <wtf/Vector.h>
34 #include <wtf/unicode/Unicode.h>
41 WTF_MAKE_NONCOPYABLE(Lexer
); WTF_MAKE_FAST_ALLOCATED
;
43 // Character manipulation functions.
44 static bool isWhiteSpace(int character
);
45 static bool isLineTerminator(int character
);
46 static unsigned char convertHex(int c1
, int c2
);
47 static UChar
convertUnicode(int c1
, int c2
, int c3
, int c4
);
49 // Functions to set up parsing.
50 void setCode(const SourceCode
&, ParserArena
&);
51 void setIsReparsing() { m_isReparsing
= true; }
52 bool isReparsing() const { return m_isReparsing
; }
54 // Functions for the parser itself.
56 IgnoreReservedWords
= 1,
60 JSTokenType
lex(JSTokenData
*, JSTokenInfo
*, unsigned, bool strictMode
);
61 bool nextTokenIsColon();
62 int lineNumber() const { return m_lineNumber
; }
63 void setLastLineNumber(int lastLineNumber
) { m_lastLineNumber
= lastLineNumber
; }
64 int lastLineNumber() const { return m_lastLineNumber
; }
65 bool prevTerminator() const { return m_terminator
; }
66 SourceCode
sourceCode(int openBrace
, int closeBrace
, int firstLine
);
67 bool scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
= 0);
70 // Functions for use after parsing.
71 bool sawError() const { return m_error
; }
73 int currentOffset() { return m_code
- m_codeStart
; }
74 void setOffset(int offset
)
77 m_code
= m_codeStart
+ offset
;
80 // Faster than an if-else sequence
82 if (LIKELY(m_code
< m_codeEnd
))
85 void setLineNumber(int line
)
90 SourceProvider
* sourceProvider() const { return m_source
->provider(); }
92 JSTokenType
lexExpectIdentifier(JSTokenData
*, JSTokenInfo
*, unsigned, bool strictMode
);
94 bool isKeyword(const Identifier
&);
97 friend class JSGlobalData
;
104 void record16(UChar
);
106 void copyCodeWithoutBOMs();
108 ALWAYS_INLINE
void shift();
109 ALWAYS_INLINE
int peek(int offset
);
110 int getUnicodeCharacter();
111 void shiftLineTerminator();
113 ALWAYS_INLINE
const UChar
* currentCharacter() const;
114 ALWAYS_INLINE
int currentOffset() const;
116 ALWAYS_INLINE
const Identifier
* makeIdentifier(const UChar
* characters
, size_t length
);
118 ALWAYS_INLINE
bool lastTokenWasRestrKeyword() const;
120 enum ShiftType
{ DoBoundsCheck
, DoNotBoundsCheck
};
121 template <int shiftAmount
, ShiftType shouldBoundsCheck
> void internalShift();
122 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType
parseKeyword(JSTokenData
*);
123 template <bool shouldBuildIdentifiers
> ALWAYS_INLINE JSTokenType
parseIdentifier(JSTokenData
*, unsigned);
124 template <bool shouldBuildStrings
> ALWAYS_INLINE
bool parseString(JSTokenData
*, bool strictMode
);
125 ALWAYS_INLINE
void parseHex(double& returnValue
);
126 ALWAYS_INLINE
bool parseOctal(double& returnValue
);
127 ALWAYS_INLINE
bool parseDecimal(double& returnValue
);
128 ALWAYS_INLINE
void parseNumberAfterDecimalPoint();
129 ALWAYS_INLINE
bool parseNumberAfterExponentIndicator();
130 ALWAYS_INLINE
bool parseMultilineComment();
132 static const size_t initialReadBufferCapacity
= 32;
135 int m_lastLineNumber
;
137 Vector
<char> m_buffer8
;
138 Vector
<UChar
> m_buffer16
;
140 bool m_delimited
; // encountered delimiter like "'" and "}" on last run
143 const SourceCode
* m_source
;
145 const UChar
* m_codeStart
;
146 const UChar
* m_codeEnd
;
151 // current and following unicode characters (int to allow for -1 for end-of-file marker)
154 IdentifierArena
* m_arena
;
156 JSGlobalData
* m_globalData
;
158 const HashTable m_keywordTable
;
161 ALWAYS_INLINE
bool Lexer::isWhiteSpace(int ch
)
163 return isASCII(ch
) ? (ch
== ' ' || ch
== '\t' || ch
== 0xB || ch
== 0xC) : (WTF::Unicode::isSeparatorSpace(ch
) || ch
== 0xFEFF);
166 ALWAYS_INLINE
bool Lexer::isLineTerminator(int ch
)
168 return ch
== '\r' || ch
== '\n' || (ch
& ~1) == 0x2028;
171 inline unsigned char Lexer::convertHex(int c1
, int c2
)
173 return (toASCIIHexValue(c1
) << 4) | toASCIIHexValue(c2
);
176 inline UChar
Lexer::convertUnicode(int c1
, int c2
, int c3
, int c4
)
178 return (convertHex(c1
, c2
) << 8) | convertHex(c3
, c4
);
181 ALWAYS_INLINE
const Identifier
* Lexer::makeIdentifier(const UChar
* characters
, size_t length
)
183 return &m_arena
->makeIdentifier(m_globalData
, characters
, length
);
186 ALWAYS_INLINE JSTokenType
Lexer::lexExpectIdentifier(JSTokenData
* tokenData
, JSTokenInfo
* tokenInfo
, unsigned lexType
, bool strictMode
)
188 ASSERT((lexType
& IgnoreReservedWords
));
189 const UChar
* start
= m_code
;
190 const UChar
* ptr
= start
;
191 const UChar
* end
= m_codeEnd
;
196 if (!WTF::isASCIIAlpha(*ptr
))
200 if (!WTF::isASCIIAlphanumeric(*ptr
))
207 if ((!WTF::isASCII(*ptr
)) || (*ptr
== '\\') || (*ptr
== '_') || (*ptr
== '$'))
215 // Create the identifier if needed
216 if (lexType
& DontBuildKeywords
)
217 tokenData
->ident
= 0;
219 tokenData
->ident
= makeIdentifier(start
, ptr
- start
);
220 tokenInfo
->line
= m_lineNumber
;
221 tokenInfo
->startOffset
= start
- m_codeStart
;
222 tokenInfo
->endOffset
= currentOffset();
227 return lex(tokenData
, tokenInfo
, lexType
, strictMode
);