git.saurik.com Git - apple/javascriptcore.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
	3	* Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012 Apple Inc. All Rights Reserved.
	4	* Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
	5	* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
	6	* Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be)
	7	*
	8	* This library is free software; you can redistribute it and/or
	9	* modify it under the terms of the GNU Library General Public
	10	* License as published by the Free Software Foundation; either
	11	* version 2 of the License, or (at your option) any later version.
	12	*
	13	* This library is distributed in the hope that it will be useful,
	14	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	15	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	16	* Library General Public License for more details.
	17	*
	18	* You should have received a copy of the GNU Library General Public License
	19	* along with this library; see the file COPYING.LIB. If not, write to
	20	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	21	* Boston, MA 02110-1301, USA.
	22	*
	23	*/
	24
	25	#include "config.h"
	26	#include "Lexer.h"
	27
	28	#include "JSFunction.h"
	29
	30	#include "JSGlobalObjectFunctions.h"
	31	#include "Identifier.h"
	32	#include "NodeInfo.h"
	33	#include "Nodes.h"
	34	#include <wtf/dtoa.h>
	35	#include <ctype.h>
	36	#include <limits.h>
	37	#include <string.h>
	38	#include <wtf/Assertions.h>
	39
	40	using namespace WTF;
	41	using namespace Unicode;
	42
	43	#include "KeywordLookup.h"
	44	#include "Lexer.lut.h"
	45	#include "Parser.h"
	46
	47	namespace JSC {
	48
	49	Keywords::Keywords(JSGlobalData* globalData)
	50	: m_globalData(globalData)
	51	, m_keywordTable(JSC::mainTable)
	52	{
	53	}
	54
	55	enum CharacterType {
	56	// Types for the main switch
	57
	58	// The first three types are fixed, and also used for identifying
	59	// ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart).
	60	CharacterIdentifierStart,
	61	CharacterZero,
	62	CharacterNumber,
	63
	64	CharacterInvalid,
	65	CharacterLineTerminator,
	66	CharacterExclamationMark,
	67	CharacterOpenParen,
	68	CharacterCloseParen,
	69	CharacterOpenBracket,
	70	CharacterCloseBracket,
	71	CharacterComma,
	72	CharacterColon,
	73	CharacterQuestion,
	74	CharacterTilde,
	75	CharacterQuote,
	76	CharacterDot,
	77	CharacterSlash,
	78	CharacterBackSlash,
	79	CharacterSemicolon,
	80	CharacterOpenBrace,
	81	CharacterCloseBrace,
	82
	83	CharacterAdd,
	84	CharacterSub,
	85	CharacterMultiply,
	86	CharacterModulo,
	87	CharacterAnd,
	88	CharacterXor,
	89	CharacterOr,
	90	CharacterLess,
	91	CharacterGreater,
	92	CharacterEqual,
	93
	94	// Other types (only one so far)
	95	CharacterWhiteSpace,
	96	};
	97
	98	// 256 Latin-1 codes
	99	static const unsigned short typesOfLatin1Characters[256] = {
	100	/* 0 - Null */ CharacterInvalid,
	101	/* 1 - Start of Heading */ CharacterInvalid,
	102	/* 2 - Start of Text */ CharacterInvalid,
	103	/* 3 - End of Text */ CharacterInvalid,
	104	/* 4 - End of Transm. */ CharacterInvalid,
	105	/* 5 - Enquiry */ CharacterInvalid,
	106	/* 6 - Acknowledgment */ CharacterInvalid,
	107	/* 7 - Bell */ CharacterInvalid,
	108	/* 8 - Back Space */ CharacterInvalid,
	109	/* 9 - Horizontal Tab */ CharacterWhiteSpace,
	110	/* 10 - Line Feed */ CharacterLineTerminator,
	111	/* 11 - Vertical Tab */ CharacterWhiteSpace,
	112	/* 12 - Form Feed */ CharacterWhiteSpace,
	113	/* 13 - Carriage Return */ CharacterLineTerminator,
	114	/* 14 - Shift Out */ CharacterInvalid,
	115	/* 15 - Shift In */ CharacterInvalid,
	116	/* 16 - Data Line Escape */ CharacterInvalid,
	117	/* 17 - Device Control 1 */ CharacterInvalid,
	118	/* 18 - Device Control 2 */ CharacterInvalid,
	119	/* 19 - Device Control 3 */ CharacterInvalid,
	120	/* 20 - Device Control 4 */ CharacterInvalid,
	121	/* 21 - Negative Ack. */ CharacterInvalid,
	122	/* 22 - Synchronous Idle */ CharacterInvalid,
	123	/* 23 - End of Transmit */ CharacterInvalid,
	124	/* 24 - Cancel */ CharacterInvalid,
	125	/* 25 - End of Medium */ CharacterInvalid,
	126	/* 26 - Substitute */ CharacterInvalid,
	127	/* 27 - Escape */ CharacterInvalid,
	128	/* 28 - File Separator */ CharacterInvalid,
	129	/* 29 - Group Separator */ CharacterInvalid,
	130	/* 30 - Record Separator */ CharacterInvalid,
	131	/* 31 - Unit Separator */ CharacterInvalid,
	132	/* 32 - Space */ CharacterWhiteSpace,
	133	/* 33 - ! */ CharacterExclamationMark,
	134	/* 34 - " */ CharacterQuote,
	135	/* 35 - # */ CharacterInvalid,
	136	/* 36 - $ */ CharacterIdentifierStart,
	137	/* 37 - % */ CharacterModulo,
	138	/* 38 - & */ CharacterAnd,
	139	/* 39 - ' */ CharacterQuote,
	140	/* 40 - ( */ CharacterOpenParen,
	141	/* 41 - ) */ CharacterCloseParen,
	142	/* 42 - * */ CharacterMultiply,
	143	/* 43 - + */ CharacterAdd,
	144	/* 44 - , */ CharacterComma,
	145	/* 45 - - */ CharacterSub,
	146	/* 46 - . */ CharacterDot,
	147	/* 47 - / */ CharacterSlash,
	148	/* 48 - 0 */ CharacterZero,
	149	/* 49 - 1 */ CharacterNumber,
	150	/* 50 - 2 */ CharacterNumber,
	151	/* 51 - 3 */ CharacterNumber,
	152	/* 52 - 4 */ CharacterNumber,
	153	/* 53 - 5 */ CharacterNumber,
	154	/* 54 - 6 */ CharacterNumber,
	155	/* 55 - 7 */ CharacterNumber,
	156	/* 56 - 8 */ CharacterNumber,
	157	/* 57 - 9 */ CharacterNumber,
	158	/* 58 - : */ CharacterColon,
	159	/* 59 - ; */ CharacterSemicolon,
	160	/* 60 - < */ CharacterLess,
	161	/* 61 - = */ CharacterEqual,
	162	/* 62 - > */ CharacterGreater,
	163	/* 63 - ? */ CharacterQuestion,
	164	/* 64 - @ */ CharacterInvalid,
	165	/* 65 - A */ CharacterIdentifierStart,
	166	/* 66 - B */ CharacterIdentifierStart,
	167	/* 67 - C */ CharacterIdentifierStart,
	168	/* 68 - D */ CharacterIdentifierStart,
	169	/* 69 - E */ CharacterIdentifierStart,
	170	/* 70 - F */ CharacterIdentifierStart,
	171	/* 71 - G */ CharacterIdentifierStart,
	172	/* 72 - H */ CharacterIdentifierStart,
	173	/* 73 - I */ CharacterIdentifierStart,
	174	/* 74 - J */ CharacterIdentifierStart,
	175	/* 75 - K */ CharacterIdentifierStart,
	176	/* 76 - L */ CharacterIdentifierStart,
	177	/* 77 - M */ CharacterIdentifierStart,
	178	/* 78 - N */ CharacterIdentifierStart,
	179	/* 79 - O */ CharacterIdentifierStart,
	180	/* 80 - P */ CharacterIdentifierStart,
	181	/* 81 - Q */ CharacterIdentifierStart,
	182	/* 82 - R */ CharacterIdentifierStart,
	183	/* 83 - S */ CharacterIdentifierStart,
	184	/* 84 - T */ CharacterIdentifierStart,
	185	/* 85 - U */ CharacterIdentifierStart,
	186	/* 86 - V */ CharacterIdentifierStart,
	187	/* 87 - W */ CharacterIdentifierStart,
	188	/* 88 - X */ CharacterIdentifierStart,
	189	/* 89 - Y */ CharacterIdentifierStart,
	190	/* 90 - Z */ CharacterIdentifierStart,
	191	/* 91 - [ */ CharacterOpenBracket,
	192	/* 92 - \ */ CharacterBackSlash,
	193	/* 93 - ] */ CharacterCloseBracket,
	194	/* 94 - ^ */ CharacterXor,
	195	/* 95 - _ */ CharacterIdentifierStart,
	196	/* 96 - ` */ CharacterInvalid,
	197	/* 97 - a */ CharacterIdentifierStart,
	198	/* 98 - b */ CharacterIdentifierStart,
	199	/* 99 - c */ CharacterIdentifierStart,
	200	/* 100 - d */ CharacterIdentifierStart,
	201	/* 101 - e */ CharacterIdentifierStart,
	202	/* 102 - f */ CharacterIdentifierStart,
	203	/* 103 - g */ CharacterIdentifierStart,
	204	/* 104 - h */ CharacterIdentifierStart,
	205	/* 105 - i */ CharacterIdentifierStart,
	206	/* 106 - j */ CharacterIdentifierStart,
	207	/* 107 - k */ CharacterIdentifierStart,
	208	/* 108 - l */ CharacterIdentifierStart,
	209	/* 109 - m */ CharacterIdentifierStart,
	210	/* 110 - n */ CharacterIdentifierStart,
	211	/* 111 - o */ CharacterIdentifierStart,
	212	/* 112 - p */ CharacterIdentifierStart,
	213	/* 113 - q */ CharacterIdentifierStart,
	214	/* 114 - r */ CharacterIdentifierStart,
	215	/* 115 - s */ CharacterIdentifierStart,
	216	/* 116 - t */ CharacterIdentifierStart,
	217	/* 117 - u */ CharacterIdentifierStart,
	218	/* 118 - v */ CharacterIdentifierStart,
	219	/* 119 - w */ CharacterIdentifierStart,
	220	/* 120 - x */ CharacterIdentifierStart,
	221	/* 121 - y */ CharacterIdentifierStart,
	222	/* 122 - z */ CharacterIdentifierStart,
	223	/* 123 - { */ CharacterOpenBrace,
	224	/* 124 - \| */ CharacterOr,
	225	/* 125 - } */ CharacterCloseBrace,
	226	/* 126 - ~ */ CharacterTilde,
	227	/* 127 - Delete */ CharacterInvalid,
	228	/* 128 - Cc category */ CharacterInvalid,
	229	/* 129 - Cc category */ CharacterInvalid,
	230	/* 130 - Cc category */ CharacterInvalid,
	231	/* 131 - Cc category */ CharacterInvalid,
	232	/* 132 - Cc category */ CharacterInvalid,
	233	/* 133 - Cc category */ CharacterInvalid,
	234	/* 134 - Cc category */ CharacterInvalid,
	235	/* 135 - Cc category */ CharacterInvalid,
	236	/* 136 - Cc category */ CharacterInvalid,
	237	/* 137 - Cc category */ CharacterInvalid,
	238	/* 138 - Cc category */ CharacterInvalid,
	239	/* 139 - Cc category */ CharacterInvalid,
	240	/* 140 - Cc category */ CharacterInvalid,
	241	/* 141 - Cc category */ CharacterInvalid,
	242	/* 142 - Cc category */ CharacterInvalid,
	243	/* 143 - Cc category */ CharacterInvalid,
	244	/* 144 - Cc category */ CharacterInvalid,
	245	/* 145 - Cc category */ CharacterInvalid,
	246	/* 146 - Cc category */ CharacterInvalid,
	247	/* 147 - Cc category */ CharacterInvalid,
	248	/* 148 - Cc category */ CharacterInvalid,
	249	/* 149 - Cc category */ CharacterInvalid,
	250	/* 150 - Cc category */ CharacterInvalid,
	251	/* 151 - Cc category */ CharacterInvalid,
	252	/* 152 - Cc category */ CharacterInvalid,
	253	/* 153 - Cc category */ CharacterInvalid,
	254	/* 154 - Cc category */ CharacterInvalid,
	255	/* 155 - Cc category */ CharacterInvalid,
	256	/* 156 - Cc category */ CharacterInvalid,
	257	/* 157 - Cc category */ CharacterInvalid,
	258	/* 158 - Cc category */ CharacterInvalid,
	259	/* 159 - Cc category */ CharacterInvalid,
	260	/* 160 - Zs category (nbsp) */ CharacterWhiteSpace,
	261	/* 161 - Po category */ CharacterInvalid,
	262	/* 162 - Sc category */ CharacterInvalid,
	263	/* 163 - Sc category */ CharacterInvalid,
	264	/* 164 - Sc category */ CharacterInvalid,
	265	/* 165 - Sc category */ CharacterInvalid,
	266	/* 166 - So category */ CharacterInvalid,
	267	/* 167 - So category */ CharacterInvalid,
	268	/* 168 - Sk category */ CharacterInvalid,
	269	/* 169 - So category */ CharacterInvalid,
	270	/* 170 - Ll category */ CharacterIdentifierStart,
	271	/* 171 - Pi category */ CharacterInvalid,
	272	/* 172 - Sm category */ CharacterInvalid,
	273	/* 173 - Cf category */ CharacterInvalid,
	274	/* 174 - So category */ CharacterInvalid,
	275	/* 175 - Sk category */ CharacterInvalid,
	276	/* 176 - So category */ CharacterInvalid,
	277	/* 177 - Sm category */ CharacterInvalid,
	278	/* 178 - No category */ CharacterInvalid,
	279	/* 179 - No category */ CharacterInvalid,
	280	/* 180 - Sk category */ CharacterInvalid,
	281	/* 181 - Ll category */ CharacterIdentifierStart,
	282	/* 182 - So category */ CharacterInvalid,
	283	/* 183 - Po category */ CharacterInvalid,
	284	/* 184 - Sk category */ CharacterInvalid,
	285	/* 185 - No category */ CharacterInvalid,
	286	/* 186 - Ll category */ CharacterIdentifierStart,
	287	/* 187 - Pf category */ CharacterInvalid,
	288	/* 188 - No category */ CharacterInvalid,
	289	/* 189 - No category */ CharacterInvalid,
	290	/* 190 - No category */ CharacterInvalid,
	291	/* 191 - Po category */ CharacterInvalid,
	292	/* 192 - Lu category */ CharacterIdentifierStart,
	293	/* 193 - Lu category */ CharacterIdentifierStart,
	294	/* 194 - Lu category */ CharacterIdentifierStart,
	295	/* 195 - Lu category */ CharacterIdentifierStart,
	296	/* 196 - Lu category */ CharacterIdentifierStart,
	297	/* 197 - Lu category */ CharacterIdentifierStart,
	298	/* 198 - Lu category */ CharacterIdentifierStart,
	299	/* 199 - Lu category */ CharacterIdentifierStart,
	300	/* 200 - Lu category */ CharacterIdentifierStart,
	301	/* 201 - Lu category */ CharacterIdentifierStart,
	302	/* 202 - Lu category */ CharacterIdentifierStart,
	303	/* 203 - Lu category */ CharacterIdentifierStart,
	304	/* 204 - Lu category */ CharacterIdentifierStart,
	305	/* 205 - Lu category */ CharacterIdentifierStart,
	306	/* 206 - Lu category */ CharacterIdentifierStart,
	307	/* 207 - Lu category */ CharacterIdentifierStart,
	308	/* 208 - Lu category */ CharacterIdentifierStart,
	309	/* 209 - Lu category */ CharacterIdentifierStart,
	310	/* 210 - Lu category */ CharacterIdentifierStart,
	311	/* 211 - Lu category */ CharacterIdentifierStart,
	312	/* 212 - Lu category */ CharacterIdentifierStart,
	313	/* 213 - Lu category */ CharacterIdentifierStart,
	314	/* 214 - Lu category */ CharacterIdentifierStart,
	315	/* 215 - Sm category */ CharacterInvalid,
	316	/* 216 - Lu category */ CharacterIdentifierStart,
	317	/* 217 - Lu category */ CharacterIdentifierStart,
	318	/* 218 - Lu category */ CharacterIdentifierStart,
	319	/* 219 - Lu category */ CharacterIdentifierStart,
	320	/* 220 - Lu category */ CharacterIdentifierStart,
	321	/* 221 - Lu category */ CharacterIdentifierStart,
	322	/* 222 - Lu category */ CharacterIdentifierStart,
	323	/* 223 - Ll category */ CharacterIdentifierStart,
	324	/* 224 - Ll category */ CharacterIdentifierStart,
	325	/* 225 - Ll category */ CharacterIdentifierStart,
	326	/* 226 - Ll category */ CharacterIdentifierStart,
	327	/* 227 - Ll category */ CharacterIdentifierStart,
	328	/* 228 - Ll category */ CharacterIdentifierStart,
	329	/* 229 - Ll category */ CharacterIdentifierStart,
	330	/* 230 - Ll category */ CharacterIdentifierStart,
	331	/* 231 - Ll category */ CharacterIdentifierStart,
	332	/* 232 - Ll category */ CharacterIdentifierStart,
	333	/* 233 - Ll category */ CharacterIdentifierStart,
	334	/* 234 - Ll category */ CharacterIdentifierStart,
	335	/* 235 - Ll category */ CharacterIdentifierStart,
	336	/* 236 - Ll category */ CharacterIdentifierStart,
	337	/* 237 - Ll category */ CharacterIdentifierStart,
	338	/* 238 - Ll category */ CharacterIdentifierStart,
	339	/* 239 - Ll category */ CharacterIdentifierStart,
	340	/* 240 - Ll category */ CharacterIdentifierStart,
	341	/* 241 - Ll category */ CharacterIdentifierStart,
	342	/* 242 - Ll category */ CharacterIdentifierStart,
	343	/* 243 - Ll category */ CharacterIdentifierStart,
	344	/* 244 - Ll category */ CharacterIdentifierStart,
	345	/* 245 - Ll category */ CharacterIdentifierStart,
	346	/* 246 - Ll category */ CharacterIdentifierStart,
	347	/* 247 - Sm category */ CharacterInvalid,
	348	/* 248 - Ll category */ CharacterIdentifierStart,
	349	/* 249 - Ll category */ CharacterIdentifierStart,
	350	/* 250 - Ll category */ CharacterIdentifierStart,
	351	/* 251 - Ll category */ CharacterIdentifierStart,
	352	/* 252 - Ll category */ CharacterIdentifierStart,
	353	/* 253 - Ll category */ CharacterIdentifierStart,
	354	/* 254 - Ll category */ CharacterIdentifierStart,
	355	/* 255 - Ll category */ CharacterIdentifierStart
	356	};
	357
	358	template <typename T>
	359	Lexer<T>::Lexer(JSGlobalData* globalData)
	360	: m_isReparsing(false)
	361	, m_globalData(globalData)
	362	{
	363	}
	364
	365	template <typename T>
	366	Lexer<T>::~Lexer()
	367	{
	368	}
	369
	370	template <typename T>
	371	UString Lexer<T>::invalidCharacterMessage() const
	372	{
	373	switch (m_current) {
	374	case 0:
	375	return "Invalid character: '\\0'";
	376	case 10:
	377	return "Invalid character: '\\n'";
	378	case 11:
	379	return "Invalid character: '\\v'";
	380	case 13:
	381	return "Invalid character: '\\r'";
	382	case 35:
	383	return "Invalid character: '#'";
	384	case 64:
	385	return "Invalid character: '@'";
	386	case 96:
	387	return "Invalid character: '`'";
	388	default:
	389	return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current)).impl();
	390	}
	391	}
	392
	393	template <typename T>
	394	ALWAYS_INLINE const T* Lexer<T>::currentCharacter() const
	395	{
	396	ASSERT(m_code <= m_codeEnd);
	397	return m_code;
	398	}
	399
	400	template <typename T>
	401	void Lexer<T>::setCode(const SourceCode& source, ParserArena* arena)
	402	{
	403	m_arena = &arena->identifierArena();
	404
	405	m_lineNumber = source.firstLine();
	406	m_lastToken = -1;
	407
	408	const StringImpl* sourceString = source.provider()->data();
	409
	410	if (sourceString)
	411	setCodeStart(sourceString);
	412	else
	413	m_codeStart = 0;
	414
	415	m_source = &source;
	416	m_code = m_codeStart + source.startOffset();
	417	m_codeEnd = m_codeStart + source.endOffset();
	418	m_error = false;
	419	m_atLineStart = true;
	420	m_lexErrorMessage = UString();
	421
	422	m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
	423	m_buffer16.reserveInitialCapacity((m_codeEnd - m_code) / 2);
	424
	425	if (LIKELY(m_code < m_codeEnd))
	426	m_current = *m_code;
	427	else
	428	m_current = 0;
	429	ASSERT(currentOffset() == source.startOffset());
	430	}
	431
	432	template <typename T>
	433	template <int shiftAmount> ALWAYS_INLINE void Lexer<T>::internalShift()
	434	{
	435	m_code += shiftAmount;
	436	m_current = *m_code;
	437	}
	438
	439	template <typename T>
	440	ALWAYS_INLINE void Lexer<T>::shift()
	441	{
	442	// At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence.
	443	m_current = 0;
	444	++m_code;
	445	if (LIKELY(m_code < m_codeEnd))
	446	m_current = *m_code;
	447	}
	448
	449	template <typename T>
	450	ALWAYS_INLINE bool Lexer<T>::atEnd() const
	451	{
	452	ASSERT(!m_current \|\| m_code < m_codeEnd);
	453	return UNLIKELY(UNLIKELY(!m_current) && m_code == m_codeEnd);
	454	}
	455
	456	template <typename T>
	457	ALWAYS_INLINE T Lexer<T>::peek(int offset) const
	458	{
	459	ASSERT(offset > 0 && offset < 5);
	460	const T* code = m_code + offset;
	461	return (code < m_codeEnd) ? *code : 0;
	462	}
	463
	464	template <typename T>
	465	int Lexer<T>::parseFourDigitUnicodeHex()
	466	{
	467	T char1 = peek(1);
	468	T char2 = peek(2);
	469	T char3 = peek(3);
	470
	471	if (UNLIKELY(!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(char1) \|\| !isASCIIHexDigit(char2) \|\| !isASCIIHexDigit(char3)))
	472	return -1;
	473
	474	int result = convertUnicode(m_current, char1, char2, char3);
	475	shift();
	476	shift();
	477	shift();
	478	shift();
	479	return result;
	480	}
	481
	482	template <typename T>
	483	void Lexer<T>::shiftLineTerminator()
	484	{
	485	ASSERT(isLineTerminator(m_current));
	486
	487	T prev = m_current;
	488	shift();
	489
	490	// Allow both CRLF and LFCR.
	491	if (prev + m_current == '\n' + '\r')
	492	shift();
	493
	494	++m_lineNumber;
	495	}
	496
	497	template <typename T>
	498	ALWAYS_INLINE bool Lexer<T>::lastTokenWasRestrKeyword() const
	499	{
	500	return m_lastToken == CONTINUE \|\| m_lastToken == BREAK \|\| m_lastToken == RETURN \|\| m_lastToken == THROW;
	501	}
	502
	503	static NEVER_INLINE bool isNonLatin1IdentStart(int c)
	504	{
	505	return category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other);
	506	}
	507
	508	static ALWAYS_INLINE bool isLatin1(LChar)
	509	{
	510	return true;
	511	}
	512
	513	static ALWAYS_INLINE bool isLatin1(UChar c)
	514	{
	515	return c < 256;
	516	}
	517
	518	static inline bool isIdentStart(LChar c)
	519	{
	520	return typesOfLatin1Characters[c] == CharacterIdentifierStart;
	521	}
	522
	523	static inline bool isIdentStart(UChar c)
	524	{
	525	return isLatin1(c) ? isIdentStart(static_cast<LChar>(c)) : isNonLatin1IdentStart(c);
	526	}
	527
	528	static NEVER_INLINE bool isNonLatin1IdentPart(int c)
	529	{
	530	return (category(c) & (Letter_Uppercase \| Letter_Lowercase \| Letter_Titlecase \| Letter_Modifier \| Letter_Other
	531	\| Mark_NonSpacing \| Mark_SpacingCombining \| Number_DecimalDigit \| Punctuation_Connector)) \|\| c == 0x200C \|\| c == 0x200D;
	532	}
	533
	534	static ALWAYS_INLINE bool isIdentPart(LChar c)
	535	{
	536	// Character types are divided into two groups depending on whether they can be part of an
	537	// identifier or not. Those whose type value is less or equal than CharacterNumber can be
	538	// part of an identifier. (See the CharacterType definition for more details.)
	539	return typesOfLatin1Characters[c] <= CharacterNumber;
	540	}
	541
	542	static ALWAYS_INLINE bool isIdentPart(UChar c)
	543	{
	544	return isLatin1(c) ? isIdentPart(static_cast<LChar>(c)) : isNonLatin1IdentPart(c);
	545	}
	546
	547	static inline int singleEscape(int c)
	548	{
	549	switch (c) {
	550	case 'b':
	551	return 0x08;
	552	case 't':
	553	return 0x09;
	554	case 'n':
	555	return 0x0A;
	556	case 'v':
	557	return 0x0B;
	558	case 'f':
	559	return 0x0C;
	560	case 'r':
	561	return 0x0D;
	562	case '\\':
	563	return '\\';
	564	case '\'':
	565	return '\'';
	566	case '"':
	567	return '"';
	568	default:
	569	return 0;
	570	}
	571	}
	572
	573	template <typename T>
	574	inline void Lexer<T>::record8(int c)
	575	{
	576	ASSERT(c >= 0);
	577	ASSERT(c <= 0xFF);
	578	m_buffer8.append(static_cast<LChar>(c));
	579	}
	580
	581	template <typename T>
	582	inline void assertCharIsIn8BitRange(T c)
	583	{
	584	UNUSED_PARAM(c);
	585	ASSERT(c >= 0);
	586	ASSERT(c <= 0xFF);
	587	}
	588
	589	template <>
	590	inline void assertCharIsIn8BitRange(UChar c)
	591	{
	592	UNUSED_PARAM(c);
	593	ASSERT(c <= 0xFF);
	594	}
	595
	596	template <>
	597	inline void assertCharIsIn8BitRange(LChar)
	598	{
	599	}
	600
	601	template <typename T>
	602	inline void Lexer<T>::append8(const T* p, size_t length)
	603	{
	604	size_t currentSize = m_buffer8.size();
	605	m_buffer8.grow(currentSize + length);
	606	LChar* rawBuffer = m_buffer8.data() + currentSize;
	607
	608	for (size_t i = 0; i < length; i++) {
	609	T c = p[i];
	610	assertCharIsIn8BitRange(c);
	611	rawBuffer[i] = c;
	612	}
	613	}
	614
	615	template <typename T>
	616	inline void Lexer<T>::append16(const LChar* p, size_t length)
	617	{
	618	size_t currentSize = m_buffer16.size();
	619	m_buffer16.grow(currentSize + length);
	620	UChar* rawBuffer = m_buffer16.data() + currentSize;
	621
	622	for (size_t i = 0; i < length; i++)
	623	rawBuffer[i] = p[i];
	624	}
	625
	626	template <typename T>
	627	inline void Lexer<T>::record16(T c)
	628	{
	629	m_buffer16.append(c);
	630	}
	631
	632	template <typename T>
	633	inline void Lexer<T>::record16(int c)
	634	{
	635	ASSERT(c >= 0);
	636	ASSERT(c <= static_cast<int>(USHRT_MAX));
	637	m_buffer16.append(static_cast<UChar>(c));
	638	}
	639
	640	template <>
	641	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<LChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
	642	{
	643	const ptrdiff_t remaining = m_codeEnd - m_code;
	644	if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
	645	JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
	646	if (keyword != IDENT) {
	647	ASSERT((!shouldCreateIdentifier) \|\| tokenData->ident);
	648	return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
	649	}
	650	}
	651
	652	const LChar* identifierStart = currentCharacter();
	653
	654	while (isIdentPart(m_current))
	655	shift();
	656
	657	if (UNLIKELY(m_current == '\\')) {
	658	setOffsetFromCharOffset(identifierStart);
	659	return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
	660	}
	661
	662	const Identifier* ident = 0;
	663
	664	if (shouldCreateIdentifier) {
	665	int identifierLength = currentCharacter() - identifierStart;
	666	ident = makeIdentifier(identifierStart, identifierLength);
	667
	668	tokenData->ident = ident;
	669	} else
	670	tokenData->ident = 0;
	671
	672	if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
	673	ASSERT(shouldCreateIdentifier);
	674	if (remaining < maxTokenLength) {
	675	const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
	676	ASSERT((remaining < maxTokenLength) \|\| !entry);
	677	if (!entry)
	678	return IDENT;
	679	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
	680	return (token != RESERVED_IF_STRICT) \|\| strictMode ? token : IDENT;
	681	}
	682	return IDENT;
	683	}
	684
	685	return IDENT;
	686	}
	687
	688	template <>
	689	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer<UChar>::parseIdentifier(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
	690	{
	691	const ptrdiff_t remaining = m_codeEnd - m_code;
	692	if ((remaining >= maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords)) {
	693	JSTokenType keyword = parseKeyword<shouldCreateIdentifier>(tokenData);
	694	if (keyword != IDENT) {
	695	ASSERT((!shouldCreateIdentifier) \|\| tokenData->ident);
	696	return keyword == RESERVED_IF_STRICT && !strictMode ? IDENT : keyword;
	697	}
	698	}
	699
	700	const UChar* identifierStart = currentCharacter();
	701
	702	UChar orAllChars = 0;
	703
	704	while (isIdentPart(m_current)) {
	705	orAllChars \|= m_current;
	706	shift();
	707	}
	708
	709	if (UNLIKELY(m_current == '\\')) {
	710	setOffsetFromCharOffset(identifierStart);
	711	return parseIdentifierSlowCase<shouldCreateIdentifier>(tokenData, lexerFlags, strictMode);
	712	}
	713
	714	bool isAll8Bit = false;
	715
	716	if (!(orAllChars & ~0xff))
	717	isAll8Bit = true;
	718
	719	const Identifier* ident = 0;
	720
	721	if (shouldCreateIdentifier) {
	722	int identifierLength = currentCharacter() - identifierStart;
	723	if (isAll8Bit)
	724	ident = makeIdentifierLCharFromUChar(identifierStart, identifierLength);
	725	else
	726	ident = makeIdentifier(identifierStart, identifierLength);
	727
	728	tokenData->ident = ident;
	729	} else
	730	tokenData->ident = 0;
	731
	732	if (UNLIKELY((remaining < maxTokenLength) && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
	733	ASSERT(shouldCreateIdentifier);
	734	if (remaining < maxTokenLength) {
	735	const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
	736	ASSERT((remaining < maxTokenLength) \|\| !entry);
	737	if (!entry)
	738	return IDENT;
	739	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
	740	return (token != RESERVED_IF_STRICT) \|\| strictMode ? token : IDENT;
	741	}
	742	return IDENT;
	743	}
	744
	745	return IDENT;
	746	}
	747
	748	template <typename T>
	749	template <bool shouldCreateIdentifier> JSTokenType Lexer<T>::parseIdentifierSlowCase(JSTokenData* tokenData, unsigned lexerFlags, bool strictMode)
	750	{
	751	const ptrdiff_t remaining = m_codeEnd - m_code;
	752	const T* identifierStart = currentCharacter();
	753	bool bufferRequired = false;
	754
	755	while (true) {
	756	if (LIKELY(isIdentPart(m_current))) {
	757	shift();
	758	continue;
	759	}
	760	if (LIKELY(m_current != '\\'))
	761	break;
	762
	763	// \uXXXX unicode characters.
	764	bufferRequired = true;
	765	if (identifierStart != currentCharacter())
	766	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
	767	shift();
	768	if (UNLIKELY(m_current != 'u'))
	769	return ERRORTOK;
	770	shift();
	771	int character = parseFourDigitUnicodeHex();
	772	if (UNLIKELY(character == -1))
	773	return ERRORTOK;
	774	UChar ucharacter = static_cast<UChar>(character);
	775	if (UNLIKELY(m_buffer16.size() ? !isIdentPart(ucharacter) : !isIdentStart(ucharacter)))
	776	return ERRORTOK;
	777	if (shouldCreateIdentifier)
	778	record16(ucharacter);
	779	identifierStart = currentCharacter();
	780	}
	781
	782	int identifierLength;
	783	const Identifier* ident = 0;
	784	if (shouldCreateIdentifier) {
	785	if (!bufferRequired) {
	786	identifierLength = currentCharacter() - identifierStart;
	787	ident = makeIdentifier(identifierStart, identifierLength);
	788	} else {
	789	if (identifierStart != currentCharacter())
	790	m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
	791	ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
	792	}
	793
	794	tokenData->ident = ident;
	795	} else
	796	tokenData->ident = 0;
	797
	798	if (LIKELY(!bufferRequired && !(lexerFlags & LexerFlagsIgnoreReservedWords))) {
	799	ASSERT(shouldCreateIdentifier);
	800	// Keywords must not be recognized if there was an \uXXXX in the identifier.
	801	if (remaining < maxTokenLength) {
	802	const HashEntry* entry = m_globalData->keywords->getKeyword(*ident);
	803	ASSERT((remaining < maxTokenLength) \|\| !entry);
	804	if (!entry)
	805	return IDENT;
	806	JSTokenType token = static_cast<JSTokenType>(entry->lexerValue());
	807	return (token != RESERVED_IF_STRICT) \|\| strictMode ? token : IDENT;
	808	}
	809	return IDENT;
	810	}
	811
	812	m_buffer16.resize(0);
	813	return IDENT;
	814	}
	815
	816	static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(LChar character)
	817	{
	818	return character < 0xE;
	819	}
	820
	821	static ALWAYS_INLINE bool characterRequiresParseStringSlowCase(UChar character)
	822	{
	823	return character < 0xE \|\| character > 0xFF;
	824	}
	825
	826	template <typename T>
	827	template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer<T>::parseString(JSTokenData* tokenData, bool strictMode)
	828	{
	829	int startingOffset = currentOffset();
	830	int startingLineNumber = lineNumber();
	831	T stringQuoteCharacter = m_current;
	832	shift();
	833
	834	const T* stringStart = currentCharacter();
	835
	836	while (m_current != stringQuoteCharacter) {
	837	if (UNLIKELY(m_current == '\\')) {
	838	if (stringStart != currentCharacter() && shouldBuildStrings)
	839	append8(stringStart, currentCharacter() - stringStart);
	840	shift();
	841
	842	int escape = singleEscape(m_current);
	843
	844	// Most common escape sequences first
	845	if (escape) {
	846	if (shouldBuildStrings)
	847	record8(escape);
	848	shift();
	849	} else if (UNLIKELY(isLineTerminator(m_current)))
	850	shiftLineTerminator();
	851	else if (m_current == 'x') {
	852	shift();
	853	if (!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(peek(1))) {
	854	m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
	855	return false;
	856	}
	857	T prev = m_current;
	858	shift();
	859	if (shouldBuildStrings)
	860	record8(convertHex(prev, m_current));
	861	shift();
	862	} else {
	863	setOffset(startingOffset);
	864	setLineNumber(startingLineNumber);
	865	m_buffer8.resize(0);
	866	return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
	867	}
	868	stringStart = currentCharacter();
	869	continue;
	870	}
	871
	872	if (UNLIKELY(characterRequiresParseStringSlowCase(m_current))) {
	873	setOffset(startingOffset);
	874	setLineNumber(startingLineNumber);
	875	m_buffer8.resize(0);
	876	return parseStringSlowCase<shouldBuildStrings>(tokenData, strictMode);
	877	}
	878
	879	shift();
	880	}
	881
	882	if (currentCharacter() != stringStart && shouldBuildStrings)
	883	append8(stringStart, currentCharacter() - stringStart);
	884	if (shouldBuildStrings) {
	885	tokenData->ident = makeIdentifier(m_buffer8.data(), m_buffer8.size());
	886	m_buffer8.resize(0);
	887	} else
	888	tokenData->ident = 0;
	889
	890	return true;
	891	}
	892
	893	template <typename T>
	894	template <bool shouldBuildStrings> bool Lexer<T>::parseStringSlowCase(JSTokenData* tokenData, bool strictMode)
	895	{
	896	T stringQuoteCharacter = m_current;
	897	shift();
	898
	899	const T* stringStart = currentCharacter();
	900
	901	while (m_current != stringQuoteCharacter) {
	902	if (UNLIKELY(m_current == '\\')) {
	903	if (stringStart != currentCharacter() && shouldBuildStrings)
	904	append16(stringStart, currentCharacter() - stringStart);
	905	shift();
	906
	907	int escape = singleEscape(m_current);
	908
	909	// Most common escape sequences first
	910	if (escape) {
	911	if (shouldBuildStrings)
	912	record16(escape);
	913	shift();
	914	} else if (UNLIKELY(isLineTerminator(m_current)))
	915	shiftLineTerminator();
	916	else if (m_current == 'x') {
	917	shift();
	918	if (!isASCIIHexDigit(m_current) \|\| !isASCIIHexDigit(peek(1))) {
	919	m_lexErrorMessage = "\\x can only be followed by a hex character sequence";
	920	return false;
	921	}
	922	T prev = m_current;
	923	shift();
	924	if (shouldBuildStrings)
	925	record16(convertHex(prev, m_current));
	926	shift();
	927	} else if (m_current == 'u') {
	928	shift();
	929	int character = parseFourDigitUnicodeHex();
	930	if (character != -1) {
	931	if (shouldBuildStrings)
	932	record16(character);
	933	} else if (m_current == stringQuoteCharacter) {
	934	if (shouldBuildStrings)
	935	record16('u');
	936	} else {
	937	m_lexErrorMessage = "\\u can only be followed by a Unicode character sequence";
	938	return false;
	939	}
	940	} else if (strictMode && isASCIIDigit(m_current)) {
	941	// The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
	942	int character1 = m_current;
	943	shift();
	944	if (character1 != '0' \|\| isASCIIDigit(m_current)) {
	945	m_lexErrorMessage = "The only valid numeric escape in strict mode is '\\0'";
	946	return false;
	947	}
	948	if (shouldBuildStrings)
	949	record16(0);
	950	} else if (!strictMode && isASCIIOctalDigit(m_current)) {
	951	// Octal character sequences
	952	T character1 = m_current;
	953	shift();
	954	if (isASCIIOctalDigit(m_current)) {
	955	// Two octal characters
	956	T character2 = m_current;
	957	shift();
	958	if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
	959	if (shouldBuildStrings)
	960	record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
	961	shift();
	962	} else {
	963	if (shouldBuildStrings)
	964	record16((character1 - '0') * 8 + character2 - '0');
	965	}
	966	} else {
	967	if (shouldBuildStrings)
	968	record16(character1 - '0');
	969	}
	970	} else if (!atEnd()) {
	971	if (shouldBuildStrings)
	972	record16(m_current);
	973	shift();
	974	} else {
	975	m_lexErrorMessage = "Unterminated string constant";
	976	return false;
	977	}
	978
	979	stringStart = currentCharacter();
	980	continue;
	981	}
	982	// Fast check for characters that require special handling.
	983	// Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently
	984	// as possible, and lets through all common ASCII characters.
	985	if (UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
	986	// New-line or end of input is not allowed
	987	if (atEnd() \|\| isLineTerminator(m_current)) {
	988	m_lexErrorMessage = "Unexpected EOF";
	989	return false;
	990	}
	991	// Anything else is just a normal character
	992	}
	993	shift();
	994	}
	995
	996	if (currentCharacter() != stringStart && shouldBuildStrings)
	997	append16(stringStart, currentCharacter() - stringStart);
	998	if (shouldBuildStrings)
	999	tokenData->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
	1000	else
	1001	tokenData->ident = 0;
	1002
	1003	m_buffer16.resize(0);
	1004	return true;
	1005	}
	1006
	1007	template <typename T>
	1008	ALWAYS_INLINE void Lexer<T>::parseHex(double& returnValue)
	1009	{
	1010	// Optimization: most hexadecimal values fit into 4 bytes.
	1011	uint32_t hexValue = 0;
	1012	int maximumDigits = 7;
	1013
	1014	// Shift out the 'x' prefix.
	1015	shift();
	1016
	1017	do {
	1018	hexValue = (hexValue << 4) + toASCIIHexValue(m_current);
	1019	shift();
	1020	--maximumDigits;
	1021	} while (isASCIIHexDigit(m_current) && maximumDigits >= 0);
	1022
	1023	if (maximumDigits >= 0) {
	1024	returnValue = hexValue;
	1025	return;
	1026	}
	1027
	1028	// No more place in the hexValue buffer.
	1029	// The values are shifted out and placed into the m_buffer8 vector.
	1030	for (int i = 0; i < 8; ++i) {
	1031	int digit = hexValue >> 28;
	1032	if (digit < 10)
	1033	record8(digit + '0');
	1034	else
	1035	record8(digit - 10 + 'a');
	1036	hexValue <<= 4;
	1037	}
	1038
	1039	while (isASCIIHexDigit(m_current)) {
	1040	record8(m_current);
	1041	shift();
	1042	}
	1043
	1044	returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 16);
	1045	}
	1046
	1047	template <typename T>
	1048	ALWAYS_INLINE bool Lexer<T>::parseOctal(double& returnValue)
	1049	{
	1050	// Optimization: most octal values fit into 4 bytes.
	1051	uint32_t octalValue = 0;
	1052	int maximumDigits = 9;
	1053	// Temporary buffer for the digits. Makes easier
	1054	// to reconstruct the input characters when needed.
	1055	LChar digits[10];
	1056
	1057	do {
	1058	octalValue = octalValue * 8 + (m_current - '0');
	1059	digits[maximumDigits] = m_current;
	1060	shift();
	1061	--maximumDigits;
	1062	} while (isASCIIOctalDigit(m_current) && maximumDigits >= 0);
	1063
	1064	if (!isASCIIDigit(m_current) && maximumDigits >= 0) {
	1065	returnValue = octalValue;
	1066	return true;
	1067	}
	1068
	1069	for (int i = 9; i > maximumDigits; --i)
	1070	record8(digits[i]);
	1071
	1072	while (isASCIIOctalDigit(m_current)) {
	1073	record8(m_current);
	1074	shift();
	1075	}
	1076
	1077	if (isASCIIDigit(m_current))
	1078	return false;
	1079
	1080	returnValue = parseIntOverflow(m_buffer8.data(), m_buffer8.size(), 8);
	1081	return true;
	1082	}
	1083
	1084	template <typename T>
	1085	ALWAYS_INLINE bool Lexer<T>::parseDecimal(double& returnValue)
	1086	{
	1087	// Optimization: most decimal values fit into 4 bytes.
	1088	uint32_t decimalValue = 0;
	1089
	1090	// Since parseOctal may be executed before parseDecimal,
	1091	// the m_buffer8 may hold ascii digits.
	1092	if (!m_buffer8.size()) {
	1093	int maximumDigits = 9;
	1094	// Temporary buffer for the digits. Makes easier
	1095	// to reconstruct the input characters when needed.
	1096	LChar digits[10];
	1097
	1098	do {
	1099	decimalValue = decimalValue * 10 + (m_current - '0');
	1100	digits[maximumDigits] = m_current;
	1101	shift();
	1102	--maximumDigits;
	1103	} while (isASCIIDigit(m_current) && maximumDigits >= 0);
	1104
	1105	if (maximumDigits >= 0 && m_current != '.' && (m_current \| 0x20) != 'e') {
	1106	returnValue = decimalValue;
	1107	return true;
	1108	}
	1109
	1110	for (int i = 9; i > maximumDigits; --i)
	1111	record8(digits[i]);
	1112	}
	1113
	1114	while (isASCIIDigit(m_current)) {
	1115	record8(m_current);
	1116	shift();
	1117	}
	1118
	1119	return false;
	1120	}
	1121
	1122	template <typename T>
	1123	ALWAYS_INLINE void Lexer<T>::parseNumberAfterDecimalPoint()
	1124	{
	1125	record8('.');
	1126	while (isASCIIDigit(m_current)) {
	1127	record8(m_current);
	1128	shift();
	1129	}
	1130	}
	1131
	1132	template <typename T>
	1133	ALWAYS_INLINE bool Lexer<T>::parseNumberAfterExponentIndicator()
	1134	{
	1135	record8('e');
	1136	shift();
	1137	if (m_current == '+' \|\| m_current == '-') {
	1138	record8(m_current);
	1139	shift();
	1140	}
	1141
	1142	if (!isASCIIDigit(m_current))
	1143	return false;
	1144
	1145	do {
	1146	record8(m_current);
	1147	shift();
	1148	} while (isASCIIDigit(m_current));
	1149	return true;
	1150	}
	1151
	1152	template <typename T>
	1153	ALWAYS_INLINE bool Lexer<T>::parseMultilineComment()
	1154	{
	1155	while (true) {
	1156	while (UNLIKELY(m_current == '*')) {
	1157	shift();
	1158	if (m_current == '/') {
	1159	shift();
	1160	return true;
	1161	}
	1162	}
	1163
	1164	if (atEnd())
	1165	return false;
	1166
	1167	if (isLineTerminator(m_current)) {
	1168	shiftLineTerminator();
	1169	m_terminator = true;
	1170	} else
	1171	shift();
	1172	}
	1173	}
	1174
	1175	template <typename T>
	1176	bool Lexer<T>::nextTokenIsColon()
	1177	{
	1178	const T* code = m_code;
	1179	while (code < m_codeEnd && (isWhiteSpace(code) \|\| isLineTerminator(code)))
	1180	code++;
	1181
	1182	return code < m_codeEnd && *code == ':';
	1183	}
	1184
	1185	template <typename T>
	1186	JSTokenType Lexer<T>::lex(JSTokenData* tokenData, JSTokenInfo* tokenInfo, unsigned lexerFlags, bool strictMode)
	1187	{
	1188	ASSERT(!m_error);
	1189	ASSERT(m_buffer8.isEmpty());
	1190	ASSERT(m_buffer16.isEmpty());
	1191
	1192	JSTokenType token = ERRORTOK;
	1193	m_terminator = false;
	1194
	1195	start:
	1196	while (isWhiteSpace(m_current))
	1197	shift();
	1198
	1199	if (atEnd())
	1200	return EOFTOK;
	1201
	1202	tokenInfo->startOffset = currentOffset();
	1203
	1204	CharacterType type;
	1205	if (LIKELY(isLatin1(m_current)))
	1206	type = static_cast<CharacterType>(typesOfLatin1Characters[m_current]);
	1207	else if (isNonLatin1IdentStart(m_current))
	1208	type = CharacterIdentifierStart;
	1209	else if (isLineTerminator(m_current))
	1210	type = CharacterLineTerminator;
	1211	else
	1212	type = CharacterInvalid;
	1213
	1214	switch (type) {
	1215	case CharacterGreater:
	1216	shift();
	1217	if (m_current == '>') {
	1218	shift();
	1219	if (m_current == '>') {
	1220	shift();
	1221	if (m_current == '=') {
	1222	shift();
	1223	token = URSHIFTEQUAL;
	1224	break;
	1225	}
	1226	token = URSHIFT;
	1227	break;
	1228	}
	1229	if (m_current == '=') {
	1230	shift();
	1231	token = RSHIFTEQUAL;
	1232	break;
	1233	}
	1234	token = RSHIFT;
	1235	break;
	1236	}
	1237	if (m_current == '=') {
	1238	shift();
	1239	token = GE;
	1240	break;
	1241	}
	1242	token = GT;
	1243	break;
	1244	case CharacterEqual:
	1245	shift();
	1246	if (m_current == '=') {
	1247	shift();
	1248	if (m_current == '=') {
	1249	shift();
	1250	token = STREQ;
	1251	break;
	1252	}
	1253	token = EQEQ;
	1254	break;
	1255	}
	1256	token = EQUAL;
	1257	break;
	1258	case CharacterLess:
	1259	shift();
	1260	if (m_current == '!' && peek(1) == '-' && peek(2) == '-') {
	1261	// <!-- marks the beginning of a line comment (for www usage)
	1262	goto inSingleLineComment;
	1263	}
	1264	if (m_current == '<') {
	1265	shift();
	1266	if (m_current == '=') {
	1267	shift();
	1268	token = LSHIFTEQUAL;
	1269	break;
	1270	}
	1271	token = LSHIFT;
	1272	break;
	1273	}
	1274	if (m_current == '=') {
	1275	shift();
	1276	token = LE;
	1277	break;
	1278	}
	1279	token = LT;
	1280	break;
	1281	case CharacterExclamationMark:
	1282	shift();
	1283	if (m_current == '=') {
	1284	shift();
	1285	if (m_current == '=') {
	1286	shift();
	1287	token = STRNEQ;
	1288	break;
	1289	}
	1290	token = NE;
	1291	break;
	1292	}
	1293	token = EXCLAMATION;
	1294	break;
	1295	case CharacterAdd:
	1296	shift();
	1297	if (m_current == '+') {
	1298	shift();
	1299	token = (!m_terminator) ? PLUSPLUS : AUTOPLUSPLUS;
	1300	break;
	1301	}
	1302	if (m_current == '=') {
	1303	shift();
	1304	token = PLUSEQUAL;
	1305	break;
	1306	}
	1307	token = PLUS;
	1308	break;
	1309	case CharacterSub:
	1310	shift();
	1311	if (m_current == '-') {
	1312	shift();
	1313	if (m_atLineStart && m_current == '>') {
	1314	shift();
	1315	goto inSingleLineComment;
	1316	}
	1317	token = (!m_terminator) ? MINUSMINUS : AUTOMINUSMINUS;
	1318	break;
	1319	}
	1320	if (m_current == '=') {
	1321	shift();
	1322	token = MINUSEQUAL;
	1323	break;
	1324	}
	1325	token = MINUS;
	1326	break;
	1327	case CharacterMultiply:
	1328	shift();
	1329	if (m_current == '=') {
	1330	shift();
	1331	token = MULTEQUAL;
	1332	break;
	1333	}
	1334	token = TIMES;
	1335	break;
	1336	case CharacterSlash:
	1337	shift();
	1338	if (m_current == '/') {
	1339	shift();
	1340	goto inSingleLineComment;
	1341	}
	1342	if (m_current == '*') {
	1343	shift();
	1344	if (parseMultilineComment())
	1345	goto start;
	1346	m_lexErrorMessage = "Multiline comment was not closed properly";
	1347	goto returnError;
	1348	}
	1349	if (m_current == '=') {
	1350	shift();
	1351	token = DIVEQUAL;
	1352	break;
	1353	}
	1354	token = DIVIDE;
	1355	break;
	1356	case CharacterAnd:
	1357	shift();
	1358	if (m_current == '&') {
	1359	shift();
	1360	token = AND;
	1361	break;
	1362	}
	1363	if (m_current == '=') {
	1364	shift();
	1365	token = ANDEQUAL;
	1366	break;
	1367	}
	1368	token = BITAND;
	1369	break;
	1370	case CharacterXor:
	1371	shift();
	1372	if (m_current == '=') {
	1373	shift();
	1374	token = XOREQUAL;
	1375	break;
	1376	}
	1377	token = BITXOR;
	1378	break;
	1379	case CharacterModulo:
	1380	shift();
	1381	if (m_current == '=') {
	1382	shift();
	1383	token = MODEQUAL;
	1384	break;
	1385	}
	1386	token = MOD;
	1387	break;
	1388	case CharacterOr:
	1389	shift();
	1390	if (m_current == '=') {
	1391	shift();
	1392	token = OREQUAL;
	1393	break;
	1394	}
	1395	if (m_current == '\|') {
	1396	shift();
	1397	token = OR;
	1398	break;
	1399	}
	1400	token = BITOR;
	1401	break;
	1402	case CharacterOpenParen:
	1403	token = OPENPAREN;
	1404	shift();
	1405	break;
	1406	case CharacterCloseParen:
	1407	token = CLOSEPAREN;
	1408	shift();
	1409	break;
	1410	case CharacterOpenBracket:
	1411	token = OPENBRACKET;
	1412	shift();
	1413	break;
	1414	case CharacterCloseBracket:
	1415	token = CLOSEBRACKET;
	1416	shift();
	1417	break;
	1418	case CharacterComma:
	1419	token = COMMA;
	1420	shift();
	1421	break;
	1422	case CharacterColon:
	1423	token = COLON;
	1424	shift();
	1425	break;
	1426	case CharacterQuestion:
	1427	token = QUESTION;
	1428	shift();
	1429	break;
	1430	case CharacterTilde:
	1431	token = TILDE;
	1432	shift();
	1433	break;
	1434	case CharacterSemicolon:
	1435	shift();
	1436	token = SEMICOLON;
	1437	break;
	1438	case CharacterOpenBrace:
	1439	tokenData->intValue = currentOffset();
	1440	shift();
	1441	token = OPENBRACE;
	1442	break;
	1443	case CharacterCloseBrace:
	1444	tokenData->intValue = currentOffset();
	1445	shift();
	1446	token = CLOSEBRACE;
	1447	break;
	1448	case CharacterDot:
	1449	shift();
	1450	if (!isASCIIDigit(m_current)) {
	1451	token = DOT;
	1452	break;
	1453	}
	1454	goto inNumberAfterDecimalPoint;
	1455	case CharacterZero:
	1456	shift();
	1457	if ((m_current \| 0x20) == 'x' && isASCIIHexDigit(peek(1))) {
	1458	parseHex(tokenData->doubleValue);
	1459	token = NUMBER;
	1460	} else {
	1461	record8('0');
	1462	if (isASCIIOctalDigit(m_current)) {
	1463	if (parseOctal(tokenData->doubleValue)) {
	1464	if (strictMode) {
	1465	m_lexErrorMessage = "Octal escapes are forbidden in strict mode";
	1466	goto returnError;
	1467	}
	1468	token = NUMBER;
	1469	}
	1470	}
	1471	}
	1472	// Fall through into CharacterNumber
	1473	case CharacterNumber:
	1474	if (LIKELY(token != NUMBER)) {
	1475	if (!parseDecimal(tokenData->doubleValue)) {
	1476	if (m_current == '.') {
	1477	shift();
	1478	inNumberAfterDecimalPoint:
	1479	parseNumberAfterDecimalPoint();
	1480	}
	1481	if ((m_current \| 0x20) == 'e') {
	1482	if (!parseNumberAfterExponentIndicator()) {
	1483	m_lexErrorMessage = "Non-number found after exponent indicator";
	1484	goto returnError;
	1485	}
	1486	}
	1487	size_t parsedLength;
	1488	tokenData->doubleValue = parseDouble(m_buffer8.data(), m_buffer8.size(), parsedLength);
	1489	}
	1490	token = NUMBER;
	1491	}
	1492
	1493	// No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
	1494	if (UNLIKELY(isIdentStart(m_current))) {
	1495	m_lexErrorMessage = "At least one digit must occur after a decimal point";
	1496	goto returnError;
	1497	}
	1498	m_buffer8.resize(0);
	1499	break;
	1500	case CharacterQuote:
	1501	if (lexerFlags & LexerFlagsDontBuildStrings) {
	1502	if (UNLIKELY(!parseString<false>(tokenData, strictMode)))
	1503	goto returnError;
	1504	} else {
	1505	if (UNLIKELY(!parseString<true>(tokenData, strictMode)))
	1506	goto returnError;
	1507	}
	1508	shift();
	1509	token = STRING;
	1510	break;
	1511	case CharacterIdentifierStart:
	1512	ASSERT(isIdentStart(m_current));
	1513	// Fall through into CharacterBackSlash.
	1514	case CharacterBackSlash:
	1515	if (lexerFlags & LexexFlagsDontBuildKeywords)
	1516	token = parseIdentifier<false>(tokenData, lexerFlags, strictMode);
	1517	else
	1518	token = parseIdentifier<true>(tokenData, lexerFlags, strictMode);
	1519	break;
	1520	case CharacterLineTerminator:
	1521	ASSERT(isLineTerminator(m_current));
	1522	shiftLineTerminator();
	1523	m_atLineStart = true;
	1524	m_terminator = true;
	1525	goto start;
	1526	case CharacterInvalid:
	1527	m_lexErrorMessage = invalidCharacterMessage();
	1528	goto returnError;
	1529	default:
	1530	ASSERT_NOT_REACHED();
	1531	m_lexErrorMessage = "Internal Error";
	1532	goto returnError;
	1533	}
	1534
	1535	m_atLineStart = false;
	1536	goto returnToken;
	1537
	1538	inSingleLineComment:
	1539	while (!isLineTerminator(m_current)) {
	1540	if (atEnd())
	1541	return EOFTOK;
	1542	shift();
	1543	}
	1544	shiftLineTerminator();
	1545	m_atLineStart = true;
	1546	m_terminator = true;
	1547	if (!lastTokenWasRestrKeyword())
	1548	goto start;
	1549
	1550	token = SEMICOLON;
	1551	// Fall through into returnToken.
	1552
	1553	returnToken:
	1554	tokenInfo->line = m_lineNumber;
	1555	tokenInfo->endOffset = currentOffset();
	1556	m_lastToken = token;
	1557	return token;
	1558
	1559	returnError:
	1560	m_error = true;
	1561	tokenInfo->line = m_lineNumber;
	1562	tokenInfo->endOffset = currentOffset();
	1563	return ERRORTOK;
	1564	}
	1565
	1566	template <typename T>
	1567	bool Lexer<T>::scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix)
	1568	{
	1569	ASSERT(m_buffer16.isEmpty());
	1570
	1571	bool lastWasEscape = false;
	1572	bool inBrackets = false;
	1573
	1574	if (patternPrefix) {
	1575	ASSERT(!isLineTerminator(patternPrefix));
	1576	ASSERT(patternPrefix != '/');
	1577	ASSERT(patternPrefix != '[');
	1578	record16(patternPrefix);
	1579	}
	1580
	1581	while (true) {
	1582	if (isLineTerminator(m_current) \|\| atEnd()) {
	1583	m_buffer16.resize(0);
	1584	return false;
	1585	}
	1586
	1587	T prev = m_current;
	1588
	1589	shift();
	1590
	1591	if (prev == '/' && !lastWasEscape && !inBrackets)
	1592	break;
	1593
	1594	record16(prev);
	1595
	1596	if (lastWasEscape) {
	1597	lastWasEscape = false;
	1598	continue;
	1599	}
	1600
	1601	switch (prev) {
	1602	case '[':
	1603	inBrackets = true;
	1604	break;
	1605	case ']':
	1606	inBrackets = false;
	1607	break;
	1608	case '\\':
	1609	lastWasEscape = true;
	1610	break;
	1611	}
	1612	}
	1613
	1614	pattern = makeIdentifier(m_buffer16.data(), m_buffer16.size());
	1615	m_buffer16.resize(0);
	1616
	1617	while (isIdentPart(m_current)) {
	1618	record16(m_current);
	1619	shift();
	1620	}
	1621
	1622	flags = makeIdentifier(m_buffer16.data(), m_buffer16.size());
	1623	m_buffer16.resize(0);
	1624
	1625	return true;
	1626	}
	1627
	1628	template <typename T>
	1629	bool Lexer<T>::skipRegExp()
	1630	{
	1631	bool lastWasEscape = false;
	1632	bool inBrackets = false;
	1633
	1634	while (true) {
	1635	if (isLineTerminator(m_current) \|\| atEnd())
	1636	return false;
	1637
	1638	T prev = m_current;
	1639
	1640	shift();
	1641
	1642	if (prev == '/' && !lastWasEscape && !inBrackets)
	1643	break;
	1644
	1645	if (lastWasEscape) {
	1646	lastWasEscape = false;
	1647	continue;
	1648	}
	1649
	1650	switch (prev) {
	1651	case '[':
	1652	inBrackets = true;
	1653	break;
	1654	case ']':
	1655	inBrackets = false;
	1656	break;
	1657	case '\\':
	1658	lastWasEscape = true;
	1659	break;
	1660	}
	1661	}
	1662
	1663	while (isIdentPart(m_current))
	1664	shift();
	1665
	1666	return true;
	1667	}
	1668
	1669	template <typename T>
	1670	void Lexer<T>::clear()
	1671	{
	1672	m_arena = 0;
	1673
	1674	Vector<LChar> newBuffer8;
	1675	m_buffer8.swap(newBuffer8);
	1676
	1677	Vector<UChar> newBuffer16;
	1678	m_buffer16.swap(newBuffer16);
	1679
	1680	m_isReparsing = false;
	1681	}
	1682
	1683	template <typename T>
	1684	SourceCode Lexer<T>::sourceCode(int openBrace, int closeBrace, int firstLine)
	1685	{
	1686	ASSERT((*m_source->provider()->data())[openBrace] == '{');
	1687	ASSERT((*m_source->provider()->data())[closeBrace] == '}');
	1688	return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
	1689	}
	1690
	1691	// Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h
	1692	template class Lexer<LChar>;
	1693	template class Lexer<UChar>;
	1694
	1695	} // namespace JSC