git.saurik.com Git - apple/javascriptcore.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
	3	* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
	4	* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
	5	*
	6	* This library is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU Library General Public
	8	* License as published by the Free Software Foundation; either
	9	* version 2 of the License, or (at your option) any later version.
	10	*
	11	* This library is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	* Library General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU Library General Public License
	17	* along with this library; see the file COPYING.LIB. If not, write to
	18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	19	* Boston, MA 02110-1301, USA.
	20	*
	21	*/
	22
	23	#ifndef Lexer_h
	24	#define Lexer_h
	25
	26	#include "Lookup.h"
	27	#include "ParserArena.h"
	28	#include "ParserTokens.h"
	29	#include "SourceCode.h"
	30	#include <wtf/ASCIICType.h>
	31	#include <wtf/SegmentedVector.h>
	32	#include <wtf/Vector.h>
	33
	34	namespace JSC {
	35
	36	class Keywords {
	37	public:
	38	bool isKeyword(const Identifier& ident) const
	39	{
	40	return m_keywordTable.entry(ident);
	41	}
	42
	43	const HashTableValue* getKeyword(const Identifier& ident) const
	44	{
	45	return m_keywordTable.entry(ident);
	46	}
	47
	48	explicit Keywords(VM&);
	49
	50	~Keywords()
	51	{
	52	m_keywordTable.deleteTable();
	53	}
	54
	55	private:
	56	friend class VM;
	57
	58	VM& m_vm;
	59	const HashTable m_keywordTable;
	60	};
	61
	62	enum LexerFlags {
	63	LexerFlagsIgnoreReservedWords = 1,
	64	LexerFlagsDontBuildStrings = 2,
	65	LexexFlagsDontBuildKeywords = 4
	66	};
	67
	68	struct ParsedUnicodeEscapeValue;
	69
	70	template <typename T>
	71	class Lexer {
	72	WTF_MAKE_NONCOPYABLE(Lexer);
	73	WTF_MAKE_FAST_ALLOCATED;
	74
	75	public:
	76	Lexer(VM*, JSParserBuiltinMode);
	77	~Lexer();
	78
	79	// Character manipulation functions.
	80	static bool isWhiteSpace(T character);
	81	static bool isLineTerminator(T character);
	82	static unsigned char convertHex(int c1, int c2);
	83	static UChar convertUnicode(int c1, int c2, int c3, int c4);
	84
	85	// Functions to set up parsing.
	86	void setCode(const SourceCode&, ParserArena*);
	87	void setIsReparsing() { m_isReparsing = true; }
	88	bool isReparsing() const { return m_isReparsing; }
	89
	90	#if ENABLE(ES6_ARROWFUNCTION_SYNTAX)
	91	void setTokenPosition(JSToken* tokenRecord);
	92	#endif
	93	JSTokenType lex(JSToken*, unsigned, bool strictMode);
	94	bool nextTokenIsColon();
	95	int lineNumber() const { return m_lineNumber; }
	96	ALWAYS_INLINE int currentOffset() const { return offsetFromSourcePtr(m_code); }
	97	ALWAYS_INLINE int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart); }
	98	ALWAYS_INLINE JSTextPosition currentPosition() const
	99	{
	100	return JSTextPosition(m_lineNumber, currentOffset(), currentLineStartOffset());
	101	}
	102	JSTextPosition positionBeforeLastNewline() const { return m_positionBeforeLastNewline; }
	103	JSTokenLocation lastTokenLocation() const { return m_lastTockenLocation; }
	104	void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
	105	int lastLineNumber() const { return m_lastLineNumber; }
	106	bool prevTerminator() const { return m_terminator; }
	107	bool scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix = 0);
	108	#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
	109	enum class RawStringsBuildMode { BuildRawStrings, DontBuildRawStrings };
	110	JSTokenType scanTrailingTemplateString(JSToken*, RawStringsBuildMode);
	111	#endif
	112	bool skipRegExp();
	113
	114	// Functions for use after parsing.
	115	bool sawError() const { return m_error; }
	116	String getErrorMessage() const { return m_lexErrorMessage; }
	117	void clear();
	118	void setOffset(int offset, int lineStartOffset)
	119	{
	120	m_error = 0;
	121	m_lexErrorMessage = String();
	122
	123	m_code = sourcePtrFromOffset(offset);
	124	m_lineStart = sourcePtrFromOffset(lineStartOffset);
	125	ASSERT(currentOffset() >= currentLineStartOffset());
	126
	127	m_buffer8.resize(0);
	128	m_buffer16.resize(0);
	129	if (LIKELY(m_code < m_codeEnd))
	130	m_current = *m_code;
	131	else
	132	m_current = 0;
	133	}
	134	void setLineNumber(int line)
	135	{
	136	m_lineNumber = line;
	137	}
	138	void setTerminator(bool terminator)
	139	{
	140	m_terminator = terminator;
	141	}
	142
	143	SourceProvider* sourceProvider() const { return m_source->provider(); }
	144
	145	JSTokenType lexExpectIdentifier(JSToken*, unsigned, bool strictMode);
	146
	147	private:
	148	void record8(int);
	149	void append8(const T*, size_t);
	150	void record16(int);
	151	void record16(T);
	152	void recordUnicodeCodePoint(UChar32);
	153	void append16(const LChar*, size_t);
	154	void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
	155
	156	ALWAYS_INLINE void shift();
	157	ALWAYS_INLINE bool atEnd() const;
	158	ALWAYS_INLINE T peek(int offset) const;
	159
	160	ParsedUnicodeEscapeValue parseUnicodeEscape();
	161	void shiftLineTerminator();
	162
	163	ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
	164	ALWAYS_INLINE const T* sourcePtrFromOffset(int offset) const { return m_codeStart + offset; }
	165
	166	String invalidCharacterMessage() const;
	167	ALWAYS_INLINE const T* currentSourcePtr() const;
	168	ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }
	169
	170	ALWAYS_INLINE void setCodeStart(const StringImpl*);
	171
	172	ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
	173	ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
	174	ALWAYS_INLINE const Identifier* makeLCharIdentifier(const LChar* characters, size_t length);
	175	ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
	176	ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
	177	ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
	178	ALWAYS_INLINE const Identifier* makeEmptyIdentifier();
	179
	180	ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
	181
	182	template <int shiftAmount> void internalShift();
	183	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
	184	template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
	185	template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
	186	enum StringParseResult {
	187	StringParsedSuccessfully,
	188	StringUnterminated,
	189	StringCannotBeParsed
	190	};
	191	template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
	192	template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
	193
	194	enum class EscapeParseMode { Template, String };
	195	template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseComplexEscape(EscapeParseMode, bool strictMode, T stringQuoteCharacter);
	196	#if ENABLE(ES6_TEMPLATE_LITERAL_SYNTAX)
	197	template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseTemplateLiteral(JSTokenData*, RawStringsBuildMode);
	198	#endif
	199	ALWAYS_INLINE void parseHex(double& returnValue);
	200	ALWAYS_INLINE bool parseBinary(double& returnValue);
	201	ALWAYS_INLINE bool parseOctal(double& returnValue);
	202	ALWAYS_INLINE bool parseDecimal(double& returnValue);
	203	ALWAYS_INLINE void parseNumberAfterDecimalPoint();
	204	ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
	205	ALWAYS_INLINE bool parseMultilineComment();
	206
	207	static const size_t initialReadBufferCapacity = 32;
	208
	209	int m_lineNumber;
	210	int m_lastLineNumber;
	211
	212	Vector<LChar> m_buffer8;
	213	Vector<UChar> m_buffer16;
	214	Vector<UChar> m_bufferForRawTemplateString16;
	215	bool m_terminator;
	216	int m_lastToken;
	217
	218	const SourceCode* m_source;
	219	unsigned m_sourceOffset;
	220	const T* m_code;
	221	const T* m_codeStart;
	222	const T* m_codeEnd;
	223	const T* m_codeStartPlusOffset;
	224	const T* m_lineStart;
	225	JSTextPosition m_positionBeforeLastNewline;
	226	JSTokenLocation m_lastTockenLocation;
	227	bool m_isReparsing;
	228	bool m_atLineStart;
	229	bool m_error;
	230	String m_lexErrorMessage;
	231
	232	T m_current;
	233
	234	IdentifierArena* m_arena;
	235
	236	VM* m_vm;
	237	bool m_parsingBuiltinFunction;
	238	};
	239
	240	template <>
	241	ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
	242	{
	243	return ch == ' ' \|\| ch == '\t' \|\| ch == 0xB \|\| ch == 0xC \|\| ch == 0xA0;
	244	}
	245
	246	template <>
	247	ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
	248	{
	249	// 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such.
	250	return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR \|\| ch == 0x180E \|\| ch == 0xFEFF);
	251	}
	252
	253	template <>
	254	ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
	255	{
	256	return ch == '\r' \|\| ch == '\n';
	257	}
	258
	259	template <>
	260	ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
	261	{
	262	return ch == '\r' \|\| ch == '\n' \|\| (ch & ~1) == 0x2028;
	263	}
	264
	265	template <typename T>
	266	inline unsigned char Lexer<T>::convertHex(int c1, int c2)
	267	{
	268	return (toASCIIHexValue(c1) << 4) \| toASCIIHexValue(c2);
	269	}
	270
	271	template <typename T>
	272	inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
	273	{
	274	return (convertHex(c1, c2) << 8) \| convertHex(c3, c4);
	275	}
	276
	277	template <typename T>
	278	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
	279	{
	280	return &m_arena->makeIdentifier(m_vm, characters, length);
	281	}
	282
	283	template <typename T>
	284	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
	285	{
	286	return &m_arena->makeIdentifier(m_vm, characters, length);
	287	}
	288
	289	template <>
	290	ALWAYS_INLINE const Identifier* Lexer<LChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar)
	291	{
	292	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	293	}
	294
	295	template <>
	296	ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars)
	297	{
	298	if (!(orAllChars & ~0xff))
	299	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	300
	301	return &m_arena->makeIdentifier(m_vm, characters, length);
	302	}
	303
	304	template <typename T>
	305	ALWAYS_INLINE const Identifier* Lexer<T>::makeEmptyIdentifier()
	306	{
	307	return &m_arena->makeEmptyIdentifier(m_vm);
	308	}
	309
	310	template <>
	311	ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
	312	{
	313	ASSERT(sourceString->is8Bit());
	314	m_codeStart = sourceString->characters8();
	315	}
	316
	317	template <>
	318	ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
	319	{
	320	ASSERT(!sourceString->is8Bit());
	321	m_codeStart = sourceString->characters16();
	322	}
	323
	324	template <typename T>
	325	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
	326	{
	327	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	328	}
	329
	330	template <typename T>
	331	ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const LChar* characters, size_t length)
	332	{
	333	return &m_arena->makeIdentifier(m_vm, characters, length);
	334	}
	335
	336	template <typename T>
	337	ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* characters, size_t length)
	338	{
	339	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	340	}
	341
	342	#if ASSERT_DISABLED
	343	ALWAYS_INLINE bool isSafeBuiltinIdentifier(VM&, const Identifier*) { return true; }
	344	#else
	345	bool isSafeBuiltinIdentifier(VM&, const Identifier*);
	346	#endif
	347
	348	template <typename T>
	349	ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
	350	{
	351	JSTokenData* tokenData = &tokenRecord->m_data;
	352	JSTokenLocation* tokenLocation = &tokenRecord->m_location;
	353	ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
	354	const T* start = m_code;
	355	const T* ptr = start;
	356	const T* end = m_codeEnd;
	357	JSTextPosition startPosition = currentPosition();
	358	if (ptr >= end) {
	359	ASSERT(ptr == end);
	360	goto slowCase;
	361	}
	362	if (!WTF::isASCIIAlpha(*ptr))
	363	goto slowCase;
	364	++ptr;
	365	while (ptr < end) {
	366	if (!WTF::isASCIIAlphanumeric(*ptr))
	367	break;
	368	++ptr;
	369	}
	370
	371	// Here's the shift
	372	if (ptr < end) {
	373	if ((!WTF::isASCII(ptr)) \|\| (ptr == '\\') \|\| (ptr == '_') \|\| (ptr == '$'))
	374	goto slowCase;
	375	m_current = *ptr;
	376	} else
	377	m_current = 0;
	378
	379	m_code = ptr;
	380	ASSERT(currentOffset() >= currentLineStartOffset());
	381
	382	// Create the identifier if needed
	383	if (lexerFlags & LexexFlagsDontBuildKeywords
	384	#if !ASSERT_DISABLED
	385	&& !m_parsingBuiltinFunction
	386	#endif
	387	)
	388	tokenData->ident = 0;
	389	else
	390	tokenData->ident = makeLCharIdentifier(start, ptr - start);
	391
	392	tokenLocation->line = m_lineNumber;
	393	tokenLocation->lineStartOffset = currentLineStartOffset();
	394	tokenLocation->startOffset = offsetFromSourcePtr(start);
	395	tokenLocation->endOffset = currentOffset();
	396	ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
	397	tokenRecord->m_startPosition = startPosition;
	398	tokenRecord->m_endPosition = currentPosition();
	399	#if !ASSERT_DISABLED
	400	if (m_parsingBuiltinFunction) {
	401	if (!isSafeBuiltinIdentifier(*m_vm, tokenData->ident))
	402	return ERRORTOK;
	403	}
	404	#endif
	405
	406	m_lastToken = IDENT;
	407	return IDENT;
	408
	409	slowCase:
	410	return lex(tokenRecord, lexerFlags, strictMode);
	411	}
	412
	413	} // namespace JSC
	414
	415	#endif // Lexer_h