git.saurik.com Git - apple/javascriptcore.git/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
	3	* Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved.
	4	* Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu)
	5	*
	6	* This library is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU Library General Public
	8	* License as published by the Free Software Foundation; either
	9	* version 2 of the License, or (at your option) any later version.
	10	*
	11	* This library is distributed in the hope that it will be useful,
	12	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	* Library General Public License for more details.
	15	*
	16	* You should have received a copy of the GNU Library General Public License
	17	* along with this library; see the file COPYING.LIB. If not, write to
	18	* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
	19	* Boston, MA 02110-1301, USA.
	20	*
	21	*/
	22
	23	#ifndef Lexer_h
	24	#define Lexer_h
	25
	26	#include "Lookup.h"
	27	#include "ParserArena.h"
	28	#include "ParserTokens.h"
	29	#include "SourceCode.h"
	30	#include <wtf/ASCIICType.h>
	31	#include <wtf/SegmentedVector.h>
	32	#include <wtf/Vector.h>
	33
	34	namespace JSC {
	35
	36	class Keywords {
	37	public:
	38	bool isKeyword(const Identifier& ident) const
	39	{
	40	return m_keywordTable.entry(m_vm, ident);
	41	}
	42
	43	const HashTableValue* getKeyword(const Identifier& ident) const
	44	{
	45	return m_keywordTable.entry(m_vm, ident);
	46	}
	47
	48	~Keywords()
	49	{
	50	m_keywordTable.deleteTable();
	51	}
	52
	53	private:
	54	friend class VM;
	55
	56	explicit Keywords(VM&);
	57
	58	VM& m_vm;
	59	const HashTable m_keywordTable;
	60	};
	61
	62	enum LexerFlags {
	63	LexerFlagsIgnoreReservedWords = 1,
	64	LexerFlagsDontBuildStrings = 2,
	65	LexexFlagsDontBuildKeywords = 4
	66	};
	67
	68	template <typename T>
	69	class Lexer {
	70	WTF_MAKE_NONCOPYABLE(Lexer);
	71	WTF_MAKE_FAST_ALLOCATED;
	72
	73	public:
	74	Lexer(VM*, JSParserStrictness);
	75	~Lexer();
	76
	77	// Character manipulation functions.
	78	static bool isWhiteSpace(T character);
	79	static bool isLineTerminator(T character);
	80	static unsigned char convertHex(int c1, int c2);
	81	static UChar convertUnicode(int c1, int c2, int c3, int c4);
	82
	83	// Functions to set up parsing.
	84	void setCode(const SourceCode&, ParserArena*);
	85	void setIsReparsing() { m_isReparsing = true; }
	86	bool isReparsing() const { return m_isReparsing; }
	87
	88	JSTokenType lex(JSToken*, unsigned, bool strictMode);
	89	bool nextTokenIsColon();
	90	int lineNumber() const { return m_lineNumber; }
	91	ALWAYS_INLINE int currentOffset() const { return offsetFromSourcePtr(m_code); }
	92	ALWAYS_INLINE int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart); }
	93	ALWAYS_INLINE JSTextPosition currentPosition() const
	94	{
	95	return JSTextPosition(m_lineNumber, currentOffset(), currentLineStartOffset());
	96	}
	97	JSTextPosition positionBeforeLastNewline() const { return m_positionBeforeLastNewline; }
	98	void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
	99	int lastLineNumber() const { return m_lastLineNumber; }
	100	bool prevTerminator() const { return m_terminator; }
	101	bool scanRegExp(const Identifier& pattern, const Identifier& flags, UChar patternPrefix = 0);
	102	bool skipRegExp();
	103
	104	// Functions for use after parsing.
	105	bool sawError() const { return m_error; }
	106	String getErrorMessage() const { return m_lexErrorMessage; }
	107	void clear();
	108	void setOffset(int offset, int lineStartOffset)
	109	{
	110	m_error = 0;
	111	m_lexErrorMessage = String();
	112
	113	m_code = sourcePtrFromOffset(offset);
	114	m_lineStart = sourcePtrFromOffset(lineStartOffset);
	115	ASSERT(currentOffset() >= currentLineStartOffset());
	116
	117	m_buffer8.resize(0);
	118	m_buffer16.resize(0);
	119	if (LIKELY(m_code < m_codeEnd))
	120	m_current = *m_code;
	121	else
	122	m_current = 0;
	123	}
	124	void setLineNumber(int line)
	125	{
	126	m_lineNumber = line;
	127	}
	128
	129	SourceProvider* sourceProvider() const { return m_source->provider(); }
	130
	131	JSTokenType lexExpectIdentifier(JSToken*, unsigned, bool strictMode);
	132
	133	private:
	134	void record8(int);
	135	void append8(const T*, size_t);
	136	void record16(int);
	137	void record16(T);
	138	void append16(const LChar*, size_t);
	139	void append16(const UChar* characters, size_t length) { m_buffer16.append(characters, length); }
	140
	141	ALWAYS_INLINE void shift();
	142	ALWAYS_INLINE bool atEnd() const;
	143	ALWAYS_INLINE T peek(int offset) const;
	144	struct UnicodeHexValue {
	145
	146	enum ValueType { ValidHex, IncompleteHex, InvalidHex };
	147
	148	explicit UnicodeHexValue(int value)
	149	: m_value(value)
	150	{
	151	}
	152	explicit UnicodeHexValue(ValueType type)
	153	: m_value(type == IncompleteHex ? -2 : -1)
	154	{
	155	}
	156
	157	ValueType valueType() const
	158	{
	159	if (m_value >= 0)
	160	return ValidHex;
	161	return m_value == -2 ? IncompleteHex : InvalidHex;
	162	}
	163	bool isValid() const { return m_value >= 0; }
	164	int value() const
	165	{
	166	ASSERT(m_value >= 0);
	167	return m_value;
	168	}
	169
	170	private:
	171	int m_value;
	172	};
	173	UnicodeHexValue parseFourDigitUnicodeHex();
	174	void shiftLineTerminator();
	175
	176	ALWAYS_INLINE int offsetFromSourcePtr(const T* ptr) const { return ptr - m_codeStart; }
	177	ALWAYS_INLINE const T* sourcePtrFromOffset(int offset) const { return m_codeStart + offset; }
	178
	179	String invalidCharacterMessage() const;
	180	ALWAYS_INLINE const T* currentSourcePtr() const;
	181	ALWAYS_INLINE void setOffsetFromSourcePtr(const T* sourcePtr, unsigned lineStartOffset) { setOffset(offsetFromSourcePtr(sourcePtr), lineStartOffset); }
	182
	183	ALWAYS_INLINE void setCodeStart(const StringImpl*);
	184
	185	ALWAYS_INLINE const Identifier* makeIdentifier(const LChar* characters, size_t length);
	186	ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
	187	ALWAYS_INLINE const Identifier* makeLCharIdentifier(const LChar* characters, size_t length);
	188	ALWAYS_INLINE const Identifier* makeLCharIdentifier(const UChar* characters, size_t length);
	189	ALWAYS_INLINE const Identifier* makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars);
	190	ALWAYS_INLINE const Identifier* makeIdentifierLCharFromUChar(const UChar* characters, size_t length);
	191
	192	ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
	193
	194	template <int shiftAmount> void internalShift();
	195	template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType parseKeyword(JSTokenData*);
	196	template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned lexerFlags, bool strictMode);
	197	template <bool shouldBuildIdentifiers> NEVER_INLINE JSTokenType parseIdentifierSlowCase(JSTokenData*, unsigned lexerFlags, bool strictMode);
	198	enum StringParseResult {
	199	StringParsedSuccessfully,
	200	StringUnterminated,
	201	StringCannotBeParsed
	202	};
	203	template <bool shouldBuildStrings> ALWAYS_INLINE StringParseResult parseString(JSTokenData*, bool strictMode);
	204	template <bool shouldBuildStrings> NEVER_INLINE StringParseResult parseStringSlowCase(JSTokenData*, bool strictMode);
	205	ALWAYS_INLINE void parseHex(double& returnValue);
	206	ALWAYS_INLINE bool parseOctal(double& returnValue);
	207	ALWAYS_INLINE bool parseDecimal(double& returnValue);
	208	ALWAYS_INLINE void parseNumberAfterDecimalPoint();
	209	ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
	210	ALWAYS_INLINE bool parseMultilineComment();
	211
	212	static const size_t initialReadBufferCapacity = 32;
	213
	214	int m_lineNumber;
	215	int m_lastLineNumber;
	216
	217	Vector<LChar> m_buffer8;
	218	Vector<UChar> m_buffer16;
	219	bool m_terminator;
	220	int m_lastToken;
	221
	222	const SourceCode* m_source;
	223	unsigned m_sourceOffset;
	224	const T* m_code;
	225	const T* m_codeStart;
	226	const T* m_codeEnd;
	227	const T* m_codeStartPlusOffset;
	228	const T* m_lineStart;
	229	JSTextPosition m_positionBeforeLastNewline;
	230	bool m_isReparsing;
	231	bool m_atLineStart;
	232	bool m_error;
	233	String m_lexErrorMessage;
	234
	235	T m_current;
	236
	237	IdentifierArena* m_arena;
	238
	239	VM* m_vm;
	240	bool m_parsingBuiltinFunction;
	241	};
	242
	243	template <>
	244	ALWAYS_INLINE bool Lexer<LChar>::isWhiteSpace(LChar ch)
	245	{
	246	return ch == ' ' \|\| ch == '\t' \|\| ch == 0xB \|\| ch == 0xC \|\| ch == 0xA0;
	247	}
	248
	249	template <>
	250	ALWAYS_INLINE bool Lexer<UChar>::isWhiteSpace(UChar ch)
	251	{
	252	// 0x180E used to be in Zs category before Unicode 6.3, and EcmaScript says that we should keep treating it as such.
	253	return (ch < 256) ? Lexer<LChar>::isWhiteSpace(static_cast<LChar>(ch)) : (u_charType(ch) == U_SPACE_SEPARATOR \|\| ch == 0x180E \|\| ch == 0xFEFF);
	254	}
	255
	256	template <>
	257	ALWAYS_INLINE bool Lexer<LChar>::isLineTerminator(LChar ch)
	258	{
	259	return ch == '\r' \|\| ch == '\n';
	260	}
	261
	262	template <>
	263	ALWAYS_INLINE bool Lexer<UChar>::isLineTerminator(UChar ch)
	264	{
	265	return ch == '\r' \|\| ch == '\n' \|\| (ch & ~1) == 0x2028;
	266	}
	267
	268	template <typename T>
	269	inline unsigned char Lexer<T>::convertHex(int c1, int c2)
	270	{
	271	return (toASCIIHexValue(c1) << 4) \| toASCIIHexValue(c2);
	272	}
	273
	274	template <typename T>
	275	inline UChar Lexer<T>::convertUnicode(int c1, int c2, int c3, int c4)
	276	{
	277	return (convertHex(c1, c2) << 8) \| convertHex(c3, c4);
	278	}
	279
	280	template <typename T>
	281	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const LChar* characters, size_t length)
	282	{
	283	return &m_arena->makeIdentifier(m_vm, characters, length);
	284	}
	285
	286	template <typename T>
	287	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifier(const UChar* characters, size_t length)
	288	{
	289	return &m_arena->makeIdentifier(m_vm, characters, length);
	290	}
	291
	292	template <>
	293	ALWAYS_INLINE const Identifier* Lexer<LChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar)
	294	{
	295	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	296	}
	297
	298	template <>
	299	ALWAYS_INLINE const Identifier* Lexer<UChar>::makeRightSizedIdentifier(const UChar* characters, size_t length, UChar orAllChars)
	300	{
	301	if (!(orAllChars & ~0xff))
	302	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	303
	304	return &m_arena->makeIdentifier(m_vm, characters, length);
	305	}
	306
	307	template <>
	308	ALWAYS_INLINE void Lexer<LChar>::setCodeStart(const StringImpl* sourceString)
	309	{
	310	ASSERT(sourceString->is8Bit());
	311	m_codeStart = sourceString->characters8();
	312	}
	313
	314	template <>
	315	ALWAYS_INLINE void Lexer<UChar>::setCodeStart(const StringImpl* sourceString)
	316	{
	317	ASSERT(!sourceString->is8Bit());
	318	m_codeStart = sourceString->characters16();
	319	}
	320
	321	template <typename T>
	322	ALWAYS_INLINE const Identifier* Lexer<T>::makeIdentifierLCharFromUChar(const UChar* characters, size_t length)
	323	{
	324	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	325	}
	326
	327	template <typename T>
	328	ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const LChar* characters, size_t length)
	329	{
	330	return &m_arena->makeIdentifier(m_vm, characters, length);
	331	}
	332
	333	template <typename T>
	334	ALWAYS_INLINE const Identifier* Lexer<T>::makeLCharIdentifier(const UChar* characters, size_t length)
	335	{
	336	return &m_arena->makeIdentifierLCharFromUChar(m_vm, characters, length);
	337	}
	338
	339	#if ASSERT_DISABLED
	340	ALWAYS_INLINE bool isSafeBuiltinIdentifier(VM&, const Identifier*) { return true; }
	341	#else
	342	bool isSafeBuiltinIdentifier(VM&, const Identifier*);
	343	#endif
	344
	345	template <typename T>
	346	ALWAYS_INLINE JSTokenType Lexer<T>::lexExpectIdentifier(JSToken* tokenRecord, unsigned lexerFlags, bool strictMode)
	347	{
	348	JSTokenData* tokenData = &tokenRecord->m_data;
	349	JSTokenLocation* tokenLocation = &tokenRecord->m_location;
	350	ASSERT((lexerFlags & LexerFlagsIgnoreReservedWords));
	351	const T* start = m_code;
	352	const T* ptr = start;
	353	const T* end = m_codeEnd;
	354	JSTextPosition startPosition = currentPosition();
	355	if (ptr >= end) {
	356	ASSERT(ptr == end);
	357	goto slowCase;
	358	}
	359	if (!WTF::isASCIIAlpha(*ptr))
	360	goto slowCase;
	361	++ptr;
	362	while (ptr < end) {
	363	if (!WTF::isASCIIAlphanumeric(*ptr))
	364	break;
	365	++ptr;
	366	}
	367
	368	// Here's the shift
	369	if (ptr < end) {
	370	if ((!WTF::isASCII(ptr)) \|\| (ptr == '\\') \|\| (ptr == '_') \|\| (ptr == '$'))
	371	goto slowCase;
	372	m_current = *ptr;
	373	} else
	374	m_current = 0;
	375
	376	m_code = ptr;
	377	ASSERT(currentOffset() >= currentLineStartOffset());
	378
	379	// Create the identifier if needed
	380	if (lexerFlags & LexexFlagsDontBuildKeywords
	381	#if !ASSERT_DISABLED
	382	&& !m_parsingBuiltinFunction
	383	#endif
	384	)
	385	tokenData->ident = 0;
	386	else
	387	tokenData->ident = makeLCharIdentifier(start, ptr - start);
	388
	389	tokenLocation->line = m_lineNumber;
	390	tokenLocation->lineStartOffset = currentLineStartOffset();
	391	tokenLocation->startOffset = offsetFromSourcePtr(start);
	392	tokenLocation->endOffset = currentOffset();
	393	ASSERT(tokenLocation->startOffset >= tokenLocation->lineStartOffset);
	394	tokenRecord->m_startPosition = startPosition;
	395	tokenRecord->m_endPosition = currentPosition();
	396	#if !ASSERT_DISABLED
	397	if (m_parsingBuiltinFunction) {
	398	if (!isSafeBuiltinIdentifier(*m_vm, tokenData->ident))
	399	return ERRORTOK;
	400	}
	401	#endif
	402
	403	m_lastToken = IDENT;
	404	return IDENT;
	405
	406	slowCase:
	407	return lex(tokenRecord, lexerFlags, strictMode);
	408	}
	409
	410	} // namespace JSC
	411
	412	#endif // Lexer_h