2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 
   3  *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All rights reserved. 
   4  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu) 
   6  *  This library is free software; you can redistribute it and/or 
   7  *  modify it under the terms of the GNU Library General Public 
   8  *  License as published by the Free Software Foundation; either 
   9  *  version 2 of the License, or (at your option) any later version. 
  11  *  This library is distributed in the hope that it will be useful, 
  12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
  14  *  Library General Public License for more details. 
  16  *  You should have received a copy of the GNU Library General Public License 
  17  *  along with this library; see the file COPYING.LIB.  If not, write to 
  18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 
  19  *  Boston, MA 02110-1301, USA. 
  27 #include "ParserArena.h" 
  28 #include "ParserTokens.h" 
  29 #include "SourceCode.h" 
  30 #include <wtf/ASCIICType.h> 
  31 #include <wtf/SegmentedVector.h> 
  32 #include <wtf/Vector.h> 
  33 #include <wtf/unicode/Unicode.h> 
  39     bool isKeyword(const Identifier
& ident
) const 
  41         return m_keywordTable
.entry(m_vm
, ident
); 
  44     const HashEntry
* getKeyword(const Identifier
& ident
) const 
  46         return m_keywordTable
.entry(m_vm
, ident
); 
  51         m_keywordTable
.deleteTable(); 
  60     const HashTable m_keywordTable
; 
  64     LexerFlagsIgnoreReservedWords 
= 1,  
  65     LexerFlagsDontBuildStrings 
= 2, 
  66     LexexFlagsDontBuildKeywords 
= 4 
  71     WTF_MAKE_NONCOPYABLE(Lexer
); 
  72     WTF_MAKE_FAST_ALLOCATED
; 
  78     // Character manipulation functions. 
  79     static bool isWhiteSpace(T character
); 
  80     static bool isLineTerminator(T character
); 
  81     static unsigned char convertHex(int c1
, int c2
); 
  82     static UChar 
convertUnicode(int c1
, int c2
, int c3
, int c4
); 
  84     // Functions to set up parsing. 
  85     void setCode(const SourceCode
&, ParserArena
*); 
  86     void setIsReparsing() { m_isReparsing 
= true; } 
  87     bool isReparsing() const { return m_isReparsing
; } 
  89     JSTokenType 
lex(JSTokenData
*, JSTokenLocation
*, unsigned, bool strictMode
); 
  90     bool nextTokenIsColon(); 
  91     int lineNumber() const { return m_lineNumber
; } 
  92     ALWAYS_INLINE 
int currentOffset() const { return offsetFromSourcePtr(m_code
); } 
  93     ALWAYS_INLINE 
int currentLineStartOffset() const { return offsetFromSourcePtr(m_lineStart
); } 
  94     void setLastLineNumber(int lastLineNumber
) { m_lastLineNumber 
= lastLineNumber
; } 
  95     int lastLineNumber() const { return m_lastLineNumber
; } 
  96     bool prevTerminator() const { return m_terminator
; } 
  97     SourceCode 
sourceCode(int openBrace
, int closeBrace
, int firstLine
, unsigned startColumn
); 
  98     bool scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix 
= 0); 
 101     // Functions for use after parsing. 
 102     bool sawError() const { return m_error
; } 
 103     String 
getErrorMessage() const { return m_lexErrorMessage
; } 
 105     void setOffset(int offset
, int lineStartOffset
) 
 108         m_lexErrorMessage 
= String(); 
 110         m_code 
= sourcePtrFromOffset(offset
); 
 111         m_lineStart 
= sourcePtrFromOffset(lineStartOffset
); 
 112         ASSERT(currentOffset() >= currentLineStartOffset()); 
 115         m_buffer16
.resize(0); 
 116         if (LIKELY(m_code 
< m_codeEnd
)) 
 121     void setLineNumber(int line
) 
 126     SourceProvider
* sourceProvider() const { return m_source
->provider(); } 
 128     JSTokenType 
lexExpectIdentifier(JSTokenData
*, JSTokenLocation
*, unsigned, bool strictMode
); 
 132     void append8(const T
*, size_t); 
 135     void append16(const LChar
*, size_t); 
 136     void append16(const UChar
* characters
, size_t length
) { m_buffer16
.append(characters
, length
); } 
 138     ALWAYS_INLINE 
void shift(); 
 139     ALWAYS_INLINE 
bool atEnd() const; 
 140     ALWAYS_INLINE T 
peek(int offset
) const; 
 141     struct UnicodeHexValue 
{ 
 143         enum ValueType 
{ ValidHex
, IncompleteHex
, InvalidHex 
}; 
 145         explicit UnicodeHexValue(int value
) 
 149         explicit UnicodeHexValue(ValueType type
) 
 150             : m_value(type 
== IncompleteHex 
? -2 : -1) 
 154         ValueType 
valueType() const 
 158             return m_value 
== -2 ? IncompleteHex 
: InvalidHex
; 
 160         bool isValid() const { return m_value 
>= 0; } 
 163             ASSERT(m_value 
>= 0); 
 170     UnicodeHexValue 
parseFourDigitUnicodeHex(); 
 171     void shiftLineTerminator(); 
 173     ALWAYS_INLINE 
int offsetFromSourcePtr(const T
* ptr
) const { return ptr 
- m_codeStart
; } 
 174     ALWAYS_INLINE 
const T
* sourcePtrFromOffset(int offset
) const { return m_codeStart 
+ offset
; } 
 176     String 
invalidCharacterMessage() const; 
 177     ALWAYS_INLINE 
const T
* currentSourcePtr() const; 
 178     ALWAYS_INLINE 
void setOffsetFromSourcePtr(const T
* sourcePtr
, unsigned lineStartOffset
) { setOffset(offsetFromSourcePtr(sourcePtr
), lineStartOffset
); } 
 180     ALWAYS_INLINE 
void setCodeStart(const StringImpl
*); 
 182     ALWAYS_INLINE 
const Identifier
* makeIdentifier(const LChar
* characters
, size_t length
); 
 183     ALWAYS_INLINE 
const Identifier
* makeIdentifier(const UChar
* characters
, size_t length
); 
 184     ALWAYS_INLINE 
const Identifier
* makeLCharIdentifier(const LChar
* characters
, size_t length
); 
 185     ALWAYS_INLINE 
const Identifier
* makeLCharIdentifier(const UChar
* characters
, size_t length
); 
 186     ALWAYS_INLINE 
const Identifier
* makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
); 
 187     ALWAYS_INLINE 
const Identifier
* makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
); 
 189     ALWAYS_INLINE 
bool lastTokenWasRestrKeyword() const; 
 191     template <int shiftAmount
> void internalShift(); 
 192     template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType 
parseKeyword(JSTokenData
*); 
 193     template <bool shouldBuildIdentifiers
> ALWAYS_INLINE JSTokenType 
parseIdentifier(JSTokenData
*, unsigned lexerFlags
, bool strictMode
); 
 194     template <bool shouldBuildIdentifiers
> NEVER_INLINE JSTokenType 
parseIdentifierSlowCase(JSTokenData
*, unsigned lexerFlags
, bool strictMode
); 
 195     enum StringParseResult 
{ 
 196         StringParsedSuccessfully
, 
 200     template <bool shouldBuildStrings
> ALWAYS_INLINE StringParseResult 
parseString(JSTokenData
*, bool strictMode
); 
 201     template <bool shouldBuildStrings
> NEVER_INLINE StringParseResult 
parseStringSlowCase(JSTokenData
*, bool strictMode
); 
 202     ALWAYS_INLINE 
void parseHex(double& returnValue
); 
 203     ALWAYS_INLINE 
bool parseOctal(double& returnValue
); 
 204     ALWAYS_INLINE 
bool parseDecimal(double& returnValue
); 
 205     ALWAYS_INLINE 
void parseNumberAfterDecimalPoint(); 
 206     ALWAYS_INLINE 
bool parseNumberAfterExponentIndicator(); 
 207     ALWAYS_INLINE 
bool parseMultilineComment(); 
 209     static const size_t initialReadBufferCapacity 
= 32; 
 212     int m_lastLineNumber
; 
 214     Vector
<LChar
> m_buffer8
; 
 215     Vector
<UChar
> m_buffer16
; 
 219     const SourceCode
* m_source
; 
 220     unsigned m_sourceOffset
; 
 222     const T
* m_codeStart
; 
 224     const T
* m_codeStartPlusOffset
; 
 225     const T
* m_lineStart
; 
 229     String m_lexErrorMessage
; 
 233     IdentifierArena
* m_arena
; 
 239 ALWAYS_INLINE 
bool Lexer
<LChar
>::isWhiteSpace(LChar ch
) 
 241     return ch 
== ' ' || ch 
== '\t' || ch 
== 0xB || ch 
== 0xC || ch 
== 0xA0; 
 245 ALWAYS_INLINE 
bool Lexer
<UChar
>::isWhiteSpace(UChar ch
) 
 247     return (ch 
< 256) ? Lexer
<LChar
>::isWhiteSpace(static_cast<LChar
>(ch
)) : (WTF::Unicode::isSeparatorSpace(ch
) || ch 
== 0xFEFF); 
 251 ALWAYS_INLINE 
bool Lexer
<LChar
>::isLineTerminator(LChar ch
) 
 253     return ch 
== '\r' || ch 
== '\n'; 
 257 ALWAYS_INLINE 
bool Lexer
<UChar
>::isLineTerminator(UChar ch
) 
 259     return ch 
== '\r' || ch 
== '\n' || (ch 
& ~1) == 0x2028; 
 262 template <typename T
> 
 263 inline unsigned char Lexer
<T
>::convertHex(int c1
, int c2
) 
 265     return (toASCIIHexValue(c1
) << 4) | toASCIIHexValue(c2
); 
 268 template <typename T
> 
 269 inline UChar Lexer
<T
>::convertUnicode(int c1
, int c2
, int c3
, int c4
) 
 271     return (convertHex(c1
, c2
) << 8) | convertHex(c3
, c4
); 
 274 template <typename T
> 
 275 ALWAYS_INLINE 
const Identifier
* Lexer
<T
>::makeIdentifier(const LChar
* characters
, size_t length
) 
 277     return &m_arena
->makeIdentifier(m_vm
, characters
, length
); 
 280 template <typename T
> 
 281 ALWAYS_INLINE 
const Identifier
* Lexer
<T
>::makeIdentifier(const UChar
* characters
, size_t length
) 
 283     return &m_arena
->makeIdentifier(m_vm
, characters
, length
); 
 287 ALWAYS_INLINE 
const Identifier
* Lexer
<LChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar
) 
 289     return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
); 
 293 ALWAYS_INLINE 
const Identifier
* Lexer
<UChar
>::makeRightSizedIdentifier(const UChar
* characters
, size_t length
, UChar orAllChars
) 
 295     if (!(orAllChars 
& ~0xff)) 
 296         return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
); 
 298     return &m_arena
->makeIdentifier(m_vm
, characters
, length
); 
 302 ALWAYS_INLINE 
void Lexer
<LChar
>::setCodeStart(const StringImpl
* sourceString
) 
 304     ASSERT(sourceString
->is8Bit()); 
 305     m_codeStart 
= sourceString
->characters8(); 
 309 ALWAYS_INLINE 
void Lexer
<UChar
>::setCodeStart(const StringImpl
* sourceString
) 
 311     ASSERT(!sourceString
->is8Bit()); 
 312     m_codeStart 
= sourceString
->characters16(); 
 315 template <typename T
> 
 316 ALWAYS_INLINE 
const Identifier
* Lexer
<T
>::makeIdentifierLCharFromUChar(const UChar
* characters
, size_t length
) 
 318     return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
); 
 321 template <typename T
> 
 322 ALWAYS_INLINE 
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const LChar
* characters
, size_t length
) 
 324     return &m_arena
->makeIdentifier(m_vm
, characters
, length
); 
 327 template <typename T
> 
 328 ALWAYS_INLINE 
const Identifier
* Lexer
<T
>::makeLCharIdentifier(const UChar
* characters
, size_t length
) 
 330     return &m_arena
->makeIdentifierLCharFromUChar(m_vm
, characters
, length
); 
 333 template <typename T
> 
 334 ALWAYS_INLINE JSTokenType Lexer
<T
>::lexExpectIdentifier(JSTokenData
* tokenData
, JSTokenLocation
* tokenLocation
, unsigned lexerFlags
, bool strictMode
) 
 336     ASSERT((lexerFlags 
& LexerFlagsIgnoreReservedWords
)); 
 337     const T
* start 
= m_code
; 
 338     const T
* ptr 
= start
; 
 339     const T
* end 
= m_codeEnd
; 
 344     if (!WTF::isASCIIAlpha(*ptr
)) 
 348         if (!WTF::isASCIIAlphanumeric(*ptr
)) 
 355         if ((!WTF::isASCII(*ptr
)) || (*ptr 
== '\\') || (*ptr 
== '_') || (*ptr 
== '$')) 
 362     ASSERT(currentOffset() >= currentLineStartOffset()); 
 364     // Create the identifier if needed 
 365     if (lexerFlags 
& LexexFlagsDontBuildKeywords
) 
 366         tokenData
->ident 
= 0; 
 368         tokenData
->ident 
= makeLCharIdentifier(start
, ptr 
- start
); 
 369     tokenLocation
->line 
= m_lineNumber
; 
 370     tokenLocation
->lineStartOffset 
= currentLineStartOffset(); 
 371     tokenLocation
->startOffset 
= offsetFromSourcePtr(start
); 
 372     tokenLocation
->endOffset 
= currentOffset(); 
 373     ASSERT(tokenLocation
->startOffset 
>= tokenLocation
->lineStartOffset
); 
 378     return lex(tokenData
, tokenLocation
, lexerFlags
, strictMode
);