2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB. If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
26 #include "ParserArena.h"
27 #include "SourceCode.h"
28 #include <wtf/ASCIICType.h>
29 #include <wtf/SegmentedVector.h>
30 #include <wtf/Vector.h>
31 #include <wtf/unicode/Unicode.h>
37 class Lexer
: public Noncopyable
{
39 // Character manipulation functions.
40 static bool isWhiteSpace(int character
);
41 static bool isLineTerminator(int character
);
42 static unsigned char convertHex(int c1
, int c2
);
43 static UChar
convertUnicode(int c1
, int c2
, int c3
, int c4
);
45 // Functions to set up parsing.
46 void setCode(const SourceCode
&, ParserArena
&);
47 void setIsReparsing() { m_isReparsing
= true; }
49 // Functions for the parser itself.
50 int lex(void* lvalp
, void* llocp
);
51 int lineNumber() const { return m_lineNumber
; }
52 bool prevTerminator() const { return m_terminator
; }
53 SourceCode
sourceCode(int openBrace
, int closeBrace
, int firstLine
);
54 bool scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
= 0);
57 // Functions for use after parsing.
58 bool sawError() const { return m_error
; }
62 friend class JSGlobalData
;
71 void shiftLineTerminator();
77 void copyCodeWithoutBOMs();
79 int currentOffset() const;
80 const UChar
* currentCharacter() const;
82 const Identifier
* makeIdentifier(const UChar
* characters
, size_t length
);
84 bool lastTokenWasRestrKeyword() const;
86 static const size_t initialReadBufferCapacity
= 32;
90 Vector
<char> m_buffer8
;
91 Vector
<UChar
> m_buffer16
;
93 bool m_delimited
; // encountered delimiter like "'" and "}" on last run
96 const SourceCode
* m_source
;
98 const UChar
* m_codeStart
;
99 const UChar
* m_codeEnd
;
104 // current and following unicode characters (int to allow for -1 for end-of-file marker)
110 IdentifierArena
* m_arena
;
112 JSGlobalData
* m_globalData
;
114 const HashTable m_keywordTable
;
116 Vector
<UChar
> m_codeWithoutBOMs
;
119 inline bool Lexer::isWhiteSpace(int ch
)
121 return isASCII(ch
) ? (ch
== ' ' || ch
== '\t' || ch
== 0xB || ch
== 0xC) : WTF::Unicode::isSeparatorSpace(ch
);
124 inline bool Lexer::isLineTerminator(int ch
)
126 return ch
== '\r' || ch
== '\n' || (ch
& ~1) == 0x2028;
129 inline unsigned char Lexer::convertHex(int c1
, int c2
)
131 return (toASCIIHexValue(c1
) << 4) | toASCIIHexValue(c2
);
134 inline UChar
Lexer::convertUnicode(int c1
, int c2
, int c3
, int c4
)
136 return (convertHex(c1
, c2
) << 8) | convertHex(c3
, c4
);
139 // A bridge for yacc from the C world to the C++ world.
140 inline int jscyylex(void* lvalp
, void* llocp
, void* globalData
)
142 return static_cast<JSGlobalData
*>(globalData
)->lexer
->lex(lvalp
, llocp
);