2  *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 
   3  *  Copyright (C) 2006, 2007, 2008, 2009, 2011, 2012, 2013 Apple Inc. All Rights Reserved. 
   4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca) 
   5  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg@inf.u-szeged.hu) 
   6  *  Copyright (C) 2012 Mathias Bynens (mathias@qiwi.be) 
   8  *  This library is free software; you can redistribute it and/or 
   9  *  modify it under the terms of the GNU Library General Public 
  10  *  License as published by the Free Software Foundation; either 
  11  *  version 2 of the License, or (at your option) any later version. 
  13  *  This library is distributed in the hope that it will be useful, 
  14  *  but WITHOUT ANY WARRANTY; without even the implied warranty of 
  15  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
  16  *  Library General Public License for more details. 
  18  *  You should have received a copy of the GNU Library General Public License 
  19  *  along with this library; see the file COPYING.LIB.  If not, write to 
  20  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 
  21  *  Boston, MA 02110-1301, USA. 
  28 #include "JSFunction.h" 
  30 #include "JSGlobalObjectFunctions.h" 
  31 #include "Identifier.h" 
  38 #include <wtf/Assertions.h> 
  41 using namespace Unicode
; 
  43 #include "KeywordLookup.h" 
  44 #include "Lexer.lut.h" 
  49 Keywords::Keywords(VM
* vm
) 
  51     , m_keywordTable(JSC::mainTable
) 
  56     // Types for the main switch 
  58     // The first three types are fixed, and also used for identifying 
  59     // ASCII alpha and alphanumeric characters (see isIdentStart and isIdentPart). 
  60     CharacterIdentifierStart
, 
  65     CharacterLineTerminator
, 
  66     CharacterExclamationMark
, 
  70     CharacterCloseBracket
, 
  94     // Other types (only one so far) 
  99 static const unsigned short typesOfLatin1Characters
[256] = { 
 100 /*   0 - Null               */ CharacterInvalid
, 
 101 /*   1 - Start of Heading   */ CharacterInvalid
, 
 102 /*   2 - Start of Text      */ CharacterInvalid
, 
 103 /*   3 - End of Text        */ CharacterInvalid
, 
 104 /*   4 - End of Transm.     */ CharacterInvalid
, 
 105 /*   5 - Enquiry            */ CharacterInvalid
, 
 106 /*   6 - Acknowledgment     */ CharacterInvalid
, 
 107 /*   7 - Bell               */ CharacterInvalid
, 
 108 /*   8 - Back Space         */ CharacterInvalid
, 
 109 /*   9 - Horizontal Tab     */ CharacterWhiteSpace
, 
 110 /*  10 - Line Feed          */ CharacterLineTerminator
, 
 111 /*  11 - Vertical Tab       */ CharacterWhiteSpace
, 
 112 /*  12 - Form Feed          */ CharacterWhiteSpace
, 
 113 /*  13 - Carriage Return    */ CharacterLineTerminator
, 
 114 /*  14 - Shift Out          */ CharacterInvalid
, 
 115 /*  15 - Shift In           */ CharacterInvalid
, 
 116 /*  16 - Data Line Escape   */ CharacterInvalid
, 
 117 /*  17 - Device Control 1   */ CharacterInvalid
, 
 118 /*  18 - Device Control 2   */ CharacterInvalid
, 
 119 /*  19 - Device Control 3   */ CharacterInvalid
, 
 120 /*  20 - Device Control 4   */ CharacterInvalid
, 
 121 /*  21 - Negative Ack.      */ CharacterInvalid
, 
 122 /*  22 - Synchronous Idle   */ CharacterInvalid
, 
 123 /*  23 - End of Transmit    */ CharacterInvalid
, 
 124 /*  24 - Cancel             */ CharacterInvalid
, 
 125 /*  25 - End of Medium      */ CharacterInvalid
, 
 126 /*  26 - Substitute         */ CharacterInvalid
, 
 127 /*  27 - Escape             */ CharacterInvalid
, 
 128 /*  28 - File Separator     */ CharacterInvalid
, 
 129 /*  29 - Group Separator    */ CharacterInvalid
, 
 130 /*  30 - Record Separator   */ CharacterInvalid
, 
 131 /*  31 - Unit Separator     */ CharacterInvalid
, 
 132 /*  32 - Space              */ CharacterWhiteSpace
, 
 133 /*  33 - !                  */ CharacterExclamationMark
, 
 134 /*  34 - "                  */ CharacterQuote
, 
 135 /*  35 - #                  */ CharacterInvalid
, 
 136 /*  36 - $                  */ CharacterIdentifierStart
, 
 137 /*  37 - %                  */ CharacterModulo
, 
 138 /*  38 - &                  */ CharacterAnd
, 
 139 /*  39 - '                  */ CharacterQuote
, 
 140 /*  40 - (                  */ CharacterOpenParen
, 
 141 /*  41 - )                  */ CharacterCloseParen
, 
 142 /*  42 - *                  */ CharacterMultiply
, 
 143 /*  43 - +                  */ CharacterAdd
, 
 144 /*  44 - ,                  */ CharacterComma
, 
 145 /*  45 - -                  */ CharacterSub
, 
 146 /*  46 - .                  */ CharacterDot
, 
 147 /*  47 - /                  */ CharacterSlash
, 
 148 /*  48 - 0                  */ CharacterZero
, 
 149 /*  49 - 1                  */ CharacterNumber
, 
 150 /*  50 - 2                  */ CharacterNumber
, 
 151 /*  51 - 3                  */ CharacterNumber
, 
 152 /*  52 - 4                  */ CharacterNumber
, 
 153 /*  53 - 5                  */ CharacterNumber
, 
 154 /*  54 - 6                  */ CharacterNumber
, 
 155 /*  55 - 7                  */ CharacterNumber
, 
 156 /*  56 - 8                  */ CharacterNumber
, 
 157 /*  57 - 9                  */ CharacterNumber
, 
 158 /*  58 - :                  */ CharacterColon
, 
 159 /*  59 - ;                  */ CharacterSemicolon
, 
 160 /*  60 - <                  */ CharacterLess
, 
 161 /*  61 - =                  */ CharacterEqual
, 
 162 /*  62 - >                  */ CharacterGreater
, 
 163 /*  63 - ?                  */ CharacterQuestion
, 
 164 /*  64 - @                  */ CharacterInvalid
, 
 165 /*  65 - A                  */ CharacterIdentifierStart
, 
 166 /*  66 - B                  */ CharacterIdentifierStart
, 
 167 /*  67 - C                  */ CharacterIdentifierStart
, 
 168 /*  68 - D                  */ CharacterIdentifierStart
, 
 169 /*  69 - E                  */ CharacterIdentifierStart
, 
 170 /*  70 - F                  */ CharacterIdentifierStart
, 
 171 /*  71 - G                  */ CharacterIdentifierStart
, 
 172 /*  72 - H                  */ CharacterIdentifierStart
, 
 173 /*  73 - I                  */ CharacterIdentifierStart
, 
 174 /*  74 - J                  */ CharacterIdentifierStart
, 
 175 /*  75 - K                  */ CharacterIdentifierStart
, 
 176 /*  76 - L                  */ CharacterIdentifierStart
, 
 177 /*  77 - M                  */ CharacterIdentifierStart
, 
 178 /*  78 - N                  */ CharacterIdentifierStart
, 
 179 /*  79 - O                  */ CharacterIdentifierStart
, 
 180 /*  80 - P                  */ CharacterIdentifierStart
, 
 181 /*  81 - Q                  */ CharacterIdentifierStart
, 
 182 /*  82 - R                  */ CharacterIdentifierStart
, 
 183 /*  83 - S                  */ CharacterIdentifierStart
, 
 184 /*  84 - T                  */ CharacterIdentifierStart
, 
 185 /*  85 - U                  */ CharacterIdentifierStart
, 
 186 /*  86 - V                  */ CharacterIdentifierStart
, 
 187 /*  87 - W                  */ CharacterIdentifierStart
, 
 188 /*  88 - X                  */ CharacterIdentifierStart
, 
 189 /*  89 - Y                  */ CharacterIdentifierStart
, 
 190 /*  90 - Z                  */ CharacterIdentifierStart
, 
 191 /*  91 - [                  */ CharacterOpenBracket
, 
 192 /*  92 - \                  */ CharacterBackSlash
, 
 193 /*  93 - ]                  */ CharacterCloseBracket
, 
 194 /*  94 - ^                  */ CharacterXor
, 
 195 /*  95 - _                  */ CharacterIdentifierStart
, 
 196 /*  96 - `                  */ CharacterInvalid
, 
 197 /*  97 - a                  */ CharacterIdentifierStart
, 
 198 /*  98 - b                  */ CharacterIdentifierStart
, 
 199 /*  99 - c                  */ CharacterIdentifierStart
, 
 200 /* 100 - d                  */ CharacterIdentifierStart
, 
 201 /* 101 - e                  */ CharacterIdentifierStart
, 
 202 /* 102 - f                  */ CharacterIdentifierStart
, 
 203 /* 103 - g                  */ CharacterIdentifierStart
, 
 204 /* 104 - h                  */ CharacterIdentifierStart
, 
 205 /* 105 - i                  */ CharacterIdentifierStart
, 
 206 /* 106 - j                  */ CharacterIdentifierStart
, 
 207 /* 107 - k                  */ CharacterIdentifierStart
, 
 208 /* 108 - l                  */ CharacterIdentifierStart
, 
 209 /* 109 - m                  */ CharacterIdentifierStart
, 
 210 /* 110 - n                  */ CharacterIdentifierStart
, 
 211 /* 111 - o                  */ CharacterIdentifierStart
, 
 212 /* 112 - p                  */ CharacterIdentifierStart
, 
 213 /* 113 - q                  */ CharacterIdentifierStart
, 
 214 /* 114 - r                  */ CharacterIdentifierStart
, 
 215 /* 115 - s                  */ CharacterIdentifierStart
, 
 216 /* 116 - t                  */ CharacterIdentifierStart
, 
 217 /* 117 - u                  */ CharacterIdentifierStart
, 
 218 /* 118 - v                  */ CharacterIdentifierStart
, 
 219 /* 119 - w                  */ CharacterIdentifierStart
, 
 220 /* 120 - x                  */ CharacterIdentifierStart
, 
 221 /* 121 - y                  */ CharacterIdentifierStart
, 
 222 /* 122 - z                  */ CharacterIdentifierStart
, 
 223 /* 123 - {                  */ CharacterOpenBrace
, 
 224 /* 124 - |                  */ CharacterOr
, 
 225 /* 125 - }                  */ CharacterCloseBrace
, 
 226 /* 126 - ~                  */ CharacterTilde
, 
 227 /* 127 - Delete             */ CharacterInvalid
, 
 228 /* 128 - Cc category        */ CharacterInvalid
, 
 229 /* 129 - Cc category        */ CharacterInvalid
, 
 230 /* 130 - Cc category        */ CharacterInvalid
, 
 231 /* 131 - Cc category        */ CharacterInvalid
, 
 232 /* 132 - Cc category        */ CharacterInvalid
, 
 233 /* 133 - Cc category        */ CharacterInvalid
, 
 234 /* 134 - Cc category        */ CharacterInvalid
, 
 235 /* 135 - Cc category        */ CharacterInvalid
, 
 236 /* 136 - Cc category        */ CharacterInvalid
, 
 237 /* 137 - Cc category        */ CharacterInvalid
, 
 238 /* 138 - Cc category        */ CharacterInvalid
, 
 239 /* 139 - Cc category        */ CharacterInvalid
, 
 240 /* 140 - Cc category        */ CharacterInvalid
, 
 241 /* 141 - Cc category        */ CharacterInvalid
, 
 242 /* 142 - Cc category        */ CharacterInvalid
, 
 243 /* 143 - Cc category        */ CharacterInvalid
, 
 244 /* 144 - Cc category        */ CharacterInvalid
, 
 245 /* 145 - Cc category        */ CharacterInvalid
, 
 246 /* 146 - Cc category        */ CharacterInvalid
, 
 247 /* 147 - Cc category        */ CharacterInvalid
, 
 248 /* 148 - Cc category        */ CharacterInvalid
, 
 249 /* 149 - Cc category        */ CharacterInvalid
, 
 250 /* 150 - Cc category        */ CharacterInvalid
, 
 251 /* 151 - Cc category        */ CharacterInvalid
, 
 252 /* 152 - Cc category        */ CharacterInvalid
, 
 253 /* 153 - Cc category        */ CharacterInvalid
, 
 254 /* 154 - Cc category        */ CharacterInvalid
, 
 255 /* 155 - Cc category        */ CharacterInvalid
, 
 256 /* 156 - Cc category        */ CharacterInvalid
, 
 257 /* 157 - Cc category        */ CharacterInvalid
, 
 258 /* 158 - Cc category        */ CharacterInvalid
, 
 259 /* 159 - Cc category        */ CharacterInvalid
, 
 260 /* 160 - Zs category (nbsp) */ CharacterWhiteSpace
, 
 261 /* 161 - Po category        */ CharacterInvalid
, 
 262 /* 162 - Sc category        */ CharacterInvalid
, 
 263 /* 163 - Sc category        */ CharacterInvalid
, 
 264 /* 164 - Sc category        */ CharacterInvalid
, 
 265 /* 165 - Sc category        */ CharacterInvalid
, 
 266 /* 166 - So category        */ CharacterInvalid
, 
 267 /* 167 - So category        */ CharacterInvalid
, 
 268 /* 168 - Sk category        */ CharacterInvalid
, 
 269 /* 169 - So category        */ CharacterInvalid
, 
 270 /* 170 - Ll category        */ CharacterIdentifierStart
, 
 271 /* 171 - Pi category        */ CharacterInvalid
, 
 272 /* 172 - Sm category        */ CharacterInvalid
, 
 273 /* 173 - Cf category        */ CharacterInvalid
, 
 274 /* 174 - So category        */ CharacterInvalid
, 
 275 /* 175 - Sk category        */ CharacterInvalid
, 
 276 /* 176 - So category        */ CharacterInvalid
, 
 277 /* 177 - Sm category        */ CharacterInvalid
, 
 278 /* 178 - No category        */ CharacterInvalid
, 
 279 /* 179 - No category        */ CharacterInvalid
, 
 280 /* 180 - Sk category        */ CharacterInvalid
, 
 281 /* 181 - Ll category        */ CharacterIdentifierStart
, 
 282 /* 182 - So category        */ CharacterInvalid
, 
 283 /* 183 - Po category        */ CharacterInvalid
, 
 284 /* 184 - Sk category        */ CharacterInvalid
, 
 285 /* 185 - No category        */ CharacterInvalid
, 
 286 /* 186 - Ll category        */ CharacterIdentifierStart
, 
 287 /* 187 - Pf category        */ CharacterInvalid
, 
 288 /* 188 - No category        */ CharacterInvalid
, 
 289 /* 189 - No category        */ CharacterInvalid
, 
 290 /* 190 - No category        */ CharacterInvalid
, 
 291 /* 191 - Po category        */ CharacterInvalid
, 
 292 /* 192 - Lu category        */ CharacterIdentifierStart
, 
 293 /* 193 - Lu category        */ CharacterIdentifierStart
, 
 294 /* 194 - Lu category        */ CharacterIdentifierStart
, 
 295 /* 195 - Lu category        */ CharacterIdentifierStart
, 
 296 /* 196 - Lu category        */ CharacterIdentifierStart
, 
 297 /* 197 - Lu category        */ CharacterIdentifierStart
, 
 298 /* 198 - Lu category        */ CharacterIdentifierStart
, 
 299 /* 199 - Lu category        */ CharacterIdentifierStart
, 
 300 /* 200 - Lu category        */ CharacterIdentifierStart
, 
 301 /* 201 - Lu category        */ CharacterIdentifierStart
, 
 302 /* 202 - Lu category        */ CharacterIdentifierStart
, 
 303 /* 203 - Lu category        */ CharacterIdentifierStart
, 
 304 /* 204 - Lu category        */ CharacterIdentifierStart
, 
 305 /* 205 - Lu category        */ CharacterIdentifierStart
, 
 306 /* 206 - Lu category        */ CharacterIdentifierStart
, 
 307 /* 207 - Lu category        */ CharacterIdentifierStart
, 
 308 /* 208 - Lu category        */ CharacterIdentifierStart
, 
 309 /* 209 - Lu category        */ CharacterIdentifierStart
, 
 310 /* 210 - Lu category        */ CharacterIdentifierStart
, 
 311 /* 211 - Lu category        */ CharacterIdentifierStart
, 
 312 /* 212 - Lu category        */ CharacterIdentifierStart
, 
 313 /* 213 - Lu category        */ CharacterIdentifierStart
, 
 314 /* 214 - Lu category        */ CharacterIdentifierStart
, 
 315 /* 215 - Sm category        */ CharacterInvalid
, 
 316 /* 216 - Lu category        */ CharacterIdentifierStart
, 
 317 /* 217 - Lu category        */ CharacterIdentifierStart
, 
 318 /* 218 - Lu category        */ CharacterIdentifierStart
, 
 319 /* 219 - Lu category        */ CharacterIdentifierStart
, 
 320 /* 220 - Lu category        */ CharacterIdentifierStart
, 
 321 /* 221 - Lu category        */ CharacterIdentifierStart
, 
 322 /* 222 - Lu category        */ CharacterIdentifierStart
, 
 323 /* 223 - Ll category        */ CharacterIdentifierStart
, 
 324 /* 224 - Ll category        */ CharacterIdentifierStart
, 
 325 /* 225 - Ll category        */ CharacterIdentifierStart
, 
 326 /* 226 - Ll category        */ CharacterIdentifierStart
, 
 327 /* 227 - Ll category        */ CharacterIdentifierStart
, 
 328 /* 228 - Ll category        */ CharacterIdentifierStart
, 
 329 /* 229 - Ll category        */ CharacterIdentifierStart
, 
 330 /* 230 - Ll category        */ CharacterIdentifierStart
, 
 331 /* 231 - Ll category        */ CharacterIdentifierStart
, 
 332 /* 232 - Ll category        */ CharacterIdentifierStart
, 
 333 /* 233 - Ll category        */ CharacterIdentifierStart
, 
 334 /* 234 - Ll category        */ CharacterIdentifierStart
, 
 335 /* 235 - Ll category        */ CharacterIdentifierStart
, 
 336 /* 236 - Ll category        */ CharacterIdentifierStart
, 
 337 /* 237 - Ll category        */ CharacterIdentifierStart
, 
 338 /* 238 - Ll category        */ CharacterIdentifierStart
, 
 339 /* 239 - Ll category        */ CharacterIdentifierStart
, 
 340 /* 240 - Ll category        */ CharacterIdentifierStart
, 
 341 /* 241 - Ll category        */ CharacterIdentifierStart
, 
 342 /* 242 - Ll category        */ CharacterIdentifierStart
, 
 343 /* 243 - Ll category        */ CharacterIdentifierStart
, 
 344 /* 244 - Ll category        */ CharacterIdentifierStart
, 
 345 /* 245 - Ll category        */ CharacterIdentifierStart
, 
 346 /* 246 - Ll category        */ CharacterIdentifierStart
, 
 347 /* 247 - Sm category        */ CharacterInvalid
, 
 348 /* 248 - Ll category        */ CharacterIdentifierStart
, 
 349 /* 249 - Ll category        */ CharacterIdentifierStart
, 
 350 /* 250 - Ll category        */ CharacterIdentifierStart
, 
 351 /* 251 - Ll category        */ CharacterIdentifierStart
, 
 352 /* 252 - Ll category        */ CharacterIdentifierStart
, 
 353 /* 253 - Ll category        */ CharacterIdentifierStart
, 
 354 /* 254 - Ll category        */ CharacterIdentifierStart
, 
 355 /* 255 - Ll category        */ CharacterIdentifierStart
 
 358 // This table provides the character that results from \X where X is the index in the table beginning 
 359 // with SPACE. A table value of 0 means that more processing needs to be done. 
 360 static const LChar singleCharacterEscapeValuesForASCII
[128] = { 
 362 /*   1 - Start of Heading   */ 0, 
 363 /*   2 - Start of Text      */ 0, 
 364 /*   3 - End of Text        */ 0, 
 365 /*   4 - End of Transm.     */ 0, 
 367 /*   6 - Acknowledgment     */ 0, 
 369 /*   8 - Back Space         */ 0, 
 370 /*   9 - Horizontal Tab     */ 0, 
 371 /*  10 - Line Feed          */ 0, 
 372 /*  11 - Vertical Tab       */ 0, 
 373 /*  12 - Form Feed          */ 0, 
 374 /*  13 - Carriage Return    */ 0, 
 375 /*  14 - Shift Out          */ 0, 
 376 /*  15 - Shift In           */ 0, 
 377 /*  16 - Data Line Escape   */ 0, 
 378 /*  17 - Device Control 1   */ 0, 
 379 /*  18 - Device Control 2   */ 0, 
 380 /*  19 - Device Control 3   */ 0, 
 381 /*  20 - Device Control 4   */ 0, 
 382 /*  21 - Negative Ack.      */ 0, 
 383 /*  22 - Synchronous Idle   */ 0, 
 384 /*  23 - End of Transmit    */ 0, 
 386 /*  25 - End of Medium      */ 0, 
 387 /*  26 - Substitute         */ 0, 
 389 /*  28 - File Separator     */ 0, 
 390 /*  29 - Group Separator    */ 0, 
 391 /*  30 - Record Separator   */ 0, 
 392 /*  31 - Unit Separator     */ 0, 
 393 /*  32 - Space              */ ' ', 
 491 template <typename T
> 
 492 Lexer
<T
>::Lexer(VM
* vm
) 
 493     : m_isReparsing(false) 
 498 template <typename T
> 
 503 template <typename T
> 
 504 String Lexer
<T
>::invalidCharacterMessage() const 
 508         return "Invalid character: '\\0'"; 
 510         return "Invalid character: '\\n'"; 
 512         return "Invalid character: '\\v'"; 
 514         return "Invalid character: '\\r'"; 
 516         return "Invalid character: '#'"; 
 518         return "Invalid character: '@'"; 
 520         return "Invalid character: '`'"; 
 522         return String::format("Invalid character '\\u%04u'", static_cast<unsigned>(m_current
)).impl(); 
 526 template <typename T
> 
 527 ALWAYS_INLINE 
const T
* Lexer
<T
>::currentSourcePtr() const 
 529     ASSERT(m_code 
<= m_codeEnd
); 
 533 template <typename T
> 
 534 void Lexer
<T
>::setCode(const SourceCode
& source
, ParserArena
* arena
) 
 536     m_arena 
= &arena
->identifierArena(); 
 538     m_lineNumber 
= source
.firstLine(); 
 541     const String
& sourceString 
= source
.provider()->source(); 
 543     if (!sourceString
.isNull()) 
 544         setCodeStart(sourceString
.impl()); 
 549     m_sourceOffset 
= source
.startOffset(); 
 550     m_codeStartPlusOffset 
= m_codeStart 
+ source
.startOffset(); 
 551     m_code 
= m_codeStartPlusOffset
; 
 552     m_codeEnd 
= m_codeStart 
+ source
.endOffset(); 
 554     m_atLineStart 
= true; 
 555     m_lineStart 
= m_code
; 
 556     m_lexErrorMessage 
= String(); 
 558     m_buffer8
.reserveInitialCapacity(initialReadBufferCapacity
); 
 559     m_buffer16
.reserveInitialCapacity((m_codeEnd 
- m_code
) / 2); 
 561     if (LIKELY(m_code 
< m_codeEnd
)) 
 565     ASSERT(currentOffset() == source
.startOffset()); 
 568 template <typename T
> 
 569 template <int shiftAmount
> ALWAYS_INLINE 
void Lexer
<T
>::internalShift() 
 571     m_code 
+= shiftAmount
; 
 572     ASSERT(currentOffset() >= currentLineStartOffset()); 
 576 template <typename T
> 
 577 ALWAYS_INLINE 
void Lexer
<T
>::shift() 
 579     // At one point timing showed that setting m_current to 0 unconditionally was faster than an if-else sequence. 
 582     if (LIKELY(m_code 
< m_codeEnd
)) 
 586 template <typename T
> 
 587 ALWAYS_INLINE 
bool Lexer
<T
>::atEnd() const 
 589     ASSERT(!m_current 
|| m_code 
< m_codeEnd
); 
 590     return UNLIKELY(UNLIKELY(!m_current
) && m_code 
== m_codeEnd
); 
 593 template <typename T
> 
 594 ALWAYS_INLINE T Lexer
<T
>::peek(int offset
) const 
 596     ASSERT(offset 
> 0 && offset 
< 5); 
 597     const T
* code 
= m_code 
+ offset
; 
 598     return (code 
< m_codeEnd
) ? *code 
: 0; 
 601 template <typename T
> 
 602 typename Lexer
<T
>::UnicodeHexValue Lexer
<T
>::parseFourDigitUnicodeHex() 
 608     if (UNLIKELY(!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(char1
) || !isASCIIHexDigit(char2
) || !isASCIIHexDigit(char3
))) 
 609         return UnicodeHexValue((m_code 
+ 4) >= m_codeEnd 
? UnicodeHexValue::IncompleteHex 
: UnicodeHexValue::InvalidHex
); 
 611     int result 
= convertUnicode(m_current
, char1
, char2
, char3
); 
 616     return UnicodeHexValue(result
); 
 619 template <typename T
> 
 620 void Lexer
<T
>::shiftLineTerminator() 
 622     ASSERT(isLineTerminator(m_current
)); 
 627     // Allow both CRLF and LFCR. 
 628     if (prev 
+ m_current 
== '\n' + '\r') 
 634 template <typename T
> 
 635 ALWAYS_INLINE 
bool Lexer
<T
>::lastTokenWasRestrKeyword() const 
 637     return m_lastToken 
== CONTINUE 
|| m_lastToken 
== BREAK 
|| m_lastToken 
== RETURN 
|| m_lastToken 
== THROW
; 
 640 static NEVER_INLINE 
bool isNonLatin1IdentStart(int c
) 
 642     return category(c
) & (Letter_Uppercase 
| Letter_Lowercase 
| Letter_Titlecase 
| Letter_Modifier 
| Letter_Other
); 
 645 static ALWAYS_INLINE 
bool isLatin1(LChar
) 
 650 static ALWAYS_INLINE 
bool isLatin1(UChar c
) 
 655 static inline bool isIdentStart(LChar c
) 
 657     return typesOfLatin1Characters
[c
] == CharacterIdentifierStart
; 
 660 static inline bool isIdentStart(UChar c
) 
 662     return isLatin1(c
) ? isIdentStart(static_cast<LChar
>(c
)) : isNonLatin1IdentStart(c
); 
 665 static NEVER_INLINE 
bool isNonLatin1IdentPart(int c
) 
 667     return (category(c
) & (Letter_Uppercase 
| Letter_Lowercase 
| Letter_Titlecase 
| Letter_Modifier 
| Letter_Other
 
 668         | Mark_NonSpacing 
| Mark_SpacingCombining 
| Number_DecimalDigit 
| Punctuation_Connector
)) || c 
== 0x200C || c 
== 0x200D; 
 671 static ALWAYS_INLINE 
bool isIdentPart(LChar c
) 
 673     // Character types are divided into two groups depending on whether they can be part of an 
 674     // identifier or not. Those whose type value is less or equal than CharacterNumber can be 
 675     // part of an identifier. (See the CharacterType definition for more details.) 
 676     return typesOfLatin1Characters
[c
] <= CharacterNumber
; 
 679 static ALWAYS_INLINE 
bool isIdentPart(UChar c
) 
 681     return isLatin1(c
) ? isIdentPart(static_cast<LChar
>(c
)) : isNonLatin1IdentPart(c
); 
 684 static inline LChar 
singleEscape(int c
) 
 687         ASSERT(static_cast<size_t>(c
) < ARRAY_SIZE(singleCharacterEscapeValuesForASCII
)); 
 688         return singleCharacterEscapeValuesForASCII
[c
]; 
 693 template <typename T
> 
 694 inline void Lexer
<T
>::record8(int c
) 
 698     m_buffer8
.append(static_cast<LChar
>(c
)); 
 701 template <typename T
> 
 702 inline void assertCharIsIn8BitRange(T c
) 
 710 inline void assertCharIsIn8BitRange(UChar c
) 
 717 inline void assertCharIsIn8BitRange(LChar
) 
 721 template <typename T
> 
 722 inline void Lexer
<T
>::append8(const T
* p
, size_t length
) 
 724     size_t currentSize 
= m_buffer8
.size(); 
 725     m_buffer8
.grow(currentSize 
+ length
); 
 726     LChar
* rawBuffer 
= m_buffer8
.data() + currentSize
; 
 728     for (size_t i 
= 0; i 
< length
; i
++) { 
 730         assertCharIsIn8BitRange(c
); 
 735 template <typename T
> 
 736 inline void Lexer
<T
>::append16(const LChar
* p
, size_t length
) 
 738     size_t currentSize 
= m_buffer16
.size(); 
 739     m_buffer16
.grow(currentSize 
+ length
); 
 740     UChar
* rawBuffer 
= m_buffer16
.data() + currentSize
; 
 742     for (size_t i 
= 0; i 
< length
; i
++) 
 746 template <typename T
> 
 747 inline void Lexer
<T
>::record16(T c
) 
 749     m_buffer16
.append(c
); 
 752 template <typename T
> 
 753 inline void Lexer
<T
>::record16(int c
) 
 756     ASSERT(c 
<= static_cast<int>(USHRT_MAX
)); 
 757     m_buffer16
.append(static_cast<UChar
>(c
)); 
 761 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<LChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
) 
 763     const ptrdiff_t remaining 
= m_codeEnd 
- m_code
; 
 764     if ((remaining 
>= maxTokenLength
) && !(lexerFlags 
& LexerFlagsIgnoreReservedWords
)) { 
 765         JSTokenType keyword 
= parseKeyword
<shouldCreateIdentifier
>(tokenData
); 
 766         if (keyword 
!= IDENT
) { 
 767             ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
); 
 768             return keyword 
== RESERVED_IF_STRICT 
&& !strictMode 
? IDENT 
: keyword
; 
 772     const LChar
* identifierStart 
= currentSourcePtr(); 
 773     unsigned identifierLineStart 
= currentLineStartOffset(); 
 775     while (isIdentPart(m_current
)) 
 778     if (UNLIKELY(m_current 
== '\\')) { 
 779         setOffsetFromSourcePtr(identifierStart
, identifierLineStart
); 
 780         return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
); 
 783     const Identifier
* ident 
= 0; 
 785     if (shouldCreateIdentifier
) { 
 786         int identifierLength 
= currentSourcePtr() - identifierStart
; 
 787         ident 
= makeIdentifier(identifierStart
, identifierLength
); 
 789         tokenData
->ident 
= ident
; 
 791         tokenData
->ident 
= 0; 
 793     if (UNLIKELY((remaining 
< maxTokenLength
) && !(lexerFlags 
& LexerFlagsIgnoreReservedWords
))) { 
 794         ASSERT(shouldCreateIdentifier
); 
 795         if (remaining 
< maxTokenLength
) { 
 796             const HashEntry
* entry 
= m_vm
->keywords
->getKeyword(*ident
); 
 797             ASSERT((remaining 
< maxTokenLength
) || !entry
); 
 800             JSTokenType token 
= static_cast<JSTokenType
>(entry
->lexerValue()); 
 801             return (token 
!= RESERVED_IF_STRICT
) || strictMode 
? token 
: IDENT
; 
 810 template <bool shouldCreateIdentifier
> ALWAYS_INLINE JSTokenType Lexer
<UChar
>::parseIdentifier(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
) 
 812     const ptrdiff_t remaining 
= m_codeEnd 
- m_code
; 
 813     if ((remaining 
>= maxTokenLength
) && !(lexerFlags 
& LexerFlagsIgnoreReservedWords
)) { 
 814         JSTokenType keyword 
= parseKeyword
<shouldCreateIdentifier
>(tokenData
); 
 815         if (keyword 
!= IDENT
) { 
 816             ASSERT((!shouldCreateIdentifier
) || tokenData
->ident
); 
 817             return keyword 
== RESERVED_IF_STRICT 
&& !strictMode 
? IDENT 
: keyword
; 
 821     const UChar
* identifierStart 
= currentSourcePtr(); 
 822     int identifierLineStart 
= currentLineStartOffset(); 
 824     UChar orAllChars 
= 0; 
 826     while (isIdentPart(m_current
)) { 
 827         orAllChars 
|= m_current
; 
 831     if (UNLIKELY(m_current 
== '\\')) { 
 832         setOffsetFromSourcePtr(identifierStart
, identifierLineStart
); 
 833         return parseIdentifierSlowCase
<shouldCreateIdentifier
>(tokenData
, lexerFlags
, strictMode
); 
 836     bool isAll8Bit 
= false; 
 838     if (!(orAllChars 
& ~0xff)) 
 841     const Identifier
* ident 
= 0; 
 843     if (shouldCreateIdentifier
) { 
 844         int identifierLength 
= currentSourcePtr() - identifierStart
; 
 846             ident 
= makeIdentifierLCharFromUChar(identifierStart
, identifierLength
); 
 848             ident 
= makeIdentifier(identifierStart
, identifierLength
); 
 850         tokenData
->ident 
= ident
; 
 852         tokenData
->ident 
= 0; 
 854     if (UNLIKELY((remaining 
< maxTokenLength
) && !(lexerFlags 
& LexerFlagsIgnoreReservedWords
))) { 
 855         ASSERT(shouldCreateIdentifier
); 
 856         if (remaining 
< maxTokenLength
) { 
 857             const HashEntry
* entry 
= m_vm
->keywords
->getKeyword(*ident
); 
 858             ASSERT((remaining 
< maxTokenLength
) || !entry
); 
 861             JSTokenType token 
= static_cast<JSTokenType
>(entry
->lexerValue()); 
 862             return (token 
!= RESERVED_IF_STRICT
) || strictMode 
? token 
: IDENT
; 
 870 template <typename T
> 
 871 template <bool shouldCreateIdentifier
> JSTokenType Lexer
<T
>::parseIdentifierSlowCase(JSTokenData
* tokenData
, unsigned lexerFlags
, bool strictMode
) 
 873     const ptrdiff_t remaining 
= m_codeEnd 
- m_code
; 
 874     const T
* identifierStart 
= currentSourcePtr(); 
 875     bool bufferRequired 
= false; 
 878         if (LIKELY(isIdentPart(m_current
))) { 
 882         if (LIKELY(m_current 
!= '\\')) 
 885         // \uXXXX unicode characters. 
 886         bufferRequired 
= true; 
 887         if (identifierStart 
!= currentSourcePtr()) 
 888             m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
); 
 890         if (UNLIKELY(m_current 
!= 'u')) 
 891             return atEnd() ? UNTERMINATED_IDENTIFIER_ESCAPE_ERRORTOK 
: INVALID_IDENTIFIER_ESCAPE_ERRORTOK
; 
 893         UnicodeHexValue character 
= parseFourDigitUnicodeHex(); 
 894         if (UNLIKELY(!character
.isValid())) 
 895             return character
.valueType() == UnicodeHexValue::IncompleteHex 
? UNTERMINATED_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK 
: INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
; 
 896         UChar ucharacter 
= static_cast<UChar
>(character
.value()); 
 897         if (UNLIKELY(m_buffer16
.size() ? !isIdentPart(ucharacter
) : !isIdentStart(ucharacter
))) 
 898             return INVALID_IDENTIFIER_UNICODE_ESCAPE_ERRORTOK
; 
 899         if (shouldCreateIdentifier
) 
 900             record16(ucharacter
); 
 901         identifierStart 
= currentSourcePtr(); 
 904     int identifierLength
; 
 905     const Identifier
* ident 
= 0; 
 906     if (shouldCreateIdentifier
) { 
 907         if (!bufferRequired
) { 
 908             identifierLength 
= currentSourcePtr() - identifierStart
; 
 909             ident 
= makeIdentifier(identifierStart
, identifierLength
); 
 911             if (identifierStart 
!= currentSourcePtr()) 
 912                 m_buffer16
.append(identifierStart
, currentSourcePtr() - identifierStart
); 
 913             ident 
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size()); 
 916         tokenData
->ident 
= ident
; 
 918         tokenData
->ident 
= 0; 
 920     if (LIKELY(!bufferRequired 
&& !(lexerFlags 
& LexerFlagsIgnoreReservedWords
))) { 
 921         ASSERT(shouldCreateIdentifier
); 
 922         // Keywords must not be recognized if there was an \uXXXX in the identifier. 
 923         if (remaining 
< maxTokenLength
) { 
 924             const HashEntry
* entry 
= m_vm
->keywords
->getKeyword(*ident
); 
 925             ASSERT((remaining 
< maxTokenLength
) || !entry
); 
 928             JSTokenType token 
= static_cast<JSTokenType
>(entry
->lexerValue()); 
 929             return (token 
!= RESERVED_IF_STRICT
) || strictMode 
? token 
: IDENT
; 
 934     m_buffer16
.resize(0); 
 938 static ALWAYS_INLINE 
bool characterRequiresParseStringSlowCase(LChar character
) 
 940     return character 
< 0xE; 
 943 static ALWAYS_INLINE 
bool characterRequiresParseStringSlowCase(UChar character
) 
 945     return character 
< 0xE || character 
> 0xFF; 
 948 template <typename T
> 
 949 template <bool shouldBuildStrings
> ALWAYS_INLINE typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseString(JSTokenData
* tokenData
, bool strictMode
) 
 951     int startingOffset 
= currentOffset(); 
 952     int startingLineStartOffset 
= currentLineStartOffset(); 
 953     int startingLineNumber 
= lineNumber(); 
 954     T stringQuoteCharacter 
= m_current
; 
 957     const T
* stringStart 
= currentSourcePtr(); 
 959     while (m_current 
!= stringQuoteCharacter
) { 
 960         if (UNLIKELY(m_current 
== '\\')) { 
 961             if (stringStart 
!= currentSourcePtr() && shouldBuildStrings
) 
 962                 append8(stringStart
, currentSourcePtr() - stringStart
); 
 965             LChar escape 
= singleEscape(m_current
); 
 967             // Most common escape sequences first 
 969                 if (shouldBuildStrings
) 
 972             } else if (UNLIKELY(isLineTerminator(m_current
))) 
 973                 shiftLineTerminator(); 
 974             else if (m_current 
== 'x') { 
 976                 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) { 
 977                     m_lexErrorMessage 
= "\\x can only be followed by a hex character sequence"; 
 978                     return (atEnd() || (isASCIIHexDigit(m_current
) && (m_code 
+ 1 == m_codeEnd
))) ? StringUnterminated 
: StringCannotBeParsed
; 
 982                 if (shouldBuildStrings
) 
 983                     record8(convertHex(prev
, m_current
)); 
 986                 setOffset(startingOffset
, startingLineStartOffset
); 
 987                 setLineNumber(startingLineNumber
); 
 989                 return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
); 
 991             stringStart 
= currentSourcePtr(); 
 995         if (UNLIKELY(characterRequiresParseStringSlowCase(m_current
))) { 
 996             setOffset(startingOffset
, startingLineStartOffset
); 
 997             setLineNumber(startingLineNumber
); 
 999             return parseStringSlowCase
<shouldBuildStrings
>(tokenData
, strictMode
); 
1005     if (currentSourcePtr() != stringStart 
&& shouldBuildStrings
) 
1006         append8(stringStart
, currentSourcePtr() - stringStart
); 
1007     if (shouldBuildStrings
) { 
1008         tokenData
->ident 
= makeIdentifier(m_buffer8
.data(), m_buffer8
.size()); 
1009         m_buffer8
.resize(0); 
1011         tokenData
->ident 
= 0; 
1013     return StringParsedSuccessfully
; 
1016 template <typename T
> 
1017 template <bool shouldBuildStrings
> typename Lexer
<T
>::StringParseResult Lexer
<T
>::parseStringSlowCase(JSTokenData
* tokenData
, bool strictMode
) 
1019     T stringQuoteCharacter 
= m_current
; 
1022     const T
* stringStart 
= currentSourcePtr(); 
1024     while (m_current 
!= stringQuoteCharacter
) { 
1025         if (UNLIKELY(m_current 
== '\\')) { 
1026             if (stringStart 
!= currentSourcePtr() && shouldBuildStrings
) 
1027                 append16(stringStart
, currentSourcePtr() - stringStart
); 
1030             LChar escape 
= singleEscape(m_current
); 
1032             // Most common escape sequences first 
1034                 if (shouldBuildStrings
) 
1037             } else if (UNLIKELY(isLineTerminator(m_current
))) 
1038                 shiftLineTerminator(); 
1039             else if (m_current 
== 'x') { 
1041                 if (!isASCIIHexDigit(m_current
) || !isASCIIHexDigit(peek(1))) { 
1042                     m_lexErrorMessage 
= "\\x can only be followed by a hex character sequence"; 
1043                     return StringCannotBeParsed
; 
1047                 if (shouldBuildStrings
) 
1048                     record16(convertHex(prev
, m_current
)); 
1050             } else if (m_current 
== 'u') { 
1052                 UnicodeHexValue character 
= parseFourDigitUnicodeHex(); 
1053                 if (character
.isValid()) { 
1054                     if (shouldBuildStrings
) 
1055                         record16(character
.value()); 
1056                 } else if (m_current 
== stringQuoteCharacter
) { 
1057                     if (shouldBuildStrings
) 
1060                     m_lexErrorMessage 
= "\\u can only be followed by a Unicode character sequence"; 
1061                     return character
.valueType() == UnicodeHexValue::IncompleteHex 
? StringUnterminated 
: StringCannotBeParsed
; 
1063             } else if (strictMode 
&& isASCIIDigit(m_current
)) { 
1064                 // The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit. 
1065                 int character1 
= m_current
; 
1067                 if (character1 
!= '0' || isASCIIDigit(m_current
)) { 
1068                     m_lexErrorMessage 
= "The only valid numeric escape in strict mode is '\\0'"; 
1069                     return StringCannotBeParsed
; 
1071                 if (shouldBuildStrings
) 
1073             } else if (!strictMode 
&& isASCIIOctalDigit(m_current
)) { 
1074                 // Octal character sequences 
1075                 T character1 
= m_current
; 
1077                 if (isASCIIOctalDigit(m_current
)) { 
1078                     // Two octal characters 
1079                     T character2 
= m_current
; 
1081                     if (character1 
>= '0' && character1 
<= '3' && isASCIIOctalDigit(m_current
)) { 
1082                         if (shouldBuildStrings
) 
1083                             record16((character1 
- '0') * 64 + (character2 
- '0') * 8 + m_current 
- '0'); 
1086                         if (shouldBuildStrings
) 
1087                             record16((character1 
- '0') * 8 + character2 
- '0'); 
1090                     if (shouldBuildStrings
) 
1091                         record16(character1 
- '0'); 
1093             } else if (!atEnd()) { 
1094                 if (shouldBuildStrings
) 
1095                     record16(m_current
); 
1098                 m_lexErrorMessage 
= "Unterminated string constant"; 
1099                 return StringUnterminated
; 
1102             stringStart 
= currentSourcePtr(); 
1105         // Fast check for characters that require special handling. 
1106         // Catches 0, \n, \r, 0x2028, and 0x2029 as efficiently 
1107         // as possible, and lets through all common ASCII characters. 
1108         if (UNLIKELY(((static_cast<unsigned>(m_current
) - 0xE) & 0x2000))) { 
1109             // New-line or end of input is not allowed 
1110             if (atEnd() || isLineTerminator(m_current
)) { 
1111                 m_lexErrorMessage 
= "Unexpected EOF"; 
1112                 return atEnd() ? StringUnterminated 
: StringCannotBeParsed
; 
1114             // Anything else is just a normal character 
1119     if (currentSourcePtr() != stringStart 
&& shouldBuildStrings
) 
1120         append16(stringStart
, currentSourcePtr() - stringStart
); 
1121     if (shouldBuildStrings
) 
1122         tokenData
->ident 
= makeIdentifier(m_buffer16
.data(), m_buffer16
.size()); 
1124         tokenData
->ident 
= 0; 
1126     m_buffer16
.resize(0); 
1127     return StringParsedSuccessfully
; 
1130 template <typename T
> 
1131 ALWAYS_INLINE 
void Lexer
<T
>::parseHex(double& returnValue
) 
1133     // Optimization: most hexadecimal values fit into 4 bytes. 
1134     uint32_t hexValue 
= 0; 
1135     int maximumDigits 
= 7; 
1137     // Shift out the 'x' prefix. 
1141         hexValue 
= (hexValue 
<< 4) + toASCIIHexValue(m_current
); 
1144     } while (isASCIIHexDigit(m_current
) && maximumDigits 
>= 0); 
1146     if (maximumDigits 
>= 0) { 
1147         returnValue 
= hexValue
; 
1151     // No more place in the hexValue buffer. 
1152     // The values are shifted out and placed into the m_buffer8 vector. 
1153     for (int i 
= 0; i 
< 8; ++i
) { 
1154          int digit 
= hexValue 
>> 28; 
1156              record8(digit 
+ '0'); 
1158              record8(digit 
- 10 + 'a'); 
1162     while (isASCIIHexDigit(m_current
)) { 
1167     returnValue 
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 16); 
1170 template <typename T
> 
1171 ALWAYS_INLINE 
bool Lexer
<T
>::parseOctal(double& returnValue
) 
1173     // Optimization: most octal values fit into 4 bytes. 
1174     uint32_t octalValue 
= 0; 
1175     int maximumDigits 
= 9; 
1176     // Temporary buffer for the digits. Makes easier 
1177     // to reconstruct the input characters when needed. 
1181         octalValue 
= octalValue 
* 8 + (m_current 
- '0'); 
1182         digits
[maximumDigits
] = m_current
; 
1185     } while (isASCIIOctalDigit(m_current
) && maximumDigits 
>= 0); 
1187     if (!isASCIIDigit(m_current
) && maximumDigits 
>= 0) { 
1188         returnValue 
= octalValue
; 
1192     for (int i 
= 9; i 
> maximumDigits
; --i
) 
1195     while (isASCIIOctalDigit(m_current
)) { 
1200     if (isASCIIDigit(m_current
)) 
1203     returnValue 
= parseIntOverflow(m_buffer8
.data(), m_buffer8
.size(), 8); 
1207 template <typename T
> 
1208 ALWAYS_INLINE 
bool Lexer
<T
>::parseDecimal(double& returnValue
) 
1210     // Optimization: most decimal values fit into 4 bytes. 
1211     uint32_t decimalValue 
= 0; 
1213     // Since parseOctal may be executed before parseDecimal, 
1214     // the m_buffer8 may hold ascii digits. 
1215     if (!m_buffer8
.size()) { 
1216         int maximumDigits 
= 9; 
1217         // Temporary buffer for the digits. Makes easier 
1218         // to reconstruct the input characters when needed. 
1222             decimalValue 
= decimalValue 
* 10 + (m_current 
- '0'); 
1223             digits
[maximumDigits
] = m_current
; 
1226         } while (isASCIIDigit(m_current
) && maximumDigits 
>= 0); 
1228         if (maximumDigits 
>= 0 && m_current 
!= '.' && (m_current 
| 0x20) != 'e') { 
1229             returnValue 
= decimalValue
; 
1233         for (int i 
= 9; i 
> maximumDigits
; --i
) 
1237     while (isASCIIDigit(m_current
)) { 
1245 template <typename T
> 
1246 ALWAYS_INLINE 
void Lexer
<T
>::parseNumberAfterDecimalPoint() 
1249     while (isASCIIDigit(m_current
)) { 
1255 template <typename T
> 
1256 ALWAYS_INLINE 
bool Lexer
<T
>::parseNumberAfterExponentIndicator() 
1260     if (m_current 
== '+' || m_current 
== '-') { 
1265     if (!isASCIIDigit(m_current
)) 
1271     } while (isASCIIDigit(m_current
)); 
1275 template <typename T
> 
1276 ALWAYS_INLINE 
bool Lexer
<T
>::parseMultilineComment() 
1279         while (UNLIKELY(m_current 
== '*')) { 
1281             if (m_current 
== '/') { 
1290         if (isLineTerminator(m_current
)) { 
1291             shiftLineTerminator(); 
1292             m_terminator 
= true; 
1298 template <typename T
> 
1299 bool Lexer
<T
>::nextTokenIsColon() 
1301     const T
* code 
= m_code
; 
1302     while (code 
< m_codeEnd 
&& (isWhiteSpace(*code
) || isLineTerminator(*code
))) 
1305     return code 
< m_codeEnd 
&& *code 
== ':'; 
1308 template <typename T
> 
1309 JSTokenType Lexer
<T
>::lex(JSTokenData
* tokenData
, JSTokenLocation
* tokenLocation
, unsigned lexerFlags
, bool strictMode
) 
1312     ASSERT(m_buffer8
.isEmpty()); 
1313     ASSERT(m_buffer16
.isEmpty()); 
1315     JSTokenType token 
= ERRORTOK
; 
1316     m_terminator 
= false; 
1319     while (isWhiteSpace(m_current
)) 
1325     tokenLocation
->startOffset 
= currentOffset(); 
1326     ASSERT(currentOffset() >= currentLineStartOffset()); 
1329     if (LIKELY(isLatin1(m_current
))) 
1330         type 
= static_cast<CharacterType
>(typesOfLatin1Characters
[m_current
]); 
1331     else if (isNonLatin1IdentStart(m_current
)) 
1332         type 
= CharacterIdentifierStart
; 
1333     else if (isLineTerminator(m_current
)) 
1334         type 
= CharacterLineTerminator
; 
1336         type 
= CharacterInvalid
; 
1339     case CharacterGreater
: 
1341         if (m_current 
== '>') { 
1343             if (m_current 
== '>') { 
1345                 if (m_current 
== '=') { 
1347                     token 
= URSHIFTEQUAL
; 
1353             if (m_current 
== '=') { 
1355                 token 
= RSHIFTEQUAL
; 
1361         if (m_current 
== '=') { 
1368     case CharacterEqual
: 
1370         if (m_current 
== '=') { 
1372             if (m_current 
== '=') { 
1384         if (m_current 
== '!' && peek(1) == '-' && peek(2) == '-') { 
1385             // <!-- marks the beginning of a line comment (for www usage) 
1386             goto inSingleLineComment
; 
1388         if (m_current 
== '<') { 
1390             if (m_current 
== '=') { 
1392                 token 
= LSHIFTEQUAL
; 
1398         if (m_current 
== '=') { 
1405     case CharacterExclamationMark
: 
1407         if (m_current 
== '=') { 
1409             if (m_current 
== '=') { 
1417         token 
= EXCLAMATION
; 
1421         if (m_current 
== '+') { 
1423             token 
= (!m_terminator
) ? PLUSPLUS 
: AUTOPLUSPLUS
; 
1426         if (m_current 
== '=') { 
1435         if (m_current 
== '-') { 
1437             if (m_atLineStart 
&& m_current 
== '>') { 
1439                 goto inSingleLineComment
; 
1441             token 
= (!m_terminator
) ? MINUSMINUS 
: AUTOMINUSMINUS
; 
1444         if (m_current 
== '=') { 
1451     case CharacterMultiply
: 
1453         if (m_current 
== '=') { 
1460     case CharacterSlash
: 
1462         if (m_current 
== '/') { 
1464             goto inSingleLineComment
; 
1466         if (m_current 
== '*') { 
1468             if (parseMultilineComment()) 
1470             m_lexErrorMessage 
= "Multiline comment was not closed properly"; 
1471             token 
= UNTERMINATED_MULTILINE_COMMENT_ERRORTOK
; 
1474         if (m_current 
== '=') { 
1483         if (m_current 
== '&') { 
1488         if (m_current 
== '=') { 
1497         if (m_current 
== '=') { 
1504     case CharacterModulo
: 
1506         if (m_current 
== '=') { 
1515         if (m_current 
== '=') { 
1520         if (m_current 
== '|') { 
1527     case CharacterOpenParen
: 
1531     case CharacterCloseParen
: 
1535     case CharacterOpenBracket
: 
1536         token 
= OPENBRACKET
; 
1539     case CharacterCloseBracket
: 
1540         token 
= CLOSEBRACKET
; 
1543     case CharacterComma
: 
1547     case CharacterColon
: 
1551     case CharacterQuestion
: 
1555     case CharacterTilde
: 
1559     case CharacterSemicolon
: 
1563     case CharacterOpenBrace
: 
1564         tokenData
->line 
= lineNumber(); 
1565         tokenData
->offset 
= currentOffset(); 
1566         tokenData
->lineStartOffset 
= currentLineStartOffset(); 
1567         ASSERT(tokenData
->offset 
>= tokenData
->lineStartOffset
); 
1571     case CharacterCloseBrace
: 
1572         tokenData
->line 
= lineNumber(); 
1573         tokenData
->offset 
= currentOffset(); 
1574         tokenData
->lineStartOffset 
= currentLineStartOffset(); 
1575         ASSERT(tokenData
->offset 
>= tokenData
->lineStartOffset
); 
1581         if (!isASCIIDigit(m_current
)) { 
1585         goto inNumberAfterDecimalPoint
; 
1588         if ((m_current 
| 0x20) == 'x' && isASCIIHexDigit(peek(1))) { 
1589             parseHex(tokenData
->doubleValue
); 
1593             if (isASCIIOctalDigit(m_current
)) { 
1594                 if (parseOctal(tokenData
->doubleValue
)) { 
1596                         m_lexErrorMessage 
= "Octal escapes are forbidden in strict mode"; 
1597                         token 
= INVALID_OCTAL_NUMBER_ERRORTOK
; 
1604         // Fall through into CharacterNumber 
1605     case CharacterNumber
: 
1606         if (LIKELY(token 
!= NUMBER
)) { 
1607             if (!parseDecimal(tokenData
->doubleValue
)) { 
1608                 if (m_current 
== '.') { 
1610 inNumberAfterDecimalPoint
: 
1611                     parseNumberAfterDecimalPoint(); 
1613                 if ((m_current 
| 0x20) == 'e') { 
1614                     if (!parseNumberAfterExponentIndicator()) { 
1615                         m_lexErrorMessage 
= "Non-number found after exponent indicator"; 
1616                         token 
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK 
: INVALID_NUMERIC_LITERAL_ERRORTOK
; 
1620                 size_t parsedLength
; 
1621                 tokenData
->doubleValue 
= parseDouble(m_buffer8
.data(), m_buffer8
.size(), parsedLength
); 
1626         // No identifiers allowed directly after numeric literal, e.g. "3in" is bad. 
1627         if (UNLIKELY(isIdentStart(m_current
))) { 
1628             m_lexErrorMessage 
= "At least one digit must occur after a decimal point"; 
1629             token 
= atEnd() ? UNTERMINATED_NUMERIC_LITERAL_ERRORTOK 
: INVALID_NUMERIC_LITERAL_ERRORTOK
; 
1632         m_buffer8
.resize(0); 
1634     case CharacterQuote
: 
1635         if (lexerFlags 
& LexerFlagsDontBuildStrings
) { 
1636             StringParseResult result 
= parseString
<false>(tokenData
, strictMode
); 
1637             if (UNLIKELY(result 
!= StringParsedSuccessfully
)) { 
1638                 token 
= result 
== StringUnterminated 
? UNTERMINATED_STRING_LITERAL_ERRORTOK 
: INVALID_STRING_LITERAL_ERRORTOK
; 
1642             StringParseResult result 
= parseString
<true>(tokenData
, strictMode
); 
1643             if (UNLIKELY(result 
!= StringParsedSuccessfully
)) { 
1644                 token 
= result 
== StringUnterminated 
? UNTERMINATED_STRING_LITERAL_ERRORTOK 
: INVALID_STRING_LITERAL_ERRORTOK
; 
1651     case CharacterIdentifierStart
: 
1652         ASSERT(isIdentStart(m_current
)); 
1653         // Fall through into CharacterBackSlash. 
1654     case CharacterBackSlash
: 
1655         if (lexerFlags 
& LexexFlagsDontBuildKeywords
) 
1656             token 
= parseIdentifier
<false>(tokenData
, lexerFlags
, strictMode
); 
1658             token 
= parseIdentifier
<true>(tokenData
, lexerFlags
, strictMode
); 
1660     case CharacterLineTerminator
: 
1661         ASSERT(isLineTerminator(m_current
)); 
1662         shiftLineTerminator(); 
1663         m_atLineStart 
= true; 
1664         m_terminator 
= true; 
1665         m_lineStart 
= m_code
; 
1667     case CharacterInvalid
: 
1668         m_lexErrorMessage 
= invalidCharacterMessage(); 
1672         RELEASE_ASSERT_NOT_REACHED(); 
1673         m_lexErrorMessage 
= "Internal Error"; 
1678     m_atLineStart 
= false; 
1681 inSingleLineComment
: 
1682     while (!isLineTerminator(m_current
)) { 
1687     shiftLineTerminator(); 
1688     m_atLineStart 
= true; 
1689     m_terminator 
= true; 
1690     m_lineStart 
= m_code
; 
1691     if (!lastTokenWasRestrKeyword()) 
1695     // Fall through into returnToken. 
1698     tokenLocation
->line 
= m_lineNumber
; 
1699     tokenLocation
->endOffset 
= currentOffset(); 
1700     tokenLocation
->lineStartOffset 
= currentLineStartOffset(); 
1701     ASSERT(tokenLocation
->endOffset 
>= tokenLocation
->lineStartOffset
); 
1702     m_lastToken 
= token
; 
1707     tokenLocation
->line 
= m_lineNumber
; 
1708     tokenLocation
->endOffset 
= currentOffset(); 
1709     tokenLocation
->lineStartOffset 
= currentLineStartOffset(); 
1710     ASSERT(tokenLocation
->endOffset 
>= tokenLocation
->lineStartOffset
); 
1711     RELEASE_ASSERT(token 
& ErrorTokenFlag
); 
1715 template <typename T
> 
1716 static inline void orCharacter(UChar
&, UChar
); 
1719 inline void orCharacter
<LChar
>(UChar
&, UChar
) { } 
1722 inline void orCharacter
<UChar
>(UChar
& orAccumulator
, UChar character
) 
1724     orAccumulator 
|= character
; 
1727 template <typename T
> 
1728 bool Lexer
<T
>::scanRegExp(const Identifier
*& pattern
, const Identifier
*& flags
, UChar patternPrefix
) 
1730     ASSERT(m_buffer16
.isEmpty()); 
1732     bool lastWasEscape 
= false; 
1733     bool inBrackets 
= false; 
1734     UChar charactersOredTogether 
= 0; 
1736     if (patternPrefix
) { 
1737         ASSERT(!isLineTerminator(patternPrefix
)); 
1738         ASSERT(patternPrefix 
!= '/'); 
1739         ASSERT(patternPrefix 
!= '['); 
1740         record16(patternPrefix
); 
1744         if (isLineTerminator(m_current
) || atEnd()) { 
1745             m_buffer16
.resize(0); 
1753         if (prev 
== '/' && !lastWasEscape 
&& !inBrackets
) 
1757         orCharacter
<T
>(charactersOredTogether
, prev
); 
1759         if (lastWasEscape
) { 
1760             lastWasEscape 
= false; 
1772             lastWasEscape 
= true; 
1777     pattern 
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
); 
1779     m_buffer16
.resize(0); 
1780     charactersOredTogether 
= 0; 
1782     while (isIdentPart(m_current
)) { 
1783         record16(m_current
); 
1784         orCharacter
<T
>(charactersOredTogether
, m_current
); 
1788     flags 
= makeRightSizedIdentifier(m_buffer16
.data(), m_buffer16
.size(), charactersOredTogether
); 
1789     m_buffer16
.resize(0); 
1794 template <typename T
> 
1795 bool Lexer
<T
>::skipRegExp() 
1797     bool lastWasEscape 
= false; 
1798     bool inBrackets 
= false; 
1801         if (isLineTerminator(m_current
) || atEnd()) 
1808         if (prev 
== '/' && !lastWasEscape 
&& !inBrackets
) 
1811         if (lastWasEscape
) { 
1812             lastWasEscape 
= false; 
1824             lastWasEscape 
= true; 
1829     while (isIdentPart(m_current
)) 
1835 template <typename T
> 
1836 void Lexer
<T
>::clear() 
1840     Vector
<LChar
> newBuffer8
; 
1841     m_buffer8
.swap(newBuffer8
); 
1843     Vector
<UChar
> newBuffer16
; 
1844     m_buffer16
.swap(newBuffer16
); 
1846     m_isReparsing 
= false; 
1849 template <typename T
> 
1850 SourceCode Lexer
<T
>::sourceCode(int openBrace
, int closeBrace
, int firstLine
, unsigned startColumn
) 
1852     ASSERT(m_source
->provider()->source()[openBrace
] == '{'); 
1853     ASSERT(m_source
->provider()->source()[closeBrace
] == '}'); 
1854     return SourceCode(m_source
->provider(), openBrace
, closeBrace 
+ 1, firstLine
, startColumn
); 
1857 // Instantiate the two flavors of Lexer we need instead of putting most of this file in Lexer.h 
1858 template class Lexer
<LChar
>; 
1859 template class Lexer
<UChar
>;