Originally written by Philip Hazel
Copyright (c) 1997-2006 University of Cambridge
- Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
+ Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
#ifndef DFTABLES
+// Change the following to 1 to dump used regular expressions at process exit time.
+#define REGEXP_HISTOGRAM 0
+
#include "Assertions.h"
#if COMPILER(MSVC)
offsets within the compiled regex. The default is 2, which allows for compiled
patterns up to 64K long. */
-#define LINK_SIZE 2
+#define LINK_SIZE 3
/* Define DEBUG to get debugging output on stdout. */
opcodePtr[1] = value;
}
+static inline void put3ByteValue(unsigned char* opcodePtr, int value)
+{
+ ASSERT(value >= 0 && value <= 0xFFFFFF);
+ opcodePtr[0] = value >> 16;
+ opcodePtr[1] = value >> 8;
+ opcodePtr[2] = value;
+}
+
static inline int get2ByteValue(const unsigned char* opcodePtr)
{
return (opcodePtr[0] << 8) | opcodePtr[1];
}
+static inline int get3ByteValue(const unsigned char* opcodePtr)
+{
+ return (opcodePtr[0] << 16) | (opcodePtr[1] << 8) | opcodePtr[2];
+}
+
static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
{
put2ByteValue(opcodePtr, value);
opcodePtr += 2;
}
+static inline void put3ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
+{
+ put3ByteValue(opcodePtr, value);
+ opcodePtr += 3;
+}
+
static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value)
{
+#if LINK_SIZE == 3
+ put3ByteValue(opcodePtr, value);
+#elif LINK_SIZE == 2
put2ByteValue(opcodePtr, value);
+#else
+# error LINK_SIZE not supported.
+#endif
}
static inline int getLinkValueAllowZero(const unsigned char* opcodePtr)
{
+#if LINK_SIZE == 3
+ return get3ByteValue(opcodePtr);
+#elif LINK_SIZE == 2
return get2ByteValue(opcodePtr);
+#else
+# error LINK_SIZE not supported.
+#endif
}
-#define MAX_PATTERN_SIZE (1 << 16)
+#define MAX_PATTERN_SIZE 1024 * 1024 // Derived by empirical testing of compile time in PCRE and WREC.
+COMPILE_ASSERT(MAX_PATTERN_SIZE < (1 << (8 * LINK_SIZE)), pcre_max_pattern_fits_in_bytecode);
static inline void putLinkValue(unsigned char* opcodePtr, int value)
{
// FIXME: These are really more of a "compiled regexp state" than "regexp options"
enum RegExpOptions {
- UseFirstByteOptimizationOption = 0x40000000, /* first_byte is set */
- UseRequiredByteOptimizationOption = 0x20000000, /* req_byte is set */
+ UseFirstByteOptimizationOption = 0x40000000, /* firstByte is set */
+ UseRequiredByteOptimizationOption = 0x20000000, /* reqByte is set */
UseMultiLineFirstByteOptimizationOption = 0x10000000, /* start after \n for multiline */
IsAnchoredOption = 0x02000000, /* can't use partial with this regex */
IgnoreCaseOption = 0x00000001,
MatchAcrossMultipleLinesOption = 0x00000002
};
-/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
+/* Flags added to firstByte or reqByte; a "non-literal" item is either a
variable-length repeat, or a anything other than literal characters. */
#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */
-#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */
+#define REQ_VARY 0x0200 /* reqByte followed non-literal item */
/* Miscellaneous definitions */
\
macro(CIRC) \
macro(DOLL) \
+ macro(BOL) \
+ macro(EOL) \
macro(CHAR) \
macro(CHAR_IGNORING_CASE) \
macro(ASCII_CHAR) \
#define EXTRACT_BASIC_MAX 100
-/* The index of names and the
-code vector run on as long as necessary after the end. We store an explicit
-offset to the name table so that if a regex is compiled on one host, saved, and
-then run on another where the size of pointers is different, all might still
-be well. For the case of compiled-on-4 and run-on-8, we include an extra
-pointer that is always NULL.
-*/
+/* The code vector runs on as long as necessary after the end. */
struct JSRegExp {
unsigned options;
- unsigned short top_bracket;
- unsigned short top_backref;
+ unsigned short topBracket;
+ unsigned short topBackref;
- unsigned short first_byte;
- unsigned short req_byte;
+ unsigned short firstByte;
+ unsigned short reqByte;
+
+#if REGEXP_HISTOGRAM
+ size_t stringOffset;
+ size_t stringLength;
+#endif
};
/* Internal shared data tables. These are tables that are used by more than one
but are not part of the PCRE public API. The data for these tables is in the
pcre_tables.c module. */
-#define kjs_pcre_utf8_table1_size 6
+#define jsc_pcre_utf8_table1_size 6
-extern const int kjs_pcre_utf8_table1[6];
-extern const int kjs_pcre_utf8_table2[6];
-extern const int kjs_pcre_utf8_table3[6];
-extern const unsigned char kjs_pcre_utf8_table4[0x40];
+extern const int jsc_pcre_utf8_table1[6];
+extern const int jsc_pcre_utf8_table2[6];
+extern const int jsc_pcre_utf8_table3[6];
+extern const unsigned char jsc_pcre_utf8_table4[0x40];
-extern const unsigned char kjs_pcre_default_tables[tables_length];
+extern const unsigned char jsc_pcre_default_tables[tables_length];
static inline unsigned char toLowerCase(unsigned char c)
{
- static const unsigned char* lowerCaseChars = kjs_pcre_default_tables + lcc_offset;
+ static const unsigned char* lowerCaseChars = jsc_pcre_default_tables + lcc_offset;
return lowerCaseChars[c];
}
static inline unsigned char flipCase(unsigned char c)
{
- static const unsigned char* flippedCaseChars = kjs_pcre_default_tables + fcc_offset;
+ static const unsigned char* flippedCaseChars = jsc_pcre_default_tables + fcc_offset;
return flippedCaseChars[c];
}
static inline unsigned char classBitmapForChar(unsigned char c)
{
- static const unsigned char* charClassBitmaps = kjs_pcre_default_tables + cbits_offset;
+ static const unsigned char* charClassBitmaps = jsc_pcre_default_tables + cbits_offset;
return charClassBitmaps[c];
}
static inline unsigned char charTypeForChar(unsigned char c)
{
- const unsigned char* charTypeMap = kjs_pcre_default_tables + ctypes_offset;
+ const unsigned char* charTypeMap = jsc_pcre_default_tables + ctypes_offset;
return charTypeMap[c];
}
static inline bool isSpaceChar(UChar c)
{
- return c < 128 && (charTypeForChar(c) & ctype_space);
+ return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0;
}
static inline bool isNewline(UChar nl)
that one of the source files. They have to have external linkage, but
but are not part of the public API and so not exported from the library. */
-extern int kjs_pcre_ucp_othercase(unsigned);
-extern bool kjs_pcre_xclass(int, const unsigned char*);
+extern int jsc_pcre_ucp_othercase(unsigned);
+extern bool jsc_pcre_xclass(int, const unsigned char*);
#endif