]> git.saurik.com Git - apple/javascriptcore.git/blame - create_regex_tables
JavaScriptCore-903.5.tar.gz
[apple/javascriptcore.git] / create_regex_tables
CommitLineData
4e4e5a6f
A
1# Copyright (C) 2010 Apple Inc. All rights reserved.
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions
5# are met:
6# 1. Redistributions of source code must retain the above copyright
7# notice, this list of conditions and the following disclaimer.
8# 2. Redistributions in binary form must reproduce the above copyright
9# notice, this list of conditions and the following disclaimer in the
10# documentation and/or other materials provided with the distribution.
11#
12# THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
13# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
14# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
15# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
16# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
17# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
18# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
19# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
20# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
21# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
22# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23
14957cd0
A
24import sys
25
4e4e5a6f
A
26types = {
27 "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]},
28 "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]},
29 "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]},
14957cd0
A
30 "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]},
31 "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]},
4e4e5a6f
A
32 "digits": { "UseTable" : False, "data": [('0', '9')]},
33 "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] }
34}
35entriesPerLine = 50
36arrays = "";
37functions = "";
14957cd0 38emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables")
4e4e5a6f
A
39
40for name, classes in types.items():
41 ranges = [];
42 size = 0;
43 for _class in classes["data"]:
44 if type(_class) == str:
45 ranges.append((ord(_class), ord(_class)))
46 elif type(_class) == int:
47 ranges.append((_class, _class))
48 else:
49 (min, max) = _class;
50 if type(min) == str:
51 min = ord(min)
52 if type(max) == str:
53 max = ord(max)
54 if max > 0x7f and min <= 0x7f:
55 ranges.append((min, 0x7f))
56 min = 0x80
57 ranges.append((min,max))
58 ranges.sort();
59
14957cd0 60 if emitTables and classes["UseTable"] and (not "Inverse" in classes):
4e4e5a6f
A
61 array = ("static const char _%sData[65536] = {\n" % name);
62 i = 0
63 for (min,max) in ranges:
64 while i < min:
65 i = i + 1
66 array += ('0,')
67 if (i % entriesPerLine == 0) and (i != 0):
68 array += ('\n')
69 while i <= max:
70 i = i + 1
71 if (i == 65536):
72 array += ("1")
73 else:
74 array += ('1,')
75 if (i % entriesPerLine == 0) and (i != 0):
76 array += ('\n')
77 while i < 0xffff:
78 array += ("0,")
79 i = i + 1;
80 if (i % entriesPerLine == 0) and (i != 0):
81 array += ('\n')
82 if i == 0xffff:
83 array += ("0")
84 array += ("\n};\n\n");
85 arrays += array
86
87 # Generate createFunction:
88 function = "";
89 function += ("CharacterClass* %sCreate()\n" % name)
90 function += ("{\n")
14957cd0 91 if emitTables and classes["UseTable"]:
4e4e5a6f
A
92 if "Inverse" in classes:
93 function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"]))
94 else:
95 function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name))
96 else:
97 function += (" CharacterClass* characterClass = new CharacterClass(0);\n")
98 for (min, max) in ranges:
99 if (min == max):
100 if (min > 127):
101 function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min)
102 else:
103 function += (" characterClass->m_matches.append(0x%02x);\n" % min)
104 continue
105 if (min > 127) or (max > 127):
106 function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max))
107 else:
108 function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max))
109 function += (" return characterClass;\n")
110 function += ("}\n\n")
111 functions += function
112
14957cd0
A
113if (len(sys.argv) > 1):
114 f = open(sys.argv[-1], "w")
115 f.write(arrays)
116 f.write(functions)
117 f.close()
118else:
119 print(arrays)
120 print(functions)
4e4e5a6f 121