]>
Commit | Line | Data |
---|---|---|
1 | # Copyright (C) 2010 Apple Inc. All rights reserved. | |
2 | # | |
3 | # Redistribution and use in source and binary forms, with or without | |
4 | # modification, are permitted provided that the following conditions | |
5 | # are met: | |
6 | # 1. Redistributions of source code must retain the above copyright | |
7 | # notice, this list of conditions and the following disclaimer. | |
8 | # 2. Redistributions in binary form must reproduce the above copyright | |
9 | # notice, this list of conditions and the following disclaimer in the | |
10 | # documentation and/or other materials provided with the distribution. | |
11 | # | |
12 | # THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY | |
13 | # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
15 | # PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR | |
16 | # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | |
17 | # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | |
18 | # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |
19 | # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | |
20 | # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
21 | # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
22 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
23 | ||
24 | import sys | |
25 | ||
26 | types = { | |
27 | "wordchar": { "UseTable" : True, "data": ['_', ('0','9'), ('A', 'Z'), ('a','z')]}, | |
28 | "nonwordchar": { "UseTable" : True, "Inverse": "wordchar", "data": ['`', (0, ord('0') - 1), (ord('9') + 1, ord('A') - 1), (ord('Z') + 1, ord('_') - 1), (ord('z') + 1, 0xffff)]}, | |
29 | "newline": { "UseTable" : False, "data": ['\n', '\r', 0x2028, 0x2029]}, | |
30 | "spaces": { "UseTable" : True, "data": [' ', ('\t', '\r'), 0xa0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000, (0x2000, 0x200a), 0xfeff]}, | |
31 | "nonspaces": { "UseTable" : True, "Inverse": "spaces", "data": [(0, ord('\t') - 1), (ord('\r') + 1, ord(' ') - 1), (ord(' ') + 1, 0x009f), (0x00a1, 0x167f), (0x1681, 0x180d), (0x180f, 0x1fff), (0x200b, 0x2027), (0x202a, 0x202e), (0x2030, 0x205e), (0x2060, 0x2fff), (0x3001, 0xfefe), (0xff00, 0xffff)]}, | |
32 | "digits": { "UseTable" : False, "data": [('0', '9')]}, | |
33 | "nondigits": { "UseTable" : False, "Inverse": "digits", "data": [(0, ord('0') - 1), (ord('9') + 1, 0xffff)] } | |
34 | } | |
35 | entriesPerLine = 50 | |
36 | arrays = ""; | |
37 | functions = ""; | |
38 | emitTables = (len(sys.argv) < 2 or sys.argv[1] != "--no-tables") | |
39 | ||
40 | for name, classes in types.items(): | |
41 | ranges = []; | |
42 | size = 0; | |
43 | for _class in classes["data"]: | |
44 | if type(_class) == str: | |
45 | ranges.append((ord(_class), ord(_class))) | |
46 | elif type(_class) == int: | |
47 | ranges.append((_class, _class)) | |
48 | else: | |
49 | (min, max) = _class; | |
50 | if type(min) == str: | |
51 | min = ord(min) | |
52 | if type(max) == str: | |
53 | max = ord(max) | |
54 | if max > 0x7f and min <= 0x7f: | |
55 | ranges.append((min, 0x7f)) | |
56 | min = 0x80 | |
57 | ranges.append((min,max)) | |
58 | ranges.sort(); | |
59 | ||
60 | if emitTables and classes["UseTable"] and (not "Inverse" in classes): | |
61 | array = ("static const char _%sData[65536] = {\n" % name); | |
62 | i = 0 | |
63 | for (min,max) in ranges: | |
64 | while i < min: | |
65 | i = i + 1 | |
66 | array += ('0,') | |
67 | if (i % entriesPerLine == 0) and (i != 0): | |
68 | array += ('\n') | |
69 | while i <= max: | |
70 | i = i + 1 | |
71 | if (i == 65536): | |
72 | array += ("1") | |
73 | else: | |
74 | array += ('1,') | |
75 | if (i % entriesPerLine == 0) and (i != 0): | |
76 | array += ('\n') | |
77 | while i < 0xffff: | |
78 | array += ("0,") | |
79 | i = i + 1; | |
80 | if (i % entriesPerLine == 0) and (i != 0): | |
81 | array += ('\n') | |
82 | if i == 0xffff: | |
83 | array += ("0") | |
84 | array += ("\n};\n\n"); | |
85 | arrays += array | |
86 | ||
87 | # Generate createFunction: | |
88 | function = ""; | |
89 | function += ("CharacterClass* %sCreate()\n" % name) | |
90 | function += ("{\n") | |
91 | if emitTables and classes["UseTable"]: | |
92 | if "Inverse" in classes: | |
93 | function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, true));\n" % (classes["Inverse"])) | |
94 | else: | |
95 | function += (" CharacterClass* characterClass = new CharacterClass(CharacterClassTable::create(_%sData, false));\n" % (name)) | |
96 | else: | |
97 | function += (" CharacterClass* characterClass = new CharacterClass(0);\n") | |
98 | for (min, max) in ranges: | |
99 | if (min == max): | |
100 | if (min > 127): | |
101 | function += (" characterClass->m_matchesUnicode.append(0x%04x);\n" % min) | |
102 | else: | |
103 | function += (" characterClass->m_matches.append(0x%02x);\n" % min) | |
104 | continue | |
105 | if (min > 127) or (max > 127): | |
106 | function += (" characterClass->m_rangesUnicode.append(CharacterRange(0x%04x, 0x%04x));\n" % (min, max)) | |
107 | else: | |
108 | function += (" characterClass->m_ranges.append(CharacterRange(0x%02x, 0x%02x));\n" % (min, max)) | |
109 | function += (" return characterClass;\n") | |
110 | function += ("}\n\n") | |
111 | functions += function | |
112 | ||
113 | if (len(sys.argv) > 1): | |
114 | f = open(sys.argv[-1], "w") | |
115 | f.write(arrays) | |
116 | f.write(functions) | |
117 | f.close() | |
118 | else: | |
119 | print(arrays) | |
120 | print(functions) | |
121 |