]>
Commit | Line | Data |
---|---|---|
ee6c04ef JF |
1 | #!/usr/bin/python |
2 | ||
3 | # Cycript - Optimizing JavaScript Compiler/Runtime | |
4 | # Copyright (C) 2009-2015 Jay Freeman (saurik) | |
5 | ||
6 | # GNU Affero General Public License, Version 3 {{{ | |
7 | # | |
8 | # This program is free software: you can redistribute it and/or modify | |
9 | # it under the terms of the GNU Affero General Public License as published by | |
10 | # the Free Software Foundation, either version 3 of the License, or | |
11 | # (at your option) any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU Affero General Public License for more details. | |
17 | # | |
18 | # You should have received a copy of the GNU Affero General Public License | |
19 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | # }}} | |
21 | ||
22 | import sys | |
23 | ||
24 | trees = [dict(), dict(), dict(), dict()] | |
25 | ||
26 | for line in sys.stdin: | |
27 | line = line[0:14] | |
28 | line = line.rstrip(' \n') | |
29 | line = line.split('..') | |
30 | if len(line) == 1: | |
31 | line.append(line[0]) | |
32 | line = [int(end, 16) for end in line] | |
33 | for point in range(line[0], line[1] + 1): | |
34 | # http://stackoverflow.com/questions/7105874/ | |
35 | point = "\\U%08x" % point | |
36 | point = point.decode('unicode-escape') | |
37 | point = point.encode('utf-8') | |
38 | point = list(point) | |
39 | tree = trees[len(point) - 1] | |
40 | for unit in point: | |
41 | unit = ord(unit) | |
42 | tree = tree.setdefault(unit, dict()) | |
43 | ||
44 | items = [] | |
45 | ||
46 | def build(index, tree, units): | |
47 | if index == 0: | |
48 | keys = tree.keys() | |
49 | else: | |
50 | keys = [] | |
51 | for unit, tree in tree.iteritems(): | |
52 | if build(index - 1, tree, units + [unit]): | |
53 | keys.append(unit) | |
54 | ||
55 | if len(keys) == 0: | |
56 | return False | |
57 | if len(keys) == 0xc0 - 0x80: | |
58 | return True | |
59 | ||
60 | item = '' | |
61 | for unit in units: | |
62 | item += '\\x%02x' % unit | |
63 | item += '[' | |
64 | ||
65 | first = -1 | |
66 | last = -1 | |
67 | ||
68 | assert len(keys) != 0 | |
69 | for unit in keys + [-1]: | |
70 | if unit != -1: | |
71 | if first == -1: | |
72 | first = unit | |
73 | last = unit | |
74 | continue | |
75 | if unit == last + 1: | |
76 | last = unit | |
77 | continue | |
78 | ||
79 | item += '\\x%02x' % first | |
80 | if first != last: | |
81 | if last != first + 1: | |
82 | item += '-' | |
83 | item += '\\x%02x' % last | |
84 | ||
85 | first = unit | |
86 | last = unit | |
87 | ||
88 | item += ']' | |
89 | ||
90 | for i in range(0, index): | |
91 | item += '[\\x80-\\xbf]' | |
92 | ||
b900e1a4 JF |
93 | if False: |
94 | item = item.replace('[\\x00-\\x7f]', '{U1}') | |
95 | item = item.replace('[\\x80-\\xbf]', '{U0}') | |
96 | item = item.replace('[\\xc2-\\xdf]', '{U2}') | |
97 | item = item.replace('[\\xe0-\\xef]', '{U3}') | |
98 | item = item.replace('[\\xf0-\\xf4]', '{U4}') | |
99 | ||
ee6c04ef JF |
100 | items.append(item) |
101 | return False | |
102 | ||
103 | for index, tree in enumerate(trees): | |
104 | build(index, tree, []) | |
105 | ||
106 | name = sys.argv[1] | |
107 | parts = [] | |
108 | part = [] | |
109 | length = 0 | |
110 | index = 0 | |
111 | for item in items: | |
112 | part += [item] | |
113 | length += len(item) + 1 | |
114 | if length > 1000: | |
115 | indexed = name + '_' + str(index) | |
116 | index += 1 | |
117 | print indexed, '|'.join(part) | |
118 | parts += ['{' + indexed + '}'] | |
119 | part = [] | |
120 | length = 0 | |
121 | parts += part | |
122 | print name, '|'.join(parts) |