]>
Commit | Line | Data |
---|---|---|
ee6c04ef JF |
1 | #!/usr/bin/python |
2 | ||
7341eedb JF |
3 | # Cycript - The Truly Universal Scripting Language |
4 | # Copyright (C) 2009-2016 Jay Freeman (saurik) | |
ee6c04ef JF |
5 | |
6 | # GNU Affero General Public License, Version 3 {{{ | |
7 | # | |
8 | # This program is free software: you can redistribute it and/or modify | |
9 | # it under the terms of the GNU Affero General Public License as published by | |
10 | # the Free Software Foundation, either version 3 of the License, or | |
11 | # (at your option) any later version. | |
12 | # | |
13 | # This program is distributed in the hope that it will be useful, | |
14 | # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | # GNU Affero General Public License for more details. | |
17 | # | |
18 | # You should have received a copy of the GNU Affero General Public License | |
19 | # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
20 | # }}} | |
21 | ||
22 | import sys | |
23 | ||
cbd87cbe JF |
24 | escape = False |
25 | ||
26 | trees = [dict(), dict(), dict(), dict(), dict()] | |
27 | ||
28 | def insert(point): | |
29 | point = list(point) | |
30 | tree = trees[len(point) - 1] | |
31 | for unit in point: | |
32 | unit = ord(unit) | |
33 | tree = tree.setdefault(unit, dict()) | |
34 | ||
35 | def insertmore(point, prefix=''): | |
36 | if len(point) == 0: | |
37 | return insert(prefix) | |
38 | ||
39 | next = point[0] | |
40 | point = point[1:] | |
41 | insertmore(point, prefix + next) | |
42 | ||
43 | upper = next.upper() | |
44 | if upper != next: | |
45 | insertmore(point, prefix + upper) | |
ee6c04ef JF |
46 | |
47 | for line in sys.stdin: | |
48 | line = line[0:14] | |
49 | line = line.rstrip(' \n') | |
50 | line = line.split('..') | |
51 | if len(line) == 1: | |
52 | line.append(line[0]) | |
53 | line = [int(end, 16) for end in line] | |
54 | for point in range(line[0], line[1] + 1): | |
cbd87cbe JF |
55 | if escape: |
56 | point = format(point, 'x') | |
57 | insertmore(point) | |
58 | else: | |
59 | # http://stackoverflow.com/questions/7105874/ | |
60 | point = "\\U%08x" % point | |
61 | point = point.decode('unicode-escape') | |
62 | point = point.encode('utf-8') | |
63 | insert(point) | |
ee6c04ef JF |
64 | |
65 | items = [] | |
66 | ||
cbd87cbe JF |
67 | def encode(value): |
68 | if escape: | |
69 | if ord('A') <= value <= ord('Z') or ord('a') <= value <= ord('z') or ord('0') <= value <= ord('9'): | |
70 | return chr(value) | |
71 | return '\\x%02x' % value | |
72 | ||
73 | def build(index, tree, units, wrap=()): | |
ee6c04ef | 74 | if index == 0: |
cbd87cbe | 75 | keys = sorted(tree.keys()) |
ee6c04ef JF |
76 | else: |
77 | keys = [] | |
cbd87cbe JF |
78 | for unit, tree in sorted(tree.items()): |
79 | if build(index - 1, tree, units + [unit], wrap): | |
ee6c04ef JF |
80 | keys.append(unit) |
81 | ||
82 | if len(keys) == 0: | |
83 | return False | |
cbd87cbe JF |
84 | |
85 | if escape: | |
86 | if len(keys) == 10 + 6 + 6: | |
87 | return True | |
88 | else: | |
89 | if len(keys) == 0xc0 - 0x80: | |
90 | return True | |
ee6c04ef JF |
91 | |
92 | item = '' | |
93 | for unit in units: | |
cbd87cbe | 94 | item += encode(unit) |
ee6c04ef JF |
95 | item += '[' |
96 | ||
97 | first = -1 | |
98 | last = -1 | |
99 | ||
100 | assert len(keys) != 0 | |
101 | for unit in keys + [-1]: | |
102 | if unit != -1: | |
103 | if first == -1: | |
104 | first = unit | |
105 | last = unit | |
106 | continue | |
107 | if unit == last + 1: | |
108 | last = unit | |
109 | continue | |
110 | ||
cbd87cbe | 111 | item += encode(first) |
ee6c04ef JF |
112 | if first != last: |
113 | if last != first + 1: | |
114 | item += '-' | |
cbd87cbe | 115 | item += encode(last) |
ee6c04ef JF |
116 | |
117 | first = unit | |
118 | last = unit | |
119 | ||
120 | item += ']' | |
121 | ||
cbd87cbe JF |
122 | if index != 0: |
123 | if escape: | |
124 | item += '[0-9A-Fa-f]' | |
125 | else: | |
126 | item += '[\\x80-\\xbf]' | |
127 | if index != 1: | |
128 | item += '{' + str(index) + '}' | |
ee6c04ef | 129 | |
b900e1a4 JF |
130 | if False: |
131 | item = item.replace('[\\x00-\\x7f]', '{U1}') | |
132 | item = item.replace('[\\x80-\\xbf]', '{U0}') | |
133 | item = item.replace('[\\xc2-\\xdf]', '{U2}') | |
134 | item = item.replace('[\\xe0-\\xef]', '{U3}') | |
135 | item = item.replace('[\\xf0-\\xf4]', '{U4}') | |
136 | ||
cbd87cbe JF |
137 | count = len(units) + 1 + index |
138 | if wrap == (): | |
139 | if not escape: | |
140 | wrap = ('', '') | |
141 | elif count > 4: | |
142 | return False | |
143 | else: | |
144 | wrap = ('0' * (4 - count), '') | |
145 | ||
146 | items.append(wrap[0] + item + wrap[1]) | |
ee6c04ef JF |
147 | return False |
148 | ||
149 | for index, tree in enumerate(trees): | |
150 | build(index, tree, []) | |
cbd87cbe JF |
151 | if escape: |
152 | build(index, tree, [], ('\\{0*', '\\}')) | |
ee6c04ef JF |
153 | |
154 | name = sys.argv[1] | |
155 | parts = [] | |
156 | part = [] | |
157 | length = 0 | |
158 | index = 0 | |
159 | for item in items: | |
160 | part += [item] | |
161 | length += len(item) + 1 | |
162 | if length > 1000: | |
163 | indexed = name + '_' + str(index) | |
164 | index += 1 | |
165 | print indexed, '|'.join(part) | |
166 | parts += ['{' + indexed + '}'] | |
167 | part = [] | |
168 | length = 0 | |
169 | parts += part | |
170 | print name, '|'.join(parts) |