]> git.saurik.com Git - cycript.git/blob - unicode.py
Remove all non-%union fields from semantic values.
[cycript.git] / unicode.py
1 #!/usr/bin/python
2
3 # Cycript - Optimizing JavaScript Compiler/Runtime
4 # Copyright (C) 2009-2015 Jay Freeman (saurik)
5
6 # GNU Affero General Public License, Version 3 {{{
7 #
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU Affero General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
12 #
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Affero General Public License for more details.
17 #
18 # You should have received a copy of the GNU Affero General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
20 # }}}
21
22 import sys
23
24 trees = [dict(), dict(), dict(), dict()]
25
26 for line in sys.stdin:
27 line = line[0:14]
28 line = line.rstrip(' \n')
29 line = line.split('..')
30 if len(line) == 1:
31 line.append(line[0])
32 line = [int(end, 16) for end in line]
33 for point in range(line[0], line[1] + 1):
34 # http://stackoverflow.com/questions/7105874/
35 point = "\\U%08x" % point
36 point = point.decode('unicode-escape')
37 point = point.encode('utf-8')
38 point = list(point)
39 tree = trees[len(point) - 1]
40 for unit in point:
41 unit = ord(unit)
42 tree = tree.setdefault(unit, dict())
43
44 items = []
45
46 def build(index, tree, units):
47 if index == 0:
48 keys = tree.keys()
49 else:
50 keys = []
51 for unit, tree in tree.iteritems():
52 if build(index - 1, tree, units + [unit]):
53 keys.append(unit)
54
55 if len(keys) == 0:
56 return False
57 if len(keys) == 0xc0 - 0x80:
58 return True
59
60 item = ''
61 for unit in units:
62 item += '\\x%02x' % unit
63 item += '['
64
65 first = -1
66 last = -1
67
68 assert len(keys) != 0
69 for unit in keys + [-1]:
70 if unit != -1:
71 if first == -1:
72 first = unit
73 last = unit
74 continue
75 if unit == last + 1:
76 last = unit
77 continue
78
79 item += '\\x%02x' % first
80 if first != last:
81 if last != first + 1:
82 item += '-'
83 item += '\\x%02x' % last
84
85 first = unit
86 last = unit
87
88 item += ']'
89
90 for i in range(0, index):
91 item += '[\\x80-\\xbf]'
92
93 items.append(item)
94 return False
95
96 for index, tree in enumerate(trees):
97 build(index, tree, [])
98
99 name = sys.argv[1]
100 parts = []
101 part = []
102 length = 0
103 index = 0
104 for item in items:
105 part += [item]
106 length += len(item) + 1
107 if length > 1000:
108 indexed = name + '_' + str(index)
109 index += 1
110 print indexed, '|'.join(part)
111 parts += ['{' + indexed + '}']
112 part = []
113 length = 0
114 parts += part
115 print name, '|'.join(parts)