]>
git.saurik.com Git - cycript.git/blob - unicode.py
3 # Cycript - Optimizing JavaScript Compiler/Runtime
4 # Copyright (C) 2009-2015 Jay Freeman (saurik)
6 # GNU Affero General Public License, Version 3 {{{
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU Affero General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Affero General Public License for more details.
18 # You should have received a copy of the GNU Affero General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
24 trees
= [dict(), dict(), dict(), dict()]
26 for line
in sys
.stdin
:
28 line
= line
.rstrip(' \n')
29 line
= line
.split('..')
32 line
= [int(end
, 16) for end
in line
]
33 for point
in range(line
[0], line
[1] + 1):
34 # http://stackoverflow.com/questions/7105874/
35 point
= "\\U%08x" % point
36 point
= point
.decode('unicode-escape')
37 point
= point
.encode('utf-8')
39 tree
= trees
[len(point
) - 1]
42 tree
= tree
.setdefault(unit
, dict())
46 def build(index
, tree
, units
):
51 for unit
, tree
in tree
.iteritems():
52 if build(index
- 1, tree
, units
+ [unit
]):
57 if len(keys
) == 0xc0 - 0x80:
62 item
+= '\\x%02x' % unit
69 for unit
in keys
+ [-1]:
79 item
+= '\\x%02x' % first
83 item
+= '\\x%02x' % last
90 for i
in range(0, index
):
91 item
+= '[\\x80-\\xbf]'
94 item
= item
.replace('[\\x00-\\x7f]', '{U1}')
95 item
= item
.replace('[\\x80-\\xbf]', '{U0}')
96 item
= item
.replace('[\\xc2-\\xdf]', '{U2}')
97 item
= item
.replace('[\\xe0-\\xef]', '{U3}')
98 item
= item
.replace('[\\xf0-\\xf4]', '{U4}')
103 for index
, tree
in enumerate(trees
):
104 build(index
, tree
, [])
113 length
+= len(item
) + 1
115 indexed
= name
+ '_' + str(index
)
117 print indexed
, '|'.join(part
)
118 parts
+= ['{' + indexed + '}']
122 print name
, '|'.join(parts
)