]>
git.saurik.com Git - cycript.git/blob - unicode-l.py
3 # Cycript - The Truly Universal Scripting Language
4 # Copyright (C) 2009-2016 Jay Freeman (saurik)
6 # GNU Affero General Public License, Version 3 {{{
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU Affero General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU Affero General Public License for more details.
18 # You should have received a copy of the GNU Affero General Public License
19 # along with this program. If not, see <http://www.gnu.org/licenses/>.
26 trees
= [dict(), dict(), dict(), dict(), dict()]
30 tree
= trees
[len(point
) - 1]
33 tree
= tree
.setdefault(unit
, dict())
35 def insertmore(point
, prefix
=''):
41 insertmore(point
, prefix
+ next
)
45 insertmore(point
, prefix
+ upper
)
47 for line
in sys
.stdin
:
49 line
= line
.rstrip(' \n')
50 line
= line
.split('..')
53 line
= [int(end
, 16) for end
in line
]
54 for point
in range(line
[0], line
[1] + 1):
56 point
= format(point
, 'x')
59 # http://stackoverflow.com/questions/7105874/
60 point
= "\\U%08x" % point
61 point
= point
.decode('unicode-escape')
62 point
= point
.encode('utf-8')
69 if ord('A') <= value
<= ord('Z') or ord('a') <= value
<= ord('z') or ord('0') <= value
<= ord('9'):
71 return '\\x%02x' % value
73 def build(index
, tree
, units
, wrap
=()):
75 keys
= sorted(tree
.keys())
78 for unit
, tree
in sorted(tree
.items()):
79 if build(index
- 1, tree
, units
+ [unit
], wrap
):
86 if len(keys
) == 10 + 6 + 6:
89 if len(keys
) == 0xc0 - 0x80:
100 assert len(keys
) != 0
101 for unit
in keys
+ [-1]:
111 item
+= encode(first
)
113 if last
!= first
+ 1:
124 item
+= '[0-9A-Fa-f]'
126 item
+= '[\\x80-\\xbf]'
128 item
+= '{' + str(index) + '}'
131 item
= item
.replace('[\\x00-\\x7f]', '{U1}')
132 item
= item
.replace('[\\x80-\\xbf]', '{U0}')
133 item
= item
.replace('[\\xc2-\\xdf]', '{U2}')
134 item
= item
.replace('[\\xe0-\\xef]', '{U3}')
135 item
= item
.replace('[\\xf0-\\xf4]', '{U4}')
137 count
= len(units
) + 1 + index
144 wrap
= ('0' * (4 - count
), '')
146 items
.append(wrap
[0] + item
+ wrap
[1])
149 for index
, tree
in enumerate(trees
):
150 build(index
, tree
, [])
152 build(index
, tree
, [], ('\\{0*', '\\}'))
161 length
+= len(item
) + 1
163 indexed
= name
+ '_' + str(index
)
165 print indexed
, '|'.join(part
)
166 parts
+= ['{' + indexed + '}']
170 print name
, '|'.join(parts
)