X-Git-Url: https://git.saurik.com/cycript.git/blobdiff_plain/14bb86e8dcde2168dffcda5d95b280d7a345804c..cbd87cbe604bd2d734a6a4ce624b2ba0ce5ee940:/unicode.py?ds=inline diff --git a/unicode.py b/unicode.py deleted file mode 100755 index d5a10bc..0000000 --- a/unicode.py +++ /dev/null @@ -1,122 +0,0 @@ -#!/usr/bin/python - -# Cycript - Optimizing JavaScript Compiler/Runtime -# Copyright (C) 2009-2015 Jay Freeman (saurik) - -# GNU Affero General Public License, Version 3 {{{ -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . -# }}} - -import sys - -trees = [dict(), dict(), dict(), dict()] - -for line in sys.stdin: - line = line[0:14] - line = line.rstrip(' \n') - line = line.split('..') - if len(line) == 1: - line.append(line[0]) - line = [int(end, 16) for end in line] - for point in range(line[0], line[1] + 1): - # http://stackoverflow.com/questions/7105874/ - point = "\\U%08x" % point - point = point.decode('unicode-escape') - point = point.encode('utf-8') - point = list(point) - tree = trees[len(point) - 1] - for unit in point: - unit = ord(unit) - tree = tree.setdefault(unit, dict()) - -items = [] - -def build(index, tree, units): - if index == 0: - keys = tree.keys() - else: - keys = [] - for unit, tree in tree.iteritems(): - if build(index - 1, tree, units + [unit]): - keys.append(unit) - - if len(keys) == 0: - return False - if len(keys) == 0xc0 - 0x80: - return True - - item = '' - for unit in units: - item += '\\x%02x' % unit - item += '[' - - first = -1 - last = -1 - - assert len(keys) != 0 - for unit in keys + [-1]: - if unit != -1: - if first == -1: - first = unit - last = unit - continue - if unit == last + 1: - last = unit - continue - - item += '\\x%02x' % first - if first != last: - if last != first + 1: - item += '-' - item += '\\x%02x' % last - - first = unit - last = unit - - item += ']' - - for i in range(0, index): - item += '[\\x80-\\xbf]' - - if False: - item = item.replace('[\\x00-\\x7f]', '{U1}') - item = item.replace('[\\x80-\\xbf]', '{U0}') - item = item.replace('[\\xc2-\\xdf]', '{U2}') - item = item.replace('[\\xe0-\\xef]', '{U3}') - item = item.replace('[\\xf0-\\xf4]', '{U4}') - - items.append(item) - return False - -for index, tree in enumerate(trees): - build(index, tree, []) - -name = sys.argv[1] -parts = [] -part = [] -length = 0 -index = 0 -for item in items: - part += [item] - length += len(item) + 1 - if length > 1000: - indexed = name + '_' + str(index) - index += 1 - print indexed, '|'.join(part) - parts += ['{' + indexed + '}'] - part = [] - length = 0 -parts += part -print name, '|'.join(parts)