-#!/usr/bin/python
-
-# Cycript - Optimizing JavaScript Compiler/Runtime
-# Copyright (C) 2009-2015 Jay Freeman (saurik)
-
-# GNU Affero General Public License, Version 3 {{{
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-# }}}
-
-import sys
-
-trees = [dict(), dict(), dict(), dict()]
-
-for line in sys.stdin:
- line = line[0:14]
- line = line.rstrip(' \n')
- line = line.split('..')
- if len(line) == 1:
- line.append(line[0])
- line = [int(end, 16) for end in line]
- for point in range(line[0], line[1] + 1):
- # http://stackoverflow.com/questions/7105874/
- point = "\\U%08x" % point
- point = point.decode('unicode-escape')
- point = point.encode('utf-8')
- point = list(point)
- tree = trees[len(point) - 1]
- for unit in point:
- unit = ord(unit)
- tree = tree.setdefault(unit, dict())
-
-items = []
-
-def build(index, tree, units):
- if index == 0:
- keys = tree.keys()
- else:
- keys = []
- for unit, tree in tree.iteritems():
- if build(index - 1, tree, units + [unit]):
- keys.append(unit)
-
- if len(keys) == 0:
- return False
- if len(keys) == 0xc0 - 0x80:
- return True
-
- item = ''
- for unit in units:
- item += '\\x%02x' % unit
- item += '['
-
- first = -1
- last = -1
-
- assert len(keys) != 0
- for unit in keys + [-1]:
- if unit != -1:
- if first == -1:
- first = unit
- last = unit
- continue
- if unit == last + 1:
- last = unit
- continue
-
- item += '\\x%02x' % first
- if first != last:
- if last != first + 1:
- item += '-'
- item += '\\x%02x' % last
-
- first = unit
- last = unit
-
- item += ']'
-
- for i in range(0, index):
- item += '[\\x80-\\xbf]'
-
- if False:
- item = item.replace('[\\x00-\\x7f]', '{U1}')
- item = item.replace('[\\x80-\\xbf]', '{U0}')
- item = item.replace('[\\xc2-\\xdf]', '{U2}')
- item = item.replace('[\\xe0-\\xef]', '{U3}')
- item = item.replace('[\\xf0-\\xf4]', '{U4}')
-
- items.append(item)
- return False
-
-for index, tree in enumerate(trees):
- build(index, tree, [])
-
-name = sys.argv[1]
-parts = []
-part = []
-length = 0
-index = 0
-for item in items:
- part += [item]
- length += len(item) + 1
- if length > 1000:
- indexed = name + '_' + str(index)
- index += 1
- print indexed, '|'.join(part)
- parts += ['{' + indexed + '}']
- part = []
- length = 0
-parts += part
-print name, '|'.join(parts)