X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..b331163bffd790ced0e88b73f44f86d49ccc48a5:/icuSources/data/mappings/gb18030.ucm diff --git a/icuSources/data/mappings/gb18030.ucm b/icuSources/data/mappings/gb18030.ucm index a6d7cbd7..f8f0cb03 100644 --- a/icuSources/data/mappings/gb18030.ucm +++ b/icuSources/data/mappings/gb18030.ucm @@ -1,4 +1,4 @@ -# Copyright (C) 2000, International Business Machines Corporation and others. +# Copyright (C) 2000-2012, International Business Machines Corporation and others. # All Rights Reserved. # ICU codepage data for GB 18030 @@ -14,29 +14,42 @@ # Note that the entire block for the supplementary Unicode planes is # marked unassigned because they are handled algorithmically. # Similarly, some of the BMP mappings are marked as unassigned for the same reason. +# See http://userguide.icu-project.org/conversion/data#TOC-State-table-syntax-in-.ucm-files +# States 0..2: # Mostly assigned sequences, with branches in the lead bytes - 0-7f, 81:7, 82:8, 83:9, 84:a, 85-fe:4 - 30-39:2, 40-7e, 80-fe - 81-fe:3 +# The second line is commented out (and does not count) +# because the state table is hand-optimized and does not use what would be +# the natural path for the encoding scheme. + 0-7f, 81:6, 82:7, 83:8, 84:9, 85-fe:3 +# 30-39:2, 40-7e, 80-fe + 81-fe:2 30-39 -# All-unassigned 4-byte sequences - 30-39:5, 40-7e, 80-fe - 81-fe:6 +# States 3..5: All-unassigned 4-byte sequences. +# Do not change these states, or else the conversion table will grow significantly. + 30-39:4, 40-7e, 80-fe + 81-fe:5 30-39.u +# States 6..9: # Some unassigned 4-byte sequences, one state for each of the lead bytes 81-84 # Each of these states branch on the second of four bytes; for the third and fourth bytes, # unassigned sequences continue with state 5, assigned ones with state 2 - 30:2, 31-35:5, 36-39:2, 40-7e, 80-fe - 30-35:2, 36-39:5, 40-7e, 80-fe - 30-35:5, 36:2, 37-39:5, 40-7e, 80-fe - 30-31:2, 32-39:5, 40-7e, 80-fe + 30:1, 31-34:4, 35:a, 36-39:1, 40-7e, 80-fe + 30-35:1, 36-39:4, 40-7e, 80-fe + 30-35:4, 36:1, 37-39:4, 40-7e, 80-fe + 30-31:1, 32-39:4, 40-7e, 80-fe + +# State 0xa=10, reached from 81 35: Handle the new mapping U+E7C7 <-> 81 35 F4 37 +# (see changes between revisions 25802 and 29863), +# allow mappings for 81 35 F4 zz, +# but otherwise keep 81 35 xx yy going to "unassigned" states. + 81-fe:5, f4:2 # GB 18030 BMP mappings that are not handled algorithmically are # generated using gbmake4 and gbtoucm tools. Please see charset/source/gb18030/gb18030.html -# or http://oss.software.ibm.com/cvs/icu/~checkout~/charset/source/gb18030/gb18030.html +# or http://source.icu-project.org/repos/icu/data/trunk/charset/source/gb18030/gb18030.html # for more information. CHARMAP @@ -1147,6 +1160,7 @@ CHARMAP \xA7\xF1 |0 \x81\x30\xD2\x39 |0 \xA7\xD7 |0 + \xA8\xBC |0 \xA9\x5C |0 \x81\x36\xA5\x32 |0 \x81\x36\xA5\x33 |0 @@ -30052,7 +30066,7 @@ CHARMAP \xA8\x9E |0 \xA8\x9F |0 \xA8\xA0 |0 - \xA8\xBC |0 + \x81\x35\xF4\x37 |0 \x83\x36\xC8\x30 |0 \xA8\xC1 |0 \xA8\xC2 |0