]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/my_Zawgyi.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / my_Zawgyi.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: my_Zawgyi.txt
5 # Generated from CLDR
6 #
7
8 # This transform converts Unicode Burmese text into Zawgyi font encoded
9 # form. Zawgyi is a popular, non-standard encoding scheme in Myanmar
10 # that uses the same code range as Myanmar Unicode but assigns different
11 # characters or glyphs to some codepoints. In addition to character remapping,
12 # context-based reordering of codepoints is needed to give readable
13 # output when the output is displayed with a Zawgyi font such as
14 # ZawgyiOne.ttf or ZawgyiOne2008.ttf.
15 #
16 # The transform is done in two main stages:
17 # (1) Map all Unicode codepoints to their Zawgyi counterparts.
18 # (2) Perform reordering.
19 # Modern Burmese digits & Unicode code points.
20 $nondigits = [^\u1040-\u1049];
21 $consonant = [\u1000-\u1021];
22 $narrowconsonant = [\u1001\u1002\u1004\u1005\u1007\u100b-\u100e\u1012\u1013\u1015-\u1017\u1019\u101d\u1020\u1025\u1026\u108f];
23 $wideconsonant = [\u1000\u1003\u1006\u1009\u100a\u100f\u1010\u1011\u1018\u101c\u101e\u101f\u1021];
24 $widenya = [\u100a\u106b];
25 $othernya = [\u1009\u106a];
26 $vowelsign = [\u102B-\u1030\u1032];
27 $vowelmedial = [\u102B-\u1030\u1032\u103c-\u103F];
28 $ukinzi = [\u1004\u101b\u105a]\u103A\u1039;
29 $medialraZ = [\u103b\u107e-\u1084];
30 $lowsignZ = [\u102f\u1030\u1037\u103a\u103c\u103d\u1087-\u108a];
31 $highsignZ = [\u102d\u102e\u1032\u1036\u1039\u103d-\u103e\u1064];
32 $subscriptitem = [\u1060-\u1063\u1064-\u1068\u106c\u106d\u1070-\u107c\u1085\u1093\u1096];
33 $vowelsAndConsonants = [\u1000-\u102a];
34 #### Phase 0: CODEPOINT MAPPING FROM UNICODE TO ZAWGYI
35 $ukinzi ($consonant) \u103B > $1 \u103A \u1064 ;
36 $ukinzi ($consonant) \u102D \u1036 > $1 \u108e ;
37 $ukinzi ($consonant) \u102D > $1 \u108b ;
38 $ukinzi ($consonant) \u102E > $1 \u108C ;
39 $ukinzi ($consonant) \u1036 > $1 \u108D ;
40 $ukinzi ($consonant) \u1031 > $1 \u1031 \u1064 ;
41 $ukinzi ($consonant) \u103B \u102D \u102F > $1 \u103A \u1033 \u108B ;
42 $ukinzi ($consonant) \u103B \u102D > $1 \u103A \u108b ;
43 $ukinzi ($consonant) \u103B \u102E \u102F > $1 \u103A \u108C \u1033 ;
44 $ukinzi ($consonant) \u103B \u102E > $1 \u103A \u108C ;
45 $ukinzi ($consonant) \u103B \u1036 > $1 \u103A \u108D ;
46 $ukinzi ($consonant) \u103c > $1 \u103b \u1064; # Kinzi + medial ra
47 $ukinzi \u102D > \u108B ;
48 $ukinzi \u102E > \u108C ;
49 $ukinzi \u1036 > \u108D ;
50 $ukinzi ($consonant) > $1 \u1064 ;
51 \u1025 ($vowelsign) \u1038 > \u106A $1 \u1038 ;
52 \u1025 \u102f \u1036 > \u1025 \u1036 \u1033 ;
53 \u102D \u1036 > \u108E ;
54 # Some composed lower output
55 \u103d \u103e > \u108a ;
56 \u103e \u102f > \u1088 ;
57 \u103E \u1030 > \u1089 ;
58 \u103A > \u1039 ;
59 \u103B > \u103A ;
60 \u103C > \u103B ;
61 \u103D > \u103C ;
62 \u103E > \u103D ;
63 \u103F > \u1086 ;
64 ([\u1019]) \u103e \u1030 > $1 \u103d \u1034; # A special case with signs.
65 \u102B \u103A > \u105A ;
66 \u1039 \u1010 \u103d > \u1096 ; # Very special case
67 \u1039 \u1000 > \u1060 ;
68 \u1039 \u1001 > \u1061 ;
69 \u1039 \u1002 > \u1062 ;
70 \u1039 \u1003 > \u1063 ;
71 \u1039 \u1005 > \u1065 ;
72 \u1039 \u1006 > \u1067 ;
73 \u1039 \u1007 > \u1068 ;
74 \u1039 \u1008 > \u1069 ;
75 \u1039 \u100B > \u106C ;
76 \u1039 \u100C > \u106D ;
77 \u1039 \u100D > \u106E ;
78 \u100d \u1039 \u100E > \u106F ;
79 \u1039 \u100E > \u106F ;
80 \u1039 \u100F > \u1070 ;
81 \u1039 \u1010 > \u1072 ;
82 \u1039 \u1011 > \u1074 ;
83 \u1039 \u1012 > \u1075 ;
84 \u1039 \u1013 > \u1076 ;
85 \u1039 \u1014 > \u1077 ;
86 \u1039 \u1015 > \u1078 ;
87 \u1039 \u1016 > \u1079 ;
88 \u1039 \u1017 > \u107A ;
89 \u1039 \u1018 > \u1093 ;
90 \u1039 \u1019 > \u107C ;
91 \u1039 \u101C > \u1085 ;
92 \u100d\u1039\u100D > \u106E ;
93 \u100F\u1039\u100D > \u1091 ;
94 \u100B\u1039\u100C > \u1092 ;
95 \u100B\u1039\u100B > \u1097 ;
96 \u104E\u1004\u103A\u1038 > \u104E ;
97 #### PHASE 1: Everything is now in Zawgyi code points. REORDERING RULES.
98 ::Null;
99 # E Vowel + medial ra. Move the e vowel
100 ($consonant) \u103b \u1031 > \u1031 \u103b $1 ;
101 ($consonant) \u103b > \u103b $1 ;
102 ($consonant) \u103d \u1031 \u1037 > \u1031 $1 \u1094 \u103D ;
103 ($consonant) (\u108a) \u1031 > \u1031 $1 $2 ;
104 ($consonant) ([\u103a\u103d\u103e]+) \u1031 > \u1031 $1 $2 ;
105 # Ra + kinzi
106 ($consonant) \u1064 \u103b > \u103b $1 \u1064 ;
107 # E vowel plus medials
108 ($consonant) ([\u103a\u103c-\u103d]) \u1031 > \u1031 $1 $2 ;
109 # No medials intervening.
110 ($vowelsAndConsonants) \u1031 > \u1031 $1 ;
111 # Handle Na with lower modifiers.
112 \u1014 ($subscriptitem) > \u108f $1 ;
113 \u1014 ($lowsignZ) ($highsignZ) \u1037 > \u108f $1 $2 \u1094;
114 \u1014 ($highsignZ) ($lowsignZ) \u1037 > \u108f $1 $2 \u1094;
115 \u1014 ($highsignZ) \u1037 > \u1014 $1 \u1094;
116 # Two medials
117 \u103a \u103c > \u107d \u103c;
118 # a special case
119 \u1014 \u1032 \u1037 > \u1014 \u1032 \u1094;
120 \u1014 \u1037 > \u1014 \u1094;
121 \u1014 \u1032 ($lowsignZ) \u1037 > \u108f $1 \u1032 \u1094;
122 \u1014 ($highsignZ) ($lowsignZ) > \u108f $1 $2;
123 \u1014 ($lowsignZ) ($highsignZ) > \u108f $1 $2;
124 \u1014 ($lowsignZ) \u1037 > \u108f $1 \u1094;
125 \u1014 ($lowsignZ) > \u108f $1;
126 # Move 1037 dot to right with other descenders.
127 ($lowsignZ) ($highsignZ*) \u1037 > $1 $2 \u1094;
128 ($nondigits) \u1040 ([\u102B-\u103F]) > $1 \u101D $2;
129 # Handle lack of 104E ၎ MYANMAR SYMBOL AFOREMENTIONED
130 ($nondigits) \u104e > $1 \u1044;
131 \u1031 \u1040 ($nondigits) > \u1031 \u101D $1;
132 \u1009 \u103A > \u1025 \u103A;
133 \u1025 \u102E > \u1026;
134 \u1037 \u103A > \u103A \u1037;
135 ([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) > $2 $1;
136 # Medial plus vowel sign U
137 ($medialraZ) ($consonant) \u102f > $1 $2 \u1033;
138 ## Phase 2: Further adjustments
139 ::Null;
140 # Handle consonant, subscripted consonant, medial ra
141 ($narrowconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1083 $1 $2 $3 ;
142 ($wideconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1084 $1 $2 ;
143 ($narrowconsonant) ($subscriptitem) $medialraZ > \u1081 $1 $2 ;
144 ($wideconsonant) ($subscriptitem) $medialraZ > \u1082 $1 $2 ;
145 \u103c \u1094 > \u103c \u1095 ;
146 # Medial ra variations, context dependent
147 $medialraZ ($narrowconsonant) \u102d \u103d \u102f > \u107f $1 \u102d \u1087 \u1083 ;
148 $medialraZ ($wideconsonant) \u102d \u103d \u102f > \u1080 $1 \u102d \u1087 \u1083 ;
149 $medialraZ ($narrowconsonant) ($lowsignZ) ($highsignZ) > \u1083 $1 $2 $3 ;
150 $medialraZ ($wideconsonant) ($lowsignZ) ($highsignZ) > \u1084 $1 $2 $3 ;
151 $medialraZ ($narrowconsonant) ($highsignZ) > \u107f $1 $2 ;
152 $medialraZ ($wideconsonant) ($highsignZ) > \u1080 $1 $2 ;
153 $medialraZ ($narrowconsonant) \u1030 > \u103b $1 \u1034 ;
154 $medialraZ ($wideconsonant) \u1030 > \u107e $1 \u1034 ;
155 $medialraZ ($narrowconsonant) (\u102f) > \u103b $1 \u1033 ;
156 $medialraZ ($wideconsonant) (\u102f) > \u107e $1 \u1033 ;
157 $medialraZ ($narrowconsonant) ($lowsignZ) > \u1081 $1 $2 ;
158 $medialraZ ($wideconsonant) ($lowsignZ) > \u1082 $1 $2 ;
159 $medialraZ ($widenya) > \u1082 $1 ;
160 $medialraZ ($othernya) > \u103b \u106a ;
161 $medialraZ ($narrowconsonant) > \u103b $1 ;
162 $medialraZ ($wideconsonant) > \u107e $1 ;
163 \u1009 ($lowsignZ) > \u106a $1;
164 \u100A ($lowsignZ)> \u106B $1 ; ## NYA and NNYA
165 \u103d \u102d > \u102d \u103d;
166 \u103a ($highsignZ) \u102f [\u1037\u1094\u1095] > \u103a $1 \u1033 \u1095;
167 \u103a \u102f [\u1037\u1094\u1095] > \u103a \u1033 \u1095;
168 \u103a \u102f > \u103a \u1033;
169 # Kinzi combo
170 \u1064 \u102e > \u108c ;
171 ##### Phase 3
172 ::Null;
173 ([\u103C\u103D\u103E]+) \u103B > \u103B $1;
174 ([\u103D\u103E]+) \u103C > \u103C $1;
175 \u103E\u103D > \u103D\u103E ;
176 \u1037 ([\u102D-\u1030\u1032\u1036]) > $1 \u1037;
177 ($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant)> $1 \u103A $2 $3;
178 # Combine vowel and consonant signs
179 \u103d \u102f > \u1088;
180 \u1033 \u1094 > \u1033 \u1095; # Wider spacing on lower dot
181 ($medialraZ) ($consonant) ($highsignZ) \u102f > $1 $2 $3 \u1033;
182 ##### Phase 4. More reorderings of medials
183 ::Null;
184 ([\u103D\u103E]) \u103C > \u103C $1;
185 \u103E\u103D > \u103D\u103E ;
186 \u1038 ($vowelmedial) > $1 \u1038;
187 \u1038 ([\u1036\u1037\u103A]) > $1 \u1038;
188 \u1036 \u102f > \u102f \u1036;
189 \u103a ([\u1064\u108b-\u108e]) \u102d \u102f > \u103a $1 \u102d \u1033;
190 \u103a \u102d \u102f > \u103a \u102d \u1033;
191 #### Phase 5
192 ::Null;
193 ($consonant) \u103B \u103A > $1 \u103A \u103B;
194 ([\u103C\u103D\u103E]) \u103B > \u103B $1;
195 ([\u103D\u103E]) \u103C > \u103C $1;
196 \u103E\u103D > \u103D\u103E ;
197 ([\u102D-\u1030\u1032]) \u103A ($consonant) \u103A > $1 $2 \u103A;
198 \u102D \u103A > \u102D;
199 \u102E \u103A > \u102E;
200 \u102F \u103A > \u102F;
201 \u102D \u102E > \u102E;
202 \u102F \u1030 > \u102F;
203 \u102B \u102B+ > \u102B;
204 \u102C \u102C+ > \u102C;
205 \u102D \u102D+ > \u102D;
206 \u102E \u102E+ > \u102E;
207 \u102F \u102F+ > \u102F;
208 \u1030 \u1030+ > \u1030;
209 \u1031 \u1031+ > \u1031;
210 \u1032 \u1032+ > \u1032;
211 \u1036 \u1036+ > \u1036;
212 \u103A \u103A+ > \u103A;
213 \u103B \u103B+ > \u103B;
214 \u103C \u103C+ > \u103C;
215 \u103D \u103D+ > \u103D;
216 \u103E \u103E+ > \u103E;
217 # Visually identical orderings - standardize
218 \u102f \u102D > \u102D \u102f ;
219 \u102f \u1036 > \u1036 \u102f ;
220 \u1039 \u1037 > \u1037 \u1039 ;
221 \u103c \u1032 > \u1032 \u103c ;
222 \u103c \u102e > \u102e \u103c ;
223 \u103d \u1088 > \u1088 ;
224