]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
4 | # File: my_Zawgyi.txt | |
5 | # Generated from CLDR | |
6 | # | |
7 | ||
8 | # This transform converts Unicode Burmese text into Zawgyi font encoded | |
9 | # form. Zawgyi is a popular, non-standard encoding scheme in Myanmar | |
10 | # that uses the same code range as Myanmar Unicode but assigns different | |
11 | # characters or glyphs to some codepoints. In addition to character remapping, | |
12 | # context-based reordering of codepoints is needed to give readable | |
13 | # output when the output is displayed with a Zawgyi font such as | |
14 | # ZawgyiOne.ttf or ZawgyiOne2008.ttf. | |
15 | # | |
16 | # The transform is done in two main stages: | |
17 | # (1) Map all Unicode codepoints to their Zawgyi counterparts. | |
18 | # (2) Perform reordering. | |
19 | # Modern Burmese digits & Unicode code points. | |
20 | $nondigits = [^\u1040-\u1049]; | |
21 | $consonant = [\u1000-\u1021]; | |
22 | $narrowconsonant = [\u1001\u1002\u1004\u1005\u1007\u100b-\u100e\u1012\u1013\u1015-\u1017\u1019\u101d\u1020\u1025\u1026\u108f]; | |
23 | $wideconsonant = [\u1000\u1003\u1006\u1009\u100a\u100f\u1010\u1011\u1018\u101c\u101e\u101f\u1021]; | |
24 | $widenya = [\u100a\u106b]; | |
25 | $othernya = [\u1009\u106a]; | |
26 | $vowelsign = [\u102B-\u1030\u1032]; | |
27 | $vowelmedial = [\u102B-\u1030\u1032\u103c-\u103F]; | |
28 | $ukinzi = [\u1004\u101b\u105a]\u103A\u1039; | |
29 | $medialraZ = [\u103b\u107e-\u1084]; | |
30 | $lowsignZ = [\u102f\u1030\u1037\u103a\u103c\u103d\u1087-\u108a]; | |
31 | $highsignZ = [\u102d\u102e\u1032\u1036\u1039\u103d-\u103e\u1064]; | |
32 | $subscriptitem = [\u1060-\u1063\u1064-\u1068\u106c\u106d\u1070-\u107c\u1085\u1093\u1096]; | |
33 | $vowelsAndConsonants = [\u1000-\u102a]; | |
34 | #### Phase 0: CODEPOINT MAPPING FROM UNICODE TO ZAWGYI | |
35 | $ukinzi ($consonant) \u103B > $1 \u103A \u1064 ; | |
36 | $ukinzi ($consonant) \u102D \u1036 > $1 \u108e ; | |
37 | $ukinzi ($consonant) \u102D > $1 \u108b ; | |
38 | $ukinzi ($consonant) \u102E > $1 \u108C ; | |
39 | $ukinzi ($consonant) \u1036 > $1 \u108D ; | |
40 | $ukinzi ($consonant) \u1031 > $1 \u1031 \u1064 ; | |
41 | $ukinzi ($consonant) \u103B \u102D \u102F > $1 \u103A \u1033 \u108B ; | |
42 | $ukinzi ($consonant) \u103B \u102D > $1 \u103A \u108b ; | |
43 | $ukinzi ($consonant) \u103B \u102E \u102F > $1 \u103A \u108C \u1033 ; | |
44 | $ukinzi ($consonant) \u103B \u102E > $1 \u103A \u108C ; | |
45 | $ukinzi ($consonant) \u103B \u1036 > $1 \u103A \u108D ; | |
46 | $ukinzi ($consonant) \u103c > $1 \u103b \u1064; # Kinzi + medial ra | |
47 | $ukinzi \u102D > \u108B ; | |
48 | $ukinzi \u102E > \u108C ; | |
49 | $ukinzi \u1036 > \u108D ; | |
50 | $ukinzi ($consonant) > $1 \u1064 ; | |
51 | \u1025 ($vowelsign) \u1038 > \u106A $1 \u1038 ; | |
52 | \u1025 \u102f \u1036 > \u1025 \u1036 \u1033 ; | |
53 | \u102D \u1036 > \u108E ; | |
54 | # Some composed lower output | |
55 | \u103d \u103e > \u108a ; | |
56 | \u103e \u102f > \u1088 ; | |
57 | \u103E \u1030 > \u1089 ; | |
58 | \u103A > \u1039 ; | |
59 | \u103B > \u103A ; | |
60 | \u103C > \u103B ; | |
61 | \u103D > \u103C ; | |
62 | \u103E > \u103D ; | |
63 | \u103F > \u1086 ; | |
64 | ([\u1019]) \u103e \u1030 > $1 \u103d \u1034; # A special case with signs. | |
65 | \u102B \u103A > \u105A ; | |
66 | \u1039 \u1010 \u103d > \u1096 ; # Very special case | |
67 | \u1039 \u1000 > \u1060 ; | |
68 | \u1039 \u1001 > \u1061 ; | |
69 | \u1039 \u1002 > \u1062 ; | |
70 | \u1039 \u1003 > \u1063 ; | |
71 | \u1039 \u1005 > \u1065 ; | |
72 | \u1039 \u1006 > \u1067 ; | |
73 | \u1039 \u1007 > \u1068 ; | |
74 | \u1039 \u1008 > \u1069 ; | |
75 | \u1039 \u100B > \u106C ; | |
76 | \u1039 \u100C > \u106D ; | |
77 | \u1039 \u100D > \u106E ; | |
78 | \u100d \u1039 \u100E > \u106F ; | |
79 | \u1039 \u100E > \u106F ; | |
80 | \u1039 \u100F > \u1070 ; | |
81 | \u1039 \u1010 > \u1072 ; | |
82 | \u1039 \u1011 > \u1074 ; | |
83 | \u1039 \u1012 > \u1075 ; | |
84 | \u1039 \u1013 > \u1076 ; | |
85 | \u1039 \u1014 > \u1077 ; | |
86 | \u1039 \u1015 > \u1078 ; | |
87 | \u1039 \u1016 > \u1079 ; | |
88 | \u1039 \u1017 > \u107A ; | |
89 | \u1039 \u1018 > \u1093 ; | |
90 | \u1039 \u1019 > \u107C ; | |
91 | \u1039 \u101C > \u1085 ; | |
92 | \u100d\u1039\u100D > \u106E ; | |
93 | \u100F\u1039\u100D > \u1091 ; | |
94 | \u100B\u1039\u100C > \u1092 ; | |
95 | \u100B\u1039\u100B > \u1097 ; | |
96 | \u104E\u1004\u103A\u1038 > \u104E ; | |
97 | #### PHASE 1: Everything is now in Zawgyi code points. REORDERING RULES. | |
98 | ::Null; | |
99 | # E Vowel + medial ra. Move the e vowel | |
100 | ($consonant) \u103b \u1031 > \u1031 \u103b $1 ; | |
101 | ($consonant) \u103b > \u103b $1 ; | |
102 | ($consonant) \u103d \u1031 \u1037 > \u1031 $1 \u1094 \u103D ; | |
103 | ($consonant) (\u108a) \u1031 > \u1031 $1 $2 ; | |
104 | ($consonant) ([\u103a\u103d\u103e]+) \u1031 > \u1031 $1 $2 ; | |
105 | # Ra + kinzi | |
106 | ($consonant) \u1064 \u103b > \u103b $1 \u1064 ; | |
107 | # E vowel plus medials | |
108 | ($consonant) ([\u103a\u103c-\u103d]) \u1031 > \u1031 $1 $2 ; | |
109 | # No medials intervening. | |
110 | ($vowelsAndConsonants) \u1031 > \u1031 $1 ; | |
111 | # Handle Na with lower modifiers. | |
112 | \u1014 ($subscriptitem) > \u108f $1 ; | |
113 | \u1014 ($lowsignZ) ($highsignZ) \u1037 > \u108f $1 $2 \u1094; | |
114 | \u1014 ($highsignZ) ($lowsignZ) \u1037 > \u108f $1 $2 \u1094; | |
115 | \u1014 ($highsignZ) \u1037 > \u1014 $1 \u1094; | |
116 | # Two medials | |
117 | \u103a \u103c > \u107d \u103c; | |
118 | # a special case | |
119 | \u1014 \u1032 \u1037 > \u1014 \u1032 \u1094; | |
120 | \u1014 \u1037 > \u1014 \u1094; | |
121 | \u1014 \u1032 ($lowsignZ) \u1037 > \u108f $1 \u1032 \u1094; | |
122 | \u1014 ($highsignZ) ($lowsignZ) > \u108f $1 $2; | |
123 | \u1014 ($lowsignZ) ($highsignZ) > \u108f $1 $2; | |
124 | \u1014 ($lowsignZ) \u1037 > \u108f $1 \u1094; | |
125 | \u1014 ($lowsignZ) > \u108f $1; | |
126 | # Move 1037 dot to right with other descenders. | |
127 | ($lowsignZ) ($highsignZ*) \u1037 > $1 $2 \u1094; | |
128 | ($nondigits) \u1040 ([\u102B-\u103F]) > $1 \u101D $2; | |
129 | # Handle lack of 104E ၎ MYANMAR SYMBOL AFOREMENTIONED | |
130 | ($nondigits) \u104e > $1 \u1044; | |
131 | \u1031 \u1040 ($nondigits) > \u1031 \u101D $1; | |
132 | \u1009 \u103A > \u1025 \u103A; | |
133 | \u1025 \u102E > \u1026; | |
134 | \u1037 \u103A > \u103A \u1037; | |
135 | ([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) > $2 $1; | |
136 | # Medial plus vowel sign U | |
137 | ($medialraZ) ($consonant) \u102f > $1 $2 \u1033; | |
138 | ## Phase 2: Further adjustments | |
139 | ::Null; | |
140 | # Handle consonant, subscripted consonant, medial ra | |
141 | ($narrowconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1083 $1 $2 $3 ; | |
142 | ($wideconsonant) ($subscriptitem) ($highsignZ) $medialraZ > \u1084 $1 $2 ; | |
143 | ($narrowconsonant) ($subscriptitem) $medialraZ > \u1081 $1 $2 ; | |
144 | ($wideconsonant) ($subscriptitem) $medialraZ > \u1082 $1 $2 ; | |
145 | \u103c \u1094 > \u103c \u1095 ; | |
146 | # Medial ra variations, context dependent | |
147 | $medialraZ ($narrowconsonant) \u102d \u103d \u102f > \u107f $1 \u102d \u1087 \u1083 ; | |
148 | $medialraZ ($wideconsonant) \u102d \u103d \u102f > \u1080 $1 \u102d \u1087 \u1083 ; | |
149 | $medialraZ ($narrowconsonant) ($lowsignZ) ($highsignZ) > \u1083 $1 $2 $3 ; | |
150 | $medialraZ ($wideconsonant) ($lowsignZ) ($highsignZ) > \u1084 $1 $2 $3 ; | |
151 | $medialraZ ($narrowconsonant) ($highsignZ) > \u107f $1 $2 ; | |
152 | $medialraZ ($wideconsonant) ($highsignZ) > \u1080 $1 $2 ; | |
153 | $medialraZ ($narrowconsonant) \u1030 > \u103b $1 \u1034 ; | |
154 | $medialraZ ($wideconsonant) \u1030 > \u107e $1 \u1034 ; | |
155 | $medialraZ ($narrowconsonant) (\u102f) > \u103b $1 \u1033 ; | |
156 | $medialraZ ($wideconsonant) (\u102f) > \u107e $1 \u1033 ; | |
157 | $medialraZ ($narrowconsonant) ($lowsignZ) > \u1081 $1 $2 ; | |
158 | $medialraZ ($wideconsonant) ($lowsignZ) > \u1082 $1 $2 ; | |
159 | $medialraZ ($widenya) > \u1082 $1 ; | |
160 | $medialraZ ($othernya) > \u103b \u106a ; | |
161 | $medialraZ ($narrowconsonant) > \u103b $1 ; | |
162 | $medialraZ ($wideconsonant) > \u107e $1 ; | |
163 | \u1009 ($lowsignZ) > \u106a $1; | |
164 | \u100A ($lowsignZ)> \u106B $1 ; ## NYA and NNYA | |
165 | \u103d \u102d > \u102d \u103d; | |
166 | \u103a ($highsignZ) \u102f [\u1037\u1094\u1095] > \u103a $1 \u1033 \u1095; | |
167 | \u103a \u102f [\u1037\u1094\u1095] > \u103a \u1033 \u1095; | |
168 | \u103a \u102f > \u103a \u1033; | |
169 | # Kinzi combo | |
170 | \u1064 \u102e > \u108c ; | |
171 | ##### Phase 3 | |
172 | ::Null; | |
173 | ([\u103C\u103D\u103E]+) \u103B > \u103B $1; | |
174 | ([\u103D\u103E]+) \u103C > \u103C $1; | |
175 | \u103E\u103D > \u103D\u103E ; | |
176 | \u1037 ([\u102D-\u1030\u1032\u1036]) > $1 \u1037; | |
177 | ($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant)> $1 \u103A $2 $3; | |
178 | # Combine vowel and consonant signs | |
179 | \u103d \u102f > \u1088; | |
180 | \u1033 \u1094 > \u1033 \u1095; # Wider spacing on lower dot | |
181 | ($medialraZ) ($consonant) ($highsignZ) \u102f > $1 $2 $3 \u1033; | |
182 | ##### Phase 4. More reorderings of medials | |
183 | ::Null; | |
184 | ([\u103D\u103E]) \u103C > \u103C $1; | |
185 | \u103E\u103D > \u103D\u103E ; | |
186 | \u1038 ($vowelmedial) > $1 \u1038; | |
187 | \u1038 ([\u1036\u1037\u103A]) > $1 \u1038; | |
188 | \u1036 \u102f > \u102f \u1036; | |
189 | \u103a ([\u1064\u108b-\u108e]) \u102d \u102f > \u103a $1 \u102d \u1033; | |
190 | \u103a \u102d \u102f > \u103a \u102d \u1033; | |
191 | #### Phase 5 | |
192 | ::Null; | |
193 | ($consonant) \u103B \u103A > $1 \u103A \u103B; | |
194 | ([\u103C\u103D\u103E]) \u103B > \u103B $1; | |
195 | ([\u103D\u103E]) \u103C > \u103C $1; | |
196 | \u103E\u103D > \u103D\u103E ; | |
197 | ([\u102D-\u1030\u1032]) \u103A ($consonant) \u103A > $1 $2 \u103A; | |
198 | \u102D \u103A > \u102D; | |
199 | \u102E \u103A > \u102E; | |
200 | \u102F \u103A > \u102F; | |
201 | \u102D \u102E > \u102E; | |
202 | \u102F \u1030 > \u102F; | |
203 | \u102B \u102B+ > \u102B; | |
204 | \u102C \u102C+ > \u102C; | |
205 | \u102D \u102D+ > \u102D; | |
206 | \u102E \u102E+ > \u102E; | |
207 | \u102F \u102F+ > \u102F; | |
208 | \u1030 \u1030+ > \u1030; | |
209 | \u1031 \u1031+ > \u1031; | |
210 | \u1032 \u1032+ > \u1032; | |
211 | \u1036 \u1036+ > \u1036; | |
212 | \u103A \u103A+ > \u103A; | |
213 | \u103B \u103B+ > \u103B; | |
214 | \u103C \u103C+ > \u103C; | |
215 | \u103D \u103D+ > \u103D; | |
216 | \u103E \u103E+ > \u103E; | |
217 | # Visually identical orderings - standardize | |
218 | \u102f \u102D > \u102D \u102f ; | |
219 | \u102f \u1036 > \u1036 \u102f ; | |
220 | \u1039 \u1037 > \u1037 \u1039 ; | |
221 | \u103c \u1032 > \u1032 \u103c ; | |
222 | \u103c \u102e > \u102e \u103c ; | |
223 | \u103d \u1088 > \u1088 ; | |
224 |