[apple/icu.git] / icuSources / data / translit / Zawgyi_my.txt

# © 2016 and later: Unicode, Inc. and others.
# License & terms of use: http://www.unicode.org/copyright.html#License
#
# File: Zawgyi_my.txt
# Generated from CLDR
#

# This transform converts Zawgyi "encoded" Burmese into proper
# unicode. Zawgyi is a popular encoding scheme in Myanmar. It uses
# the Myanmar unicode range but assigns different characters or
# glyphs to some codepoints. In addition to the character mapping,
# there is reordering of codepoints needed to match the expected
# unicode order. This reordering is context-based.
#
# This transform is done in two main stages:
# (1) Map all Zawgyi codepoints to their Unicode counterpart.
# (2) Perform reordering.
# Modern Burmese digits & Unicode code points.
$nondigits = [^\u1040-\u1049];
$consonant = [\u1000-\u1021];
$vowelsign = [\u102B-\u1030\u1032];  # Unicode vowel signs except E (1031)
$vowelsAndConsonants = [\u1000-\u102a];
$umedial = [\u103B-\u103E];    # Medial codepoints in Unicode
$vowelmedial = [\u102B-\u1030\u1032\1u36\u1037\u103A-\u103F];  # Union of vowel signs and medials
$ukinzi = \u1004\u103A\u1039;  # Codepoints representing kinzi in Unicode
# Zawgyi medial ra has multiple representations
$zmedialra = [\u103B\u107E-\u1084];
$wspace = [\u0020\u00a0\u1680\u2000-\u200d\u2060\u202f\u205f\u3000\ufeff];
####
#### STAGE 1: CODEPOINT MAPPING FROM ZAWGYI TO UNICODE
####
# Kinzi (predefined ligatures)
# Move base character to the right
($consonant) \u103A \u1064 → $ukinzi $1 \u103B;
($consonant) \u1064 → $ukinzi $1;
\u1064 → $ukinzi;
# Special cases moving base character to right before vowel signs
($consonant) \u108B → $ukinzi $1 \u102D;
($consonant) \u108C → $ukinzi $1 \u102E;
($consonant) \u108D → $ukinzi $1 \u1036;
# Special cases moving Kinzi block to left
($consonant) \u103A \u1033 \u108B → $ukinzi $1 \u103B \u102D \u102F;
($consonant) \u103A \u108b → $ukinzi $1 \u103B \u102D ;
($consonant) \u103A \u108C → $ukinzi $1 \u103B \u102E ;
($consonant) \u103A \u108D → $ukinzi $1 \u103B \u1036 ;
($consonant) \u103A \u108e → $1 \u103B \u102D \u1036 ;
\u108B → $ukinzi \u102D ;
\u108C → $ukinzi \u102E ;
\u108D → $ukinzi \u1036 ;
# Consonants (only the ones that have to change)
\u106A → \u1009 ;  # NYA
\u106B → \u100A ;
\u108F → \u1014 ;
\u1090 → \u101B ;
\u1086 → \u103F ;
# yapin
[\u103A|\u107d] → \u103B ;
# yayit
($zmedialra)+ → \u103C ;
# wasway
\u103C* \u108A → \u103D \u103E;  # To avoid duplicate medials
\u103C → \u103D ;
# hatoh
[\u103D|\u1087] → \u103E ;
\u1088 → \u103E \u102F ;
\u1089 → \u103E \u1030 ;
# Vowels
\u1033 → \u102F ;
\u1034 → \u1030 ;
# asat
\u1039 → \u103A ;
# lower dot
[\u1094\u1095] → \u1037 ;
# Special cases for 1025 vs 1009;
\u1025 \u1039 → \u1009 \u103a;
\u1025 \u1061 → \u1009 \u1039 \u1001;
\u1025 \u1062 → \u1009 \u1039 \u1002;
\u1025 \u1065 → \u1009 \u1039 \u1005;
\u1025 \u1068 → \u1009 \u1039 \u1007;
\u1025 \u1076 → \u1009 \u1039 \u1013;
\u1025 \u1078 → \u1009 \u1039 \u1015;
\u1025 \u107A → \u1009 \u1039 \u1017;
\u1025 \u1079 → \u1009 \u1039 \u1016;
# Stacked Consonants
\u105A → \u102B \u103A ;
\u1060 → \u1039 \u1000 ;
\u1061 → \u1039 \u1001 ;
\u1062 → \u1039 \u1002 ;
\u1063 → \u1039 \u1003 ;
\u1065 → \u1039 \u1005 ;
[\u1066\u1067] → \u1039 \u1006 ;
\u1068 → \u1039 \u1007 ;
\u1069 → \u1039 \u1008 ;
\u106C → \u1039 \u100B ;
\u106D → \u1039 \u100C ;
\u1070 → \u1039 \u100F ;
[\u1071\u1072] → \u1039 \u1010 ;
\u1096 → \u1039 \u1010 \u103D;
[\u1073\u1074] → \u1039 \u1011 ;
\u1075 → \u1039 \u1012 ;
\u1076 → \u1039 \u1013 ;
\u1077 → \u1039 \u1014 ;
\u1078 → \u1039 \u1015 ;
\u1079 → \u1039 \u1016 ;
\u107A → \u1039 \u1017 ;
[\u107B\u1093] → \u1039 \u1018 ;
\u107C → \u1039 \u1019 ;
\u1085 → \u1039 \u101C ;
\u108E → \u102D \u1036 ;
# Pre-defined ligatures
\u106E → \u100D\u1039\u100D ;
\u106F → \u100D\u1039\u100E ;
\u1091 → \u100F\u1039\u100D ;
\u1092 → \u100B\u1039\u100C ;
\u1097 → \u100B\u1039\u100B ;
\u104E → \u104E\u1004\u103A\u1038 ;
####
#### STAGE 1.01: Digits 0 and 4 used instead of letters
# Case of MYANMAR digit being used instead of a letter
# Lone digit zero and four at start
::Null;
^ \u1040 ($nondigits) → \u101D $1;
^ \u1044 ($nondigits) → | \u104E $1 ;
# Lone digit zero or four at end
($nondigits) \u1040 $ → $1 \u101D;
($nondigits) \u1044 $ → $1 \u104e;
# Evowel and dependent vowel signs before 0 or 4 only
#   -> convert to the consonant.
([\u102b-\u103f]) \u1040 ($nondigits) → $1 \u101d $2;
([\u102b-\u103f]) \u1044 ($nondigits) → $1 \u104E $2;
####
#### STAGE 1.1: Strip spaces immediately before combining characters.
####   Move e-vowel after consonants and medials
####   Now every codepoint is Unicode. This starts conversion
####   from semi-visual order to logical order.
####
::Null;
# Don't remove spaces before E vowel or medial Ra at this stage
($wspace) \u1037 > \u1037 $1;
($wspace+) ([\u102b-\u1030\u1032-\u103b\u103d\u103e]) → $2;
# Remove a duplicate early
\u1037+ → \u1037;
# Move e-vowel after medials and consonants.
\u1031+ $ukinzi ($consonant) > $ukinzi $1 \u1031;
\u1031+ \u1037+ ($consonant) > $1 \u1031 \u1037 ;
\u1031+ \u103c ($consonant) > $1 \u103c \u1031;
# Move medials other than 103c before the 1031. Leave 103c for
# the next consonant.
\u1031+ ($consonant) ([\u103b\u103d\u103e]+) > $1 $2 \u1031;
\u1031+ ($vowelsAndConsonants) > $1 \u1031;
####
#### STAGE 2: POST REORDERING RULES FOR UNICODE RENDERING
####
::Null;
\u103b \u103a > \u103a \u103b;
# Simpler replacements for Zawgyi 1025
\u1025 \u102E → \u1026;
# Asat and dot below reordering, to Unicode NFC.
\u103A\u1037 → \u1037\u103A;
# Reorder some vowel signs
\u1036 ($umedial*) ($vowelsign+) → $1 $2 \u1036 ;
([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) → $2 $1;
# Move ra medial which precedes consonant, but not other medials.
\u103C ($consonant) → $1 \u103C;
####
#### Stage 3
#### Move \u1036, and \u103C after consonants.
::Null;
($umedial) \u1039 ($consonant) > \u1039 $2 $1;
\u103C \u103A \u1039 ($consonant) → \u103A \u1039 $1 \u103C;
\u1036 ($umedial+) → $1 \u1036;
####
#### Stage 4
#### Reordering medials, dot below, contractions, E sign, and asat.
::Null;
# Reorder the medials
([\u103C\u103D\u103E]+) \u103B → \u103B $1;
([\u103D\u103E]+) \u103C → \u103C $1;
\u103E\u103D → \u103D\u103E ;
# Contractions with vowel signs
([\u1031]+) ($vowelsign*) \u1039 ($consonant) → \u1039 $3 $1 $2;
($vowelsign+) \u1039 ($consonant) → \u1039 $2 $1;
# Move vowel sign E \u1031 after medials, but not across consonants
($umedial*) ([\u1031]+) ($umedial*) → $1 $3 $2;
# Reorder dot below after medials and vowel diacritics
\u1037 ([\u102D-\u1030\u1032\u1036\u103b-\u103e]+) → $1 \u1037;
# Move vowel signs after medials
($vowelsign+) ($umedial+) → $2 $1;
# Reorder modifiers and asat
($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant) → $1 \u103A $2 $3;
####
#### Stage 5.  More reorderings
#### Vowel signs after medials, sort medials,
####
::Null;
# Replace CA + YA with JHA after moving other things beyond the medials.
\u1005 \u103b → \u1008;
# More moving vowel signs after medials
([\u102b-\u1032]) ($umedial) → $2 $1;
# Sort the medials
([\u103C\u103D\u103E]) \u103B → \u103B $1;
([\u103D\u103E]) \u103C → \u103C $1;
\u103E\u103D → \u103D\u103E ;
# Move visarga after other signs
\u1038 ($vowelmedial) → $1 \u1038;
# Reorder
\u1036 \u102f → \u102f \u1036;
###
### Stage 6
### Finish conflicting and extra diacritics. Remove some white space
###
::Null;
# Fix duplicate combiners
\u102D \u102D+ → \u102D;
\u102E \u102E+ → \u102E;
\u102F \u102F+ → \u102F;
\u1030 \u1030+ → \u1030;
\u1032 \u1032+ → \u1032;
\u1036 \u1036+ → \u1036;
\u1037 \u1037+ → \u1037;
\u1039 \u1039+ → \u1039;
\u103a \u103a+ → \u103a;
\u103b \u103b+ → \u103b;
\u103c \u103c+ → \u103c;
\u103d \u103d+ → \u103d;
\u103e \u103e+ → \u103e; # http://unicode.org/cldr/trac/ticket/10386
# Fix overlapping signs
\u102F [\u1030\u103a] → \u102F;
\u102D \u102E → \u102E;
# Remove space directly before diacritics.
($wspace)+ ([\u102b-\u1032\u1036-\u103e]) → $2;
# Remove ZWSP at start and end
^ \u200b+ → ;
\u200b+ $ → ;
# Fix multiple spaces around ZWSP to single ZWSP.
$wspace* \u200b $wspace* → \u200b;
Commit	Line	Data
f3c0d7a5 A	1	# © 2016 and later: Unicode, Inc. and others.
	2	# License & terms of use: http://www.unicode.org/copyright.html#License
	3	#
	4	# File: Zawgyi_my.txt
	5	# Generated from CLDR
	6	#
	7
	8	# This transform converts Zawgyi "encoded" Burmese into proper
	9	# unicode. Zawgyi is a popular encoding scheme in Myanmar. It uses
	10	# the Myanmar unicode range but assigns different characters or
	11	# glyphs to some codepoints. In addition to the character mapping,
	12	# there is reordering of codepoints needed to match the expected
	13	# unicode order. This reordering is context-based.
	14	#
	15	# This transform is done in two main stages:
	16	# (1) Map all Zawgyi codepoints to their Unicode counterpart.
	17	# (2) Perform reordering.
	18	# Modern Burmese digits & Unicode code points.
	19	$nondigits = [^\u1040-\u1049];
	20	$consonant = [\u1000-\u1021];
	21	$vowelsign = [\u102B-\u1030\u1032]; # Unicode vowel signs except E (1031)
3d1f044b	22	$vowelsAndConsonants = [\u1000-\u102a];
f3c0d7a5	23	$umedial = [\u103B-\u103E]; # Medial codepoints in Unicode
3d1f044b	24	$vowelmedial = [\u102B-\u1030\u1032\1u36\u1037\u103A-\u103F]; # Union of vowel signs and medials
f3c0d7a5	25	$ukinzi = \u1004\u103A\u1039; # Codepoints representing kinzi in Unicode
3d1f044b	26	# Zawgyi medial ra has multiple representations
f3c0d7a5	27	$zmedialra = [\u103B\u107E-\u1084];
3d1f044b	28	$wspace = [\u0020\u00a0\u1680\u2000-\u200d\u2060\u202f\u205f\u3000\ufeff];
f3c0d7a5 A	29	####
	30	#### STAGE 1: CODEPOINT MAPPING FROM ZAWGYI TO UNICODE
	31	####
	32	# Kinzi (predefined ligatures)
	33	# Move base character to the right
	34	($consonant) \u103A \u1064 → $ukinzi $1 \u103B;
	35	($consonant) \u1064 → $ukinzi $1;
	36	\u1064 → $ukinzi;
3d1f044b A	37	# Special cases moving base character to right before vowel signs
3d1f044b A	38	($consonant) \u108B → $ukinzi $1 \u102D;
f3c0d7a5 A	39	($consonant) \u108C → $ukinzi $1 \u102E;
	40	($consonant) \u108D → $ukinzi $1 \u1036;
	41	# Special cases moving Kinzi block to left
	42	($consonant) \u103A \u1033 \u108B → $ukinzi $1 \u103B \u102D \u102F;
	43	($consonant) \u103A \u108b → $ukinzi $1 \u103B \u102D ;
f3c0d7a5 A	44	($consonant) \u103A \u108C → $ukinzi $1 \u103B \u102E ;
	45	($consonant) \u103A \u108D → $ukinzi $1 \u103B \u1036 ;
	46	($consonant) \u103A \u108e → $1 \u103B \u102D \u1036 ;
	47	\u108B → $ukinzi \u102D ;
	48	\u108C → $ukinzi \u102E ;
	49	\u108D → $ukinzi \u1036 ;
	50	# Consonants (only the ones that have to change)
f3c0d7a5 A	51	\u106A → \u1009 ; # NYA
	52	\u106B → \u100A ;
	53	\u108F → \u1014 ;
	54	\u1090 → \u101B ;
	55	\u1086 → \u103F ;
	56	# yapin
3d1f044b A	57	[\u103A\|\u107d] → \u103B ;
	58	# yayit
	59	($zmedialra)+ → \u103C ;
f3c0d7a5	60	# wasway
3d1f044b	61	\u103C* \u108A → \u103D \u103E; # To avoid duplicate medials
f3c0d7a5	62	\u103C → \u103D ;
f3c0d7a5	63	# hatoh
3d1f044b	64	[\u103D\|\u1087] → \u103E ;
f3c0d7a5 A	65	\u1088 → \u103E \u102F ;
f3c0d7a5 A	66	\u1089 → \u103E \u1030 ;
f3c0d7a5 A	67	# Vowels
	68	\u1033 → \u102F ;
	69	\u1034 → \u1030 ;
3d1f044b A	70	# asat
	71	\u1039 → \u103A ;
	72	# lower dot
	73	[\u1094\u1095] → \u1037 ;
	74	# Special cases for 1025 vs 1009;
	75	\u1025 \u1039 → \u1009 \u103a;
f3c0d7a5 A	76	\u1025 \u1061 → \u1009 \u1039 \u1001;
	77	\u1025 \u1062 → \u1009 \u1039 \u1002;
	78	\u1025 \u1065 → \u1009 \u1039 \u1005;
	79	\u1025 \u1068 → \u1009 \u1039 \u1007;
	80	\u1025 \u1076 → \u1009 \u1039 \u1013;
	81	\u1025 \u1078 → \u1009 \u1039 \u1015;
	82	\u1025 \u107A → \u1009 \u1039 \u1017;
	83	\u1025 \u1079 → \u1009 \u1039 \u1016;
f3c0d7a5	84	# Stacked Consonants
3d1f044b	85	\u105A → \u102B \u103A ;
f3c0d7a5 A	86	\u1060 → \u1039 \u1000 ;
	87	\u1061 → \u1039 \u1001 ;
	88	\u1062 → \u1039 \u1002 ;
	89	\u1063 → \u1039 \u1003 ;
	90	\u1065 → \u1039 \u1005 ;
3d1f044b	91	[\u1066\u1067] → \u1039 \u1006 ;
f3c0d7a5 A	92	\u1068 → \u1039 \u1007 ;
	93	\u1069 → \u1039 \u1008 ;
	94	\u106C → \u1039 \u100B ;
	95	\u106D → \u1039 \u100C ;
	96	\u1070 → \u1039 \u100F ;
3d1f044b	97	[\u1071\u1072] → \u1039 \u1010 ;
f3c0d7a5	98	\u1096 → \u1039 \u1010 \u103D;
3d1f044b	99	[\u1073\u1074] → \u1039 \u1011 ;
f3c0d7a5 A	100	\u1075 → \u1039 \u1012 ;
	101	\u1076 → \u1039 \u1013 ;
	102	\u1077 → \u1039 \u1014 ;
	103	\u1078 → \u1039 \u1015 ;
	104	\u1079 → \u1039 \u1016 ;
	105	\u107A → \u1039 \u1017 ;
3d1f044b	106	[\u107B\u1093] → \u1039 \u1018 ;
f3c0d7a5 A	107	\u107C → \u1039 \u1019 ;
f3c0d7a5 A	108	\u1085 → \u1039 \u101C ;
3d1f044b	109	\u108E → \u102D \u1036 ;
f3c0d7a5 A	110	# Pre-defined ligatures
	111	\u106E → \u100D\u1039\u100D ;
	112	\u106F → \u100D\u1039\u100E ;
	113	\u1091 → \u100F\u1039\u100D ;
	114	\u1092 → \u100B\u1039\u100C ;
	115	\u1097 → \u100B\u1039\u100B ;
	116	\u104E → \u104E\u1004\u103A\u1038 ;
f3c0d7a5	117	####
3d1f044b	118	#### STAGE 1.01: Digits 0 and 4 used instead of letters
f3c0d7a5	119	# Case of MYANMAR digit being used instead of a letter
3d1f044b A	120	# Lone digit zero and four at start
	121	::Null;
	122	^ \u1040 ($nondigits) → \u101D $1;
f3c0d7a5	123	^ \u1044 ($nondigits) → \| \u104E $1 ;
3d1f044b A	124	# Lone digit zero or four at end
	125	($nondigits) \u1040 $ → $1 \u101D;
	126	($nondigits) \u1044 $ → $1 \u104e;
	127	# Evowel and dependent vowel signs before 0 or 4 only
	128	# -> convert to the consonant.
	129	([\u102b-\u103f]) \u1040 ($nondigits) → $1 \u101d $2;
f3c0d7a5	130	([\u102b-\u103f]) \u1044 ($nondigits) → $1 \u104E $2;
3d1f044b A	131	####
	132	#### STAGE 1.1: Strip spaces immediately before combining characters.
	133	#### Move e-vowel after consonants and medials
	134	#### Now every codepoint is Unicode. This starts conversion
	135	#### from semi-visual order to logical order.
	136	####
	137	::Null;
	138	# Don't remove spaces before E vowel or medial Ra at this stage
	139	($wspace) \u1037 > \u1037 $1;
	140	($wspace+) ([\u102b-\u1030\u1032-\u103b\u103d\u103e]) → $2;
	141	# Remove a duplicate early
	142	\u1037+ → \u1037;
	143	# Move e-vowel after medials and consonants.
	144	\u1031+ $ukinzi ($consonant) > $ukinzi $1 \u1031;
	145	\u1031+ \u1037+ ($consonant) > $1 \u1031 \u1037 ;
	146	\u1031+ \u103c ($consonant) > $1 \u103c \u1031;
	147	# Move medials other than 103c before the 1031. Leave 103c for
	148	# the next consonant.
	149	\u1031+ ($consonant) ([\u103b\u103d\u103e]+) > $1 $2 \u1031;
	150	\u1031+ ($vowelsAndConsonants) > $1 \u1031;
	151	####
	152	#### STAGE 2: POST REORDERING RULES FOR UNICODE RENDERING
	153	####
	154	::Null;
	155	\u103b \u103a > \u103a \u103b;
f3c0d7a5	156	# Simpler replacements for Zawgyi 1025
f3c0d7a5 A	157	\u1025 \u102E → \u1026;
	158	# Asat and dot below reordering, to Unicode NFC.
	159	\u103A\u1037 → \u1037\u103A;
	160	# Reorder some vowel signs
	161	\u1036 ($umedial*) ($vowelsign+) → $1 $2 \u1036 ;
	162	([\u102B\u102C\u102F\u1030]) ([\u102D\u102E\u1032]) → $2 $1;
3d1f044b	163	# Move ra medial which precedes consonant, but not other medials.
f3c0d7a5	164	\u103C ($consonant) → $1 \u103C;
f3c0d7a5 A	165	####
f3c0d7a5 A	166	#### Stage 3
3d1f044b	167	#### Move \u1036, and \u103C after consonants.
f3c0d7a5	168	::Null;
3d1f044b	169	($umedial) \u1039 ($consonant) > \u1039 $2 $1;
f3c0d7a5 A	170	\u103C \u103A \u1039 ($consonant) → \u103A \u1039 $1 \u103C;
	171	\u1036 ($umedial+) → $1 \u1036;
	172	####
	173	#### Stage 4
	174	#### Reordering medials, dot below, contractions, E sign, and asat.
	175	::Null;
	176	# Reorder the medials
	177	([\u103C\u103D\u103E]+) \u103B → \u103B $1;
	178	([\u103D\u103E]+) \u103C → \u103C $1;
	179	\u103E\u103D → \u103D\u103E ;
	180	# Contractions with vowel signs
	181	([\u1031]+) ($vowelsign*) \u1039 ($consonant) → \u1039 $3 $1 $2;
	182	($vowelsign+) \u1039 ($consonant) → \u1039 $2 $1;
	183	# Move vowel sign E \u1031 after medials, but not across consonants
	184	($umedial) ([\u1031]+) ($umedial) → $1 $3 $2;
	185	# Reorder dot below after medials and vowel diacritics
3d1f044b	186	\u1037 ([\u102D-\u1030\u1032\u1036\u103b-\u103e]+) → $1 \u1037;
f3c0d7a5 A	187	# Move vowel signs after medials
	188	($vowelsign+) ($umedial+) → $2 $1;
	189	# Reorder modifiers and asat
	190	($consonant) ([\u102B-\u1032\u1036\u103B-\u103E]) \u103A ($consonant) → $1 \u103A $2 $3;
	191	####
	192	#### Stage 5. More reorderings
	193	#### Vowel signs after medials, sort medials,
	194	####
	195	::Null;
3d1f044b A	196	# Replace CA + YA with JHA after moving other things beyond the medials.
3d1f044b A	197	\u1005 \u103b → \u1008;
f3c0d7a5	198	# More moving vowel signs after medials
3d1f044b	199	([\u102b-\u1032]) ($umedial) → $2 $1;
f3c0d7a5 A	200	# Sort the medials
	201	([\u103C\u103D\u103E]) \u103B → \u103B $1;
	202	([\u103D\u103E]) \u103C → \u103C $1;
	203	\u103E\u103D → \u103D\u103E ;
3d1f044b A	204	# Move visarga after other signs
3d1f044b A	205	\u1038 ($vowelmedial) → $1 \u1038;
f3c0d7a5 A	206	# Reorder
	207	\u1036 \u102f → \u102f \u1036;
	208	###
	209	### Stage 6
3d1f044b	210	### Finish conflicting and extra diacritics. Remove some white space
f3c0d7a5 A	211	###
f3c0d7a5 A	212	::Null;
3d1f044b A	213	# Fix duplicate combiners
	214	\u102D \u102D+ → \u102D;
	215	\u102E \u102E+ → \u102E;
f3c0d7a5	216	\u102F \u102F+ → \u102F;
3d1f044b A	217	\u1030 \u1030+ → \u1030;
3d1f044b A	218	\u1032 \u1032+ → \u1032;
f3c0d7a5	219	\u1036 \u1036+ → \u1036;
3d1f044b A	220	\u1037 \u1037+ → \u1037;
	221	\u1039 \u1039+ → \u1039;
	222	\u103a \u103a+ → \u103a;
	223	\u103b \u103b+ → \u103b;
	224	\u103c \u103c+ → \u103c;
	225	\u103d \u103d+ → \u103d;
0f5d89e8	226	\u103e \u103e+ → \u103e; # http://unicode.org/cldr/trac/ticket/10386
3d1f044b A	227	# Fix overlapping signs
	228	\u102F [\u1030\u103a] → \u102F;
	229	\u102D \u102E → \u102E;
f3c0d7a5	230	# Remove space directly before diacritics.
3d1f044b A	231	($wspace)+ ([\u102b-\u1032\u1036-\u103e]) → $2;
	232	# Remove ZWSP at start and end
	233	^ \u200b+ → ;
	234	\u200b+ $ → ;
	235	# Fix multiple spaces around ZWSP to single ZWSP.
	236	$wspace* \u200b $wspace* → \u200b;
f3c0d7a5	237