]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/ThaiLogical_Latin.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / ThaiLogical_Latin.txt
1 # © 2016 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html#License
3 #
4 # File: ThaiLogical_Latin.txt
5 # Generated from CLDR
6 #
7
8 # Thai-Latin
9 # This set of rules follows ISO 11940
10 # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
11 # except that that does not mention an implicit vowel, so we use o\u0323
12 #
13 # The transcription is fairly ugly, so we ought to also do the UNGEGN version
14 # see: http://www.eki.ee/wgrs/rom1_th.pdf
15 # and probably make that the main variant.
16 #
17 # Note: this is an internal file. The NFD/NFC is handled externally, in the index
18 # The insertion of spaces between words, the reversal of the vowels
19 # and the conversion of space to semicolon are done *outside* of these rules.
20 # So as far as these rules are concerned, the vowels are in logical order!
21 # insert implicit vowel (and remove it going the other way)
22 # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
23 #$consonant = [ก-ฮ];
24 #$vowel = [ะ-\u0E3Aเ-ไ\u0E47];
25 #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
26 #\uE000 → o\u0323 ;
27 # ← o\u0323 ;
28 $notAbove = [^\p{ccc=0}\p{ccc=above}] ;
29 $notBelow = [^\p{ccc=0}\p{ccc=below}] ;
30 # Consonants
31 # Warning: the 'h's need to be handled carefully!
32 # What we really want to say is the following, but we can't
33 # $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ;
34 # Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
35 $freeStandingBelow = [\u0325 ];
36 $hAccent = [ \u0304 \u0323];
37 $notHAccent0 = [^$freeStandingBelow$hAccent];
38 $notHAccent1 = $freeStandingBelow [^$hAccent];
39 ห → h\u0304 ; # THAI CHARACTER HO HIP
40 ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering
41 ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK
42 ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI
43 ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT
44 ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
45 ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG
46 ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
47 ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
48 ก ↔ k ; # THAI CHARACTER KO KAI
49 ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO
50 ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG
51 พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
52 พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
53 ป ↔ p ; # THAI CHARACTER PO PLA
54 ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING
55 ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE
56 ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
57 ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
58 จ ↔ c ; # THAI CHARACTER CHO CHAN
59 ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN
60 ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO
61 ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
62 ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG
63 ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG
64 ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
65 ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
66 #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
67 ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK
68 ต ↔ t ; # THAI CHARACTER TO TAO
69 # since there is no singleton g (generated), don't worry about that.
70 ง ↔ ng ; # THAI CHARACTER NGO NGU
71 ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN
72 น ↔ n ; # THAI CHARACTER NO NU
73 ญ ↔ y\u0323 ; # THAI CHARACTER YO YING
74 ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA
75 ด ↔ d ; # THAI CHARACTER DO DEK
76 บ ↔ b ; # THAI CHARACTER BO BAIMAI
77 ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA
78 ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering
79 ม ↔ m ; # THAI CHARACTER MO MA
80 ย ↔ y ; # THAI CHARACTER YO YAK
81 ร ↔ r ; # THAI CHARACTER RO RUA
82 ฤ ↔ v ; # THAI CHARACTER RU
83 ฦ ↔ ł ; # THAI CHARACTER LU
84 ว ↔ w ; # THAI CHARACTER WO WAEN
85 ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA***
86 ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering
87 ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI
88 ส → s\u0304 ; # THAI CHARACTER SO SUA***
89 ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering
90 ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA
91 ล ↔ l ; # THAI CHARACTER LO LING
92 ฟ ↔ f ; # THAI CHARACTER FO FAN
93 อ ↔ x ; # THAI CHARACTER O ANG
94 ซ ↔ s ; # THAI CHARACTER SO SO
95 # vowels
96 \u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT
97 า → a\u0304 ; # THAI CHARACTER SARA AA
98 า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering
99 # We deviate from ISO for SARA AM for disambiguation
100 ำ → a \u0309; # THAI CHARACTER SARA AM
101 ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering
102 ะ ↔ a ; # THAI CHARACTER SARA A
103 \u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II
104 \u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering
105 \u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE
106 \u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering
107 \u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE
108 \u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU
109 \u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering
110 \u0E38 ↔ u ; # THAI CHARACTER SARA U
111 ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
112 # ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
113 เ ↔ e ; # THAI CHARACTER SARA E
114 แ ↔ æ ; # THAI CHARACTER SARA AE
115 โ ↔ o ; # THAI CHARACTER SARA O
116 ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
117 ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI
118 ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
119 \u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU
120 \u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK
121 \u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO
122 \u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI
123 \u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA
124 \u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT
125 \u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN
126 # We deviate from ISO for disambiguation
127 \u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT
128 ๏ ↔ '§' ; # THAI CHARACTER FONGMAN
129 ๐ ↔ 0 ; # THAI DIGIT ZERO
130 ๑ ↔ 1 ; # THAI DIGIT ONE
131 ๒ ↔ 2 ; # THAI DIGIT TWO
132 ๓ ↔ 3 ; # THAI DIGIT THREE
133 ๔ ↔ 4 ; # THAI DIGIT FOUR
134 ๕ ↔ 5 ; # THAI DIGIT FIVE
135 ๖ ↔ 6 ; # THAI DIGIT SIX
136 ๗ ↔ 7 ; # THAI DIGIT SEVEN
137 ๘ ↔ 8 ; # THAI DIGIT EIGHT
138 ๙ ↔ 9 ; # THAI DIGIT NINE
139 ๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
140 ๛ ↔ » ; # THAI CHARACTER KHOMUT
141 ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
142 # moved down to make shorter first
143 #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
144 \u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU
145 \u0E34 ↔ i ; # THAI CHARACTER SARA I
146 # fallbacks
147 | k ← g ;
148 | k ← h ;
149 | c ← j ;
150 | k ← q ;
151 | s ← z ;
152 :: (lower);
153