]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/ThaiLogical_Latin.txt
ICU-6.2.4.tar.gz
[apple/icu.git] / icuSources / data / translit / ThaiLogical_Latin.txt
CommitLineData
374ca955
A
1#--------------------------------------------------------------------
2# Copyright (c) 1999-2004, International Business Machines
3# Corporation and others. All Rights Reserved.
4#--------------------------------------------------------------------
5
6# Thai-Latin
7# This set of rules follows ISO 11940
8# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
9# except that that does not mention an implicit vowel, so we use ọ
10#
11# The transcription is fairly ugly, so we ought to also do the UNGEGN version
12# see: http://www.eki.ee/wgrs/rom1_th.pdf
13# and probably make that the main variant.
14
15# Note: this is an internal file. The NFD/NFC is handled externally, in the index
16# The insertion of spaces between words, the reversal of the vowels
17# and the conversion of space to semicolon are done *outside* of these rules.
18# So as far as these rules are concerned, the vowels are in logical order!
19
20# insert implicit vowel (and remove it going the other way)
21# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
22#$consonant = [ก-ฮ];
23#$vowel = [ะ-ฺเ-ไ็];
24
25#{ ( $consonant ) } [^$vowel ] > | $1  ;
26# > ọ ;
27# < ọ ;
28
29$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
30$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
31
32# Consonants
33# Warning: the 'h's need to be handled carefully!
34# What we really want to say is the following, but we can't
35# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;
36
37# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
38$freeStandingBelow = [\u0325 ];
39$hAccent = [ ̄ ̣];
40$notHAccent0 = [^$freeStandingBelow$hAccent];
41$notHAccent1 = $freeStandingBelow [^$hAccent];
42
43ห > h̄ ; # THAI CHARACTER HO HIP
44 ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering
45ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK
46
47ข <> k̄h ; # THAI CHARACTER KHO KHAI
48ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT
49ฅ <> kʹh ; # THAI CHARACTER KHO KHON
50ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG
51ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
52ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
53ก <> k ; # THAI CHARACTER KO KAI
54
55ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO
56ผ <> p̄h ; # THAI CHARACTER PHO PHUNG
57พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
58พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
59ป <> p ; # THAI CHARACTER PO PLA
60
61ฉ <> c̄h ; # THAI CHARACTER CHO CHING
62ฌ <> c̣h ; # THAI CHARACTER CHO CHOE
63ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
64ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
65จ <> c ; # THAI CHARACTER CHO CHAN
66
67ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN
68ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO
69ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO
70ถ <> t̄h ; # THAI CHARACTER THO THUNG
71ธ <> ṭh ; # THAI CHARACTER THO THONG
72ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
73ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
74#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
75ฏ <> t̩ ; # THAI CHARACTER TO PATAK
76ต <> t ; # THAI CHARACTER TO TAO
77
78# since there is no singleton g (generated), don't worry about that.
79ง <> ng ; # THAI CHARACTER NGO NGU
80ณ <> ṇ ; # THAI CHARACTER NO NEN
81น <> n ; # THAI CHARACTER NO NU
82
83ญ <> ỵ ; # THAI CHARACTER YO YING
84ฎ <> ḍ ; # THAI CHARACTER DO CHADA
85ด <> d ; # THAI CHARACTER DO DEK
86
87บ <> b ; # THAI CHARACTER BO BAIMAI
88ฝ <> f̄ ; # THAI CHARACTER FO FA
89 ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering
90
91ม <> m ; # THAI CHARACTER MO MA
92ย <> y ; # THAI CHARACTER YO YAK
93ร <> r ; # THAI CHARACTER RO RUA
94ฤ <> v ; # THAI CHARACTER RU
95ฦ <> ł ; # THAI CHARACTER LU
96ว <> w ; # THAI CHARACTER WO WAEN
97
98ศ <> ṣ̄ ; # THAI CHARACTER SO SALA***
99 ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering
100ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI
101ส > s̄ ; # THAI CHARACTER SO SUA***
102 ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering
103
104ฬ <> ḷ ; # THAI CHARACTER LO CHULA
105ล <> l ; # THAI CHARACTER LO LING
106ฟ <> f ; # THAI CHARACTER FO FAN
107
108อ <> x ; # THAI CHARACTER O ANG
109ซ <> s ; # THAI CHARACTER SO SO
110
111# vowels
112
113 ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT
114
115า > ā ; # THAI CHARACTER SARA AA
116 า | $1 < a ($notAbove*) ̄; # backward case, account for reordering
117
118# We deviate from ISO for SARA AM for disambiguation
119ำ > a ̉; # THAI CHARACTER SARA AM
120 ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering
121
122ะ <> a ; # THAI CHARACTER SARA A
123 ี <> ī ; # THAI CHARACTER SARA II
124 ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering
125
126 ื <> ụ̄ ; # THAI CHARACTER SARA UEE
127 ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering
128
129 ึ <> ụ ; # THAI CHARACTER SARA UE
130 ู <> ū ; # THAI CHARACTER SARA UU
131 ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering
132
133 ุ <> u ; # THAI CHARACTER SARA U
134
135ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI
136
137# ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT
138
139เ <> e ; # THAI CHARACTER SARA E
140แ <> æ ; # THAI CHARACTER SARA AE
141โ <> o ; # THAI CHARACTER SARA O
142ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN
143ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI
144ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO
145 ็ <> ̆ ; # THAI CHARACTER MAITAIKHU
146 ่ <> ̀ ; # THAI CHARACTER MAI EK
147 ้ <> ̂ ; # THAI CHARACTER MAI THO
148 ๊ <> ́ ; # THAI CHARACTER MAI TRI
149 ๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA
150 ์ <> ̒ ; # THAI CHARACTER THANTHAKHAT
151 ๎ <> '~' ; # THAI CHARACTER YAMAKKAN
152
153# We deviate from ISO for disambiguation
154 ํ <> ̊ ; # THAI CHARACTER NIKHAHIT
155
156๏ <> § ; # THAI CHARACTER FONGMAN
157
158๐ <> 0 ; # THAI DIGIT ZERO
159๑ <> 1 ; # THAI DIGIT ONE
160๒ <> 2 ; # THAI DIGIT TWO
161๓ <> 3 ; # THAI DIGIT THREE
162๔ <> 4 ; # THAI DIGIT FOUR
163๕ <> 5 ; # THAI DIGIT FIVE
164๖ <> 6 ; # THAI DIGIT SIX
165๗ <> 7 ; # THAI DIGIT SEVEN
166๘ <> 8 ; # THAI DIGIT EIGHT
167๙ <> 9 ; # THAI DIGIT NINE
168
169๚ <> '||' ; # THAI CHARACTER ANGKHANKHU
170
171๛ <> » ; # THAI CHARACTER KHOMUT
172ๆ <> « ; # THAI CHARACTER MAIYAMOK
173
174# moved down to make shorter first
175#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
176 ฺ <> ˌ ; # THAI CHARACTER PHINTHU
177 ิ <> i ; # THAI CHARACTER SARA I
178
179# fallbacks
180
181| k < g ;
182| k < h ;
183| c < j ;
184| k < q ;
185| s < z ;
186
187:: (lower);