]> git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/data/translit/ThaiLogical_Latin.txt
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / ThaiLogical_Latin.txt
... / ...
CommitLineData
1# ***************************************************************************
2# *
3# * Copyright (C) 2004-2016, International Business Machines
4# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5# *
6# ***************************************************************************
7# File: ThaiLogical_Latin.txt
8# Generated from CLDR
9#
10
11# Thai-Latin
12# This set of rules follows ISO 11940
13# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
14# except that that does not mention an implicit vowel, so we use o\u0323
15#
16# The transcription is fairly ugly, so we ought to also do the UNGEGN version
17# see: http://www.eki.ee/wgrs/rom1_th.pdf
18# and probably make that the main variant.
19#
20# Note: this is an internal file. The NFD/NFC is handled externally, in the index
21# The insertion of spaces between words, the reversal of the vowels
22# and the conversion of space to semicolon are done *outside* of these rules.
23# So as far as these rules are concerned, the vowels are in logical order!
24# insert implicit vowel (and remove it going the other way)
25# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
26#$consonant = [ก-ฮ];
27#$vowel = [ะ-\u0E3Aเ-ไ\u0E47];
28#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
29#\uE000 → o\u0323 ;
30# ← o\u0323 ;
31$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
32$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
33# Consonants
34# Warning: the 'h's need to be handled carefully!
35# What we really want to say is the following, but we can't
36# $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ;
37# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
38$freeStandingBelow = [\u0325 ];
39$hAccent = [ \u0304 \u0323];
40$notHAccent0 = [^$freeStandingBelow$hAccent];
41$notHAccent1 = $freeStandingBelow [^$hAccent];
42ห → h\u0304 ; # THAI CHARACTER HO HIP
43ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering
44ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK
45ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI
46ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT
47ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
48ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG
49ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
50ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
51ก ↔ k ; # THAI CHARACTER KO KAI
52ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO
53ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG
54พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
55พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
56ป ↔ p ; # THAI CHARACTER PO PLA
57ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING
58ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE
59ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
60ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
61จ ↔ c ; # THAI CHARACTER CHO CHAN
62ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN
63ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO
64ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
65ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG
66ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG
67ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
68ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
69#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
70ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK
71ต ↔ t ; # THAI CHARACTER TO TAO
72# since there is no singleton g (generated), don't worry about that.
73ง ↔ ng ; # THAI CHARACTER NGO NGU
74ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN
75น ↔ n ; # THAI CHARACTER NO NU
76ญ ↔ y\u0323 ; # THAI CHARACTER YO YING
77ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA
78ด ↔ d ; # THAI CHARACTER DO DEK
79บ ↔ b ; # THAI CHARACTER BO BAIMAI
80ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA
81ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering
82ม ↔ m ; # THAI CHARACTER MO MA
83ย ↔ y ; # THAI CHARACTER YO YAK
84ร ↔ r ; # THAI CHARACTER RO RUA
85ฤ ↔ v ; # THAI CHARACTER RU
86ฦ ↔ ł ; # THAI CHARACTER LU
87ว ↔ w ; # THAI CHARACTER WO WAEN
88ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA***
89ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering
90ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI
91ส → s\u0304 ; # THAI CHARACTER SO SUA***
92ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering
93ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA
94ล ↔ l ; # THAI CHARACTER LO LING
95ฟ ↔ f ; # THAI CHARACTER FO FAN
96อ ↔ x ; # THAI CHARACTER O ANG
97ซ ↔ s ; # THAI CHARACTER SO SO
98# vowels
99\u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT
100า → a\u0304 ; # THAI CHARACTER SARA AA
101า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering
102# We deviate from ISO for SARA AM for disambiguation
103ำ → a \u0309; # THAI CHARACTER SARA AM
104ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering
105ะ ↔ a ; # THAI CHARACTER SARA A
106\u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II
107\u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering
108\u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE
109\u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering
110\u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE
111\u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU
112\u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering
113\u0E38 ↔ u ; # THAI CHARACTER SARA U
114ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
115# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
116เ ↔ e ; # THAI CHARACTER SARA E
117แ ↔ æ ; # THAI CHARACTER SARA AE
118โ ↔ o ; # THAI CHARACTER SARA O
119ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
120ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI
121ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
122\u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU
123\u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK
124\u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO
125\u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI
126\u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA
127\u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT
128\u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN
129# We deviate from ISO for disambiguation
130\u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT
131๏ ↔ '§' ; # THAI CHARACTER FONGMAN
132๐ ↔ 0 ; # THAI DIGIT ZERO
133๑ ↔ 1 ; # THAI DIGIT ONE
134๒ ↔ 2 ; # THAI DIGIT TWO
135๓ ↔ 3 ; # THAI DIGIT THREE
136๔ ↔ 4 ; # THAI DIGIT FOUR
137๕ ↔ 5 ; # THAI DIGIT FIVE
138๖ ↔ 6 ; # THAI DIGIT SIX
139๗ ↔ 7 ; # THAI DIGIT SEVEN
140๘ ↔ 8 ; # THAI DIGIT EIGHT
141๙ ↔ 9 ; # THAI DIGIT NINE
142๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
143๛ ↔ » ; # THAI CHARACTER KHOMUT
144ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
145# moved down to make shorter first
146#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
147\u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU
148\u0E34 ↔ i ; # THAI CHARACTER SARA I
149# fallbacks
150| k ← g ;
151| k ← h ;
152| c ← j ;
153| k ← q ;
154| s ← z ;
155:: (lower);
156