]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/ThaiLogical_Latin.txt
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / ThaiLogical_Latin.txt
1 # ***************************************************************************
2 # *
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5 # *
6 # ***************************************************************************
7 # File: ThaiLogical_Latin.txt
8 # Generated from CLDR
9 #
10
11 # Thai-Latin
12 # This set of rules follows ISO 11940
13 # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
14 # except that that does not mention an implicit vowel, so we use o\u0323
15 #
16 # The transcription is fairly ugly, so we ought to also do the UNGEGN version
17 # see: http://www.eki.ee/wgrs/rom1_th.pdf
18 # and probably make that the main variant.
19 #
20 # Note: this is an internal file. The NFD/NFC is handled externally, in the index
21 # The insertion of spaces between words, the reversal of the vowels
22 # and the conversion of space to semicolon are done *outside* of these rules.
23 # So as far as these rules are concerned, the vowels are in logical order!
24 # insert implicit vowel (and remove it going the other way)
25 # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
26 #$consonant = [ก-ฮ];
27 #$vowel = [ะ-\u0E3Aเ-ไ\u0E47];
28 #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
29 #\uE000 → o\u0323 ;
30 # ← o\u0323 ;
31 $notAbove = [^\p{ccc=0}\p{ccc=above}] ;
32 $notBelow = [^\p{ccc=0}\p{ccc=below}] ;
33 # Consonants
34 # Warning: the 'h's need to be handled carefully!
35 # What we really want to say is the following, but we can't
36 # $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ;
37 # Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
38 $freeStandingBelow = [\u0325 ];
39 $hAccent = [ \u0304 \u0323];
40 $notHAccent0 = [^$freeStandingBelow$hAccent];
41 $notHAccent1 = $freeStandingBelow [^$hAccent];
42 ห → h\u0304 ; # THAI CHARACTER HO HIP
43 ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering
44 ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK
45 ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI
46 ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT
47 ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
48 ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG
49 ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
50 ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
51 ก ↔ k ; # THAI CHARACTER KO KAI
52 ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO
53 ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG
54 พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
55 พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
56 ป ↔ p ; # THAI CHARACTER PO PLA
57 ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING
58 ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE
59 ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
60 ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
61 จ ↔ c ; # THAI CHARACTER CHO CHAN
62 ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN
63 ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO
64 ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
65 ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG
66 ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG
67 ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
68 ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
69 #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
70 ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK
71 ต ↔ t ; # THAI CHARACTER TO TAO
72 # since there is no singleton g (generated), don't worry about that.
73 ง ↔ ng ; # THAI CHARACTER NGO NGU
74 ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN
75 น ↔ n ; # THAI CHARACTER NO NU
76 ญ ↔ y\u0323 ; # THAI CHARACTER YO YING
77 ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA
78 ด ↔ d ; # THAI CHARACTER DO DEK
79 บ ↔ b ; # THAI CHARACTER BO BAIMAI
80 ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA
81 ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering
82 ม ↔ m ; # THAI CHARACTER MO MA
83 ย ↔ y ; # THAI CHARACTER YO YAK
84 ร ↔ r ; # THAI CHARACTER RO RUA
85 ฤ ↔ v ; # THAI CHARACTER RU
86 ฦ ↔ ł ; # THAI CHARACTER LU
87 ว ↔ w ; # THAI CHARACTER WO WAEN
88 ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA***
89 ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering
90 ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI
91 ส → s\u0304 ; # THAI CHARACTER SO SUA***
92 ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering
93 ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA
94 ล ↔ l ; # THAI CHARACTER LO LING
95 ฟ ↔ f ; # THAI CHARACTER FO FAN
96 อ ↔ x ; # THAI CHARACTER O ANG
97 ซ ↔ s ; # THAI CHARACTER SO SO
98 # vowels
99 \u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT
100 า → a\u0304 ; # THAI CHARACTER SARA AA
101 า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering
102 # We deviate from ISO for SARA AM for disambiguation
103 ำ → a \u0309; # THAI CHARACTER SARA AM
104 ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering
105 ะ ↔ a ; # THAI CHARACTER SARA A
106 \u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II
107 \u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering
108 \u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE
109 \u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering
110 \u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE
111 \u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU
112 \u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering
113 \u0E38 ↔ u ; # THAI CHARACTER SARA U
114 ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
115 # ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
116 เ ↔ e ; # THAI CHARACTER SARA E
117 แ ↔ æ ; # THAI CHARACTER SARA AE
118 โ ↔ o ; # THAI CHARACTER SARA O
119 ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
120 ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI
121 ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
122 \u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU
123 \u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK
124 \u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO
125 \u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI
126 \u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA
127 \u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT
128 \u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN
129 # We deviate from ISO for disambiguation
130 \u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT
131 ๏ ↔ '§' ; # THAI CHARACTER FONGMAN
132 ๐ ↔ 0 ; # THAI DIGIT ZERO
133 ๑ ↔ 1 ; # THAI DIGIT ONE
134 ๒ ↔ 2 ; # THAI DIGIT TWO
135 ๓ ↔ 3 ; # THAI DIGIT THREE
136 ๔ ↔ 4 ; # THAI DIGIT FOUR
137 ๕ ↔ 5 ; # THAI DIGIT FIVE
138 ๖ ↔ 6 ; # THAI DIGIT SIX
139 ๗ ↔ 7 ; # THAI DIGIT SEVEN
140 ๘ ↔ 8 ; # THAI DIGIT EIGHT
141 ๙ ↔ 9 ; # THAI DIGIT NINE
142 ๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
143 ๛ ↔ » ; # THAI CHARACTER KHOMUT
144 ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
145 # moved down to make shorter first
146 #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
147 \u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU
148 \u0E34 ↔ i ; # THAI CHARACTER SARA I
149 # fallbacks
150 | k ← g ;
151 | k ← h ;
152 | c ← j ;
153 | k ← q ;
154 | s ← z ;
155 :: (lower);
156