]>
Commit | Line | Data |
---|---|---|
1 | # *************************************************************************** | |
2 | # * | |
3 | # * Copyright (C) 2004-2016, International Business Machines | |
4 | # * Corporation; Unicode, Inc.; and others. All Rights Reserved. | |
5 | # * | |
6 | # *************************************************************************** | |
7 | # File: ThaiLogical_Latin.txt | |
8 | # Generated from CLDR | |
9 | # | |
10 | ||
11 | # Thai-Latin | |
12 | # This set of rules follows ISO 11940 | |
13 | # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf | |
14 | # except that that does not mention an implicit vowel, so we use o\u0323 | |
15 | # | |
16 | # The transcription is fairly ugly, so we ought to also do the UNGEGN version | |
17 | # see: http://www.eki.ee/wgrs/rom1_th.pdf | |
18 | # and probably make that the main variant. | |
19 | # | |
20 | # Note: this is an internal file. The NFD/NFC is handled externally, in the index | |
21 | # The insertion of spaces between words, the reversal of the vowels | |
22 | # and the conversion of space to semicolon are done *outside* of these rules. | |
23 | # So as far as these rules are concerned, the vowels are in logical order! | |
24 | # insert implicit vowel (and remove it going the other way) | |
25 | # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically | |
26 | #$consonant = [ก-ฮ]; | |
27 | #$vowel = [ะ-\u0E3Aเ-ไ\u0E47]; | |
28 | #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; | |
29 | #\uE000 → o\u0323 ; | |
30 | # ← o\u0323 ; | |
31 | $notAbove = [^\p{ccc=0}\p{ccc=above}] ; | |
32 | $notBelow = [^\p{ccc=0}\p{ccc=below}] ; | |
33 | # Consonants | |
34 | # Warning: the 'h's need to be handled carefully! | |
35 | # What we really want to say is the following, but we can't | |
36 | # $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ; | |
37 | # Since the only accents we care about that could cause problems are free-standing accents below, we use instead: | |
38 | $freeStandingBelow = [\u0325 ]; | |
39 | $hAccent = [ \u0304 \u0323]; | |
40 | $notHAccent0 = [^$freeStandingBelow$hAccent]; | |
41 | $notHAccent1 = $freeStandingBelow [^$hAccent]; | |
42 | ห → h\u0304 ; # THAI CHARACTER HO HIP | |
43 | ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering | |
44 | ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK | |
45 | ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI | |
46 | ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT | |
47 | ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON | |
48 | ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG | |
49 | ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI | |
50 | ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI | |
51 | ก ↔ k ; # THAI CHARACTER KO KAI | |
52 | ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO | |
53 | ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG | |
54 | พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN | |
55 | พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN | |
56 | ป ↔ p ; # THAI CHARACTER PO PLA | |
57 | ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING | |
58 | ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE | |
59 | ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG | |
60 | ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG | |
61 | จ ↔ c ; # THAI CHARACTER CHO CHAN | |
62 | ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN | |
63 | ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO | |
64 | ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO | |
65 | ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG | |
66 | ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG | |
67 | ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN | |
68 | ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN | |
69 | #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. | |
70 | ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK | |
71 | ต ↔ t ; # THAI CHARACTER TO TAO | |
72 | # since there is no singleton g (generated), don't worry about that. | |
73 | ง ↔ ng ; # THAI CHARACTER NGO NGU | |
74 | ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN | |
75 | น ↔ n ; # THAI CHARACTER NO NU | |
76 | ญ ↔ y\u0323 ; # THAI CHARACTER YO YING | |
77 | ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA | |
78 | ด ↔ d ; # THAI CHARACTER DO DEK | |
79 | บ ↔ b ; # THAI CHARACTER BO BAIMAI | |
80 | ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA | |
81 | ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering | |
82 | ม ↔ m ; # THAI CHARACTER MO MA | |
83 | ย ↔ y ; # THAI CHARACTER YO YAK | |
84 | ร ↔ r ; # THAI CHARACTER RO RUA | |
85 | ฤ ↔ v ; # THAI CHARACTER RU | |
86 | ฦ ↔ ł ; # THAI CHARACTER LU | |
87 | ว ↔ w ; # THAI CHARACTER WO WAEN | |
88 | ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA*** | |
89 | ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering | |
90 | ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI | |
91 | ส → s\u0304 ; # THAI CHARACTER SO SUA*** | |
92 | ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering | |
93 | ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA | |
94 | ล ↔ l ; # THAI CHARACTER LO LING | |
95 | ฟ ↔ f ; # THAI CHARACTER FO FAN | |
96 | อ ↔ x ; # THAI CHARACTER O ANG | |
97 | ซ ↔ s ; # THAI CHARACTER SO SO | |
98 | # vowels | |
99 | \u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT | |
100 | า → a\u0304 ; # THAI CHARACTER SARA AA | |
101 | า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering | |
102 | # We deviate from ISO for SARA AM for disambiguation | |
103 | ำ → a \u0309; # THAI CHARACTER SARA AM | |
104 | ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering | |
105 | ะ ↔ a ; # THAI CHARACTER SARA A | |
106 | \u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II | |
107 | \u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering | |
108 | \u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE | |
109 | \u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering | |
110 | \u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE | |
111 | \u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU | |
112 | \u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering | |
113 | \u0E38 ↔ u ; # THAI CHARACTER SARA U | |
114 | ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI | |
115 | # ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT | |
116 | เ ↔ e ; # THAI CHARACTER SARA E | |
117 | แ ↔ æ ; # THAI CHARACTER SARA AE | |
118 | โ ↔ o ; # THAI CHARACTER SARA O | |
119 | ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN | |
120 | ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI | |
121 | ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO | |
122 | \u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU | |
123 | \u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK | |
124 | \u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO | |
125 | \u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI | |
126 | \u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA | |
127 | \u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT | |
128 | \u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN | |
129 | # We deviate from ISO for disambiguation | |
130 | \u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT | |
131 | ๏ ↔ '§' ; # THAI CHARACTER FONGMAN | |
132 | ๐ ↔ 0 ; # THAI DIGIT ZERO | |
133 | ๑ ↔ 1 ; # THAI DIGIT ONE | |
134 | ๒ ↔ 2 ; # THAI DIGIT TWO | |
135 | ๓ ↔ 3 ; # THAI DIGIT THREE | |
136 | ๔ ↔ 4 ; # THAI DIGIT FOUR | |
137 | ๕ ↔ 5 ; # THAI DIGIT FIVE | |
138 | ๖ ↔ 6 ; # THAI DIGIT SIX | |
139 | ๗ ↔ 7 ; # THAI DIGIT SEVEN | |
140 | ๘ ↔ 8 ; # THAI DIGIT EIGHT | |
141 | ๙ ↔ 9 ; # THAI DIGIT NINE | |
142 | ๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU | |
143 | ๛ ↔ » ; # THAI CHARACTER KHOMUT | |
144 | ๆ ↔ « ; # THAI CHARACTER MAIYAMOK | |
145 | # moved down to make shorter first | |
146 | #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. | |
147 | \u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU | |
148 | \u0E34 ↔ i ; # THAI CHARACTER SARA I | |
149 | # fallbacks | |
150 | | k ← g ; | |
151 | | k ← h ; | |
152 | | c ← j ; | |
153 | | k ← q ; | |
154 | | s ← z ; | |
155 | :: (lower); | |
156 |