]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | # © 2016 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html#License | |
3 | # | |
73c04bcf | 4 | # File: ThaiLogical_Latin.txt |
f3c0d7a5 | 5 | # Generated from CLDR |
374ca955 | 6 | # |
2ca993e8 A |
7 | |
8 | # Thai-Latin | |
9 | # This set of rules follows ISO 11940 | |
10 | # see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf | |
11 | # except that that does not mention an implicit vowel, so we use o\u0323 | |
12 | # | |
13 | # The transcription is fairly ugly, so we ought to also do the UNGEGN version | |
14 | # see: http://www.eki.ee/wgrs/rom1_th.pdf | |
15 | # and probably make that the main variant. | |
16 | # | |
17 | # Note: this is an internal file. The NFD/NFC is handled externally, in the index | |
18 | # The insertion of spaces between words, the reversal of the vowels | |
19 | # and the conversion of space to semicolon are done *outside* of these rules. | |
20 | # So as far as these rules are concerned, the vowels are in logical order! | |
21 | # insert implicit vowel (and remove it going the other way) | |
22 | # COMMENTED out: the implicit vowel positions cannot be predicted algorithmically | |
23 | #$consonant = [ก-ฮ]; | |
24 | #$vowel = [ะ-\u0E3Aเ-ไ\u0E47]; | |
25 | #{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ; | |
26 | #\uE000 → o\u0323 ; | |
27 | # ← o\u0323 ; | |
374ca955 A |
28 | $notAbove = [^\p{ccc=0}\p{ccc=above}] ; |
29 | $notBelow = [^\p{ccc=0}\p{ccc=below}] ; | |
2ca993e8 A |
30 | # Consonants |
31 | # Warning: the 'h's need to be handled carefully! | |
32 | # What we really want to say is the following, but we can't | |
33 | # $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ; | |
34 | # Since the only accents we care about that could cause problems are free-standing accents below, we use instead: | |
51004dcb A |
35 | $freeStandingBelow = [\u0325 ]; |
36 | $hAccent = [ \u0304 \u0323]; | |
374ca955 A |
37 | $notHAccent0 = [^$freeStandingBelow$hAccent]; |
38 | $notHAccent1 = $freeStandingBelow [^$hAccent]; | |
729e4ab9 | 39 | ห → h\u0304 ; # THAI CHARACTER HO HIP |
51004dcb | 40 | ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering |
729e4ab9 A |
41 | ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK |
42 | ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI | |
43 | ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT | |
44 | ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON | |
45 | ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG | |
46 | ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI | |
47 | ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI | |
48 | ก ↔ k ; # THAI CHARACTER KO KAI | |
49 | ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO | |
50 | ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG | |
51 | พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN | |
52 | พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN | |
53 | ป ↔ p ; # THAI CHARACTER PO PLA | |
54 | ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING | |
55 | ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE | |
56 | ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG | |
57 | ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG | |
58 | จ ↔ c ; # THAI CHARACTER CHO CHAN | |
59 | ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN | |
60 | ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO | |
61 | ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO | |
62 | ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG | |
63 | ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG | |
64 | ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN | |
65 | ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN | |
2ca993e8 | 66 | #Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick. |
729e4ab9 A |
67 | ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK |
68 | ต ↔ t ; # THAI CHARACTER TO TAO | |
2ca993e8 | 69 | # since there is no singleton g (generated), don't worry about that. |
729e4ab9 A |
70 | ง ↔ ng ; # THAI CHARACTER NGO NGU |
71 | ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN | |
72 | น ↔ n ; # THAI CHARACTER NO NU | |
51004dcb | 73 | ญ ↔ y\u0323 ; # THAI CHARACTER YO YING |
729e4ab9 A |
74 | ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA |
75 | ด ↔ d ; # THAI CHARACTER DO DEK | |
76 | บ ↔ b ; # THAI CHARACTER BO BAIMAI | |
77 | ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA | |
51004dcb | 78 | ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering |
729e4ab9 A |
79 | ม ↔ m ; # THAI CHARACTER MO MA |
80 | ย ↔ y ; # THAI CHARACTER YO YAK | |
81 | ร ↔ r ; # THAI CHARACTER RO RUA | |
82 | ฤ ↔ v ; # THAI CHARACTER RU | |
83 | ฦ ↔ ł ; # THAI CHARACTER LU | |
84 | ว ↔ w ; # THAI CHARACTER WO WAEN | |
85 | ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA*** | |
51004dcb | 86 | ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering |
729e4ab9 A |
87 | ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI |
88 | ส → s\u0304 ; # THAI CHARACTER SO SUA*** | |
51004dcb | 89 | ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering |
729e4ab9 A |
90 | ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA |
91 | ล ↔ l ; # THAI CHARACTER LO LING | |
92 | ฟ ↔ f ; # THAI CHARACTER FO FAN | |
93 | อ ↔ x ; # THAI CHARACTER O ANG | |
94 | ซ ↔ s ; # THAI CHARACTER SO SO | |
2ca993e8 | 95 | # vowels |
729e4ab9 A |
96 | \u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT |
97 | า → a\u0304 ; # THAI CHARACTER SARA AA | |
51004dcb | 98 | า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering |
2ca993e8 | 99 | # We deviate from ISO for SARA AM for disambiguation |
51004dcb A |
100 | ำ → a \u0309; # THAI CHARACTER SARA AM |
101 | ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering | |
729e4ab9 A |
102 | ะ ↔ a ; # THAI CHARACTER SARA A |
103 | \u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II | |
51004dcb | 104 | \u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering |
729e4ab9 | 105 | \u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE |
51004dcb | 106 | \u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering |
729e4ab9 A |
107 | \u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE |
108 | \u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU | |
51004dcb | 109 | \u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering |
729e4ab9 A |
110 | \u0E38 ↔ u ; # THAI CHARACTER SARA U |
111 | ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI | |
2ca993e8 | 112 | # ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT |
729e4ab9 A |
113 | เ ↔ e ; # THAI CHARACTER SARA E |
114 | แ ↔ æ ; # THAI CHARACTER SARA AE | |
115 | โ ↔ o ; # THAI CHARACTER SARA O | |
116 | ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN | |
117 | ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI | |
118 | ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO | |
119 | \u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU | |
120 | \u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK | |
121 | \u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO | |
122 | \u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI | |
123 | \u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA | |
124 | \u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT | |
125 | \u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN | |
2ca993e8 | 126 | # We deviate from ISO for disambiguation |
51004dcb | 127 | \u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT |
729e4ab9 A |
128 | ๏ ↔ '§' ; # THAI CHARACTER FONGMAN |
129 | ๐ ↔ 0 ; # THAI DIGIT ZERO | |
130 | ๑ ↔ 1 ; # THAI DIGIT ONE | |
131 | ๒ ↔ 2 ; # THAI DIGIT TWO | |
132 | ๓ ↔ 3 ; # THAI DIGIT THREE | |
133 | ๔ ↔ 4 ; # THAI DIGIT FOUR | |
134 | ๕ ↔ 5 ; # THAI DIGIT FIVE | |
135 | ๖ ↔ 6 ; # THAI DIGIT SIX | |
136 | ๗ ↔ 7 ; # THAI DIGIT SEVEN | |
137 | ๘ ↔ 8 ; # THAI DIGIT EIGHT | |
138 | ๙ ↔ 9 ; # THAI DIGIT NINE | |
139 | ๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU | |
140 | ๛ ↔ » ; # THAI CHARACTER KHOMUT | |
141 | ๆ ↔ « ; # THAI CHARACTER MAIYAMOK | |
2ca993e8 A |
142 | # moved down to make shorter first |
143 | #Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below. | |
729e4ab9 A |
144 | \u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU |
145 | \u0E34 ↔ i ; # THAI CHARACTER SARA I | |
2ca993e8 | 146 | # fallbacks |
729e4ab9 A |
147 | | k ← g ; |
148 | | k ← h ; | |
149 | | c ← j ; | |
150 | | k ← q ; | |
151 | | s ← z ; | |
374ca955 | 152 | :: (lower); |
2ca993e8 | 153 |