-#--------------------------------------------------------------------
-# Copyright (c) 1999-2004, International Business Machines
-# Corporation and others. All Rights Reserved.
-#--------------------------------------------------------------------
+# © 2016 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html#License
+#
+# File: ThaiLogical_Latin.txt
+# Generated from CLDR
+#
# Thai-Latin
# This set of rules follows ISO 11940
-# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
-# except that that does not mention an implicit vowel, so we use ọ
+# see http://homepage.mac.com/sirbinks/pdf/Thai.r2.pdf
+# except that that does not mention an implicit vowel, so we use o\u0323
#
# The transcription is fairly ugly, so we ought to also do the UNGEGN version
-# see: http://www.eki.ee/wgrs/rom1_th.pdf
+# see: http://www.eki.ee/wgrs/rom1_th.pdf
# and probably make that the main variant.
-
+#
# Note: this is an internal file. The NFD/NFC is handled externally, in the index
# The insertion of spaces between words, the reversal of the vowels
# and the conversion of space to semicolon are done *outside* of these rules.
# So as far as these rules are concerned, the vowels are in logical order!
-
# insert implicit vowel (and remove it going the other way)
# COMMENTED out: the implicit vowel positions cannot be predicted algorithmically
#$consonant = [ก-ฮ];
-#$vowel = [ะ-ฺเ-ไ็];
-
-#{ ( $consonant ) } [^$vowel ] > | $1 ;
-# > ọ ;
-# < ọ ;
-
+#$vowel = [ะ-\u0E3Aเ-ไ\u0E47];
+#{ ( $consonant ) } [^$vowel \uE000] → | $1 \uE000 ;
+#\uE000 → o\u0323 ;
+# ← o\u0323 ;
$notAbove = [^\p{ccc=0}\p{ccc=above}] ;
$notBelow = [^\p{ccc=0}\p{ccc=below}] ;
-
# Consonants
# Warning: the 'h's need to be handled carefully!
# What we really want to say is the following, but we can't
-# $notHAccent = !($notAbove* ̄ | $notBelow* ̣) ;
-
+# $notHAccent = !($notAbove* \u0304 | $notBelow* \u0323) ;
# Since the only accents we care about that could cause problems are free-standing accents below, we use instead:
$freeStandingBelow = [\u0325 ];
-$hAccent = [ ̄ ̣];
+$hAccent = [ \u0304 \u0323];
$notHAccent0 = [^$freeStandingBelow$hAccent];
$notHAccent1 = $freeStandingBelow [^$hAccent];
-
-ห > h̄ ; # THAI CHARACTER HO HIP
- ห | $1 < h ($notAbove*) ̄; # backward case, account for reordering
-ฮ <> ḥ ; # THAI CHARACTER HO NOKHUK
-
-ข <> k̄h ; # THAI CHARACTER KHO KHAI
-ฃ <> ḳ̄h ; # THAI CHARACTER KHO KHUAT
-ฅ <> kʹh ; # THAI CHARACTER KHO KHON
-ฆ <> ḳh ; # THAI CHARACTER KHO RAKHANG
-ค < kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
-ค <> kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
-ก <> k ; # THAI CHARACTER KO KAI
-
-ภ <> p̣h ; # THAI CHARACTER PHO SAMPHAO
-ผ <> p̄h ; # THAI CHARACTER PHO PHUNG
-พ < ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
-พ <> ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
-ป <> p ; # THAI CHARACTER PO PLA
-
-ฉ <> c̄h ; # THAI CHARACTER CHO CHING
-ฌ <> c̣h ; # THAI CHARACTER CHO CHOE
-ช < ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
-ช <> ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
-จ <> c ; # THAI CHARACTER CHO CHAN
-
-ฐ <> ṭ̄h ; # THAI CHARACTER THO THAN
-ฑ <> ṯh ; # THAI CHARACTER THO NANGMONTHO
-ฒ <> tʹh ; # THAI CHARACTER THO PHUTHAO
-ถ <> t̄h ; # THAI CHARACTER THO THUNG
-ธ <> ṭh ; # THAI CHARACTER THO THONG
-ท < th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
-ท <> th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
+ห → h\u0304 ; # THAI CHARACTER HO HIP
+ห | $1 ← h ($notAbove*) \u0304; # backward case, account for reordering
+ฮ ↔ h\u0323 ; # THAI CHARACTER HO NOKHUK
+ข ↔ k\u0304h ; # THAI CHARACTER KHO KHAI
+ฃ ↔ k\u0323\u0304h ; # THAI CHARACTER KHO KHUAT
+ฅ ↔ kʹh ; # THAI CHARACTER KHO KHON
+ฆ ↔ k\u0323h ; # THAI CHARACTER KHO RAKHANG
+ค ← kh } $notHAccent1 ; # THAI CHARACTER KHO KHWAI
+ค ↔ kh } $notHAccent0 ; # THAI CHARACTER KHO KHWAI
+ก ↔ k ; # THAI CHARACTER KO KAI
+ภ ↔ p\u0323h ; # THAI CHARACTER PHO SAMPHAO
+ผ ↔ p\u0304h ; # THAI CHARACTER PHO PHUNG
+พ ← ph } $notHAccent1 ; # THAI CHARACTER PHO PHAN
+พ ↔ ph } $notHAccent0 ; # THAI CHARACTER PHO PHAN
+ป ↔ p ; # THAI CHARACTER PO PLA
+ฉ ↔ c\u0304h ; # THAI CHARACTER CHO CHING
+ฌ ↔ c\u0323h ; # THAI CHARACTER CHO CHOE
+ช ← ch } $notHAccent1 ; # THAI CHARACTER CHO CHANG
+ช ↔ ch } $notHAccent0 ; # THAI CHARACTER CHO CHANG
+จ ↔ c ; # THAI CHARACTER CHO CHAN
+ฐ ↔ t\u0323\u0304h ; # THAI CHARACTER THO THAN
+ฑ ↔ t\u0331h ; # THAI CHARACTER THO NANGMONTHO
+ฒ ↔ tʹh ; # THAI CHARACTER THO PHUTHAO
+ถ ↔ t\u0304h ; # THAI CHARACTER THO THUNG
+ธ ↔ t\u0323h ; # THAI CHARACTER THO THONG
+ท ← th } $notHAccent1 ; # THAI CHARACTER THO THAHAN
+ท ↔ th } $notHAccent0 ; # THAI CHARACTER THO THAHAN
#Note: TO PATAK deviates from ISO since t-dotunder + h would be ambigous. So it uses vertical tick.
-ฏ <> t̩ ; # THAI CHARACTER TO PATAK
-ต <> t ; # THAI CHARACTER TO TAO
-
+ฏ ↔ t\u0329 ; # THAI CHARACTER TO PATAK
+ต ↔ t ; # THAI CHARACTER TO TAO
# since there is no singleton g (generated), don't worry about that.
-ง <> ng ; # THAI CHARACTER NGO NGU
-ณ <> ṇ ; # THAI CHARACTER NO NEN
-น <> n ; # THAI CHARACTER NO NU
-
-ญ <> ỵ ; # THAI CHARACTER YO YING
-ฎ <> ḍ ; # THAI CHARACTER DO CHADA
-ด <> d ; # THAI CHARACTER DO DEK
-
-บ <> b ; # THAI CHARACTER BO BAIMAI
-ฝ <> f̄ ; # THAI CHARACTER FO FA
- ฝ | $1 < f ($notAbove*) ̄; # backward case, account for reordering
-
-ม <> m ; # THAI CHARACTER MO MA
-ย <> y ; # THAI CHARACTER YO YAK
-ร <> r ; # THAI CHARACTER RO RUA
-ฤ <> v ; # THAI CHARACTER RU
-ฦ <> ł ; # THAI CHARACTER LU
-ว <> w ; # THAI CHARACTER WO WAEN
-
-ศ <> ṣ̄ ; # THAI CHARACTER SO SALA***
- ศ | $1 < s ̣ ($notAbove*) ̄; # backward case, account for reordering
-ษ <> s̄ʹ ; # THAI CHARACTER SO RUSI
-ส > s̄ ; # THAI CHARACTER SO SUA***
- ส | $1 < s ($notAbove*) ̄; # backward case, account for reordering
-
-ฬ <> ḷ ; # THAI CHARACTER LO CHULA
-ล <> l ; # THAI CHARACTER LO LING
-ฟ <> f ; # THAI CHARACTER FO FAN
-
-อ <> x ; # THAI CHARACTER O ANG
-ซ <> s ; # THAI CHARACTER SO SO
-
+ง ↔ ng ; # THAI CHARACTER NGO NGU
+ณ ↔ n\u0323 ; # THAI CHARACTER NO NEN
+น ↔ n ; # THAI CHARACTER NO NU
+ญ ↔ y\u0323 ; # THAI CHARACTER YO YING
+ฎ ↔ d\u0323 ; # THAI CHARACTER DO CHADA
+ด ↔ d ; # THAI CHARACTER DO DEK
+บ ↔ b ; # THAI CHARACTER BO BAIMAI
+ฝ ↔ f\u0304 ; # THAI CHARACTER FO FA
+ฝ | $1 ← f ($notAbove*) \u0304; # backward case, account for reordering
+ม ↔ m ; # THAI CHARACTER MO MA
+ย ↔ y ; # THAI CHARACTER YO YAK
+ร ↔ r ; # THAI CHARACTER RO RUA
+ฤ ↔ v ; # THAI CHARACTER RU
+ฦ ↔ ł ; # THAI CHARACTER LU
+ว ↔ w ; # THAI CHARACTER WO WAEN
+ศ ↔ s\u0323\u0304 ; # THAI CHARACTER SO SALA***
+ศ | $1 ← s \u0323 ($notAbove*) \u0304; # backward case, account for reordering
+ษ ↔ s\u0304ʹ ; # THAI CHARACTER SO RUSI
+ส → s\u0304 ; # THAI CHARACTER SO SUA***
+ส | $1 ← s ($notAbove*) \u0304; # backward case, account for reordering
+ฬ ↔ l\u0323 ; # THAI CHARACTER LO CHULA
+ล ↔ l ; # THAI CHARACTER LO LING
+ฟ ↔ f ; # THAI CHARACTER FO FAN
+อ ↔ x ; # THAI CHARACTER O ANG
+ซ ↔ s ; # THAI CHARACTER SO SO
# vowels
-
- ั <> ạ ; # THAI CHARACTER MAI HAN-AKAT
-
-า > ā ; # THAI CHARACTER SARA AA
- า | $1 < a ($notAbove*) ̄; # backward case, account for reordering
-
+\u0E31 ↔ a\u0323 ; # THAI CHARACTER MAI HAN-AKAT
+า → a\u0304 ; # THAI CHARACTER SARA AA
+า | $1 ← a ($notAbove*) \u0304; # backward case, account for reordering
# We deviate from ISO for SARA AM for disambiguation
-ำ > a ̉; # THAI CHARACTER SARA AM
- ำ | $1 < a ($notAbove*) ̉ ; # backward case, account for reordering
-
-ะ <> a ; # THAI CHARACTER SARA A
- ี <> ī ; # THAI CHARACTER SARA II
- ี | $1 < i ($notAbove*) ̄ ; # backward case, account for reordering
-
- ื <> ụ̄ ; # THAI CHARACTER SARA UEE
- ื | $1 < u ̣ ($notAbove*) ̄ ; # backward case, account for reordering
-
- ึ <> ụ ; # THAI CHARACTER SARA UE
- ู <> ū ; # THAI CHARACTER SARA UU
- ู | $1 < u ($notAbove*) ̄ ; # backward case, account for reordering
-
- ุ <> u ; # THAI CHARACTER SARA U
-
-ฯ <> ‡ ; # THAI CHARACTER PAIYANNOI
-
-# ฿ <> XXX ; # THAI CURRENCY SYMBOL BAHT
-
-เ <> e ; # THAI CHARACTER SARA E
-แ <> æ ; # THAI CHARACTER SARA AE
-โ <> o ; # THAI CHARACTER SARA O
-ใ <> ı ; # THAI CHARACTER SARA AI MAIMUAN
-ไ <> ị ; # THAI CHARACTER SARA AI MAIMALAI
-ๅ <> ɨ ; # THAI CHARACTER LAKKHANGYAO
- ็ <> ̆ ; # THAI CHARACTER MAITAIKHU
- ่ <> ̀ ; # THAI CHARACTER MAI EK
- ้ <> ̂ ; # THAI CHARACTER MAI THO
- ๊ <> ́ ; # THAI CHARACTER MAI TRI
- ๋ <> ̌ ; # THAI CHARACTER MAI CHATTAWA
- ์ <> ̒ ; # THAI CHARACTER THANTHAKHAT
- ๎ <> '~' ; # THAI CHARACTER YAMAKKAN
-
+ำ → a \u0309; # THAI CHARACTER SARA AM
+ำ | $1 ← a ($notAbove*) \u0309 ; # backward case, account for reordering
+ะ ↔ a ; # THAI CHARACTER SARA A
+\u0E35 ↔ i\u0304 ; # THAI CHARACTER SARA II
+\u0E35 | $1 ← i ($notAbove*) \u0304 ; # backward case, account for reordering
+\u0E37 ↔ u\u0323\u0304 ; # THAI CHARACTER SARA UEE
+\u0E37 | $1 ← u \u0323 ($notAbove*) \u0304 ; # backward case, account for reordering
+\u0E36 ↔ u\u0323 ; # THAI CHARACTER SARA UE
+\u0E39 ↔ u\u0304 ; # THAI CHARACTER SARA UU
+\u0E39 | $1 ← u ($notAbove*) \u0304 ; # backward case, account for reordering
+\u0E38 ↔ u ; # THAI CHARACTER SARA U
+ฯ ↔ ‡ ; # THAI CHARACTER PAIYANNOI
+# ฿ ↔ XXX ; # THAI CURRENCY SYMBOL BAHT
+เ ↔ e ; # THAI CHARACTER SARA E
+แ ↔ æ ; # THAI CHARACTER SARA AE
+โ ↔ o ; # THAI CHARACTER SARA O
+ใ ↔ ı ; # THAI CHARACTER SARA AI MAIMUAN
+ไ ↔ i\u0323 ; # THAI CHARACTER SARA AI MAIMALAI
+ๅ ↔ ɨ ; # THAI CHARACTER LAKKHANGYAO
+\u0E47 ↔ \u0306 ; # THAI CHARACTER MAITAIKHU
+\u0E48 ↔ \u0300 ; # THAI CHARACTER MAI EK
+\u0E49 ↔ \u0302 ; # THAI CHARACTER MAI THO
+\u0E4A ↔ \u0301 ; # THAI CHARACTER MAI TRI
+\u0E4B ↔ \u030C ; # THAI CHARACTER MAI CHATTAWA
+\u0E4C ↔ \u0312 ; # THAI CHARACTER THANTHAKHAT
+\u0E4E ↔ '~' ; # THAI CHARACTER YAMAKKAN
# We deviate from ISO for disambiguation
- ํ <> ̊ ; # THAI CHARACTER NIKHAHIT
-
-๏ <> § ; # THAI CHARACTER FONGMAN
-
-๐ <> 0 ; # THAI DIGIT ZERO
-๑ <> 1 ; # THAI DIGIT ONE
-๒ <> 2 ; # THAI DIGIT TWO
-๓ <> 3 ; # THAI DIGIT THREE
-๔ <> 4 ; # THAI DIGIT FOUR
-๕ <> 5 ; # THAI DIGIT FIVE
-๖ <> 6 ; # THAI DIGIT SIX
-๗ <> 7 ; # THAI DIGIT SEVEN
-๘ <> 8 ; # THAI DIGIT EIGHT
-๙ <> 9 ; # THAI DIGIT NINE
-
-๚ <> '||' ; # THAI CHARACTER ANGKHANKHU
-
-๛ <> » ; # THAI CHARACTER KHOMUT
-ๆ <> « ; # THAI CHARACTER MAIYAMOK
-
+\u0E4D ↔ \u030A ; # THAI CHARACTER NIKHAHIT
+๏ ↔ '§' ; # THAI CHARACTER FONGMAN
+๐ ↔ 0 ; # THAI DIGIT ZERO
+๑ ↔ 1 ; # THAI DIGIT ONE
+๒ ↔ 2 ; # THAI DIGIT TWO
+๓ ↔ 3 ; # THAI DIGIT THREE
+๔ ↔ 4 ; # THAI DIGIT FOUR
+๕ ↔ 5 ; # THAI DIGIT FIVE
+๖ ↔ 6 ; # THAI DIGIT SIX
+๗ ↔ 7 ; # THAI DIGIT SEVEN
+๘ ↔ 8 ; # THAI DIGIT EIGHT
+๙ ↔ 9 ; # THAI DIGIT NINE
+๚ ↔ '||' ; # THAI CHARACTER ANGKHANKHU
+๛ ↔ » ; # THAI CHARACTER KHOMUT
+ๆ ↔ « ; # THAI CHARACTER MAIYAMOK
# moved down to make shorter first
#Note: PHINTHU deviates from ISO since underring causes canonical problems. So it uses spacing tick below.
- ฺ <> ˌ ; # THAI CHARACTER PHINTHU
- ิ <> i ; # THAI CHARACTER SARA I
-
+\u0E3A ↔ ˌ ; # THAI CHARACTER PHINTHU
+\u0E34 ↔ i ; # THAI CHARACTER SARA I
# fallbacks
-
-| k < g ;
-| k < h ;
-| c < j ;
-| k < q ;
-| s < z ;
-
+| k ← g ;
+| k ← h ;
+| c ← j ;
+| k ← q ;
+| s ← z ;
:: (lower);
+