]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/Arab_Latn.txt
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / translit / Arab_Latn.txt
CommitLineData
f3c0d7a5
A
1# © 2016 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html#License
3#
2ca993e8 4# File: Arab_Latn.txt
f3c0d7a5 5# Generated from CLDR
73c04bcf 6#
2ca993e8
A
7
8# Generally follows UNGEGN
9# http://www.eki.ee/wgrs/rom1_ar.pdf
10# Occasionally deviates in the direction of ISO 233
11# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
12# a) where required for disambiguation.
13# b) with underdot instead of cedilla for letter like SAD,
14# since those are explicitly in Unicode for transliteration.
15# c) with extra non-Arabic-language letters, like PEH
16#
17# Does *not* do assimilation of "al", nor hyphenation.
18# While it could be done, we need to determine whether a prefix "al" could
19# occur other than as the definite article (since no space is used).
729e4ab9 20:: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ;
374ca955 21:: NFKD (NFC);
51004dcb
A
22$disambig = \u0331 ;
23$disambig2 = \u0330 ;
24$under = \u0323 ;
73c04bcf 25$descender = ˌ;
2ca993e8
A
26$notAbove = [[:^ccc=0:] & [:^ccc=230:]];
27# non-letters
729e4ab9
A
28[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
29[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
30٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
31٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
2ca993e8 32# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
729e4ab9
A
33، ↔ ',' ; # ARABIC COMMA
34؛ ↔ ';' ; # ARABIC SEMICOLON
35؟ ↔ '?' ; # ARABIC QUESTION MARK
36٪ ↔ '%' ; # ARABIC PERCENT SIGN
37۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
38۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
39۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
40۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
41۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
42۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
43۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
44۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
45۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
46۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
47٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
48١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
49٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
50٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
51٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
52٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
53٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
54٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
55٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
56٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
2ca993e8
A
57# letters
58# long vowels
729e4ab9
A
59\u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF
60\u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW
61\u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH
2ca993e8 62# longer items moved here to prevent masking
729e4ab9
A
63ث ↔ t h $disambig ; # ARABIC LETTER THEH
64ذ ↔ d h $disambig ; # ARABIC LETTER THAL
65ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
66ص ↔ s $under ; # ARABIC LETTER SAD
67ض ↔ d $under ; # ARABIC LETTER DAD
68ط ↔ t $under ; # ARABIC LETTER TAH
69ظ ↔ z $under ; # ARABIC LETTER ZAH
70غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
2ca993e8
A
71# WARNING: special case
72# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
73# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
74# ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
729e4ab9
A
75ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA
76ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
2ca993e8 77# non-Arabic language
729e4ab9
A
78ژ ↔ z h $disambig ; # ARABIC LETTER JEH
79ڭ ↔ n $disambig g ; # ARABIC LETTER NG
80ۋ ↔ v $disambig ; # ARABIC LETTER VE
81ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
82ښ ↔ s $descender;
2ca993e8 83# Arabic language
729e4ab9
A
84ء ↔ ʾ ; # ARABIC LETTER HAMZA
85ا ↔ a $under; # ARABIC LETTER ALEF
86ب ↔ b ; # ARABIC LETTER BEH
87ت ↔ t ; # ARABIC LETTER TEH
88ج ↔ j ; # ARABIC LETTER JEEM
89ح ↔ h $under ; # ARABIC LETTER HAH
90خ ↔ k h $disambig ; # ARABIC LETTER KHAH
91د ↔ d ; # ARABIC LETTER DAL
92ر ↔ r ; # ARABIC LETTER REH
93ز ↔ z ; # ARABIC LETTER ZAIN
94س ↔ s ; # ARABIC LETTER SEEN
95ع ↔ ʿ ; # ARABIC LETTER AIN
96ـ → ; # ARABIC TATWEEL
97ف ↔ f ; # ARABIC LETTER FEH
98ق ↔ q ; # ARABIC LETTER QAF
99ک ↔ k $disambig ; # ARABIC LETTER KEHEH
100ك ↔ k ; # ARABIC LETTER KAF
101ل ↔ l ; # ARABIC LETTER LAM
102م ↔ m ; # ARABIC LETTER MEEM
103ن ↔ n ; # ARABIC LETTER NOON
104ه ↔ h ; # ARABIC LETTER HEH
105و ↔ w ; # ARABIC LETTER WAW
106ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
107ي ↔ y ; # ARABIC LETTER YEH
108\u064B ↔ aⁿ ; # ARABIC FATHATAN
109\u064C ↔ uⁿ ; # ARABIC DAMMATAN
110\u064D ↔ iⁿ ; # ARABIC KASRATAN
111\u064E ↔ a ; # ARABIC FATHA
112\u064F ↔ u ; # ARABIC DAMMA
113\u0650 ↔ i ; # ARABIC KASRA
51004dcb
A
114\u0651 ↔ \u0303 ; # ARABIC SHADDA
115\u0652 ↔ \u030A ; # ARABIC SUKUN
2ca993e8 116# special combining marks
51004dcb
A
117\u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE
118\u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE
119\u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW
2ca993e8 120# Some non-Arabic language (not in UNGEGN)
729e4ab9
A
121پ ↔ p ; # ARABIC LETTER PEH
122چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
123ڤ ↔ v ; # ARABIC LETTER VEH
2ca993e8
A
124# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
125# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
729e4ab9 126گ ↔ g ; # ARABIC LETTER GAF
2ca993e8 127# fallbacks
729e4ab9
A
128| s ← c } [eiy];
129| k ← c ;
130| i ← e ;
131| u ← o ;
132| ks ← x ;
133| n ← ‎ⁿ;
374ca955
A
134:: (lower) ;
135::NFC (NFD);
73c04bcf 136:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] );
2ca993e8 137