]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/translit/Arab_Latn.txt
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Arab_Latn.txt
CommitLineData
73c04bcf
A
1# ***************************************************************************
2# *
2ca993e8 3# * Copyright (C) 2004-2016, International Business Machines
73c04bcf
A
4# * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5# *
6# ***************************************************************************
2ca993e8 7# File: Arab_Latn.txt
46f4442e 8# Generated from CLDR
73c04bcf 9#
2ca993e8
A
10
11# Generally follows UNGEGN
12# http://www.eki.ee/wgrs/rom1_ar.pdf
13# Occasionally deviates in the direction of ISO 233
14# http://homepage.mac.com/sirbinks/pdf/Arabic.pdf
15# a) where required for disambiguation.
16# b) with underdot instead of cedilla for letter like SAD,
17# since those are explicitly in Unicode for transliteration.
18# c) with extra non-Arabic-language letters, like PEH
19#
20# Does *not* do assimilation of "al", nor hyphenation.
21# While it could be done, we need to determine whether a prefix "al" could
22# occur other than as the definite article (since no space is used).
729e4ab9 23:: [[:Arabic:][:block=ARABIC:][‎ⁿ،؛؟ـ\u064B-\u0655٠-٬۰-۹﷼ښ]] ;
374ca955 24:: NFKD (NFC);
51004dcb
A
25$disambig = \u0331 ;
26$disambig2 = \u0330 ;
27$under = \u0323 ;
73c04bcf 28$descender = ˌ;
2ca993e8
A
29$notAbove = [[:^ccc=0:] & [:^ccc=230:]];
30# non-letters
729e4ab9
A
31[:Nd:]{٫}[:Nd:] ↔ [:Nd:]{','}[:Nd:] ; # ARABIC DECIMAL SEPARATOR
32[:Nd:]{٬}[:Nd:] ↔ [:Nd:]{'.'}[:Nd:] ; # ARABIC THOUSANDS SEPARATOR
33٫ ↔ ',' $disambig ; # ARABIC DECIMAL SEPARATOR
34٬ ↔ '.' $disambig ; # ARABIC THOUSANDS SEPARATOR
2ca993e8 35# ٭ ↔ ; # ARABIC FIVE POINTED STAR // no need to transliterate
729e4ab9
A
36، ↔ ',' ; # ARABIC COMMA
37؛ ↔ ';' ; # ARABIC SEMICOLON
38؟ ↔ '?' ; # ARABIC QUESTION MARK
39٪ ↔ '%' ; # ARABIC PERCENT SIGN
40۰ ↔ 0 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ZERO
41۱ ↔ 1 $disambig ; # EXTENDED ARABIC-INDIC DIGIT ONE
42۲ ↔ 2 $disambig ; # EXTENDED ARABIC-INDIC DIGIT TWO
43۳ ↔ 3 $disambig ; # EXTENDED ARABIC-INDIC DIGIT THREE
44۴ ↔ 4 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FOUR
45۵ ↔ 5 $disambig ; # EXTENDED ARABIC-INDIC DIGIT FIVE
46۶ ↔ 6 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SIX
47۷ ↔ 7 $disambig ; # EXTENDED ARABIC-INDIC DIGIT SEVEN
48۸ ↔ 8 $disambig ; # EXTENDED ARABIC-INDIC DIGIT EIGHT
49۹ ↔ 9 $disambig ; # EXTENDED ARABIC-INDIC DIGIT NINE
50٠ ↔ 0 ; # ARABIC-INDIC DIGIT ZERO
51١ ↔ 1 ; # ARABIC-INDIC DIGIT ONE
52٢ ↔ 2 ; # ARABIC-INDIC DIGIT TWO
53٣ ↔ 3 ; # ARABIC-INDIC DIGIT THREE
54٤ ↔ 4 ; # ARABIC-INDIC DIGIT FOUR
55٥ ↔ 5 ; # ARABIC-INDIC DIGIT FIVE
56٦ ↔ 6 ; # ARABIC-INDIC DIGIT SIX
57٧ ↔ 7 ; # ARABIC-INDIC DIGIT SEVEN
58٨ ↔ 8 ; # ARABIC-INDIC DIGIT EIGHT
59٩ ↔ 9 ; # ARABIC-INDIC DIGIT NINE
2ca993e8
A
60# letters
61# long vowels
729e4ab9
A
62\u064Eا↔ a\u0304 ; # ARABIC FATHA, ARABIC LETTER ALEF
63\u064Fو ↔ u\u0304 ; # ARABIC DAMMA, ARABIC LETTER WAW
64\u0650ي ↔ i\u0304 ; # ARABIC KASRA, ARABIC LETTER YEH
2ca993e8 65# longer items moved here to prevent masking
729e4ab9
A
66ث ↔ t h $disambig ; # ARABIC LETTER THEH
67ذ ↔ d h $disambig ; # ARABIC LETTER THAL
68ش ↔ s h $disambig ; # ARABIC LETTER SHEEN
69ص ↔ s $under ; # ARABIC LETTER SAD
70ض ↔ d $under ; # ARABIC LETTER DAD
71ط ↔ t $under ; # ARABIC LETTER TAH
72ظ ↔ z $under ; # ARABIC LETTER ZAH
73غ ↔ g h $disambig ; # ARABIC LETTER GHAIN
2ca993e8
A
74# WARNING: special case
75# ←t, umlaut, half-ring below→ will be canonically ordered as ←t, half-ring below, umlaut→
76# so on the return, we have to skip over (but preserve) the half-ring below (or others like it)
77# ة\u0655 ← t\u0339\u0308 ; # LATIN SMALL LETTER T, COMBINING RIGHT HALF RING BELOW, COMBINING DIAERESIS
729e4ab9
A
78ة ↔ t \u0308 ; # ARABIC LETTER TEH MARBUTA
79ة | $1 ← t ($notAbove+) \u0308 ; # ARABIC LETTER TEH MARBUTA
2ca993e8 80# non-Arabic language
729e4ab9
A
81ژ ↔ z h $disambig ; # ARABIC LETTER JEH
82ڭ ↔ n $disambig g ; # ARABIC LETTER NG
83ۋ ↔ v $disambig ; # ARABIC LETTER VE
84ی ↔ y $disambig2 ; # ARABIC LETTER FARSI YEH
85ښ ↔ s $descender;
2ca993e8 86# Arabic language
729e4ab9
A
87ء ↔ ʾ ; # ARABIC LETTER HAMZA
88ا ↔ a $under; # ARABIC LETTER ALEF
89ب ↔ b ; # ARABIC LETTER BEH
90ت ↔ t ; # ARABIC LETTER TEH
91ج ↔ j ; # ARABIC LETTER JEEM
92ح ↔ h $under ; # ARABIC LETTER HAH
93خ ↔ k h $disambig ; # ARABIC LETTER KHAH
94د ↔ d ; # ARABIC LETTER DAL
95ر ↔ r ; # ARABIC LETTER REH
96ز ↔ z ; # ARABIC LETTER ZAIN
97س ↔ s ; # ARABIC LETTER SEEN
98ع ↔ ʿ ; # ARABIC LETTER AIN
99ـ → ; # ARABIC TATWEEL
100ف ↔ f ; # ARABIC LETTER FEH
101ق ↔ q ; # ARABIC LETTER QAF
102ک ↔ k $disambig ; # ARABIC LETTER KEHEH
103ك ↔ k ; # ARABIC LETTER KAF
104ل ↔ l ; # ARABIC LETTER LAM
105م ↔ m ; # ARABIC LETTER MEEM
106ن ↔ n ; # ARABIC LETTER NOON
107ه ↔ h ; # ARABIC LETTER HEH
108و ↔ w ; # ARABIC LETTER WAW
109ى ↔ y $disambig ; # ARABIC LETTER ALEF MAKSURA
110ي ↔ y ; # ARABIC LETTER YEH
111\u064B ↔ aⁿ ; # ARABIC FATHATAN
112\u064C ↔ uⁿ ; # ARABIC DAMMATAN
113\u064D ↔ iⁿ ; # ARABIC KASRATAN
114\u064E ↔ a ; # ARABIC FATHA
115\u064F ↔ u ; # ARABIC DAMMA
116\u0650 ↔ i ; # ARABIC KASRA
51004dcb
A
117\u0651 ↔ \u0303 ; # ARABIC SHADDA
118\u0652 ↔ \u030A ; # ARABIC SUKUN
2ca993e8 119# special combining marks
51004dcb
A
120\u0653 ↔ \u0302 ; # ARABIC MADDAH ABOVE
121\u0654 ↔ \u0309 ; # ARABIC HAMZA ABOVE
122\u0655 ↔ \u0339 ; # ARABIC HAMZA BELOW
2ca993e8 123# Some non-Arabic language (not in UNGEGN)
729e4ab9
A
124پ ↔ p ; # ARABIC LETTER PEH
125چ ↔ c h $disambig ; # ARABIC LETTER TCHEH
126ڤ ↔ v ; # ARABIC LETTER VEH
2ca993e8
A
127# ڥ ↔ v $disambig ; # ARABIC LETTER FEH WITH THREE DOTS BELOW
128# ڢ ↔ f $disambig ; # ARABIC LETTER FEH WITH DOT MOVED BELOW
729e4ab9 129گ ↔ g ; # ARABIC LETTER GAF
2ca993e8 130# fallbacks
729e4ab9
A
131| s ← c } [eiy];
132| k ← c ;
133| i ← e ;
134| u ← o ;
135| ks ← x ;
136| n ← ‎ⁿ;
374ca955
A
137:: (lower) ;
138::NFC (NFD);
73c04bcf 139:: ( [[:Latin:] [%,.0-9;?ʾ-ʿ\u0302-\u0304\u0308-\u030A\u0323\u0330-\u0331\u0339;ˌ]] );
2ca993e8 140