X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/51004dcb01e06fef634b61be77ed73dd61cb6db9..249c4c5ea9376c24572daf9c2effa7484a282f14:/icuSources/data/translit/Latin_ASCII.txt diff --git a/icuSources/data/translit/Latin_ASCII.txt b/icuSources/data/translit/Latin_ASCII.txt index d76b1f5b..c4c275e4 100644 --- a/icuSources/data/translit/Latin_ASCII.txt +++ b/icuSources/data/translit/Latin_ASCII.txt @@ -1,16 +1,29 @@ -# *************************************************************************** -# * -# * Copyright (C) 2004-2013, International Business Machines -# * Corporation; Unicode, Inc.; and others. All Rights Reserved. -# * -# *************************************************************************** +# © 2016 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html#License +# # File: Latin_ASCII.txt -# Generated from CLDR +# Generated from CLDR +# + +# This handles only Latin, Common, and IDEOGRAPHIC NUMBER ZERO (Han). # :: [[:Latin:][:Common:][:Inherited:][ã]] ; +# +# Don't want NFKD, because that would convert things like superscripts and +# subscripts, which we do not want. So the individual transforms below +# include an appropriate subset of the NFKD ones. +# Here we remove accents from Latin characters or digits. We then recompose to permit rules +# such as mapping NOT EQUAL TO to an ASCII equivalent e.g. "!=" if we choose to. +# :: NFD() ; -[:Latin:] { [:Mn:]+ â ; # maps to nothing; remove all Mn following Latin letter +[[:Latin:][0-9]] { [:Mn:]+ â ; # maps to nothing; remove all Mn following Latin letter/digit :: NFC() ; +# +# Some of the following mappings (noted) are from CLDR â¹character-fallback⺠data. +# (Note, here "â¹character-fallbackâº" uses U+2039/U+203A to avoid XML issues) +# +# Latin letters and IPA +# à â AE ; # 00C6;LATIN CAPITAL LETTER AE (from â¹character-fallbackâº) à â D ; # 00D0;LATIN CAPITAL LETTER ETH à â O ; # 00D8;LATIN CAPITAL LETTER O WITH STROKE @@ -23,7 +36,7 @@ Ä â D ; # 0110;LATIN CAPITAL LETTER D WITH STROKE Ä â d ; # 0111;LATIN SMALL LETTER D WITH STROKE Ħ â H ; # 0126;LATIN CAPITAL LETTER H WITH STROKE -ħ â h ; # 0126;LATIN CAPITAL LETTER H WITH STROKE +ħ â h ; # 0126;LATIN SMALL LETTER H WITH STROKE ı â i ; # 0131;LATIN SMALL LETTER DOTLESS I IJ â IJ ; # 0132;LATIN CAPITAL LIGATURE IJ (compat) ij â ij ; # 0133;LATIN SMALL LIGATURE IJ (compat) @@ -158,13 +171,6 @@ ʦ â ts ; # 02A6;LATIN SMALL LETTER TS DIGRAPH ʪ â ls ; # 02AA;LATIN SMALL LETTER LS DIGRAPH Ê« â lz ; # 02AB;LATIN SMALL LETTER LZ DIGRAPH -ʹ â \' ; # 02B9;MODIFIER LETTER PRIME -ʺ â \" ; # 02BA;MODIFIER LETTER DOUBLE PRIME -Ê» â \' ; # 02BB;MODIFIER LETTER TURNED COMMA -ʼ â \' ; # 02BC;MODIFIER LETTER APOSTROPHE -ʽ â \' ; # 02BD;MODIFIER LETTER REVERSED COMMA -ʾ â 2 ; # 02BE;MODIFIER LETTER RIGHT HALF RING (transliteration for Arabic hamza) -Ê¿ â 3 ; # 02BF;MODIFIER LETTER LEFT HALF RING (transliteration for Arabic ain) á´ â A ; # 1D00;LATIN LETTER SMALL CAPITAL A á´ â AE ; # 1D01;LATIN LETTER SMALL CAPITAL AE á´ â B ; # 1D03;LATIN LETTER SMALL CAPITAL BARRED B @@ -229,6 +235,7 @@ ỽ â v ; # 1EFD;LATIN SMALL LETTER MIDDLE-WELSH V Ỿ â Y ; # 1EFE;LATIN CAPITAL LETTER Y WITH LOOP ỿ â y ; # 1EFF;LATIN SMALL LETTER Y WITH LOOP +# Presentation forms ï¬ â ff ; # FB00;LATIN SMALL LIGATURE FF (compat) ï¬ â fi ; # FB01;LATIN SMALL LIGATURE FI (compat) ï¬ â fl ; # FB02;LATIN SMALL LIGATURE FL (compat) @@ -236,6 +243,7 @@ ï¬ â ffl ; # FB04;LATIN SMALL LIGATURE FFL (compat) ï¬ â st ; # FB05;LATIN SMALL LIGATURE LONG S T (compat) ï¬ â st ; # FB06;LATIN SMALL LIGATURE ST (compat) +# Fullwidth A â A ; # FF21;FULLWIDTH LATIN CAPITAL LETTER A (compat) ï¼¢ â B ; # FF22;FULLWIDTH LATIN CAPITAL LETTER B (compat) ï¼£ â C ; # FF23;FULLWIDTH LATIN CAPITAL LETTER C (compat) @@ -288,6 +296,9 @@ ï½ â x ; # FF58;FULLWIDTH LATIN SMALL LETTER X (compat) ï½ â y ; # FF59;FULLWIDTH LATIN SMALL LETTER Y (compat) ï½ â z ; # FF5A;FULLWIDTH LATIN SMALL LETTER Z (compat) +# +# Currency and letterlike +# © â '(C)' ; # 00A9;COPYRIGHT SIGN (from â¹character-fallbackâº) ® â '(R)' ; # 00AE;REGISTERED SIGN (from â¹character-fallbackâº) â â CE ; # 20A0;EURO-CURRENCY SIGN (from â¹character-fallbackâº) @@ -295,6 +306,7 @@ ⣠â 'Fr.' ; # 20A3;FRENCH FRANC SIGN (from â¹character-fallbackâº) ⤠â 'L.' ; # 20A4;LIRA SIGN (from â¹character-fallbackâº) ⧠â Pts ; # 20A7;PESETA SIGN (from â¹character-fallbackâº) +⺠â TL ; # 20BA;TURKISH LIRA SIGN (from â¹character-fallbackâº) â¹ â Rs ; # 20B9;INDIAN RUPEE SIGN (from â¹character-fallbackâº) â â 'a/c' ; # 2100;ACCOUNT OF (compat) â â 'a/s' ; # 2101;ADDRESSED TO THE SUBJECT (compat) @@ -335,6 +347,9 @@ â â e ; # 2147;DOUBLE-STRUCK ITALIC SMALL E (compat) â â i ; # 2148;DOUBLE-STRUCK ITALIC SMALL I (compat) â â j ; # 2149;DOUBLE-STRUCK ITALIC SMALL J (compat) +# +# Squared Latin +# ã± â hPa ; # 3371;SQUARE HPA (compat) ã² â da ; # 3372;SQUARE DA (compat) ã³ â AU ; # 3373;SQUARE AU (compat) @@ -416,6 +431,9 @@ ã â Wb ; # 33DD;SQUARE WB (compat) ã â 'V/m' ; # 33DE;SQUARE V OVER M (compat) (from â¹character-fallbackâº) ã â 'A/m' ; # 33DF;SQUARE A OVER M (compat) (from â¹character-fallbackâº) +# +# Enclosed Latin +# â â '(a)' ; # 249C;PARENTHESIZED LATIN SMALL LETTER A (compat) â â '(b)' ; # 249D;PARENTHESIZED LATIN SMALL LETTER B (compat) â â '(c)' ; # 249E;PARENTHESIZED LATIN SMALL LETTER C (compat) @@ -442,6 +460,9 @@ â³ â '(x)' ; # 24B3;PARENTHESIZED LATIN SMALL LETTER X (compat) â´ â '(y)' ; # 24B4;PARENTHESIZED LATIN SMALL LETTER Y (compat) âµ â '(z)' ; # 24B5;PARENTHESIZED LATIN SMALL LETTER Z (compat) +# +# Roman numerals +# â â I ; # 2160;ROMAN NUMERAL ONE (compat) â ¡ â II ; # 2161;ROMAN NUMERAL TWO (compat) â ¢ â III ; # 2162;ROMAN NUMERAL THREE (compat) @@ -474,6 +495,9 @@ â ½ â c ; # 217D;SMALL ROMAN NUMERAL ONE HUNDRED (compat) â ¾ â d ; # 217E;SMALL ROMAN NUMERAL FIVE HUNDRED (compat) â ¿ â m ; # 217F;SMALL ROMAN NUMERAL ONE THOUSAND (compat) +# +# Fractions +# ¼ â ' 1/4' ; # 00BC;VULGAR FRACTION ONE QUARTER (from â¹character-fallbackâº) ½ â ' 1/2' ; # 00BD;VULGAR FRACTION ONE HALF (from â¹character-fallbackâº) ¾ â ' 3/4' ; # 00BE;VULGAR FRACTION THREE QUARTERS (from â¹character-fallbackâº) @@ -490,6 +514,9 @@ â â ' 5/8' ; # 215D;VULGAR FRACTION FIVE EIGHTHS (from â¹character-fallbackâº) â â ' 7/8' ; # 215E;VULGAR FRACTION SEVEN EIGHTHS (from â¹character-fallbackâº) â â ' 1/' ; # 215F;FRACTION NUMERATOR ONE (from â¹character-fallbackâº) +# +# Enclosed numeric +# â´ â '(1)' ; # 2474;PARENTHESIZED DIGIT ONE (compat) âµ â '(2)' ; # 2475;PARENTHESIZED DIGIT TWO (compat) ⶠâ '(3)' ; # 2476;PARENTHESIZED DIGIT THREE (compat) @@ -530,6 +557,9 @@ â â '18.' ; # 2499;NUMBER EIGHTEEN FULL STOP (compat) â â '19.' ; # 249A;NUMBER NINETEEN FULL STOP (compat) â â '20.' ; # 249B;NUMBER TWENTY FULL STOP (compat) +# +# Other numeric (ideographic and fullwidth) +# ã â 0 ; # 3007;IDEOGRAPHIC NUMBER ZERO ï¼ â 0 ; # FF10;FULLWIDTH DIGIT ZERO (compat) ï¼ â 1 ; # FF11;FULLWIDTH DIGIT ONE (compat) @@ -541,6 +571,9 @@ ï¼ â 7 ; # FF17;FULLWIDTH DIGIT SEVEN (compat) ï¼ â 8 ; # FF18;FULLWIDTH DIGIT EIGHT (compat) ï¼ â 9 ; # FF19;FULLWIDTH DIGIT NINE (compat) +# +# Spaces +# \u00A0 â ' ' ; # 00A0;NO-BREAK SPACE \u2002 â ' ' ; # 2002;EN SPACE (compat) \u2003 â ' ' ; # 2003;EM SPACE (compat) @@ -553,6 +586,16 @@ \u200A â ' ' ; # 200A;HAIR SPACE (compat) \u205F â ' ' ; # 205F;MEDIUM MATHEMATICAL SPACE (compat) \u3000 â ' ' ; # 3000;IDEOGRAPHIC SPACE (from â¹character-fallbackâº) +# +# Quotes, apostrophes +# +ʹ â \' ; # 02B9;MODIFIER LETTER PRIME +ʺ â \" ; # 02BA;MODIFIER LETTER DOUBLE PRIME +Ê» â \' ; # 02BB;MODIFIER LETTER TURNED COMMA +ʼ â \' ; # 02BC;MODIFIER LETTER APOSTROPHE +ʽ â \' ; # 02BD;MODIFIER LETTER REVERSED COMMA +Ë â \' ; # 02C8;MODIFIER LETTER VERTICAL LINE +Ë â '`' ; # 02CB;MODIFIER LETTER GRAVE ACCENT â â \' ; # 2018;LEFT SINGLE QUOTATION MARK (from â¹character-fallbackâº) â â \' ; # 2019;RIGHT SINGLE QUOTATION MARK (from â¹character-fallbackâº) â â ',' ; # 201A;SINGLE LOW-9 QUOTATION MARK (from â¹character-fallbackâº) @@ -571,6 +614,9 @@ » â '>>' ; # 00BB;RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (from â¹character-fallbackâº) â¹ â '<' ; # 2039;SINGLE LEFT-POINTING ANGLE QUOTATION MARK ⺠â '>' ; # 203A;SINGLE RIGHT-POINTING ANGLE QUOTATION MARK +# +# Dashes, hyphens... +# \u00AD â '-' ; # 00AD;SOFT HYPHEN (from â¹character-fallbackâº) â â '-' ; # 2010;HYPHEN (from â¹character-fallbackâº) â â '-' ; # 2011;NON-BREAKING HYPHEN (from â¹character-fallbackâº) @@ -583,6 +629,15 @@ ï¹ â '-' ; # FE58;SMALL EM DASH (compat) ï¹£ â '-' ; # FE63;SMALL HYPHEN-MINUS (compat) ï¼ â '-' ; # FF0D;FULLWIDTH HYPHEN-MINUS (compat) +# +# Other misc punctuation and symbols +# +Ë â '<' ; # 02C2;MODIFIER LETTER LEFT ARROWHEAD +Ë â '>' ; # 02C3;MODIFIER LETTER RIGHT ARROWHEAD +Ë â '^' ; # 02C4;MODIFIER LETTER UP ARROWHEAD +Ë â '^' ; # 02C6;MODIFIER LETTER CIRCUMFLEX ACCENT +Ë â ':' ; # 02D0;MODIFIER LETTER TRIANGULAR COLON +Ë â '~' ; # 02DC;SMALL TILDE â â '||' ; # 2016;DOUBLE VERTICAL LINE ⤠â '.' ; # 2024;ONE DOT LEADER (compat) ⥠â '..' ; # 2025;TWO DOT LEADER (compat) @@ -595,6 +650,7 @@ â â '?!' ; # 2048;QUESTION EXCLAMATION MARK (compat) â â '!?' ; # 2049;EXCLAMATION QUESTION MARK (compat) â â '*' ; # 204E;LOW ASTERISK +# CJK ã â ',' ; # 3001;IDEOGRAPHIC COMMA ã â '.' ; # 3002;IDEOGRAPHIC FULL STOP ã â '<' ; # 3008;LEFT ANGLE BRACKET @@ -607,6 +663,7 @@ ã â ']' ; # 3019;RIGHT WHITE TORTOISE SHELL BRACKET ã â '[' ; # 301A;LEFT WHITE SQUARE BRACKET ã â ']' ; # 301B;RIGHT WHITE SQUARE BRACKET +# Vertical and small forms ï¸ â ',' ; # FE10;PRESENTATION FORM FOR VERTICAL COMMA (compat) ï¸ â ',' ; # FE11;PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA (compat) ï¸ â '.' ; # FE12;PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP (compat) @@ -652,6 +709,7 @@ ﹩ â '$' ; # FE69;SMALL DOLLAR SIGN (compat) ﹪ â '%' ; # FE6A;SMALL PERCENT SIGN (compat) ﹫ â '@' ; # FE6B;SMALL COMMERCIAL AT (compat) +# Fullwidth and halfwidth ï¼ â '!' ; # FF01;FULLWIDTH EXCLAMATION MARK (compat) ï¼ â '#' ; # FF03;FULLWIDTH NUMBER SIGN (compat) ï¼ â '$' ; # FF04;FULLWIDTH DOLLAR SIGN (compat) @@ -685,8 +743,13 @@ ï½ â '))' ; # FF60;FULLWIDTH RIGHT WHITE PARENTHESIS (compat)(from â¹character-fallbackâº) 。 â '.' ; # FF61;HALFWIDTH IDEOGRAPHIC FULL STOP (compat) 、 â ',' ; # FF64;HALFWIDTH IDEOGRAPHIC COMMA (compat) +# +# Other math operators (non-ASCII-range) +# à â '*' ; # 00D7;MULTIPLICATION SIGN ÷ â '/' ; # 00F7;DIVISION SIGN +Ë â '+' ; # 02D6;MODIFIER LETTER PLUS SIGN +Ë â '-' ; # 02D7;MODIFIER LETTER MINUS SIGN â â '-' ; # 2212;MINUS SIGN (from â¹character-fallbackâº) â â '/' ; # 2215;DIVISION SLASH (from â¹character-fallbackâº) â â '\' ; # 2216;SET MINUS (from â¹character-fallbackâº) @@ -699,3 +762,4 @@ â©´ â '::=' ; # 2A74;DOUBLE COLON EQUAL (compat) ⩵ â '==' ; # 2A75;TWO CONSECUTIVE EQUALS SIGNS (compat) ⩶ â '===' ; # 2A76;THREE CONSECUTIVE EQUALS SIGNS (compat) +