]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/translit/Latin_NumericPinyin.txt
ICU-57166.0.1.tar.gz
[apple/icu.git] / icuSources / data / translit / Latin_NumericPinyin.txt
1 # ***************************************************************************
2 # *
3 # * Copyright (C) 2004-2016, International Business Machines
4 # * Corporation; Unicode, Inc.; and others. All Rights Reserved.
5 # *
6 # ***************************************************************************
7 # File: Latin_NumericPinyin.txt
8 # Generated from CLDR
9 #
10
11 # According to the pinyin definitions I've been able to find:
12 # 'a', 'e' are the preferred bases
13 # otherwise 'o'
14 # otherwise last vowel
15 # The trailing form of syllables are the following:
16 # "a", "ai", "ao", "an", "ang",
17 # "o", "ou", "ong",
18 # "e", "ei", "er", "en", "eng",
19 # "i", "ia", "iao", "ie", "iu", "ian", "in", "iang", "ing", "iong",
20 # "u", "ua", "uo", "uai", "ui", "uan", "un", "uang", "ueng",
21 # "ü", "üe", "üan", "ün"
22 # so the letters the tone will 'hop' are:
23 ::NFD (NFC);
24 $tone = [\u0304\u0301\u030C\u0300\u0306] ;
25 # Move the tone to the end of a syllable, and convert to number
26 e {($tone) r} → r &Pinyin-NumericPinyin($1);
27 ($tone) ( [i o n u {o n} {n g}]) → $2 &Pinyin-NumericPinyin($1);
28 ($tone) → &Pinyin-NumericPinyin($1);
29 # The following backs up until it finds the right vowel, then deposits the tone
30 $vowel = [aAeEiIoOuU {u\u0308} {U\u0308} vV];
31 $consonant = [[a-z A-Z] - [$vowel]];
32 $digit = [1-5];
33 $1 &NumericPinyin-Pinyin($3) $2 ← ([aAeE]) ($vowel* $consonant*) ($digit);
34 $1 &NumericPinyin-Pinyin($3) $2 ← ([oO]) ([$vowel-[aeAE]]* $consonant*) ($digit);
35 $1 &NumericPinyin-Pinyin($3) $2 ← ($vowel) ($consonant*) ($digit);
36 &NumericPinyin-Pinyin($1) ← [:letter:] {($digit)};
37 ::NFC (NFD);
38