]>
Commit | Line | Data |
---|---|---|
b75a7d8f A |
1 | // -*- Coding: utf-8; -*- |
2 | //-------------------------------------------------------------------- | |
3 | // Copyright (c) 1999-2002, International Business Machines | |
4 | // Corporation and others. All Rights Reserved. | |
5 | //-------------------------------------------------------------------- | |
6 | // THIS IS A MACHINE-GENERATED FILE | |
7 | // Tool: dumpicurules.bat | |
8 | // Source: ../../../impl/data/Transliterator_Cyrillic_Latin.txt | |
9 | // Date: Sat Jul 27 10:31:01 2002 | |
10 | //-------------------------------------------------------------------- | |
11 | ||
12 | // Cyrillic_Latin | |
13 | ||
14 | t_Cyrl_Latn { | |
15 | Rule { | |
16 | //-------------------------------------------------------------------- | |
17 | //-------------------------------------------------------------------- | |
18 | //-------------------------------------------------------------------- | |
19 | // TODO: add remaining characters | |
20 | // Should add variants for Russian-English, Russian-German | |
21 | // Those can use this as a base, and then remap cases | |
22 | // like a $hat to ya or ja. | |
23 | ||
24 | // :: [\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]] ; | |
25 | //## WARNING, \u0308 must be added to the generated filters, in both directions ### | |
26 | // MINIMAL FILTER | |
27 | ":: [\u0308\u0102-\u0103\u0114-\u0115\u011E-\u011F\u012C-\u012D\u014E-\u014F\u016C-\u016D\u0306\u0400-\u045F\u0490-\u0495\u0498-\u0499\u04C1-\u04C2\u04D0-\u04DF\u04E2-\u04E7\u04EC-\u04F5\u04F8-\u04F9\u1E1C-\u1E1D\u1EAE-\u1EB7\u1FB0\u1FB8\u1FD0\u1FD8\u1FE0\u1FE8] ;" | |
28 | ":: NFD (NFC) ;" | |
29 | ||
30 | "$modprime = \u02B9;" | |
31 | "$modprime2 = \u02BA;" | |
32 | ||
33 | "$grave = \u0300;" | |
34 | "$acute = \u0301;" | |
35 | "$hat = \u0302;" | |
36 | "$breve = \u0306 ;" | |
37 | "$dot = \u0307 ;" | |
38 | "$caron = \u030C ;" | |
39 | "$comma = \u0326 ;" | |
40 | "$under = \u0331 ;" | |
41 | ||
42 | // move up so not masked | |
43 | ||
44 | "я <> a $hat ;" // CYRILLIC SMALL LETTER YA | |
45 | "Я <> A $hat ;" // CYRILLIC CAPITAL LETTER YA | |
46 | ||
47 | "ч <> c $caron ;" // CYRILLIC SMALL LETTER CHE | |
48 | "Ч <> C $caron;" // CYRILLIC CAPITAL LETTER CHE | |
49 | // ҷ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH DESCENDER | |
50 | // Ҷ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER | |
51 | // ӌ <> XXX ; # CYRILLIC SMALL LETTER KHAKASSIAN CHE | |
52 | // Ӌ <> XXX ; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE | |
53 | // ҹ <> XXX ; # CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE | |
54 | // Ҹ <> XXX ; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE | |
55 | ||
56 | "э <> e $acute;" // CYRILLIC SMALL LETTER E | |
57 | "Э <> E $acute;" // CYRILLIC CAPITAL LETTER E | |
58 | "є <> e $hat;" // CYRILLIC SMALL LETTER UKRAINIAN IE | |
59 | "Є <> E $hat;" // CYRILLIC CAPITAL LETTER UKRAINIAN IE | |
60 | ||
61 | "ш <> s $caron ;" // CYRILLIC SMALL LETTER SHA | |
62 | "Ш <> S $caron ;" // CYRILLIC CAPITAL LETTER SHA | |
63 | "щ <> s $hat ;" // CYRILLIC SMALL LETTER SHCHA | |
64 | "Щ <> S $hat;" // CYRILLIC CAPITAL LETTER SHCHA | |
65 | ||
66 | "ѕ <> z $hat ;" // CYRILLIC SMALL LETTER DZE | |
67 | "Ѕ <> Z $hat;" // CYRILLIC CAPITAL LETTER DZE | |
68 | // ӡ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN DZE | |
69 | // Ӡ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE | |
70 | ||
71 | "ю <> u $hat ;" // CYRILLIC SMALL LETTER YU | |
72 | "Ю <> U $hat ;" // CYRILLIC CAPITAL LETTER YU | |
73 | ||
74 | "і <> i $acute;" // CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I | |
75 | "І <> I $acute;" // CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I | |
76 | "ј <> j $caron;" // CYRILLIC SMALL LETTER JE | |
77 | "Ј <> J $caron;" // CYRILLIC CAPITAL LETTER JE | |
78 | ||
79 | "љ <> l $hat ;" // CYRILLIC SMALL LETTER LJE | |
80 | "Љ <> L $hat ;" // CYRILLIC CAPITAL LETTER LJE | |
81 | "њ <> n $hat ;" // CYRILLIC SMALL LETTER NJE | |
82 | "Њ <> N $hat ;" // CYRILLIC CAPITAL LETTER NJE | |
83 | ||
84 | "ћ <> c $acute ;" // CYRILLIC SMALL LETTER TSHE | |
85 | "Ћ <> C $acute ;" // CYRILLIC CAPITAL LETTER TSHE | |
86 | ||
87 | "џ <> d $hat ;" // CYRILLIC SMALL LETTER DZHE | |
88 | "Џ <> D $hat ;" // CYRILLIC CAPITAL LETTER DZHE | |
89 | ||
90 | // Normal order | |
91 | ||
92 | "а <> a ;" // CYRILLIC SMALL LETTER A | |
93 | "А <> A ;" // CYRILLIC CAPITAL LETTER A | |
94 | "ә <> \u0259 ;" // CYRILLIC SMALL LETTER SCHWA | |
95 | "Ә <> \u018F ;" // CYRILLIC CAPITAL LETTER SCHWA | |
96 | "ӕ <> \u00E6 ;" // CYRILLIC SMALL LIGATURE A IE | |
97 | "Ӕ <> \u00C6 ;" // CYRILLIC CAPITAL LIGATURE A IE | |
98 | "б <> b ;" // CYRILLIC SMALL LETTER BE | |
99 | "Б <> B ;" // CYRILLIC CAPITAL LETTER BE | |
100 | "в <> v ;" // CYRILLIC SMALL LETTER VE | |
101 | "В <> V ;" // CYRILLIC CAPITAL LETTER VE | |
102 | ||
103 | "ґ <> g $grave ;" // CYRILLIC SMALL LETTER GHE WITH UPTURN | |
104 | "Ґ <> G $grave ;" // CYRILLIC CAPITAL LETTER GHE WITH UPTURN | |
105 | "ғ <> g $dot ;" // CYRILLIC SMALL LETTER GHE WITH STROKE | |
106 | "Ғ <> G $dot;" // CYRILLIC CAPITAL LETTER GHE WITH STROKE | |
107 | "ҕ <> g $breve;" // CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK | |
108 | "Ҕ <> G $breve;" // CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK | |
109 | "г <> g ;" // CYRILLIC SMALL LETTER GHE | |
110 | "Г <> G ;" // CYRILLIC CAPITAL LETTER GHE | |
111 | ||
112 | "д <> d;" // CYRILLIC SMALL LETTER DE | |
113 | "Д <> D;" // CYRILLIC CAPITAL LETTER DE | |
114 | "ђ <> đ ;" // CYRILLIC SMALL LETTER DJE | |
115 | "Ђ <> Đ ;" // CYRILLIC CAPITAL LETTER DJE | |
116 | "ҙ <> z $comma ;" // CYRILLIC SMALL LETTER ZE WITH DESCENDER | |
117 | "Ҙ <> Z $comma ;" // CYRILLIC CAPITAL LETTER ZE WITH DESCENDER | |
118 | "е <> e ;" // CYRILLIC SMALL LETTER IE | |
119 | "Е <> E;" // CYRILLIC CAPITAL LETTER IE | |
120 | ||
121 | "ж <> z $caron;" // CYRILLIC SMALL LETTER ZHE | |
122 | "Ж <> Z $caron;" // CYRILLIC CAPITAL LETTER ZHE | |
123 | ||
124 | // җ <> XXX ; # CYRILLIC SMALL LETTER ZHE WITH DESCENDER | |
125 | // Җ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER | |
126 | ||
127 | "з <> z ;" // CYRILLIC SMALL LETTER ZE | |
128 | "З <> Z;" // CYRILLIC CAPITAL LETTER ZE | |
129 | ||
130 | "й <> j ;" // CYRILLIC SMALL LETTER I | |
131 | "Й <> J ;" // CYRILLIC CAPITAL LETTER I | |
132 | "и <> i ;" // CYRILLIC SMALL LETTER I | |
133 | "И <> I ;" // CYRILLIC CAPITAL LETTER I | |
134 | ||
135 | "к <> k ;" // CYRILLIC SMALL LETTER KA | |
136 | "К <> K;" // CYRILLIC CAPITAL LETTER KA | |
137 | ||
138 | // қ <> XXX ; # CYRILLIC SMALL LETTER KA WITH DESCENDER | |
139 | // Қ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER | |
140 | // ӄ <> XXX ; # CYRILLIC SMALL LETTER KA WITH HOOK | |
141 | // Ӄ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH HOOK | |
142 | // ҡ <> XXX ; # CYRILLIC SMALL LETTER BASHKIR KA | |
143 | // Ҡ <> XXX ; # CYRILLIC CAPITAL LETTER BASHKIR KA | |
144 | // ҟ <> XXX ; # CYRILLIC SMALL LETTER KA WITH STROKE | |
145 | // Ҟ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH STROKE | |
146 | // ҝ <> XXX ; # CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE | |
147 | // Ҝ <> XXX ; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE | |
148 | "л <> l ;" // CYRILLIC SMALL LETTER EL | |
149 | "Л <> L;" // CYRILLIC CAPITAL LETTER EL | |
150 | ||
151 | "м <> m ;" // CYRILLIC SMALL LETTER EM | |
152 | "М <> M ;" // CYRILLIC CAPITAL LETTER EM | |
153 | "н <> n ;" // CYRILLIC SMALL LETTER EN | |
154 | "Н <> N;" // CYRILLIC CAPITAL LETTER EN | |
155 | // ң <> XXX ; # CYRILLIC SMALL LETTER EN WITH DESCENDER | |
156 | // Ң <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER | |
157 | // ӈ <> XXX ; # CYRILLIC SMALL LETTER EN WITH HOOK | |
158 | // Ӈ <> XXX ; # CYRILLIC CAPITAL LETTER EN WITH HOOK | |
159 | // ҥ <> XXX ; # CYRILLIC SMALL LIGATURE EN GHE | |
160 | // Ҥ <> XXX ; # CYRILLIC CAPITAL LIGATURE EN GHE | |
161 | ||
162 | "о <> o ;" // CYRILLIC SMALL LETTER O | |
163 | "О <> O ;" // CYRILLIC CAPITAL LETTER O | |
164 | // ө <> XXX ; # CYRILLIC SMALL LETTER BARRED O | |
165 | // Ө <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O | |
166 | "п <> p ;" // CYRILLIC SMALL LETTER PE | |
167 | "П <> P ;" // CYRILLIC CAPITAL LETTER PE | |
168 | // ҧ <> XXX ; # CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK | |
169 | // Ҧ <> XXX ; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK | |
170 | // ҁ <> XXX ; # CYRILLIC SMALL LETTER KOPPA | |
171 | // Ҁ <> XXX ; # CYRILLIC CAPITAL LETTER KOPPA | |
172 | "р <> r ;" // CYRILLIC SMALL LETTER ER | |
173 | "Р <> R ;" // CYRILLIC CAPITAL LETTER ER | |
174 | // ҏ <> XXX ; # CYRILLIC SMALL LETTER ER WITH TICK | |
175 | // Ҏ <> XXX ; # CYRILLIC CAPITAL LETTER ER WITH TICK | |
176 | "с <> s ;" // CYRILLIC SMALL LETTER ES | |
177 | "С <> S ;" // CYRILLIC CAPITAL LETTER ES | |
178 | // ҫ <> XXX ; # CYRILLIC SMALL LETTER ES WITH DESCENDER | |
179 | // Ҫ <> XXX ; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER | |
180 | "т <> t ;" // CYRILLIC SMALL LETTER TE | |
181 | "Т <> T ;" // CYRILLIC CAPITAL LETTER TE | |
182 | // ҭ <> XXX ; # CYRILLIC SMALL LETTER TE WITH DESCENDER | |
183 | // Ҭ <> XXX ; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER | |
184 | ||
185 | "у <> u ;" // CYRILLIC SMALL LETTER U | |
186 | "У <> U ;" // CYRILLIC CAPITAL LETTER U | |
187 | // ү <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U | |
188 | // Ү <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U | |
189 | // ұ <> XXX ; # CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE | |
190 | // Ұ <> XXX ; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE | |
191 | // ѹ <> XXX ; # CYRILLIC SMALL LETTER UK | |
192 | // Ѹ <> XXX ; # CYRILLIC CAPITAL LETTER UK | |
193 | "ф <> f ;" // CYRILLIC SMALL LETTER EF | |
194 | "Ф <> F ;" // CYRILLIC CAPITAL LETTER EF | |
195 | "х <> h ;" // CYRILLIC SMALL LETTER HA | |
196 | "Х <> H;" // CYRILLIC CAPITAL LETTER HA | |
197 | // ҳ <> XXX ; # CYRILLIC SMALL LETTER HA WITH DESCENDER | |
198 | // Ҳ <> XXX ; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER | |
199 | // һ <> XXX ; # CYRILLIC SMALL LETTER SHHA | |
200 | // Һ <> XXX ; # CYRILLIC CAPITAL LETTER SHHA | |
201 | // ѡ <> XXX ; # CYRILLIC SMALL LETTER OMEGA | |
202 | // Ѡ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA | |
203 | // ѿ <> XXX ; # CYRILLIC SMALL LETTER OT | |
204 | // Ѿ <> XXX ; # CYRILLIC CAPITAL LETTER OT | |
205 | // ѽ <> XXX ; # CYRILLIC SMALL LETTER OMEGA WITH TITLO | |
206 | // Ѽ <> XXX ; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO | |
207 | // ѻ <> XXX ; # CYRILLIC SMALL LETTER ROUND OMEGA | |
208 | // Ѻ <> XXX ; # CYRILLIC CAPITAL LETTER ROUND OMEGA | |
209 | "ц <> c ;" // CYRILLIC SMALL LETTER TSE | |
210 | "Ц <> C;" // CYRILLIC CAPITAL LETTER TSE | |
211 | // ҵ <> XXX ; # CYRILLIC SMALL LIGATURE TE TSE | |
212 | // Ҵ <> XXX ; # CYRILLIC CAPITAL LIGATURE TE TSE | |
213 | ||
214 | // ҽ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE | |
215 | // Ҽ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE | |
216 | // ҿ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER | |
217 | // Ҿ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER | |
218 | ||
219 | ||
220 | "Ъ <> $modprime2 $under ;" // CYRILLIC CAPITAL LETTER HARD SIGN | |
221 | "ъ <> $modprime2 ;" // CYRILLIC SMALL LETTER HARD SIGN | |
222 | "Ь <> $modprime $under ;" // CYRILLIC CAPITAL LETTER SOFT SIGN | |
223 | "ь <> $modprime ;" // CYRILLIC SMALL LETTER SOFT SIGN | |
224 | ||
225 | "ы <> y ;" // CYRILLIC SMALL LETTER YERU | |
226 | "Ы <> Y ;" // CYRILLIC CAPITAL LETTER YERU | |
227 | ||
228 | // ҍ <> XXX ; # CYRILLIC SMALL LETTER SEMISOFT SIGN | |
229 | // Ҍ <> XXX ; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN | |
230 | // ѣ <> XXX ; # CYRILLIC SMALL LETTER YAT | |
231 | // Ѣ <> XXX ; # CYRILLIC CAPITAL LETTER YAT | |
232 | ||
233 | // ѥ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED E | |
234 | // Ѥ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED E | |
235 | // ѧ <> XXX ; # CYRILLIC SMALL LETTER LITTLE YUS | |
236 | // Ѧ <> XXX ; # CYRILLIC CAPITAL LETTER LITTLE YUS | |
237 | // ѫ <> XXX ; # CYRILLIC SMALL LETTER BIG YUS | |
238 | // Ѫ <> XXX ; # CYRILLIC CAPITAL LETTER BIG YUS | |
239 | // ѩ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS | |
240 | // Ѩ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS | |
241 | // ѭ <> XXX ; # CYRILLIC SMALL LETTER IOTIFIED BIG YUS | |
242 | // Ѭ <> XXX ; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS | |
243 | // ѯ <> XXX ; # CYRILLIC SMALL LETTER KSI | |
244 | // Ѯ <> XXX ; # CYRILLIC CAPITAL LETTER KSI | |
245 | // ѱ <> XXX ; # CYRILLIC SMALL LETTER PSI | |
246 | // Ѱ <> XXX ; # CYRILLIC CAPITAL LETTER PSI | |
247 | // ѳ <> XXX ; # CYRILLIC SMALL LETTER FITA | |
248 | // Ѳ <> XXX ; # CYRILLIC CAPITAL LETTER FITA | |
249 | // ѵ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA | |
250 | // Ѵ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA | |
251 | // ҩ <> XXX ; # CYRILLIC SMALL LETTER ABKHASIAN HA | |
252 | // Ҩ <> XXX ; # CYRILLIC CAPITAL LETTER ABKHASIAN HA | |
253 | // Ӏ <> XXX ; # CYRILLIC LETTER PALOCHKA | |
254 | //## ӑ <> XXX ; # CYRILLIC SMALL LETTER A | |
255 | //## Ӑ <> XXX ; # CYRILLIC CAPITAL LETTER A | |
256 | //## ӓ <> XXX ; # CYRILLIC SMALL LETTER A | |
257 | //## Ӓ <> XXX ; # CYRILLIC CAPITAL LETTER A | |
258 | //## ӛ <> XXX ; # CYRILLIC SMALL LETTER SCHWA | |
259 | //## Ӛ <> XXX ; # CYRILLIC CAPITAL LETTER SCHWA | |
260 | //## ѓ <> XXX ; # CYRILLIC SMALL LETTER GHE | |
261 | //## Ѓ <> XXX ; # CYRILLIC CAPITAL LETTER GHE | |
262 | //## ѐ <> XXX ; # CYRILLIC SMALL LETTER IE | |
263 | //## Ѐ <> XXX ; # CYRILLIC CAPITAL LETTER IE | |
264 | //## ё <> XXX ; # CYRILLIC SMALL LETTER IE | |
265 | //## Ё <> XXX ; # CYRILLIC CAPITAL LETTER IE | |
266 | //## ӗ <> XXX ; # CYRILLIC SMALL LETTER IE | |
267 | //## Ӗ <> XXX ; # CYRILLIC CAPITAL LETTER IE | |
268 | //## ӂ <> XXX ; # CYRILLIC SMALL LETTER ZHE | |
269 | //## Ӂ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE | |
270 | //## ӝ <> XXX ; # CYRILLIC SMALL LETTER ZHE | |
271 | //## Ӝ <> XXX ; # CYRILLIC CAPITAL LETTER ZHE | |
272 | //## ӟ <> XXX ; # CYRILLIC SMALL LETTER ZE | |
273 | //## Ӟ <> XXX ; # CYRILLIC CAPITAL LETTER ZE | |
274 | //## ѝ <> XXX ; # CYRILLIC SMALL LETTER I | |
275 | //## Ѝ <> XXX ; # CYRILLIC CAPITAL LETTER I | |
276 | //## ӣ <> XXX ; # CYRILLIC SMALL LETTER I | |
277 | //## Ӣ <> XXX ; # CYRILLIC CAPITAL LETTER I | |
278 | //## ӥ <> XXX ; # CYRILLIC SMALL LETTER I | |
279 | //## Ӥ <> XXX ; # CYRILLIC CAPITAL LETTER I | |
280 | //## ї <> XXX ; # CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I | |
281 | //## Ї <> XXX ; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I | |
282 | //## ӧ <> XXX ; # CYRILLIC SMALL LETTER O | |
283 | //## Ӧ <> XXX ; # CYRILLIC CAPITAL LETTER O | |
284 | //## ӫ <> XXX ; # CYRILLIC SMALL LETTER BARRED O | |
285 | //## Ӫ <> XXX ; # CYRILLIC CAPITAL LETTER BARRED O | |
286 | //## ќ <> XXX ; # CYRILLIC SMALL LETTER KA | |
287 | //## Ќ <> XXX ; # CYRILLIC CAPITAL LETTER KA | |
288 | //## ӯ <> XXX ; # CYRILLIC SMALL LETTER U | |
289 | //## Ӯ <> XXX ; # CYRILLIC CAPITAL LETTER U | |
290 | //## ў <> XXX ; # CYRILLIC SMALL LETTER U | |
291 | //## Ў <> XXX ; # CYRILLIC CAPITAL LETTER U | |
292 | //## ӱ <> XXX ; # CYRILLIC SMALL LETTER U | |
293 | //## Ӱ <> XXX ; # CYRILLIC CAPITAL LETTER U | |
294 | //## ӳ <> XXX ; # CYRILLIC SMALL LETTER U | |
295 | //## Ӳ <> XXX ; # CYRILLIC CAPITAL LETTER U | |
296 | //## ӵ <> XXX ; # CYRILLIC SMALL LETTER CHE | |
297 | //## Ӵ <> XXX ; # CYRILLIC CAPITAL LETTER CHE | |
298 | //## ӹ <> XXX ; # CYRILLIC SMALL LETTER YERU | |
299 | //## Ӹ <> XXX ; # CYRILLIC CAPITAL LETTER YERU | |
300 | //## ӭ <> XXX ; # CYRILLIC SMALL LETTER E | |
301 | //## Ӭ <> XXX ; # CYRILLIC CAPITAL LETTER E | |
302 | //## ѷ <> XXX ; # CYRILLIC SMALL LETTER IZHITSA | |
303 | //## Ѷ <> XXX ; # CYRILLIC CAPITAL LETTER IZHITSA | |
304 | ||
305 | // Completeness | |
306 | "$ignore = [[:Mark:]''] * ;" | |
307 | "| k < q ;" | |
308 | "| K < Q ;" | |
309 | "| u < w ;" | |
310 | "| U < W ;" | |
311 | "| KS < X } $ignore [:UppercaseLetter:] ;" | |
312 | "| KS < [:UppercaseLetter:] $ignore { X ;" | |
313 | "| Ks < X ;" | |
314 | "| ks < x ;" | |
315 | ||
316 | ":: NFC (NFD) ;" | |
317 | // note: a global filter is more efficient, but MUST include all source chars!! | |
318 | // :: ([\\u0000-\u007E \u02B9 \u02BA [:Cyrillic:] [:Latin:] [:nonspacing mark:]]); | |
319 | // MINIMAL FILTER: Latin-Cyrillic | |
320 | ":: ( [\u0308A-Za-z\u00C0-\u00CF\u00D1-\u00D6\u00D9-\u00DD\u00E0-\u00EF\u00F1-\u00F6\u00F9-\u00FD\u00FF-\u0125\u0128-\u0130\u0134-\u0137\u0139-\u013E\u0143-\u0148\u014C-\u0151\u0154-\u0165\u0168-\u017E\u018F\u01A0-\u01A1\u01AF-\u01B0\u01CD-\u01DC\u01DE-\u01E3\u01E6-\u01F0\u01F4-\u01F5\u01F8-\u021B\u021E-\u021F\u0226-\u0233\u0259\u02B9-\u02BA\u0300-\u0302\u0306-\u0307\u030C\u0326\u0331\u0340-\u0341\u0344\u0374\u0385-\u0386\u0388-\u038A\u038C\u038E-\u0390\u03AC-\u03B0\u03CC-\u03CE\u03D3\u0400\u0403\u040C-\u040E\u0419\u0439\u0450\u0453\u045C-\u045E\u04C1-\u04C2\u04D0-\u04D1\u04D6-\u04D7\u1E00-\u1E99\u1E9B\u1EA0-\u1EF9\u1F02-\u1F05\u1F0A-\u1F0D\u1F12-\u1F15\u1F1A-\u1F1D\u1F22-\u1F25\u1F2A-\u1F2D\u1F32-\u1F35\u1F3A-\u1F3D\u1F42-\u1F45\u1F4A-\u1F4D\u1F52-\u1F55\u1F5B\u1F5D\u1F62-\u1F65\u1F6A-\u1F6D\u1F70-\u1F7D\u1F82-\u1F85\u1F8A-\u1F8D\u1F92-\u1F95\u1F9A-\u1F9D\u1FA2-\u1FA5\u1FAA-\u1FAD\u1FB0\u1FB2\u1FB4\u1FB8\u1FBA-\u1FBB\u1FC2\u1FC4\u1FC8-\u1FCB\u1FCD-\u1FCE\u1FD0\u1FD2-\u1FD3\u1FD8\u1FDA-\u1FDB\u1FDD-\u1FDE\u1FE0\u1FE2-\u1FE3\u1FE8\u1FEA-\u1FEB\u1FED-\u1FEE\u1FF2\u1FF4\u1FF8-\u1FFB\u212A-\u212B] ) ;" | |
321 | } | |
322 | } |