]> git.saurik.com Git - apple/icu.git/blob - icuSources/python/icutools/databuilder/test/filtration_test.py
ICU-66108.tar.gz
[apple/icu.git] / icuSources / python / icutools / databuilder / test / filtration_test.py
1 # Copyright (C) 2018 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html
3
4 import io as pyio
5 import json
6 import os
7 import unittest
8
9 from .. import InFile
10 from ..comment_stripper import CommentStripper
11 from ..filtration import Filter
12
13 EXAMPLE_FILE_STEMS = [
14 "af_NA",
15 "af_VARIANT",
16 "af_ZA_VARIANT",
17 "af_ZA",
18 "af",
19 "ar",
20 "ar_SA",
21 "ars",
22 "bs_BA",
23 "bs_Cyrl_BA",
24 "bs_Cyrl",
25 "bs_Latn_BA",
26 "bs_Latn",
27 "bs",
28 "en_001",
29 "en_150",
30 "en_DE",
31 "en_GB",
32 "en_US",
33 "root",
34 "sr_BA",
35 "sr_CS",
36 "sr_Cyrl_BA",
37 "sr_Cyrl_CS",
38 "sr_Cyrl_ME",
39 "sr_Cyrl",
40 "sr_Latn_BA",
41 "sr_Latn_CS",
42 "sr_Latn_ME_VARIANT",
43 "sr_Latn_ME",
44 "sr_Latn",
45 "sr_ME",
46 "sr",
47 "vai_Latn_LR",
48 "vai_Latn",
49 "vai_LR",
50 "vai_Vaii_LR",
51 "vai_Vaii",
52 "vai",
53 "yue",
54 "zh_CN",
55 "zh_Hans_CN",
56 "zh_Hans_HK",
57 "zh_Hans_MO",
58 "zh_Hans_SG",
59 "zh_Hans",
60 "zh_Hant_HK",
61 "zh_Hant_MO",
62 "zh_Hant_TW",
63 "zh_Hant",
64 "zh_HK",
65 "zh_MO",
66 "zh_SG",
67 "zh_TW",
68 "zh"
69 ]
70
71
72 class TestIO(object):
73 def __init__(self):
74 pass
75
76 def read_locale_deps(self, tree):
77 if tree not in ("brkitr", "locales", "rbnf"):
78 return None
79 with pyio.open(os.path.join(
80 os.path.dirname(__file__),
81 "sample_data",
82 tree,
83 "LOCALE_DEPS.json"
84 ), "r", encoding="utf-8-sig") as f:
85 return json.load(CommentStripper(f))
86
87
88 class FiltrationTest(unittest.TestCase):
89
90 def test_exclude(self):
91 self._check_filter(Filter.create_from_json({
92 "filterType": "exclude"
93 }, TestIO()), [
94 ])
95
96 def test_default_whitelist(self):
97 self._check_filter(Filter.create_from_json({
98 "whitelist": [
99 "ars",
100 "zh_Hans"
101 ]
102 }, TestIO()), [
103 "ars",
104 "zh_Hans"
105 ])
106
107 def test_default_blacklist(self):
108 expected_matches = set(EXAMPLE_FILE_STEMS)
109 expected_matches.remove("ars")
110 expected_matches.remove("zh_Hans")
111 self._check_filter(Filter.create_from_json({
112 "blacklist": [
113 "ars",
114 "zh_Hans"
115 ]
116 }, TestIO()), expected_matches)
117
118 def test_language_whitelist(self):
119 self._check_filter(Filter.create_from_json({
120 "filterType": "language",
121 "whitelist": [
122 "af",
123 "bs"
124 ]
125 }, TestIO()), [
126 "root",
127 "af_NA",
128 "af_VARIANT",
129 "af_ZA_VARIANT",
130 "af_ZA",
131 "af",
132 "bs_BA",
133 "bs_Cyrl_BA",
134 "bs_Cyrl",
135 "bs_Latn_BA",
136 "bs_Latn",
137 "bs"
138 ])
139
140 def test_language_blacklist(self):
141 expected_matches = set(EXAMPLE_FILE_STEMS)
142 expected_matches.remove("af_NA")
143 expected_matches.remove("af_VARIANT")
144 expected_matches.remove("af_ZA_VARIANT")
145 expected_matches.remove("af_ZA")
146 expected_matches.remove("af")
147 self._check_filter(Filter.create_from_json({
148 "filterType": "language",
149 "blacklist": [
150 "af"
151 ]
152 }, TestIO()), expected_matches)
153
154 def test_regex_whitelist(self):
155 self._check_filter(Filter.create_from_json({
156 "filterType": "regex",
157 "whitelist": [
158 r"^ar.*$",
159 r"^zh$"
160 ]
161 }, TestIO()), [
162 "ar",
163 "ar_SA",
164 "ars",
165 "zh"
166 ])
167
168 def test_regex_blacklist(self):
169 expected_matches = set(EXAMPLE_FILE_STEMS)
170 expected_matches.remove("ar")
171 expected_matches.remove("ar_SA")
172 expected_matches.remove("ars")
173 expected_matches.remove("zh")
174 self._check_filter(Filter.create_from_json({
175 "filterType": "regex",
176 "blacklist": [
177 r"^ar.*$",
178 r"^zh$"
179 ]
180 }, TestIO()), expected_matches)
181
182 def test_locale_basic(self):
183 self._check_filter(Filter.create_from_json({
184 "filterType": "locale",
185 "whitelist": [
186 # Default scripts:
187 # sr => Cyrl
188 # vai => Vaii
189 # zh => Hans
190 "bs_BA", # is an alias to bs_Latn_BA
191 "en_DE",
192 "sr", # Language with no script
193 "vai_Latn", # Language with non-default script
194 "zh_Hans" # Language with default script
195 ]
196 }, TestIO()), [
197 "root",
198 # bs: should include the full dependency tree of bs_BA
199 "bs_BA",
200 "bs_Latn_BA",
201 "bs_Latn",
202 "bs",
203 # en: should include the full dependency tree of en_DE
204 "en",
205 "en_DE",
206 "en_150",
207 "en_001",
208 # sr: include Cyrl, the default, but not Latn.
209 "sr",
210 "sr_BA",
211 "sr_CS",
212 "sr_Cyrl",
213 "sr_Cyrl_BA",
214 "sr_Cyrl_CS",
215 "sr_Cyrl_ME",
216 # vai: include Latn but NOT Vaii.
217 "vai_Latn",
218 "vai_Latn_LR",
219 # zh: include Hans but NOT Hant.
220 "zh",
221 "zh_CN",
222 "zh_SG",
223 "zh_Hans",
224 "zh_Hans_CN",
225 "zh_Hans_HK",
226 "zh_Hans_MO",
227 "zh_Hans_SG"
228 ])
229
230 def test_locale_no_children(self):
231 self._check_filter(Filter.create_from_json({
232 "filterType": "locale",
233 "includeChildren": False,
234 "whitelist": [
235 # See comments in test_locale_basic.
236 "bs_BA",
237 "en_DE",
238 "sr",
239 "vai_Latn",
240 "zh_Hans"
241 ]
242 }, TestIO()), [
243 "root",
244 "bs_BA",
245 "bs_Latn_BA",
246 "bs_Latn",
247 "bs",
248 "en",
249 "en_DE",
250 "en_150",
251 "en_001",
252 "sr",
253 "vai_Latn",
254 "zh",
255 "zh_Hans",
256 ])
257
258 def test_locale_include_scripts(self):
259 self._check_filter(Filter.create_from_json({
260 "filterType": "locale",
261 "includeScripts": True,
262 "whitelist": [
263 # See comments in test_locale_basic.
264 "bs_BA",
265 "en_DE",
266 "sr",
267 "vai_Latn",
268 "zh_Hans"
269 ]
270 }, TestIO()), [
271 "root",
272 # bs: includeScripts only works for language-only (without region)
273 "bs_BA",
274 "bs_Latn_BA",
275 "bs_Latn",
276 "bs",
277 # en: should include the full dependency tree of en_DE
278 "en",
279 "en_DE",
280 "en_150",
281 "en_001",
282 # sr: include Latn, since no particular script was requested.
283 "sr_BA",
284 "sr_CS",
285 "sr_Cyrl_BA",
286 "sr_Cyrl_CS",
287 "sr_Cyrl_ME",
288 "sr_Cyrl",
289 "sr_Latn_BA",
290 "sr_Latn_CS",
291 "sr_Latn_ME_VARIANT",
292 "sr_Latn_ME",
293 "sr_Latn",
294 "sr_ME",
295 "sr",
296 # vai: do NOT include Vaii; the script was explicitly requested.
297 "vai_Latn_LR",
298 "vai_Latn",
299 # zh: do NOT include Hant; the script was explicitly requested.
300 "zh_CN",
301 "zh_SG",
302 "zh_Hans_CN",
303 "zh_Hans_HK",
304 "zh_Hans_MO",
305 "zh_Hans_SG",
306 "zh_Hans",
307 "zh"
308 ])
309
310 def test_locale_no_children_include_scripts(self):
311 self._check_filter(Filter.create_from_json({
312 "filterType": "locale",
313 "includeChildren": False,
314 "includeScripts": True,
315 "whitelist": [
316 # See comments in test_locale_basic.
317 "bs_BA",
318 "en_DE",
319 "sr",
320 "vai_Latn",
321 "zh_Hans"
322 ]
323 }, TestIO()), [
324 "root",
325 # bs: includeScripts only works for language-only (without region)
326 "bs_BA",
327 "bs_Latn_BA",
328 "bs_Latn",
329 "bs",
330 # en: should include the full dependency tree of en_DE
331 "en",
332 "en_DE",
333 "en_150",
334 "en_001",
335 # sr: include Cyrl and Latn but no other children
336 "sr",
337 "sr_Cyrl",
338 "sr_Latn",
339 # vai: include only the requested script
340 "vai_Latn",
341 # zh: include only the requested script
342 "zh",
343 "zh_Hans",
344 ])
345
346 def test_union(self):
347 self._check_filter(Filter.create_from_json({
348 "filterType": "union",
349 "unionOf": [
350 {
351 "whitelist": [
352 "ars",
353 "zh_Hans"
354 ]
355 },
356 {
357 "filterType": "regex",
358 "whitelist": [
359 r"^bs.*$",
360 r"^zh$"
361 ]
362 }
363 ]
364 }, TestIO()), [
365 "ars",
366 "zh_Hans",
367 "bs_BA",
368 "bs_Cyrl_BA",
369 "bs_Cyrl",
370 "bs_Latn_BA",
371 "bs_Latn",
372 "bs",
373 "zh"
374 ])
375
376 def test_hk_deps_normal(self):
377 self._check_filter(Filter.create_from_json({
378 "filterType": "locale",
379 "whitelist": [
380 "zh_HK"
381 ]
382 }, TestIO()), [
383 "root",
384 "zh_Hant",
385 "zh_Hant_HK",
386 "zh_HK",
387 ])
388
389 def test_hk_deps_rbnf(self):
390 self._check_filter(Filter.create_from_json({
391 "filterType": "locale",
392 "whitelist": [
393 "zh_HK"
394 ]
395 }, TestIO()), [
396 "root",
397 "yue",
398 "zh_Hant_HK",
399 "zh_HK",
400 ], "rbnf")
401
402 def test_no_alias_parent_structure(self):
403 self._check_filter(Filter.create_from_json({
404 "filterType": "locale",
405 "whitelist": [
406 "zh_HK"
407 ]
408 }, TestIO()), [
409 "root",
410 "zh_HK",
411 "zh",
412 ], "brkitr")
413
414 def _check_filter(self, filter, expected_matches, tree="locales"):
415 for file_stem in EXAMPLE_FILE_STEMS:
416 is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
417 expected_match = file_stem in expected_matches
418 self.assertEqual(is_match, expected_match, file_stem)
419
420 # Export the test for the runner
421 suite = unittest.makeSuite(FiltrationTest)