]> git.saurik.com Git - apple/icu.git/blob - icuSources/data/buildtool/test/filtration_test.py
ICU-64252.0.1.tar.gz
[apple/icu.git] / icuSources / data / buildtool / test / filtration_test.py
1 # Copyright (C) 2018 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html
3
4 import unittest
5
6 from .. import InFile
7 from ..filtration import Filter
8
9 EXAMPLE_FILE_STEMS = [
10 "af_NA",
11 "af_ZA",
12 "af",
13 "ar",
14 "ar_SA",
15 "ars",
16 "bs_BA",
17 "bs_Cyrl_BA",
18 "bs_Cyrl",
19 "bs_Latn_BA",
20 "bs_Latn",
21 "bs",
22 "en_001",
23 "en_150",
24 "en_DE",
25 "en_GB",
26 "en_US",
27 "root",
28 "sr_BA",
29 "sr_CS",
30 "sr_Cyrl_BA",
31 "sr_Cyrl_CS",
32 "sr_Cyrl_ME",
33 "sr_Cyrl",
34 "sr_Latn_BA",
35 "sr_Latn_CS",
36 "sr_Latn_ME",
37 "sr_Latn",
38 "sr_ME",
39 "sr",
40 "vai_Latn_LR",
41 "vai_Latn",
42 "vai_LR",
43 "vai_Vaii_LR",
44 "vai_Vaii",
45 "vai",
46 "zh_CN",
47 "zh_Hans_CN",
48 "zh_Hans_HK",
49 "zh_Hans_MO",
50 "zh_Hans_SG",
51 "zh_Hans",
52 "zh_Hant_HK",
53 "zh_Hant_MO",
54 "zh_Hant_TW",
55 "zh_Hant",
56 "zh_HK",
57 "zh_MO",
58 "zh_SG",
59 "zh_TW",
60 "zh"
61 ]
62
63 class FiltrationTest(unittest.TestCase):
64
65 def test_exclude(self):
66 self._check_filter(Filter.create_from_json({
67 "filterType": "exclude"
68 }), [
69 ])
70
71 def test_default_whitelist(self):
72 self._check_filter(Filter.create_from_json({
73 "whitelist": [
74 "ars",
75 "zh_Hans"
76 ]
77 }), [
78 "ars",
79 "zh_Hans"
80 ])
81
82 def test_default_blacklist(self):
83 expected_matches = set(EXAMPLE_FILE_STEMS)
84 expected_matches.remove("ars")
85 expected_matches.remove("zh_Hans")
86 self._check_filter(Filter.create_from_json({
87 "blacklist": [
88 "ars",
89 "zh_Hans"
90 ]
91 }), expected_matches)
92
93 def test_language_whitelist(self):
94 self._check_filter(Filter.create_from_json({
95 "filterType": "language",
96 "whitelist": [
97 "af",
98 "bs"
99 ]
100 }), [
101 "root",
102 "af_NA",
103 "af_ZA",
104 "af",
105 "bs_BA",
106 "bs_Cyrl_BA",
107 "bs_Cyrl",
108 "bs_Latn_BA",
109 "bs_Latn",
110 "bs"
111 ])
112
113 def test_language_blacklist(self):
114 expected_matches = set(EXAMPLE_FILE_STEMS)
115 expected_matches.remove("af_NA")
116 expected_matches.remove("af_ZA")
117 expected_matches.remove("af")
118 self._check_filter(Filter.create_from_json({
119 "filterType": "language",
120 "blacklist": [
121 "af"
122 ]
123 }), expected_matches)
124
125 def test_regex_whitelist(self):
126 self._check_filter(Filter.create_from_json({
127 "filterType": "regex",
128 "whitelist": [
129 r"^ar.*$",
130 r"^zh$"
131 ]
132 }), [
133 "ar",
134 "ar_SA",
135 "ars",
136 "zh"
137 ])
138
139 def test_regex_blacklist(self):
140 expected_matches = set(EXAMPLE_FILE_STEMS)
141 expected_matches.remove("ar")
142 expected_matches.remove("ar_SA")
143 expected_matches.remove("ars")
144 expected_matches.remove("zh")
145 self._check_filter(Filter.create_from_json({
146 "filterType": "regex",
147 "blacklist": [
148 r"^ar.*$",
149 r"^zh$"
150 ]
151 }), expected_matches)
152
153 def test_locale_basic(self):
154 self._check_filter(Filter.create_from_json({
155 "filterType": "locale",
156 "whitelist": [
157 # Default scripts:
158 # sr => Cyrl
159 # vai => Vaii
160 # zh => Hans
161 "bs_BA", # is an alias to bs_Latn_BA
162 "en_DE",
163 "sr", # Language with no script
164 "vai_Latn", # Language with non-default script
165 "zh_Hans" # Language with default script
166 ]
167 }), [
168 "root",
169 # bs: should include the full dependency tree of bs_BA
170 "bs_BA",
171 "bs_Latn_BA",
172 "bs_Latn",
173 "bs",
174 # en: should include the full dependency tree of en_DE
175 "en",
176 "en_DE",
177 "en_150",
178 "en_001",
179 # sr: include Cyrl, the default, but not Latn.
180 "sr",
181 "sr_BA",
182 "sr_CS",
183 "sr_Cyrl",
184 "sr_Cyrl_BA",
185 "sr_Cyrl_CS",
186 "sr_Cyrl_ME",
187 # vai: include Latn but NOT Vaii.
188 "vai_Latn",
189 "vai_Latn_LR",
190 # zh: include Hans but NOT Hant.
191 "zh",
192 "zh_CN",
193 "zh_SG",
194 "zh_Hans",
195 "zh_Hans_CN",
196 "zh_Hans_HK",
197 "zh_Hans_MO",
198 "zh_Hans_SG"
199 ])
200
201 def test_locale_no_children(self):
202 self._check_filter(Filter.create_from_json({
203 "filterType": "locale",
204 "includeChildren": False,
205 "whitelist": [
206 # See comments in test_locale_basic.
207 "bs_BA",
208 "en_DE",
209 "sr",
210 "vai_Latn",
211 "zh_Hans"
212 ]
213 }), [
214 "root",
215 "bs_BA",
216 "bs_Latn_BA",
217 "bs_Latn",
218 "bs",
219 "en",
220 "en_DE",
221 "en_150",
222 "en_001",
223 "sr",
224 "vai_Latn",
225 "zh",
226 "zh_Hans",
227 ])
228
229 def test_locale_include_scripts(self):
230 self._check_filter(Filter.create_from_json({
231 "filterType": "locale",
232 "includeScripts": True,
233 "whitelist": [
234 # See comments in test_locale_basic.
235 "bs_BA",
236 "en_DE",
237 "sr",
238 "vai_Latn",
239 "zh_Hans"
240 ]
241 }), [
242 "root",
243 # bs: includeScripts only works for language-only (without region)
244 "bs_BA",
245 "bs_Latn_BA",
246 "bs_Latn",
247 "bs",
248 # en: should include the full dependency tree of en_DE
249 "en",
250 "en_DE",
251 "en_150",
252 "en_001",
253 # sr: include Latn, since no particular script was requested.
254 "sr_BA",
255 "sr_CS",
256 "sr_Cyrl_BA",
257 "sr_Cyrl_CS",
258 "sr_Cyrl_ME",
259 "sr_Cyrl",
260 "sr_Latn_BA",
261 "sr_Latn_CS",
262 "sr_Latn_ME",
263 "sr_Latn",
264 "sr_ME",
265 "sr",
266 # vai: do NOT include Vaii; the script was explicitly requested.
267 "vai_Latn_LR",
268 "vai_Latn",
269 # zh: do NOT include Hant; the script was explicitly requested.
270 "zh_CN",
271 "zh_SG",
272 "zh_Hans_CN",
273 "zh_Hans_HK",
274 "zh_Hans_MO",
275 "zh_Hans_SG",
276 "zh_Hans",
277 "zh"
278 ])
279
280 def test_locale_no_children_include_scripts(self):
281 self._check_filter(Filter.create_from_json({
282 "filterType": "locale",
283 "includeChildren": False,
284 "includeScripts": True,
285 "whitelist": [
286 # See comments in test_locale_basic.
287 "bs_BA",
288 "en_DE",
289 "sr",
290 "vai_Latn",
291 "zh_Hans"
292 ]
293 }), [
294 "root",
295 # bs: includeScripts only works for language-only (without region)
296 "bs_BA",
297 "bs_Latn_BA",
298 "bs_Latn",
299 "bs",
300 # en: should include the full dependency tree of en_DE
301 "en",
302 "en_DE",
303 "en_150",
304 "en_001",
305 # sr: include Cyrl and Latn but no other children
306 "sr",
307 "sr_Cyrl",
308 "sr_Latn",
309 # vai: include only the requested script
310 "vai_Latn",
311 # zh: include only the requested script
312 "zh",
313 "zh_Hans",
314 ])
315
316 def test_union(self):
317 self._check_filter(Filter.create_from_json({
318 "filterType": "union",
319 "unionOf": [
320 {
321 "whitelist": [
322 "ars",
323 "zh_Hans"
324 ]
325 },
326 {
327 "filterType": "regex",
328 "whitelist": [
329 r"^bs.*$",
330 r"^zh$"
331 ]
332 }
333 ]
334 }), [
335 "ars",
336 "zh_Hans",
337 "bs_BA",
338 "bs_Cyrl_BA",
339 "bs_Cyrl",
340 "bs_Latn_BA",
341 "bs_Latn",
342 "bs",
343 "zh"
344 ])
345
346 def _check_filter(self, filter, expected_matches):
347 for file_stem in EXAMPLE_FILE_STEMS:
348 is_match = filter.match(InFile("locales/%s.txt" % file_stem))
349 expected_match = file_stem in expected_matches
350 self.assertEqual(is_match, expected_match, file_stem)
351
352 # Export the test for the runner
353 suite = unittest.makeSuite(FiltrationTest)