X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/a01113dcd0f39d5da295ef82785beff9ed86fe38..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/python/icutools/databuilder/test/filtration_test.py diff --git a/icuSources/python/icutools/databuilder/test/filtration_test.py b/icuSources/python/icutools/databuilder/test/filtration_test.py new file mode 100644 index 00000000..416223bd --- /dev/null +++ b/icuSources/python/icutools/databuilder/test/filtration_test.py @@ -0,0 +1,421 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +import io as pyio +import json +import os +import unittest + +from .. import InFile +from ..comment_stripper import CommentStripper +from ..filtration import Filter + +EXAMPLE_FILE_STEMS = [ + "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", + "af_ZA", + "af", + "ar", + "ar_SA", + "ars", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs", + "en_001", + "en_150", + "en_DE", + "en_GB", + "en_US", + "root", + "sr_BA", + "sr_CS", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + "sr_Cyrl", + "sr_Latn_BA", + "sr_Latn_CS", + "sr_Latn_ME_VARIANT", + "sr_Latn_ME", + "sr_Latn", + "sr_ME", + "sr", + "vai_Latn_LR", + "vai_Latn", + "vai_LR", + "vai_Vaii_LR", + "vai_Vaii", + "vai", + "yue", + "zh_CN", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG", + "zh_Hans", + "zh_Hant_HK", + "zh_Hant_MO", + "zh_Hant_TW", + "zh_Hant", + "zh_HK", + "zh_MO", + "zh_SG", + "zh_TW", + "zh" +] + + +class TestIO(object): + def __init__(self): + pass + + def read_locale_deps(self, tree): + if tree not in ("brkitr", "locales", "rbnf"): + return None + with pyio.open(os.path.join( + os.path.dirname(__file__), + "sample_data", + tree, + "LOCALE_DEPS.json" + ), "r", encoding="utf-8-sig") as f: + return json.load(CommentStripper(f)) + + +class FiltrationTest(unittest.TestCase): + + def test_exclude(self): + self._check_filter(Filter.create_from_json({ + "filterType": "exclude" + }, TestIO()), [ + ]) + + def test_default_whitelist(self): + self._check_filter(Filter.create_from_json({ + "whitelist": [ + "ars", + "zh_Hans" + ] + }, TestIO()), [ + "ars", + "zh_Hans" + ]) + + def test_default_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("ars") + expected_matches.remove("zh_Hans") + self._check_filter(Filter.create_from_json({ + "blacklist": [ + "ars", + "zh_Hans" + ] + }, TestIO()), expected_matches) + + def test_language_whitelist(self): + self._check_filter(Filter.create_from_json({ + "filterType": "language", + "whitelist": [ + "af", + "bs" + ] + }, TestIO()), [ + "root", + "af_NA", + "af_VARIANT", + "af_ZA_VARIANT", + "af_ZA", + "af", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs" + ]) + + def test_language_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("af_NA") + expected_matches.remove("af_VARIANT") + expected_matches.remove("af_ZA_VARIANT") + expected_matches.remove("af_ZA") + expected_matches.remove("af") + self._check_filter(Filter.create_from_json({ + "filterType": "language", + "blacklist": [ + "af" + ] + }, TestIO()), expected_matches) + + def test_regex_whitelist(self): + self._check_filter(Filter.create_from_json({ + "filterType": "regex", + "whitelist": [ + r"^ar.*$", + r"^zh$" + ] + }, TestIO()), [ + "ar", + "ar_SA", + "ars", + "zh" + ]) + + def test_regex_blacklist(self): + expected_matches = set(EXAMPLE_FILE_STEMS) + expected_matches.remove("ar") + expected_matches.remove("ar_SA") + expected_matches.remove("ars") + expected_matches.remove("zh") + self._check_filter(Filter.create_from_json({ + "filterType": "regex", + "blacklist": [ + r"^ar.*$", + r"^zh$" + ] + }, TestIO()), expected_matches) + + def test_locale_basic(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + # Default scripts: + # sr => Cyrl + # vai => Vaii + # zh => Hans + "bs_BA", # is an alias to bs_Latn_BA + "en_DE", + "sr", # Language with no script + "vai_Latn", # Language with non-default script + "zh_Hans" # Language with default script + ] + }, TestIO()), [ + "root", + # bs: should include the full dependency tree of bs_BA + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Cyrl, the default, but not Latn. + "sr", + "sr_BA", + "sr_CS", + "sr_Cyrl", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + # vai: include Latn but NOT Vaii. + "vai_Latn", + "vai_Latn_LR", + # zh: include Hans but NOT Hant. + "zh", + "zh_CN", + "zh_SG", + "zh_Hans", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG" + ]) + + def test_locale_no_children(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeChildren": False, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + "en", + "en_DE", + "en_150", + "en_001", + "sr", + "vai_Latn", + "zh", + "zh_Hans", + ]) + + def test_locale_include_scripts(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeScripts": True, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + # bs: includeScripts only works for language-only (without region) + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Latn, since no particular script was requested. + "sr_BA", + "sr_CS", + "sr_Cyrl_BA", + "sr_Cyrl_CS", + "sr_Cyrl_ME", + "sr_Cyrl", + "sr_Latn_BA", + "sr_Latn_CS", + "sr_Latn_ME_VARIANT", + "sr_Latn_ME", + "sr_Latn", + "sr_ME", + "sr", + # vai: do NOT include Vaii; the script was explicitly requested. + "vai_Latn_LR", + "vai_Latn", + # zh: do NOT include Hant; the script was explicitly requested. + "zh_CN", + "zh_SG", + "zh_Hans_CN", + "zh_Hans_HK", + "zh_Hans_MO", + "zh_Hans_SG", + "zh_Hans", + "zh" + ]) + + def test_locale_no_children_include_scripts(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "includeChildren": False, + "includeScripts": True, + "whitelist": [ + # See comments in test_locale_basic. + "bs_BA", + "en_DE", + "sr", + "vai_Latn", + "zh_Hans" + ] + }, TestIO()), [ + "root", + # bs: includeScripts only works for language-only (without region) + "bs_BA", + "bs_Latn_BA", + "bs_Latn", + "bs", + # en: should include the full dependency tree of en_DE + "en", + "en_DE", + "en_150", + "en_001", + # sr: include Cyrl and Latn but no other children + "sr", + "sr_Cyrl", + "sr_Latn", + # vai: include only the requested script + "vai_Latn", + # zh: include only the requested script + "zh", + "zh_Hans", + ]) + + def test_union(self): + self._check_filter(Filter.create_from_json({ + "filterType": "union", + "unionOf": [ + { + "whitelist": [ + "ars", + "zh_Hans" + ] + }, + { + "filterType": "regex", + "whitelist": [ + r"^bs.*$", + r"^zh$" + ] + } + ] + }, TestIO()), [ + "ars", + "zh_Hans", + "bs_BA", + "bs_Cyrl_BA", + "bs_Cyrl", + "bs_Latn_BA", + "bs_Latn", + "bs", + "zh" + ]) + + def test_hk_deps_normal(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_Hant", + "zh_Hant_HK", + "zh_HK", + ]) + + def test_hk_deps_rbnf(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "yue", + "zh_Hant_HK", + "zh_HK", + ], "rbnf") + + def test_no_alias_parent_structure(self): + self._check_filter(Filter.create_from_json({ + "filterType": "locale", + "whitelist": [ + "zh_HK" + ] + }, TestIO()), [ + "root", + "zh_HK", + "zh", + ], "brkitr") + + def _check_filter(self, filter, expected_matches, tree="locales"): + for file_stem in EXAMPLE_FILE_STEMS: + is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem))) + expected_match = file_stem in expected_matches + self.assertEqual(is_match, expected_match, file_stem) + +# Export the test for the runner +suite = unittest.makeSuite(FiltrationTest)