]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/python/icutools/databuilder/test/filtration_test.py
ICU-66108.tar.gz
[apple/icu.git] / icuSources / python / icutools / databuilder / test / filtration_test.py
diff --git a/icuSources/python/icutools/databuilder/test/filtration_test.py b/icuSources/python/icutools/databuilder/test/filtration_test.py
new file mode 100644 (file)
index 0000000..416223b
--- /dev/null
@@ -0,0 +1,421 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io as pyio
+import json
+import os
+import unittest
+
+from .. import InFile
+from ..comment_stripper import CommentStripper
+from ..filtration import Filter
+
+EXAMPLE_FILE_STEMS = [
+    "af_NA",
+    "af_VARIANT",
+    "af_ZA_VARIANT",
+    "af_ZA",
+    "af",
+    "ar",
+    "ar_SA",
+    "ars",
+    "bs_BA",
+    "bs_Cyrl_BA",
+    "bs_Cyrl",
+    "bs_Latn_BA",
+    "bs_Latn",
+    "bs",
+    "en_001",
+    "en_150",
+    "en_DE",
+    "en_GB",
+    "en_US",
+    "root",
+    "sr_BA",
+    "sr_CS",
+    "sr_Cyrl_BA",
+    "sr_Cyrl_CS",
+    "sr_Cyrl_ME",
+    "sr_Cyrl",
+    "sr_Latn_BA",
+    "sr_Latn_CS",
+    "sr_Latn_ME_VARIANT",
+    "sr_Latn_ME",
+    "sr_Latn",
+    "sr_ME",
+    "sr",
+    "vai_Latn_LR",
+    "vai_Latn",
+    "vai_LR",
+    "vai_Vaii_LR",
+    "vai_Vaii",
+    "vai",
+    "yue",
+    "zh_CN",
+    "zh_Hans_CN",
+    "zh_Hans_HK",
+    "zh_Hans_MO",
+    "zh_Hans_SG",
+    "zh_Hans",
+    "zh_Hant_HK",
+    "zh_Hant_MO",
+    "zh_Hant_TW",
+    "zh_Hant",
+    "zh_HK",
+    "zh_MO",
+    "zh_SG",
+    "zh_TW",
+    "zh"
+]
+
+
+class TestIO(object):
+    def __init__(self):
+        pass
+
+    def read_locale_deps(self, tree):
+        if tree not in ("brkitr", "locales", "rbnf"):
+            return None
+        with pyio.open(os.path.join(
+                os.path.dirname(__file__),
+                "sample_data",
+                tree,
+                "LOCALE_DEPS.json"
+                ), "r", encoding="utf-8-sig") as f:
+            return json.load(CommentStripper(f))
+
+
+class FiltrationTest(unittest.TestCase):
+
+    def test_exclude(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "exclude"
+        }, TestIO()), [
+        ])
+
+    def test_default_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "whitelist": [
+                "ars",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "ars",
+            "zh_Hans"
+        ])
+
+    def test_default_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("ars")
+        expected_matches.remove("zh_Hans")
+        self._check_filter(Filter.create_from_json({
+            "blacklist": [
+                "ars",
+                "zh_Hans"
+            ]
+        }, TestIO()), expected_matches)
+
+    def test_language_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "language",
+            "whitelist": [
+                "af",
+                "bs"
+            ]
+        }, TestIO()), [
+            "root",
+            "af_NA",
+            "af_VARIANT",
+            "af_ZA_VARIANT",
+            "af_ZA",
+            "af",
+            "bs_BA",
+            "bs_Cyrl_BA",
+            "bs_Cyrl",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs"
+        ])
+
+    def test_language_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("af_NA")
+        expected_matches.remove("af_VARIANT")
+        expected_matches.remove("af_ZA_VARIANT")
+        expected_matches.remove("af_ZA")
+        expected_matches.remove("af")
+        self._check_filter(Filter.create_from_json({
+            "filterType": "language",
+            "blacklist": [
+                "af"
+            ]
+        }, TestIO()), expected_matches)
+
+    def test_regex_whitelist(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "regex",
+            "whitelist": [
+                r"^ar.*$",
+                r"^zh$"
+            ]
+        }, TestIO()), [
+            "ar",
+            "ar_SA",
+            "ars",
+            "zh"
+        ])
+
+    def test_regex_blacklist(self):
+        expected_matches = set(EXAMPLE_FILE_STEMS)
+        expected_matches.remove("ar")
+        expected_matches.remove("ar_SA")
+        expected_matches.remove("ars")
+        expected_matches.remove("zh")
+        self._check_filter(Filter.create_from_json({
+            "filterType": "regex",
+            "blacklist": [
+                r"^ar.*$",
+                r"^zh$"
+            ]
+        }, TestIO()), expected_matches)
+
+    def test_locale_basic(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                # Default scripts:
+                # sr => Cyrl
+                # vai => Vaii
+                # zh => Hans
+                "bs_BA", # is an alias to bs_Latn_BA
+                "en_DE",
+                "sr", # Language with no script
+                "vai_Latn", # Language with non-default script
+                "zh_Hans" # Language with default script
+            ]
+        }, TestIO()), [
+            "root",
+            # bs: should include the full dependency tree of bs_BA
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Cyrl, the default, but not Latn.
+            "sr",
+            "sr_BA",
+            "sr_CS",
+            "sr_Cyrl",
+            "sr_Cyrl_BA",
+            "sr_Cyrl_CS",
+            "sr_Cyrl_ME",
+            # vai: include Latn but NOT Vaii.
+            "vai_Latn",
+            "vai_Latn_LR",
+            # zh: include Hans but NOT Hant.
+            "zh",
+            "zh_CN",
+            "zh_SG",
+            "zh_Hans",
+            "zh_Hans_CN",
+            "zh_Hans_HK",
+            "zh_Hans_MO",
+            "zh_Hans_SG"
+        ])
+
+    def test_locale_no_children(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeChildren": False,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "root",
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            "sr",
+            "vai_Latn",
+            "zh",
+            "zh_Hans",
+        ])
+
+    def test_locale_include_scripts(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeScripts": True,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "root",
+            # bs: includeScripts only works for language-only (without region)
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Latn, since no particular script was requested.
+            "sr_BA",
+            "sr_CS",
+            "sr_Cyrl_BA",
+            "sr_Cyrl_CS",
+            "sr_Cyrl_ME",
+            "sr_Cyrl",
+            "sr_Latn_BA",
+            "sr_Latn_CS",
+            "sr_Latn_ME_VARIANT",
+            "sr_Latn_ME",
+            "sr_Latn",
+            "sr_ME",
+            "sr",
+            # vai: do NOT include Vaii; the script was explicitly requested.
+            "vai_Latn_LR",
+            "vai_Latn",
+            # zh: do NOT include Hant; the script was explicitly requested.
+            "zh_CN",
+            "zh_SG",
+            "zh_Hans_CN",
+            "zh_Hans_HK",
+            "zh_Hans_MO",
+            "zh_Hans_SG",
+            "zh_Hans",
+            "zh"
+        ])
+
+    def test_locale_no_children_include_scripts(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "includeChildren": False,
+            "includeScripts": True,
+            "whitelist": [
+                # See comments in test_locale_basic.
+                "bs_BA",
+                "en_DE",
+                "sr",
+                "vai_Latn",
+                "zh_Hans"
+            ]
+        }, TestIO()), [
+            "root",
+            # bs: includeScripts only works for language-only (without region)
+            "bs_BA",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            # en: should include the full dependency tree of en_DE
+            "en",
+            "en_DE",
+            "en_150",
+            "en_001",
+            # sr: include Cyrl and Latn but no other children
+            "sr",
+            "sr_Cyrl",
+            "sr_Latn",
+            # vai: include only the requested script
+            "vai_Latn",
+            # zh: include only the requested script
+            "zh",
+            "zh_Hans",
+        ])
+
+    def test_union(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "union",
+            "unionOf": [
+                {
+                    "whitelist": [
+                        "ars",
+                        "zh_Hans"
+                    ]
+                },
+                {
+                    "filterType": "regex",
+                    "whitelist": [
+                        r"^bs.*$",
+                        r"^zh$"
+                    ]
+                }
+            ]
+        }, TestIO()), [
+            "ars",
+            "zh_Hans",
+            "bs_BA",
+            "bs_Cyrl_BA",
+            "bs_Cyrl",
+            "bs_Latn_BA",
+            "bs_Latn",
+            "bs",
+            "zh"
+        ])
+
+    def test_hk_deps_normal(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                "zh_HK"
+            ]
+        }, TestIO()), [
+            "root",
+            "zh_Hant",
+            "zh_Hant_HK",
+            "zh_HK",
+        ])
+
+    def test_hk_deps_rbnf(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                "zh_HK"
+            ]
+        }, TestIO()), [
+            "root",
+            "yue",
+            "zh_Hant_HK",
+            "zh_HK",
+        ], "rbnf")
+
+    def test_no_alias_parent_structure(self):
+        self._check_filter(Filter.create_from_json({
+            "filterType": "locale",
+            "whitelist": [
+                "zh_HK"
+            ]
+        }, TestIO()), [
+            "root",
+            "zh_HK",
+            "zh",
+        ], "brkitr")
+
+    def _check_filter(self, filter, expected_matches, tree="locales"):
+        for file_stem in EXAMPLE_FILE_STEMS:
+            is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
+            expected_match = file_stem in expected_matches
+            self.assertEqual(is_match, expected_match, file_stem)
+
+# Export the test for the runner
+suite = unittest.makeSuite(FiltrationTest)