--- /dev/null
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io as pyio
+import json
+import os
+import unittest
+
+from .. import InFile
+from ..comment_stripper import CommentStripper
+from ..filtration import Filter
+
+EXAMPLE_FILE_STEMS = [
+ "af_NA",
+ "af_VARIANT",
+ "af_ZA_VARIANT",
+ "af_ZA",
+ "af",
+ "ar",
+ "ar_SA",
+ "ars",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "en_001",
+ "en_150",
+ "en_DE",
+ "en_GB",
+ "en_US",
+ "root",
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ "sr_Cyrl",
+ "sr_Latn_BA",
+ "sr_Latn_CS",
+ "sr_Latn_ME_VARIANT",
+ "sr_Latn_ME",
+ "sr_Latn",
+ "sr_ME",
+ "sr",
+ "vai_Latn_LR",
+ "vai_Latn",
+ "vai_LR",
+ "vai_Vaii_LR",
+ "vai_Vaii",
+ "vai",
+ "yue",
+ "zh_CN",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG",
+ "zh_Hans",
+ "zh_Hant_HK",
+ "zh_Hant_MO",
+ "zh_Hant_TW",
+ "zh_Hant",
+ "zh_HK",
+ "zh_MO",
+ "zh_SG",
+ "zh_TW",
+ "zh"
+]
+
+
+class TestIO(object):
+ def __init__(self):
+ pass
+
+ def read_locale_deps(self, tree):
+ if tree not in ("brkitr", "locales", "rbnf"):
+ return None
+ with pyio.open(os.path.join(
+ os.path.dirname(__file__),
+ "sample_data",
+ tree,
+ "LOCALE_DEPS.json"
+ ), "r", encoding="utf-8-sig") as f:
+ return json.load(CommentStripper(f))
+
+
+class FiltrationTest(unittest.TestCase):
+
+ def test_exclude(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "exclude"
+ }, TestIO()), [
+ ])
+
+ def test_default_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "whitelist": [
+ "ars",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "ars",
+ "zh_Hans"
+ ])
+
+ def test_default_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("ars")
+ expected_matches.remove("zh_Hans")
+ self._check_filter(Filter.create_from_json({
+ "blacklist": [
+ "ars",
+ "zh_Hans"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_language_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "language",
+ "whitelist": [
+ "af",
+ "bs"
+ ]
+ }, TestIO()), [
+ "root",
+ "af_NA",
+ "af_VARIANT",
+ "af_ZA_VARIANT",
+ "af_ZA",
+ "af",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs"
+ ])
+
+ def test_language_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("af_NA")
+ expected_matches.remove("af_VARIANT")
+ expected_matches.remove("af_ZA_VARIANT")
+ expected_matches.remove("af_ZA")
+ expected_matches.remove("af")
+ self._check_filter(Filter.create_from_json({
+ "filterType": "language",
+ "blacklist": [
+ "af"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_regex_whitelist(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "regex",
+ "whitelist": [
+ r"^ar.*$",
+ r"^zh$"
+ ]
+ }, TestIO()), [
+ "ar",
+ "ar_SA",
+ "ars",
+ "zh"
+ ])
+
+ def test_regex_blacklist(self):
+ expected_matches = set(EXAMPLE_FILE_STEMS)
+ expected_matches.remove("ar")
+ expected_matches.remove("ar_SA")
+ expected_matches.remove("ars")
+ expected_matches.remove("zh")
+ self._check_filter(Filter.create_from_json({
+ "filterType": "regex",
+ "blacklist": [
+ r"^ar.*$",
+ r"^zh$"
+ ]
+ }, TestIO()), expected_matches)
+
+ def test_locale_basic(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ # Default scripts:
+ # sr => Cyrl
+ # vai => Vaii
+ # zh => Hans
+ "bs_BA", # is an alias to bs_Latn_BA
+ "en_DE",
+ "sr", # Language with no script
+ "vai_Latn", # Language with non-default script
+ "zh_Hans" # Language with default script
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: should include the full dependency tree of bs_BA
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Cyrl, the default, but not Latn.
+ "sr",
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ # vai: include Latn but NOT Vaii.
+ "vai_Latn",
+ "vai_Latn_LR",
+ # zh: include Hans but NOT Hant.
+ "zh",
+ "zh_CN",
+ "zh_SG",
+ "zh_Hans",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG"
+ ])
+
+ def test_locale_no_children(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeChildren": False,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ "sr",
+ "vai_Latn",
+ "zh",
+ "zh_Hans",
+ ])
+
+ def test_locale_include_scripts(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeScripts": True,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: includeScripts only works for language-only (without region)
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Latn, since no particular script was requested.
+ "sr_BA",
+ "sr_CS",
+ "sr_Cyrl_BA",
+ "sr_Cyrl_CS",
+ "sr_Cyrl_ME",
+ "sr_Cyrl",
+ "sr_Latn_BA",
+ "sr_Latn_CS",
+ "sr_Latn_ME_VARIANT",
+ "sr_Latn_ME",
+ "sr_Latn",
+ "sr_ME",
+ "sr",
+ # vai: do NOT include Vaii; the script was explicitly requested.
+ "vai_Latn_LR",
+ "vai_Latn",
+ # zh: do NOT include Hant; the script was explicitly requested.
+ "zh_CN",
+ "zh_SG",
+ "zh_Hans_CN",
+ "zh_Hans_HK",
+ "zh_Hans_MO",
+ "zh_Hans_SG",
+ "zh_Hans",
+ "zh"
+ ])
+
+ def test_locale_no_children_include_scripts(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "includeChildren": False,
+ "includeScripts": True,
+ "whitelist": [
+ # See comments in test_locale_basic.
+ "bs_BA",
+ "en_DE",
+ "sr",
+ "vai_Latn",
+ "zh_Hans"
+ ]
+ }, TestIO()), [
+ "root",
+ # bs: includeScripts only works for language-only (without region)
+ "bs_BA",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ # en: should include the full dependency tree of en_DE
+ "en",
+ "en_DE",
+ "en_150",
+ "en_001",
+ # sr: include Cyrl and Latn but no other children
+ "sr",
+ "sr_Cyrl",
+ "sr_Latn",
+ # vai: include only the requested script
+ "vai_Latn",
+ # zh: include only the requested script
+ "zh",
+ "zh_Hans",
+ ])
+
+ def test_union(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "union",
+ "unionOf": [
+ {
+ "whitelist": [
+ "ars",
+ "zh_Hans"
+ ]
+ },
+ {
+ "filterType": "regex",
+ "whitelist": [
+ r"^bs.*$",
+ r"^zh$"
+ ]
+ }
+ ]
+ }, TestIO()), [
+ "ars",
+ "zh_Hans",
+ "bs_BA",
+ "bs_Cyrl_BA",
+ "bs_Cyrl",
+ "bs_Latn_BA",
+ "bs_Latn",
+ "bs",
+ "zh"
+ ])
+
+ def test_hk_deps_normal(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "zh_Hant",
+ "zh_Hant_HK",
+ "zh_HK",
+ ])
+
+ def test_hk_deps_rbnf(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "yue",
+ "zh_Hant_HK",
+ "zh_HK",
+ ], "rbnf")
+
+ def test_no_alias_parent_structure(self):
+ self._check_filter(Filter.create_from_json({
+ "filterType": "locale",
+ "whitelist": [
+ "zh_HK"
+ ]
+ }, TestIO()), [
+ "root",
+ "zh_HK",
+ "zh",
+ ], "brkitr")
+
+ def _check_filter(self, filter, expected_matches, tree="locales"):
+ for file_stem in EXAMPLE_FILE_STEMS:
+ is_match = filter.match(InFile("%s/%s.txt" % (tree, file_stem)))
+ expected_match = file_stem in expected_matches
+ self.assertEqual(is_match, expected_match, file_stem)
+
+# Export the test for the runner
+suite = unittest.makeSuite(FiltrationTest)