]>
git.saurik.com Git - apple/icu.git/blob - icuSources/data/buildtool/filtration.py
1 # Copyright (C) 2018 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html
4 # Python 2/3 Compatibility (ICU-20299)
5 # TODO(ICU-20301): Remove this.
6 from __future__
import print_function
8 from abc
import abstractmethod
9 from collections
import defaultdict
15 from .locale_dependencies
import data
as DEPENDENCY_DATA
16 from .request_types
import *
19 # Note: for this to be a proper abstract class, it should extend abc.ABC.
20 # There is no nice way to do this that works in both Python 2 and 3.
21 # TODO(ICU-20301): Make this inherit from abc.ABC.
24 def create_from_json(json_data
):
25 if "filterType" in json_data
:
26 filter_type
= json_data
["filterType"]
28 filter_type
= "file-stem"
30 if filter_type
== "file-stem":
31 return FileStemFilter(json_data
)
32 elif filter_type
== "language":
33 return LanguageFilter(json_data
)
34 elif filter_type
== "regex":
35 return RegexFilter(json_data
)
36 elif filter_type
== "exclude":
37 return ExclusionFilter()
38 elif filter_type
== "union":
39 return UnionFilter(json_data
)
40 elif filter_type
== "locale":
41 return LocaleFilter(json_data
)
43 print("Error: Unknown filterType option: %s" % filter_type
, file=sys
.stderr
)
46 def filter(self
, request
):
47 if not request
.apply_file_filter(self
):
49 for file in request
.all_input_files():
50 assert self
.match(file)
54 def _file_to_file_stem(cls
, file):
55 start
= file.filename
.rfind("/")
56 limit
= file.filename
.rfind(".")
57 return file.filename
[start
+1:limit
]
60 def match(self
, file):
64 class InclusionFilter(Filter
):
65 def match(self
, file):
69 class ExclusionFilter(Filter
):
70 def match(self
, file):
74 class WhitelistBlacklistFilter(Filter
):
75 def __init__(self
, json_data
):
76 if "whitelist" in json_data
:
77 self
.is_whitelist
= True
78 self
.whitelist
= json_data
["whitelist"]
80 assert "blacklist" in json_data
, "Need either whitelist or blacklist: %s" % str(json_data
)
81 self
.is_whitelist
= False
82 self
.blacklist
= json_data
["blacklist"]
84 def match(self
, file):
85 file_stem
= self
._file
_to
_file
_stem
(file)
86 return self
._should
_include
(file_stem
)
89 def _should_include(self
, file_stem
):
93 class FileStemFilter(WhitelistBlacklistFilter
):
94 def _should_include(self
, file_stem
):
96 return file_stem
in self
.whitelist
98 return file_stem
not in self
.blacklist
101 class LanguageFilter(WhitelistBlacklistFilter
):
102 def _should_include(self
, file_stem
):
103 language
= file_stem
.split("_")[0]
104 if language
== "root":
105 # Always include root.txt
107 if self
.is_whitelist
:
108 return language
in self
.whitelist
110 return language
not in self
.blacklist
113 class RegexFilter(WhitelistBlacklistFilter
):
114 def __init__(self
, *args
):
115 # TODO(ICU-20301): Change this to: super().__init__(*args)
116 super(RegexFilter
, self
).__init
__(*args
)
117 if self
.is_whitelist
:
118 self
.whitelist
= [re
.compile(pat
) for pat
in self
.whitelist
]
120 self
.blacklist
= [re
.compile(pat
) for pat
in self
.blacklist
]
122 def _should_include(self
, file_stem
):
123 if self
.is_whitelist
:
124 for pattern
in self
.whitelist
:
125 if pattern
.match(file_stem
):
129 for pattern
in self
.blacklist
:
130 if pattern
.match(file_stem
):
135 class UnionFilter(Filter
):
136 def __init__(self
, json_data
):
137 # Collect the sub-filters.
138 self
.sub_filters
= []
139 for filter_json
in json_data
["unionOf"]:
140 self
.sub_filters
.append(Filter
.create_from_json(filter_json
))
142 def match(self
, file):
143 """Match iff any of the sub-filters match."""
144 for filter in self
.sub_filters
:
145 if filter.match(file):
150 LANGUAGE_SCRIPT_REGEX
= re
.compile(r
"^([a-z]{2,3})_[A-Z][a-z]{3}$")
151 LANGUAGE_ONLY_REGEX
= re
.compile(r
"^[a-z]{2,3}$")
153 class LocaleFilter(Filter
):
154 def __init__(self
, json_data
):
155 self
.locales_requested
= set()
156 self
.locales_required
= set()
157 self
.include_children
= json_data
.get("includeChildren", True)
158 self
.include_scripts
= json_data
.get("includeScripts", False)
160 # Compute the requested and required locales.
161 for locale
in json_data
["whitelist"]:
162 self
._add
_locale
_and
_parents
(locale
)
164 def _add_locale_and_parents(self
, locale
):
165 # Store the locale as *requested*
166 self
.locales_requested
.add(locale
)
167 # Store the locale and its dependencies as *required*
168 while locale
is not None:
169 self
.locales_required
.add(locale
)
170 locale
= self
._get
_parent
_locale
(locale
)
172 def match(self
, file):
173 locale
= self
._file
_to
_file
_stem
(file)
175 # A locale is *required* if it is *requested* or an ancestor of a
176 # *requested* locale.
177 if locale
in self
.locales_required
:
180 # Resolve include_scripts and include_children.
181 return self
._match
_recursive
(locale
)
183 def _match_recursive(self
, locale
):
184 # Base case: return True if we reached a *requested* locale,
185 # or False if we ascend out of the locale tree.
188 if locale
in self
.locales_requested
:
191 # Check for alternative scripts.
192 # This causes sr_Latn to check sr instead of going directly to root.
193 if self
.include_scripts
:
194 match
= LANGUAGE_SCRIPT_REGEX
.match(locale
)
195 if match
and self
._match
_recursive
(match
.group(1)):
198 # Check if we are a descendant of a *requested* locale.
199 if self
.include_children
:
200 parent
= self
._get
_parent
_locale
(locale
)
201 if self
._match
_recursive
(parent
):
208 def _get_parent_locale(cls
, locale
):
209 if locale
in DEPENDENCY_DATA
["parents"]:
210 return DEPENDENCY_DATA
["parents"][locale
]
211 if locale
in DEPENDENCY_DATA
["aliases"]:
212 return DEPENDENCY_DATA
["aliases"][locale
]
213 if LANGUAGE_ONLY_REGEX
.match(locale
):
215 i
= locale
.rfind("_")
221 def apply_filters(requests
, config
):
222 """Runs the filters and returns a new list of requests."""
223 requests
= _apply_file_filters(requests
, config
)
224 requests
= _apply_resource_filters(requests
, config
)
228 def _apply_file_filters(old_requests
, config
):
229 """Filters out entire files."""
230 filters
= _preprocess_file_filters(old_requests
, config
)
232 for request
in old_requests
:
233 category
= request
.category
234 if category
in filters
:
235 new_requests
+= filters
[category
].filter(request
)
237 new_requests
.append(request
)
241 def _preprocess_file_filters(requests
, config
):
242 all_categories
= set(
244 for request
in requests
246 all_categories
.remove(None)
247 all_categories
= list(sorted(all_categories
))
248 json_data
= config
.filters_json_data
250 for category
in all_categories
:
251 if "featureFilters" in json_data
and category
in json_data
["featureFilters"]:
252 filters
[category
] = Filter
.create_from_json(
253 json_data
["featureFilters"][category
]
255 elif "localeFilter" in json_data
and category
[-5:] == "_tree":
256 filters
[category
] = Filter
.create_from_json(
257 json_data
["localeFilter"]
259 if "featureFilters" in json_data
:
260 for category
in json_data
["featureFilters"]:
261 if category
not in all_categories
:
262 print("Warning: category %s is not known" % category
, file=sys
.stderr
)
266 class ResourceFilterInfo(object):
267 def __init__(self
, category
):
268 self
.category
= category
269 self
.filter_tmp_dir
= "filters/%s" % category
270 self
.input_files
= None
271 self
.filter_files
= None
272 self
.rules_by_file
= None
274 def apply_to_requests(self
, all_requests
):
275 # Call this method only once per list of requests.
276 assert self
.input_files
is None
277 for request
in all_requests
:
278 if request
.category
!= self
.category
:
280 if not isinstance(request
, AbstractExecutionRequest
):
282 if request
.tool
!= IcuTool("genrb"):
284 if not request
.input_files
:
286 self
._set
_files
(request
.input_files
)
287 request
.dep_targets
+= [self
.filter_files
[:]]
288 arg_str
= "--filterDir {TMP_DIR}/%s" % self
.filter_tmp_dir
289 request
.args
= "%s %s" % (arg_str
, request
.args
)
291 # Make sure we found the target request
292 if self
.input_files
is None:
293 print("WARNING: Category not found: %s" % self
.category
, file=sys
.stderr
)
294 self
.input_files
= []
295 self
.filter_files
= []
296 self
.rules_by_file
= []
298 def _set_files(self
, files
):
299 # Note: The input files to genrb for a certain category should always
300 # be the same. For example, there are often two genrb calls: one for
301 # --writePoolBundle, and the other for --usePoolBundle. They are both
302 # expected to have the same list of input files.
303 if self
.input_files
is not None:
304 assert self
.input_files
== files
306 self
.input_files
= list(files
)
307 self
.filter_files
= [
308 TmpFile("%s/%s" % (self
.filter_tmp_dir
, basename
))
310 file.filename
[file.filename
.rfind("/")+1:]
314 self
.rules_by_file
= [[] for _
in range(len(files
))]
316 def add_rules(self
, file_filter
, rules
):
317 for file, rule_list
in zip(self
.input_files
, self
.rules_by_file
):
318 if file_filter
.match(file):
321 def make_requests(self
):
322 # Map from rule list to filter files with that rule list
323 unique_rules
= defaultdict(list)
324 for filter_file
, rules
in zip(self
.filter_files
, self
.rules_by_file
):
325 unique_rules
[tuple(rules
)].append(filter_file
)
329 for rules
, filter_files
in unique_rules
.items():
330 base_filter_file
= filter_files
[0]
333 name
= "%s_print_%d" % (self
.category
, i
),
334 output_file
= base_filter_file
,
335 content
= self
._generate
_resource
_filter
_txt
(rules
)
339 for filter_file
in filter_files
[1:]:
342 name
= "%s_copy_%d" % (self
.category
, i
),
343 input_file
= base_filter_file
,
344 output_file
= filter_file
351 def _generate_resource_filter_txt(cls
, rules
):
352 result
= "# Caution: This file is automatically generated\n\n"
353 result
+= "\n".join(rules
)
357 def _apply_resource_filters(all_requests
, config
):
358 """Creates filters for looking within resource bundle files."""
359 json_data
= config
.filters_json_data
360 if "resourceFilters" not in json_data
:
364 for entry
in json_data
["resourceFilters"]:
366 file_filter
= Filter
.create_from_json(entry
["files"])
368 file_filter
= InclusionFilter()
369 for category
in entry
["categories"]:
370 # not defaultdict because we need to pass arguments to the constructor
371 if category
not in collected
:
372 filter_info
= ResourceFilterInfo(category
)
373 filter_info
.apply_to_requests(all_requests
)
374 collected
[category
] = filter_info
376 filter_info
= collected
[category
]
377 filter_info
.add_rules(file_filter
, entry
["rules"])
379 # Add the filter generation requests to the beginning so that by default
380 # they are made before genrb gets run (order is required by windirect)
382 for filter_info
in collected
.values():
383 new_requests
+= filter_info
.make_requests()
384 new_requests
+= all_requests