]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | # Copyright (C) 2018 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | # Python 2/3 Compatibility (ICU-20299) | |
5 | # TODO(ICU-20301): Remove this. | |
6 | from __future__ import print_function | |
7 | ||
8 | import argparse | |
9 | import glob as pyglob | |
340931cb | 10 | import io as pyio |
3d1f044b A |
11 | import json |
12 | import os | |
13 | import sys | |
14 | ||
15 | from . import * | |
16 | from .comment_stripper import CommentStripper | |
17 | from .request_types import CopyRequest | |
340931cb | 18 | from .renderers import makefile, common_exec |
3d1f044b | 19 | from . import filtration, utils |
3d1f044b A |
20 | |
21 | flag_parser = argparse.ArgumentParser( | |
22 | description = """Generates rules for building ICU binary data files from text | |
23 | and other input files in source control. | |
24 | ||
25 | Use the --mode option to declare how to execute those rules, either exporting | |
26 | the rules to a Makefile or spawning child processes to run them immediately: | |
27 | ||
28 | --mode=gnumake prints a Makefile to standard out. | |
29 | --mode=unix-exec spawns child processes in a Unix-like environment. | |
30 | --mode=windows-exec spawns child processes in a Windows-like environment. | |
31 | ||
32 | Tips for --mode=unix-exec | |
33 | ========================= | |
34 | ||
35 | Create two empty directories for out_dir and tmp_dir. They will get filled | |
36 | with a lot of intermediate files. | |
37 | ||
38 | Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source: | |
39 | ||
340931cb | 40 | $ LD_LIBRARY_PATH=lib PYTHONPATH=python python3 -m icutools.databuilder ... |
3d1f044b | 41 | |
340931cb A |
42 | Once icutools.databuilder finishes, you have compiled the data, but you have |
43 | not packaged it into a .dat or .so file. This is done by the separate pkgdata | |
44 | tool in bin. Read the docs of pkgdata: | |
3d1f044b A |
45 | |
46 | $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help | |
47 | ||
48 | Example command line to call pkgdata: | |
49 | ||
50 | $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\ | |
51 | -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst | |
52 | ||
53 | where $OUTDIR and $TMPDIR are your out and tmp directories, respectively. | |
54 | The above command will create icudt63l.dat in the tmpdir. | |
55 | ||
56 | Command-Line Arguments | |
57 | ====================== | |
58 | """, | |
59 | formatter_class = argparse.RawDescriptionHelpFormatter | |
60 | ) | |
61 | ||
62 | arg_group_required = flag_parser.add_argument_group("required arguments") | |
63 | arg_group_required.add_argument( | |
64 | "--mode", | |
65 | help = "What to do with the generated rules.", | |
340931cb | 66 | choices = ["gnumake", "unix-exec", "windows-exec", "bazel-exec"], |
3d1f044b A |
67 | required = True |
68 | ) | |
69 | ||
70 | flag_parser.add_argument( | |
71 | "--src_dir", | |
72 | help = "Path to data source folder (icu4c/source/data).", | |
73 | default = "." | |
74 | ) | |
75 | flag_parser.add_argument( | |
76 | "--filter_file", | |
77 | metavar = "PATH", | |
78 | help = "Path to an ICU data filter JSON file.", | |
79 | default = None | |
80 | ) | |
81 | flag_parser.add_argument( | |
82 | "--include_uni_core_data", | |
83 | help = "Include the full Unicode core data in the dat file.", | |
84 | default = False, | |
85 | action = "store_true" | |
86 | ) | |
87 | flag_parser.add_argument( | |
88 | "--seqmode", | |
89 | help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.", | |
90 | choices = ["sequential", "parallel"], | |
91 | default = "sequential" | |
92 | ) | |
340931cb A |
93 | flag_parser.add_argument( |
94 | "--verbose", | |
95 | help = "Print more verbose output (default false).", | |
96 | default = False, | |
97 | action = "store_true" | |
98 | ) | |
3d1f044b A |
99 | |
100 | arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes") | |
101 | arg_group_exec.add_argument( | |
102 | "--out_dir", | |
103 | help = "Path to where to save output data files.", | |
104 | default = "icudata" | |
105 | ) | |
106 | arg_group_exec.add_argument( | |
107 | "--tmp_dir", | |
108 | help = "Path to where to save temporary files.", | |
109 | default = "icutmp" | |
110 | ) | |
111 | arg_group_exec.add_argument( | |
112 | "--tool_dir", | |
113 | help = "Path to where to find binary tools (genrb, etc).", | |
114 | default = "../bin" | |
115 | ) | |
116 | arg_group_exec.add_argument( | |
117 | "--tool_cfg", | |
118 | help = "The build configuration of the tools. Used in 'windows-exec' mode only.", | |
119 | default = "x86/Debug" | |
120 | ) | |
121 | ||
122 | ||
123 | class Config(object): | |
124 | ||
125 | def __init__(self, args): | |
126 | # Process arguments | |
127 | self.max_parallel = (args.seqmode == "parallel") | |
128 | ||
129 | # Boolean: Whether to include core Unicode data files in the .dat file | |
130 | self.include_uni_core_data = args.include_uni_core_data | |
131 | ||
132 | # Default fields before processing filter file | |
133 | self.filters_json_data = {} | |
340931cb | 134 | self.filter_dir = "ERROR_NO_FILTER_FILE" |
3d1f044b A |
135 | |
136 | # Process filter file | |
137 | if args.filter_file: | |
138 | try: | |
139 | with open(args.filter_file, "r") as f: | |
140 | print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr) | |
141 | self._parse_filter_file(f) | |
142 | except IOError: | |
143 | print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr) | |
144 | exit(1) | |
340931cb | 145 | self.filter_dir = os.path.abspath(os.path.dirname(args.filter_file)) |
3d1f044b A |
146 | |
147 | # Either "unihan" or "implicithan" | |
148 | self.coll_han_type = "unihan" | |
149 | if "collationUCAData" in self.filters_json_data: | |
150 | self.coll_han_type = self.filters_json_data["collationUCAData"] | |
151 | ||
340931cb A |
152 | # Either "additive" or "subtractive" |
153 | self.strategy = "subtractive" | |
154 | if "strategy" in self.filters_json_data: | |
155 | self.strategy = self.filters_json_data["strategy"] | |
156 | ||
157 | # True or False (could be extended later to support enum/list) | |
158 | self.use_pool_bundle = True | |
159 | if "usePoolBundle" in self.filters_json_data: | |
160 | self.use_pool_bundle = self.filters_json_data["usePoolBundle"] | |
161 | ||
3d1f044b A |
162 | def _parse_filter_file(self, f): |
163 | # Use the Hjson parser if it is available; otherwise, use vanilla JSON. | |
164 | try: | |
165 | import hjson | |
166 | self.filters_json_data = hjson.load(f) | |
167 | except ImportError: | |
168 | self.filters_json_data = json.load(CommentStripper(f)) | |
169 | ||
170 | # Optionally pre-validate the JSON schema before further processing. | |
171 | # Some schema errors will be caught later, but this step ensures | |
172 | # maximal validity. | |
173 | try: | |
174 | import jsonschema | |
175 | schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json") | |
176 | with open(schema_path) as schema_f: | |
177 | schema = json.load(CommentStripper(schema_f)) | |
178 | validator = jsonschema.Draft4Validator(schema) | |
179 | for error in validator.iter_errors(self.filters_json_data, schema): | |
180 | print("WARNING: ICU data filter JSON file:", error.message, | |
181 | "at", "".join( | |
182 | "[%d]" % part if isinstance(part, int) else ".%s" % part | |
183 | for part in error.absolute_path | |
184 | ), | |
185 | file=sys.stderr) | |
186 | except ImportError: | |
187 | print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr) | |
188 | pass | |
189 | ||
190 | ||
191 | def add_copy_input_requests(requests, config, common_vars): | |
192 | files_to_copy = set() | |
193 | for request in requests: | |
340931cb A |
194 | request_files = request.all_input_files() |
195 | # Also add known dependency txt files as possible inputs. | |
196 | # This is required for translit rule files. | |
197 | if hasattr(request, "dep_targets"): | |
198 | request_files += [ | |
199 | f for f in request.dep_targets if isinstance(f, InFile) | |
200 | ] | |
201 | for f in request_files: | |
3d1f044b A |
202 | if isinstance(f, InFile): |
203 | files_to_copy.add(f) | |
204 | ||
205 | result = [] | |
206 | id = 0 | |
207 | ||
208 | json_data = config.filters_json_data["fileReplacements"] | |
209 | dirname = json_data["directory"] | |
210 | for directive in json_data["replacements"]: | |
340931cb A |
211 | if type(directive) == str: |
212 | input_file = LocalFile(dirname, directive) | |
213 | output_file = InFile(directive) | |
214 | else: | |
215 | input_file = LocalFile(dirname, directive["src"]) | |
216 | output_file = InFile(directive["dest"]) | |
3d1f044b A |
217 | result += [ |
218 | CopyRequest( | |
219 | name = "input_copy_%d" % id, | |
220 | input_file = input_file, | |
221 | output_file = output_file | |
222 | ) | |
223 | ] | |
224 | files_to_copy.remove(output_file) | |
225 | id += 1 | |
226 | ||
227 | for f in files_to_copy: | |
228 | result += [ | |
229 | CopyRequest( | |
230 | name = "input_copy_%d" % id, | |
231 | input_file = SrcFile(f.filename), | |
232 | output_file = f | |
233 | ) | |
234 | ] | |
235 | id += 1 | |
236 | ||
237 | result += requests | |
238 | return result | |
239 | ||
240 | ||
340931cb A |
241 | class IO(object): |
242 | """I/O operations required when computing the build actions""" | |
243 | ||
244 | def __init__(self, src_dir): | |
245 | self.src_dir = src_dir | |
246 | ||
247 | def glob(self, pattern): | |
248 | absolute_paths = pyglob.glob(os.path.join(self.src_dir, pattern)) | |
249 | # Strip off the absolute path suffix so we are left with a relative path. | |
250 | relative_paths = [v[len(self.src_dir)+1:] for v in sorted(absolute_paths)] | |
251 | # For the purposes of icutools.databuilder, force Unix-style directory separators. | |
252 | # Within the Python code, including BUILDRULES.py and user-provided config files, | |
253 | # directory separators are normalized to '/', including on Windows platforms. | |
254 | return [v.replace("\\", "/") for v in relative_paths] | |
255 | ||
256 | def read_locale_deps(self, tree): | |
257 | return self._read_json("%s/LOCALE_DEPS.json" % tree) | |
258 | ||
259 | def _read_json(self, filename): | |
260 | with pyio.open(os.path.join(self.src_dir, filename), "r", encoding="utf-8-sig") as f: | |
261 | return json.load(CommentStripper(f)) | |
262 | ||
263 | ||
264 | def main(argv): | |
265 | args = flag_parser.parse_args(argv) | |
3d1f044b A |
266 | config = Config(args) |
267 | ||
268 | if args.mode == "gnumake": | |
269 | makefile_vars = { | |
270 | "SRC_DIR": "$(srcdir)", | |
271 | "IN_DIR": "$(srcdir)", | |
272 | "INDEX_NAME": "res_index" | |
273 | } | |
274 | makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"] | |
275 | common = { | |
276 | key: "$(%s)" % key | |
277 | for key in list(makefile_vars.keys()) + makefile_env | |
278 | } | |
340931cb A |
279 | common["FILTERS_DIR"] = config.filter_dir |
280 | common["CWD_DIR"] = os.getcwd() | |
3d1f044b | 281 | else: |
340931cb | 282 | makefile_vars = None |
3d1f044b | 283 | common = { |
3d1f044b A |
284 | "SRC_DIR": args.src_dir, |
285 | "IN_DIR": args.src_dir, | |
286 | "OUT_DIR": args.out_dir, | |
287 | "TMP_DIR": args.tmp_dir, | |
340931cb A |
288 | "FILTERS_DIR": config.filter_dir, |
289 | "CWD_DIR": os.getcwd(), | |
3d1f044b A |
290 | "INDEX_NAME": "res_index", |
291 | # TODO: Pull this from configure script: | |
292 | "ICUDATA_CHAR": "l" | |
293 | } | |
294 | ||
340931cb A |
295 | # Automatically load BUILDRULES from the src_dir |
296 | sys.path.append(args.src_dir) | |
297 | try: | |
298 | import BUILDRULES | |
299 | except ImportError: | |
300 | print("Cannot find BUILDRULES! Did you set your --src_dir?", file=sys.stderr) | |
301 | sys.exit(1) | |
3d1f044b | 302 | |
340931cb A |
303 | io = IO(args.src_dir) |
304 | requests = BUILDRULES.generate(config, io, common) | |
3d1f044b A |
305 | |
306 | if "fileReplacements" in config.filters_json_data: | |
307 | tmp_in_dir = "{TMP_DIR}/in".format(**common) | |
308 | if makefile_vars: | |
309 | makefile_vars["IN_DIR"] = tmp_in_dir | |
310 | else: | |
311 | common["IN_DIR"] = tmp_in_dir | |
312 | requests = add_copy_input_requests(requests, config, common) | |
313 | ||
340931cb A |
314 | requests = filtration.apply_filters(requests, config, io) |
315 | requests = utils.flatten_requests(requests, config, common) | |
316 | ||
3d1f044b A |
317 | build_dirs = utils.compute_directories(requests) |
318 | ||
319 | if args.mode == "gnumake": | |
320 | print(makefile.get_gnumake_rules( | |
321 | build_dirs, | |
322 | requests, | |
323 | makefile_vars, | |
324 | common_vars = common | |
325 | )) | |
326 | elif args.mode == "windows-exec": | |
340931cb A |
327 | return common_exec.run( |
328 | platform = "windows", | |
3d1f044b A |
329 | build_dirs = build_dirs, |
330 | requests = requests, | |
331 | common_vars = common, | |
332 | tool_dir = args.tool_dir, | |
340931cb A |
333 | tool_cfg = args.tool_cfg, |
334 | verbose = args.verbose, | |
3d1f044b A |
335 | ) |
336 | elif args.mode == "unix-exec": | |
340931cb A |
337 | return common_exec.run( |
338 | platform = "unix", | |
3d1f044b A |
339 | build_dirs = build_dirs, |
340 | requests = requests, | |
341 | common_vars = common, | |
340931cb A |
342 | tool_dir = args.tool_dir, |
343 | verbose = args.verbose, | |
344 | ) | |
345 | elif args.mode == "bazel-exec": | |
346 | return common_exec.run( | |
347 | platform = "bazel", | |
348 | build_dirs = build_dirs, | |
349 | requests = requests, | |
350 | common_vars = common, | |
351 | tool_dir = args.tool_dir, | |
352 | verbose = args.verbose, | |
3d1f044b A |
353 | ) |
354 | else: | |
355 | print("Mode not supported: %s" % args.mode) | |
356 | return 1 | |
357 | return 0 | |
358 | ||
359 | if __name__ == "__main__": | |
340931cb | 360 | exit(main(sys.argv[1:])) |