]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | # Copyright (C) 2018 and later: Unicode, Inc. and others. |
2 | # License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | # Python 2/3 Compatibility (ICU-20299) | |
5 | # TODO(ICU-20301): Remove this. | |
6 | from __future__ import print_function | |
7 | ||
8 | import argparse | |
9 | import glob as pyglob | |
10 | import json | |
11 | import os | |
12 | import sys | |
13 | ||
14 | from . import * | |
15 | from .comment_stripper import CommentStripper | |
16 | from .request_types import CopyRequest | |
17 | from .renderers import makefile, unix_exec, windows_exec | |
18 | from . import filtration, utils | |
19 | import BUILDRULES | |
20 | ||
21 | flag_parser = argparse.ArgumentParser( | |
22 | description = """Generates rules for building ICU binary data files from text | |
23 | and other input files in source control. | |
24 | ||
25 | Use the --mode option to declare how to execute those rules, either exporting | |
26 | the rules to a Makefile or spawning child processes to run them immediately: | |
27 | ||
28 | --mode=gnumake prints a Makefile to standard out. | |
29 | --mode=unix-exec spawns child processes in a Unix-like environment. | |
30 | --mode=windows-exec spawns child processes in a Windows-like environment. | |
31 | ||
32 | Tips for --mode=unix-exec | |
33 | ========================= | |
34 | ||
35 | Create two empty directories for out_dir and tmp_dir. They will get filled | |
36 | with a lot of intermediate files. | |
37 | ||
38 | Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source: | |
39 | ||
40 | $ LD_LIBRARY_PATH=lib PYTHONPATH=data python3 -m buildtool ... | |
41 | ||
42 | Once buildtool finishes, you have compiled the data, but you have not packaged | |
43 | it into a .dat or .so file. This is done by the separate pkgdata tool in bin. | |
44 | Read the docs of pkgdata: | |
45 | ||
46 | $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help | |
47 | ||
48 | Example command line to call pkgdata: | |
49 | ||
50 | $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\ | |
51 | -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst | |
52 | ||
53 | where $OUTDIR and $TMPDIR are your out and tmp directories, respectively. | |
54 | The above command will create icudt63l.dat in the tmpdir. | |
55 | ||
56 | Command-Line Arguments | |
57 | ====================== | |
58 | """, | |
59 | formatter_class = argparse.RawDescriptionHelpFormatter | |
60 | ) | |
61 | ||
62 | arg_group_required = flag_parser.add_argument_group("required arguments") | |
63 | arg_group_required.add_argument( | |
64 | "--mode", | |
65 | help = "What to do with the generated rules.", | |
66 | choices = ["gnumake", "unix-exec", "windows-exec"], | |
67 | required = True | |
68 | ) | |
69 | ||
70 | flag_parser.add_argument( | |
71 | "--src_dir", | |
72 | help = "Path to data source folder (icu4c/source/data).", | |
73 | default = "." | |
74 | ) | |
75 | flag_parser.add_argument( | |
76 | "--filter_file", | |
77 | metavar = "PATH", | |
78 | help = "Path to an ICU data filter JSON file.", | |
79 | default = None | |
80 | ) | |
81 | flag_parser.add_argument( | |
82 | "--include_uni_core_data", | |
83 | help = "Include the full Unicode core data in the dat file.", | |
84 | default = False, | |
85 | action = "store_true" | |
86 | ) | |
87 | flag_parser.add_argument( | |
88 | "--seqmode", | |
89 | help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.", | |
90 | choices = ["sequential", "parallel"], | |
91 | default = "sequential" | |
92 | ) | |
93 | ||
94 | arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes") | |
95 | arg_group_exec.add_argument( | |
96 | "--out_dir", | |
97 | help = "Path to where to save output data files.", | |
98 | default = "icudata" | |
99 | ) | |
100 | arg_group_exec.add_argument( | |
101 | "--tmp_dir", | |
102 | help = "Path to where to save temporary files.", | |
103 | default = "icutmp" | |
104 | ) | |
105 | arg_group_exec.add_argument( | |
106 | "--tool_dir", | |
107 | help = "Path to where to find binary tools (genrb, etc).", | |
108 | default = "../bin" | |
109 | ) | |
110 | arg_group_exec.add_argument( | |
111 | "--tool_cfg", | |
112 | help = "The build configuration of the tools. Used in 'windows-exec' mode only.", | |
113 | default = "x86/Debug" | |
114 | ) | |
115 | ||
116 | ||
117 | class Config(object): | |
118 | ||
119 | def __init__(self, args): | |
120 | # Process arguments | |
121 | self.max_parallel = (args.seqmode == "parallel") | |
122 | ||
123 | # Boolean: Whether to include core Unicode data files in the .dat file | |
124 | self.include_uni_core_data = args.include_uni_core_data | |
125 | ||
126 | # Default fields before processing filter file | |
127 | self.filters_json_data = {} | |
128 | ||
129 | # Process filter file | |
130 | if args.filter_file: | |
131 | try: | |
132 | with open(args.filter_file, "r") as f: | |
133 | print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr) | |
134 | self._parse_filter_file(f) | |
135 | except IOError: | |
136 | print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr) | |
137 | exit(1) | |
138 | ||
139 | # Either "unihan" or "implicithan" | |
140 | self.coll_han_type = "unihan" | |
141 | if "collationUCAData" in self.filters_json_data: | |
142 | self.coll_han_type = self.filters_json_data["collationUCAData"] | |
143 | ||
144 | def _parse_filter_file(self, f): | |
145 | # Use the Hjson parser if it is available; otherwise, use vanilla JSON. | |
146 | try: | |
147 | import hjson | |
148 | self.filters_json_data = hjson.load(f) | |
149 | except ImportError: | |
150 | self.filters_json_data = json.load(CommentStripper(f)) | |
151 | ||
152 | # Optionally pre-validate the JSON schema before further processing. | |
153 | # Some schema errors will be caught later, but this step ensures | |
154 | # maximal validity. | |
155 | try: | |
156 | import jsonschema | |
157 | schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json") | |
158 | with open(schema_path) as schema_f: | |
159 | schema = json.load(CommentStripper(schema_f)) | |
160 | validator = jsonschema.Draft4Validator(schema) | |
161 | for error in validator.iter_errors(self.filters_json_data, schema): | |
162 | print("WARNING: ICU data filter JSON file:", error.message, | |
163 | "at", "".join( | |
164 | "[%d]" % part if isinstance(part, int) else ".%s" % part | |
165 | for part in error.absolute_path | |
166 | ), | |
167 | file=sys.stderr) | |
168 | except ImportError: | |
169 | print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr) | |
170 | pass | |
171 | ||
172 | ||
173 | def add_copy_input_requests(requests, config, common_vars): | |
174 | files_to_copy = set() | |
175 | for request in requests: | |
176 | for f in request.all_input_files(): | |
177 | if isinstance(f, InFile): | |
178 | files_to_copy.add(f) | |
179 | ||
180 | result = [] | |
181 | id = 0 | |
182 | ||
183 | json_data = config.filters_json_data["fileReplacements"] | |
184 | dirname = json_data["directory"] | |
185 | for directive in json_data["replacements"]: | |
186 | input_file = LocalFile(dirname, directive["src"]) | |
187 | output_file = InFile(directive["dest"]) | |
188 | result += [ | |
189 | CopyRequest( | |
190 | name = "input_copy_%d" % id, | |
191 | input_file = input_file, | |
192 | output_file = output_file | |
193 | ) | |
194 | ] | |
195 | files_to_copy.remove(output_file) | |
196 | id += 1 | |
197 | ||
198 | for f in files_to_copy: | |
199 | result += [ | |
200 | CopyRequest( | |
201 | name = "input_copy_%d" % id, | |
202 | input_file = SrcFile(f.filename), | |
203 | output_file = f | |
204 | ) | |
205 | ] | |
206 | id += 1 | |
207 | ||
208 | result += requests | |
209 | return result | |
210 | ||
211 | ||
212 | def main(): | |
213 | args = flag_parser.parse_args() | |
214 | config = Config(args) | |
215 | ||
216 | if args.mode == "gnumake": | |
217 | makefile_vars = { | |
218 | "SRC_DIR": "$(srcdir)", | |
219 | "IN_DIR": "$(srcdir)", | |
220 | "INDEX_NAME": "res_index" | |
221 | } | |
222 | makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"] | |
223 | common = { | |
224 | key: "$(%s)" % key | |
225 | for key in list(makefile_vars.keys()) + makefile_env | |
226 | } | |
227 | common["GLOB_DIR"] = args.src_dir | |
228 | else: | |
229 | common = { | |
230 | # GLOB_DIR is used now, whereas IN_DIR is used during execution phase. | |
231 | # There is no useful distinction in unix-exec or windows-exec mode. | |
232 | "GLOB_DIR": args.src_dir, | |
233 | "SRC_DIR": args.src_dir, | |
234 | "IN_DIR": args.src_dir, | |
235 | "OUT_DIR": args.out_dir, | |
236 | "TMP_DIR": args.tmp_dir, | |
237 | "INDEX_NAME": "res_index", | |
238 | # TODO: Pull this from configure script: | |
239 | "ICUDATA_CHAR": "l" | |
240 | } | |
241 | ||
242 | def glob(pattern): | |
243 | result_paths = pyglob.glob("{GLOB_DIR}/{PATTERN}".format( | |
244 | GLOB_DIR = args.src_dir, | |
245 | PATTERN = pattern | |
246 | )) | |
247 | # For the purposes of buildtool, force Unix-style directory separators. | |
248 | return [v.replace("\\", "/")[len(args.src_dir)+1:] for v in sorted(result_paths)] | |
249 | ||
250 | requests = BUILDRULES.generate(config, glob, common) | |
251 | requests = filtration.apply_filters(requests, config) | |
252 | requests = utils.flatten_requests(requests, config, common) | |
253 | ||
254 | if "fileReplacements" in config.filters_json_data: | |
255 | tmp_in_dir = "{TMP_DIR}/in".format(**common) | |
256 | if makefile_vars: | |
257 | makefile_vars["IN_DIR"] = tmp_in_dir | |
258 | else: | |
259 | common["IN_DIR"] = tmp_in_dir | |
260 | requests = add_copy_input_requests(requests, config, common) | |
261 | ||
262 | build_dirs = utils.compute_directories(requests) | |
263 | ||
264 | if args.mode == "gnumake": | |
265 | print(makefile.get_gnumake_rules( | |
266 | build_dirs, | |
267 | requests, | |
268 | makefile_vars, | |
269 | common_vars = common | |
270 | )) | |
271 | elif args.mode == "windows-exec": | |
272 | return windows_exec.run( | |
273 | build_dirs = build_dirs, | |
274 | requests = requests, | |
275 | common_vars = common, | |
276 | tool_dir = args.tool_dir, | |
277 | tool_cfg = args.tool_cfg | |
278 | ) | |
279 | elif args.mode == "unix-exec": | |
280 | return unix_exec.run( | |
281 | build_dirs = build_dirs, | |
282 | requests = requests, | |
283 | common_vars = common, | |
284 | tool_dir = args.tool_dir | |
285 | ) | |
286 | else: | |
287 | print("Mode not supported: %s" % args.mode) | |
288 | return 1 | |
289 | return 0 | |
290 | ||
291 | if __name__ == "__main__": | |
292 | exit(main()) |