1 # Copyright (C) 2018 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html
4 # Python 2/3 Compatibility (ICU-20299)
5 # TODO(ICU-20301): Remove this.
6 from __future__
import print_function
15 from .comment_stripper
import CommentStripper
16 from .request_types
import CopyRequest
17 from .renderers
import makefile
, unix_exec
, windows_exec
18 from . import filtration
, utils
21 flag_parser
= argparse
.ArgumentParser(
22 description
= """Generates rules for building ICU binary data files from text
23 and other input files in source control.
25 Use the --mode option to declare how to execute those rules, either exporting
26 the rules to a Makefile or spawning child processes to run them immediately:
28 --mode=gnumake prints a Makefile to standard out.
29 --mode=unix-exec spawns child processes in a Unix-like environment.
30 --mode=windows-exec spawns child processes in a Windows-like environment.
32 Tips for --mode=unix-exec
33 =========================
35 Create two empty directories for out_dir and tmp_dir. They will get filled
36 with a lot of intermediate files.
38 Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source:
40 $ LD_LIBRARY_PATH=lib PYTHONPATH=data python3 -m buildtool ...
42 Once buildtool finishes, you have compiled the data, but you have not packaged
43 it into a .dat or .so file. This is done by the separate pkgdata tool in bin.
44 Read the docs of pkgdata:
46 $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help
48 Example command line to call pkgdata:
50 $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\
51 -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst
53 where $OUTDIR and $TMPDIR are your out and tmp directories, respectively.
54 The above command will create icudt63l.dat in the tmpdir.
56 Command-Line Arguments
57 ======================
59 formatter_class
= argparse
.RawDescriptionHelpFormatter
62 arg_group_required
= flag_parser
.add_argument_group("required arguments")
63 arg_group_required
.add_argument(
65 help = "What to do with the generated rules.",
66 choices
= ["gnumake", "unix-exec", "windows-exec"],
70 flag_parser
.add_argument(
72 help = "Path to data source folder (icu4c/source/data).",
75 flag_parser
.add_argument(
78 help = "Path to an ICU data filter JSON file.",
81 flag_parser
.add_argument(
82 "--include_uni_core_data",
83 help = "Include the full Unicode core data in the dat file.",
87 flag_parser
.add_argument(
89 help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.",
90 choices
= ["sequential", "parallel"],
91 default
= "sequential"
94 arg_group_exec
= flag_parser
.add_argument_group("arguments for unix-exec and windows-exec modes")
95 arg_group_exec
.add_argument(
97 help = "Path to where to save output data files.",
100 arg_group_exec
.add_argument(
102 help = "Path to where to save temporary files.",
105 arg_group_exec
.add_argument(
107 help = "Path to where to find binary tools (genrb, etc).",
110 arg_group_exec
.add_argument(
112 help = "The build configuration of the tools. Used in 'windows-exec' mode only.",
113 default
= "x86/Debug"
117 class Config(object):
119 def __init__(self
, args
):
121 self
.max_parallel
= (args
.seqmode
== "parallel")
123 # Boolean: Whether to include core Unicode data files in the .dat file
124 self
.include_uni_core_data
= args
.include_uni_core_data
126 # Default fields before processing filter file
127 self
.filters_json_data
= {}
129 # Process filter file
132 with open(args
.filter_file
, "r") as f
:
133 print("Note: Applying filters from %s." % args
.filter_file
, file=sys
.stderr
)
134 self
._parse
_filter
_file
(f
)
136 print("Error: Could not read filter file %s." % args
.filter_file
, file=sys
.stderr
)
139 # Either "unihan" or "implicithan"
140 self
.coll_han_type
= "unihan"
141 if "collationUCAData" in self
.filters_json_data
:
142 self
.coll_han_type
= self
.filters_json_data
["collationUCAData"]
144 def _parse_filter_file(self
, f
):
145 # Use the Hjson parser if it is available; otherwise, use vanilla JSON.
148 self
.filters_json_data
= hjson
.load(f
)
150 self
.filters_json_data
= json
.load(CommentStripper(f
))
152 # Optionally pre-validate the JSON schema before further processing.
153 # Some schema errors will be caught later, but this step ensures
157 schema_path
= os
.path
.join(os
.path
.dirname(__file__
), "filtration_schema.json")
158 with open(schema_path
) as schema_f
:
159 schema
= json
.load(CommentStripper(schema_f
))
160 validator
= jsonschema
.Draft4Validator(schema
)
161 for error
in validator
.iter_errors(self
.filters_json_data
, schema
):
162 print("WARNING: ICU data filter JSON file:", error
.message
,
164 "[%d]" % part
if isinstance(part
, int) else ".%s" % part
165 for part
in error
.absolute_path
169 print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys
.stderr
)
173 def add_copy_input_requests(requests
, config
, common_vars
):
174 files_to_copy
= set()
175 for request
in requests
:
176 for f
in request
.all_input_files():
177 if isinstance(f
, InFile
):
183 json_data
= config
.filters_json_data
["fileReplacements"]
184 dirname
= json_data
["directory"]
185 for directive
in json_data
["replacements"]:
186 input_file
= LocalFile(dirname
, directive
["src"])
187 output_file
= InFile(directive
["dest"])
190 name
= "input_copy_%d" % id,
191 input_file
= input_file
,
192 output_file
= output_file
195 files_to_copy
.remove(output_file
)
198 for f
in files_to_copy
:
201 name
= "input_copy_%d" % id,
202 input_file
= SrcFile(f
.filename
),
213 args
= flag_parser
.parse_args()
214 config
= Config(args
)
216 if args
.mode
== "gnumake":
218 "SRC_DIR": "$(srcdir)",
219 "IN_DIR": "$(srcdir)",
220 "INDEX_NAME": "res_index"
222 makefile_env
= ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"]
225 for key
in list(makefile_vars
.keys()) + makefile_env
227 common
["GLOB_DIR"] = args
.src_dir
230 # GLOB_DIR is used now, whereas IN_DIR is used during execution phase.
231 # There is no useful distinction in unix-exec or windows-exec mode.
232 "GLOB_DIR": args
.src_dir
,
233 "SRC_DIR": args
.src_dir
,
234 "IN_DIR": args
.src_dir
,
235 "OUT_DIR": args
.out_dir
,
236 "TMP_DIR": args
.tmp_dir
,
237 "INDEX_NAME": "res_index",
238 # TODO: Pull this from configure script:
243 result_paths
= pyglob
.glob("{GLOB_DIR}/{PATTERN}".format(
244 GLOB_DIR
= args
.src_dir
,
247 # For the purposes of buildtool, force Unix-style directory separators.
248 return [v
.replace("\\", "/")[len(args
.src_dir
)+1:] for v
in sorted(result_paths
)]
250 requests
= BUILDRULES
.generate(config
, glob
, common
)
251 requests
= filtration
.apply_filters(requests
, config
)
252 requests
= utils
.flatten_requests(requests
, config
, common
)
254 if "fileReplacements" in config
.filters_json_data
:
255 tmp_in_dir
= "{TMP_DIR}/in".format(**common
)
257 makefile_vars
["IN_DIR"] = tmp_in_dir
259 common
["IN_DIR"] = tmp_in_dir
260 requests
= add_copy_input_requests(requests
, config
, common
)
262 build_dirs
= utils
.compute_directories(requests
)
264 if args
.mode
== "gnumake":
265 print(makefile
.get_gnumake_rules(
271 elif args
.mode
== "windows-exec":
272 return windows_exec
.run(
273 build_dirs
= build_dirs
,
275 common_vars
= common
,
276 tool_dir
= args
.tool_dir
,
277 tool_cfg
= args
.tool_cfg
279 elif args
.mode
== "unix-exec":
280 return unix_exec
.run(
281 build_dirs
= build_dirs
,
283 common_vars
= common
,
284 tool_dir
= args
.tool_dir
287 print("Mode not supported: %s" % args
.mode
)
291 if __name__
== "__main__":