X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/a01113dcd0f39d5da295ef82785beff9ed86fe38..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/python/icutools/databuilder/__main__.py?ds=inline diff --git a/icuSources/python/icutools/databuilder/__main__.py b/icuSources/python/icutools/databuilder/__main__.py new file mode 100644 index 00000000..a6a387d9 --- /dev/null +++ b/icuSources/python/icutools/databuilder/__main__.py @@ -0,0 +1,360 @@ +# Copyright (C) 2018 and later: Unicode, Inc. and others. +# License & terms of use: http://www.unicode.org/copyright.html + +# Python 2/3 Compatibility (ICU-20299) +# TODO(ICU-20301): Remove this. +from __future__ import print_function + +import argparse +import glob as pyglob +import io as pyio +import json +import os +import sys + +from . import * +from .comment_stripper import CommentStripper +from .request_types import CopyRequest +from .renderers import makefile, common_exec +from . import filtration, utils + +flag_parser = argparse.ArgumentParser( + description = """Generates rules for building ICU binary data files from text +and other input files in source control. + +Use the --mode option to declare how to execute those rules, either exporting +the rules to a Makefile or spawning child processes to run them immediately: + + --mode=gnumake prints a Makefile to standard out. + --mode=unix-exec spawns child processes in a Unix-like environment. + --mode=windows-exec spawns child processes in a Windows-like environment. + +Tips for --mode=unix-exec +========================= + +Create two empty directories for out_dir and tmp_dir. They will get filled +with a lot of intermediate files. + +Set LD_LIBRARY_PATH to include the lib directory. e.g., from icu4c/source: + + $ LD_LIBRARY_PATH=lib PYTHONPATH=python python3 -m icutools.databuilder ... + +Once icutools.databuilder finishes, you have compiled the data, but you have +not packaged it into a .dat or .so file. This is done by the separate pkgdata +tool in bin. Read the docs of pkgdata: + + $ LD_LIBRARY_PATH=lib ./bin/pkgdata --help + +Example command line to call pkgdata: + + $ LD_LIBRARY_PATH=lib ./bin/pkgdata -m common -p icudt63l -c \\ + -O data/icupkg.inc -s $OUTDIR -d $TMPDIR $TMPDIR/icudata.lst + +where $OUTDIR and $TMPDIR are your out and tmp directories, respectively. +The above command will create icudt63l.dat in the tmpdir. + +Command-Line Arguments +====================== +""", + formatter_class = argparse.RawDescriptionHelpFormatter +) + +arg_group_required = flag_parser.add_argument_group("required arguments") +arg_group_required.add_argument( + "--mode", + help = "What to do with the generated rules.", + choices = ["gnumake", "unix-exec", "windows-exec", "bazel-exec"], + required = True +) + +flag_parser.add_argument( + "--src_dir", + help = "Path to data source folder (icu4c/source/data).", + default = "." +) +flag_parser.add_argument( + "--filter_file", + metavar = "PATH", + help = "Path to an ICU data filter JSON file.", + default = None +) +flag_parser.add_argument( + "--include_uni_core_data", + help = "Include the full Unicode core data in the dat file.", + default = False, + action = "store_true" +) +flag_parser.add_argument( + "--seqmode", + help = "Whether to optimize rules to be run sequentially (fewer threads) or in parallel (many threads). Defaults to 'sequential', which is better for unix-exec and windows-exec modes. 'parallel' is often better for massively parallel build systems.", + choices = ["sequential", "parallel"], + default = "sequential" +) +flag_parser.add_argument( + "--verbose", + help = "Print more verbose output (default false).", + default = False, + action = "store_true" +) + +arg_group_exec = flag_parser.add_argument_group("arguments for unix-exec and windows-exec modes") +arg_group_exec.add_argument( + "--out_dir", + help = "Path to where to save output data files.", + default = "icudata" +) +arg_group_exec.add_argument( + "--tmp_dir", + help = "Path to where to save temporary files.", + default = "icutmp" +) +arg_group_exec.add_argument( + "--tool_dir", + help = "Path to where to find binary tools (genrb, etc).", + default = "../bin" +) +arg_group_exec.add_argument( + "--tool_cfg", + help = "The build configuration of the tools. Used in 'windows-exec' mode only.", + default = "x86/Debug" +) + + +class Config(object): + + def __init__(self, args): + # Process arguments + self.max_parallel = (args.seqmode == "parallel") + + # Boolean: Whether to include core Unicode data files in the .dat file + self.include_uni_core_data = args.include_uni_core_data + + # Default fields before processing filter file + self.filters_json_data = {} + self.filter_dir = "ERROR_NO_FILTER_FILE" + + # Process filter file + if args.filter_file: + try: + with open(args.filter_file, "r") as f: + print("Note: Applying filters from %s." % args.filter_file, file=sys.stderr) + self._parse_filter_file(f) + except IOError: + print("Error: Could not read filter file %s." % args.filter_file, file=sys.stderr) + exit(1) + self.filter_dir = os.path.abspath(os.path.dirname(args.filter_file)) + + # Either "unihan" or "implicithan" + self.coll_han_type = "unihan" + if "collationUCAData" in self.filters_json_data: + self.coll_han_type = self.filters_json_data["collationUCAData"] + + # Either "additive" or "subtractive" + self.strategy = "subtractive" + if "strategy" in self.filters_json_data: + self.strategy = self.filters_json_data["strategy"] + + # True or False (could be extended later to support enum/list) + self.use_pool_bundle = True + if "usePoolBundle" in self.filters_json_data: + self.use_pool_bundle = self.filters_json_data["usePoolBundle"] + + def _parse_filter_file(self, f): + # Use the Hjson parser if it is available; otherwise, use vanilla JSON. + try: + import hjson + self.filters_json_data = hjson.load(f) + except ImportError: + self.filters_json_data = json.load(CommentStripper(f)) + + # Optionally pre-validate the JSON schema before further processing. + # Some schema errors will be caught later, but this step ensures + # maximal validity. + try: + import jsonschema + schema_path = os.path.join(os.path.dirname(__file__), "filtration_schema.json") + with open(schema_path) as schema_f: + schema = json.load(CommentStripper(schema_f)) + validator = jsonschema.Draft4Validator(schema) + for error in validator.iter_errors(self.filters_json_data, schema): + print("WARNING: ICU data filter JSON file:", error.message, + "at", "".join( + "[%d]" % part if isinstance(part, int) else ".%s" % part + for part in error.absolute_path + ), + file=sys.stderr) + except ImportError: + print("Tip: to validate your filter file, install the Pip package 'jsonschema'", file=sys.stderr) + pass + + +def add_copy_input_requests(requests, config, common_vars): + files_to_copy = set() + for request in requests: + request_files = request.all_input_files() + # Also add known dependency txt files as possible inputs. + # This is required for translit rule files. + if hasattr(request, "dep_targets"): + request_files += [ + f for f in request.dep_targets if isinstance(f, InFile) + ] + for f in request_files: + if isinstance(f, InFile): + files_to_copy.add(f) + + result = [] + id = 0 + + json_data = config.filters_json_data["fileReplacements"] + dirname = json_data["directory"] + for directive in json_data["replacements"]: + if type(directive) == str: + input_file = LocalFile(dirname, directive) + output_file = InFile(directive) + else: + input_file = LocalFile(dirname, directive["src"]) + output_file = InFile(directive["dest"]) + result += [ + CopyRequest( + name = "input_copy_%d" % id, + input_file = input_file, + output_file = output_file + ) + ] + files_to_copy.remove(output_file) + id += 1 + + for f in files_to_copy: + result += [ + CopyRequest( + name = "input_copy_%d" % id, + input_file = SrcFile(f.filename), + output_file = f + ) + ] + id += 1 + + result += requests + return result + + +class IO(object): + """I/O operations required when computing the build actions""" + + def __init__(self, src_dir): + self.src_dir = src_dir + + def glob(self, pattern): + absolute_paths = pyglob.glob(os.path.join(self.src_dir, pattern)) + # Strip off the absolute path suffix so we are left with a relative path. + relative_paths = [v[len(self.src_dir)+1:] for v in sorted(absolute_paths)] + # For the purposes of icutools.databuilder, force Unix-style directory separators. + # Within the Python code, including BUILDRULES.py and user-provided config files, + # directory separators are normalized to '/', including on Windows platforms. + return [v.replace("\\", "/") for v in relative_paths] + + def read_locale_deps(self, tree): + return self._read_json("%s/LOCALE_DEPS.json" % tree) + + def _read_json(self, filename): + with pyio.open(os.path.join(self.src_dir, filename), "r", encoding="utf-8-sig") as f: + return json.load(CommentStripper(f)) + + +def main(argv): + args = flag_parser.parse_args(argv) + config = Config(args) + + if args.mode == "gnumake": + makefile_vars = { + "SRC_DIR": "$(srcdir)", + "IN_DIR": "$(srcdir)", + "INDEX_NAME": "res_index" + } + makefile_env = ["ICUDATA_CHAR", "OUT_DIR", "TMP_DIR"] + common = { + key: "$(%s)" % key + for key in list(makefile_vars.keys()) + makefile_env + } + common["FILTERS_DIR"] = config.filter_dir + common["CWD_DIR"] = os.getcwd() + else: + makefile_vars = None + common = { + "SRC_DIR": args.src_dir, + "IN_DIR": args.src_dir, + "OUT_DIR": args.out_dir, + "TMP_DIR": args.tmp_dir, + "FILTERS_DIR": config.filter_dir, + "CWD_DIR": os.getcwd(), + "INDEX_NAME": "res_index", + # TODO: Pull this from configure script: + "ICUDATA_CHAR": "l" + } + + # Automatically load BUILDRULES from the src_dir + sys.path.append(args.src_dir) + try: + import BUILDRULES + except ImportError: + print("Cannot find BUILDRULES! Did you set your --src_dir?", file=sys.stderr) + sys.exit(1) + + io = IO(args.src_dir) + requests = BUILDRULES.generate(config, io, common) + + if "fileReplacements" in config.filters_json_data: + tmp_in_dir = "{TMP_DIR}/in".format(**common) + if makefile_vars: + makefile_vars["IN_DIR"] = tmp_in_dir + else: + common["IN_DIR"] = tmp_in_dir + requests = add_copy_input_requests(requests, config, common) + + requests = filtration.apply_filters(requests, config, io) + requests = utils.flatten_requests(requests, config, common) + + build_dirs = utils.compute_directories(requests) + + if args.mode == "gnumake": + print(makefile.get_gnumake_rules( + build_dirs, + requests, + makefile_vars, + common_vars = common + )) + elif args.mode == "windows-exec": + return common_exec.run( + platform = "windows", + build_dirs = build_dirs, + requests = requests, + common_vars = common, + tool_dir = args.tool_dir, + tool_cfg = args.tool_cfg, + verbose = args.verbose, + ) + elif args.mode == "unix-exec": + return common_exec.run( + platform = "unix", + build_dirs = build_dirs, + requests = requests, + common_vars = common, + tool_dir = args.tool_dir, + verbose = args.verbose, + ) + elif args.mode == "bazel-exec": + return common_exec.run( + platform = "bazel", + build_dirs = build_dirs, + requests = requests, + common_vars = common, + tool_dir = args.tool_dir, + verbose = args.verbose, + ) + else: + print("Mode not supported: %s" % args.mode) + return 1 + return 0 + +if __name__ == "__main__": + exit(main(sys.argv[1:]))