]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/BUILDRULES.py
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / data / BUILDRULES.py
CommitLineData
3d1f044b
A
1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4# Python 2/3 Compatibility (ICU-20299)
5# TODO(ICU-20301): Remove this.
6from __future__ import print_function
7
8from buildtool import *
9from buildtool import locale_dependencies
10from buildtool import utils
11from buildtool.request_types import *
12
13import os
14import sys
15import xml.etree.ElementTree as ET
16
17
18def generate(config, glob, common_vars):
19 requests = []
20
21 if len(glob("misc/*")) == 0:
22 print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr)
23 exit(1)
24
25 requests += generate_cnvalias(config, glob, common_vars)
26 requests += generate_confusables(config, glob, common_vars)
27 requests += generate_conversion_mappings(config, glob, common_vars)
28 requests += generate_brkitr_brk(config, glob, common_vars)
29 requests += generate_stringprep(config, glob, common_vars)
30 requests += generate_brkitr_dictionaries(config, glob, common_vars)
31 requests += generate_normalization(config, glob, common_vars)
32 requests += generate_coll_ucadata(config, glob, common_vars)
33 requests += generate_full_unicore_data(config, glob, common_vars)
34 requests += generate_unames(config, glob, common_vars)
35 requests += generate_ulayout(config, glob, common_vars)
36 requests += generate_misc(config, glob, common_vars)
37 requests += generate_curr_supplemental(config, glob, common_vars)
38 requests += generate_translit(config, glob, common_vars)
39
40 # Res Tree Files
41 # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
42 requests += generate_tree(config, glob, common_vars,
43 "locales",
44 None,
45 "icu-locale-deprecates.xml",
46 True,
47 [])
48
49 requests += generate_tree(config, glob, common_vars,
50 "curr",
51 "curr",
52 "icu-locale-deprecates.xml",
53 True,
54 [])
55
56 requests += generate_tree(config, glob, common_vars,
57 "lang",
58 "lang",
59 "icu-locale-deprecates.xml",
60 True,
61 [])
62
63 requests += generate_tree(config, glob, common_vars,
64 "region",
65 "region",
66 "icu-locale-deprecates.xml",
67 True,
68 [])
69
70 requests += generate_tree(config, glob, common_vars,
71 "zone",
72 "zone",
73 "icu-locale-deprecates.xml",
74 True,
75 [])
76
77 requests += generate_tree(config, glob, common_vars,
78 "unit",
79 "unit",
80 "icu-locale-deprecates.xml",
81 True,
82 [])
83
84 requests += generate_tree(config, glob, common_vars,
85 "coll",
86 "coll",
87 "icu-coll-deprecates.xml",
88 False,
89 # Depends on timezoneTypes.res and keyTypeData.res.
90 # TODO: We should not need this dependency to build collation.
91 # TODO: Bake keyTypeData.res into the common library?
92 [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
93
94 requests += generate_tree(config, glob, common_vars,
95 "brkitr",
96 "brkitr",
97 "icu-locale-deprecates.xml",
98 False,
99 [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
100
101 requests += generate_tree(config, glob, common_vars,
102 "rbnf",
103 "rbnf",
104 "icu-rbnf-deprecates.xml",
105 False,
106 [])
107
108 requests += [
109 ListRequest(
110 name = "icudata_list",
111 variable_name = "icudata_all_output_files",
112 output_file = TmpFile("icudata.lst"),
113 include_tmp = False
114 )
115 ]
116
117 return requests
118
119
120def generate_cnvalias(config, glob, common_vars):
121 # UConv Name Aliases
122 input_file = InFile("mappings/convrtrs.txt")
123 output_file = OutFile("cnvalias.icu")
124 return [
125 SingleExecutionRequest(
126 name = "cnvalias",
127 category = "cnvalias",
128 dep_targets = [],
129 input_files = [input_file],
130 output_files = [output_file],
131 tool = IcuTool("gencnval"),
132 args = "-s {IN_DIR} -d {OUT_DIR} "
133 "{INPUT_FILES[0]}",
134 format_with = {}
135 )
136 ]
137
138
139def generate_confusables(config, glob, common_vars):
140 # CONFUSABLES
141 txt1 = InFile("unidata/confusables.txt")
142 txt2 = InFile("unidata/confusablesWholeScript.txt")
143 cfu = OutFile("confusables.cfu")
144 return [
145 SingleExecutionRequest(
146 name = "confusables",
147 category = "confusables",
148 dep_targets = [DepTarget("cnvalias")],
149 input_files = [txt1, txt2],
150 output_files = [cfu],
151 tool = IcuTool("gencfu"),
152 args = "-d {OUT_DIR} -i {OUT_DIR} "
153 "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
154 "-o {OUTPUT_FILES[0]}",
155 format_with = {}
156 )
157 ]
158
159
160def generate_conversion_mappings(config, glob, common_vars):
161 # UConv Conversion Table Files
162 input_files = [InFile(filename) for filename in glob("mappings/*.ucm")]
163 output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files]
164 # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
165 return [
166 RepeatedOrSingleExecutionRequest(
167 name = "conversion_mappings",
168 category = "conversion_mappings",
169 dep_targets = [],
170 input_files = input_files,
171 output_files = output_files,
172 tool = IcuTool("makeconv"),
173 args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
174 format_with = {},
175 repeat_with = {
176 "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files)
177 }
178 )
179 ]
180
181
182def generate_brkitr_brk(config, glob, common_vars):
183 # BRK Files
184 input_files = [InFile(filename) for filename in glob("brkitr/rules/*.txt")]
185 output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files]
186 return [
187 RepeatedExecutionRequest(
188 name = "brkitr_brk",
189 category = "brkitr_rules",
190 dep_targets = [DepTarget("cnvalias")],
191 input_files = input_files,
192 output_files = output_files,
193 tool = IcuTool("genbrk"),
194 args = "-d {OUT_DIR} -i {OUT_DIR} "
195 "-c -r {IN_DIR}/{INPUT_FILE} "
196 "-o {OUTPUT_FILE}",
197 format_with = {},
198 repeat_with = {}
199 )
200 ]
201
202
203def generate_stringprep(config, glob, common_vars):
204 # SPP FILES
205 input_files = [InFile(filename) for filename in glob("sprep/*.txt")]
206 output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files]
207 bundle_names = [v.filename[6:-4] for v in input_files]
208 return [
209 RepeatedExecutionRequest(
210 name = "stringprep",
211 category = "stringprep",
212 dep_targets = [InFile("unidata/NormalizationCorrections.txt")],
213 input_files = input_files,
214 output_files = output_files,
215 tool = IcuTool("gensprep"),
216 args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
217 "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
218 format_with = {},
219 repeat_with = {
220 "BUNDLE_NAME": bundle_names
221 }
222 )
223 ]
224
225
226def generate_brkitr_dictionaries(config, glob, common_vars):
227 # Dict Files
228 input_files = [InFile(filename) for filename in glob("brkitr/dictionaries/*.txt")]
229 output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files]
230 extra_options_map = {
231 "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
232 "brkitr/dictionaries/cjdict.txt": "--uchars",
233 "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
234 "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
235 "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
236 }
237 extra_optionses = [extra_options_map[v.filename] for v in input_files]
238 return [
239 RepeatedExecutionRequest(
240 name = "dictionaries",
241 category = "brkitr_dictionaries",
242 dep_targets = [],
243 input_files = input_files,
244 output_files = output_files,
245 tool = IcuTool("gendict"),
246 args = "-i {OUT_DIR} "
247 "-c {EXTRA_OPTIONS} "
248 "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
249 format_with = {},
250 repeat_with = {
251 "EXTRA_OPTIONS": extra_optionses
252 }
253 )
254 ]
255
256
257def generate_normalization(config, glob, common_vars):
258 # NRM Files
259 input_files = [InFile(filename) for filename in glob("in/*.nrm")]
260 # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
261 input_files.remove(InFile("in/nfc.nrm"))
262 output_files = [OutFile(v.filename[3:]) for v in input_files]
263 return [
264 RepeatedExecutionRequest(
265 name = "normalization",
266 category = "normalization",
267 dep_targets = [],
268 input_files = input_files,
269 output_files = output_files,
270 tool = IcuTool("icupkg"),
271 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
272 format_with = {},
273 repeat_with = {}
274 )
275 ]
276
277
278def generate_coll_ucadata(config, glob, common_vars):
279 # Collation Dependency File (ucadata.icu)
280 input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type)
281 output_file = OutFile("coll/ucadata.icu")
282 return [
283 SingleExecutionRequest(
284 name = "coll_ucadata",
285 category = "coll_ucadata",
286 dep_targets = [],
287 input_files = [input_file],
288 output_files = [output_file],
289 tool = IcuTool("icupkg"),
290 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
291 format_with = {}
292 )
293 ]
294
295
296def generate_full_unicore_data(config, glob, common_vars):
297 # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
298 # are hardcoded in the common DLL and therefore not included in the data package any more.
299 # They are not built by default but need to be built for ICU4J data,
300 # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
301 # See ICU-4497.
302 if not config.include_uni_core_data:
303 return []
304
305 basenames = [
306 "pnames.icu",
307 "uprops.icu",
308 "ucase.icu",
309 "ubidi.icu",
310 "nfc.nrm"
311 ]
312 input_files = [InFile("in/%s" % bn) for bn in basenames]
313 output_files = [OutFile(bn) for bn in basenames]
314 return [
315 RepeatedExecutionRequest(
316 name = "unicore",
317 category = "unicore",
318 input_files = input_files,
319 output_files = output_files,
320 tool = IcuTool("icupkg"),
321 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
322 )
323 ]
324
325
326def generate_unames(config, glob, common_vars):
327 # Unicode Character Names
328 input_file = InFile("in/unames.icu")
329 output_file = OutFile("unames.icu")
330 return [
331 SingleExecutionRequest(
332 name = "unames",
333 category = "unames",
334 dep_targets = [],
335 input_files = [input_file],
336 output_files = [output_file],
337 tool = IcuTool("icupkg"),
338 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
339 format_with = {}
340 )
341 ]
342
343
344def generate_ulayout(config, glob, common_vars):
345 # Unicode text layout properties
346 basename = "ulayout"
347 input_file = InFile("in/%s.icu" % basename)
348 output_file = OutFile("%s.icu" % basename)
349 return [
350 SingleExecutionRequest(
351 name = basename,
352 category = basename,
353 dep_targets = [],
354 input_files = [input_file],
355 output_files = [output_file],
356 tool = IcuTool("icupkg"),
357 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
358 format_with = {}
359 )
360 ]
361
362
363def generate_misc(config, glob, common_vars):
364 # Misc Data Res Files
365 input_files = [InFile(filename) for filename in glob("misc/*.txt")]
366 input_basenames = [v.filename[5:] for v in input_files]
367 output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames]
368 return [
369 RepeatedExecutionRequest(
370 name = "misc_res",
371 category = "misc",
372 dep_targets = [],
373 input_files = input_files,
374 output_files = output_files,
375 tool = IcuTool("genrb"),
376 args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
377 "-k -q "
378 "{INPUT_BASENAME}",
379 format_with = {},
380 repeat_with = {
381 "INPUT_BASENAME": input_basenames
382 }
383 )
384 ]
385
386
387def generate_curr_supplemental(config, glob, common_vars):
388 # Currency Supplemental Res File
389 input_file = InFile("curr/supplementalData.txt")
390 input_basename = "supplementalData.txt"
391 output_file = OutFile("curr/supplementalData.res")
392 return [
393 SingleExecutionRequest(
394 name = "curr_supplemental_res",
395 category = "curr_supplemental",
396 dep_targets = [],
397 input_files = [input_file],
398 output_files = [output_file],
399 tool = IcuTool("genrb"),
400 args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
401 "-k "
402 "{INPUT_BASENAME}",
403 format_with = {
404 "INPUT_BASENAME": input_basename
405 }
406 )
407 ]
408
409
410def generate_translit(config, glob, common_vars):
411 input_files = [
412 InFile("translit/root.txt"),
413 InFile("translit/en.txt"),
414 InFile("translit/el.txt")
415 ]
416 dep_files = set(InFile(filename) for filename in glob("translit/*.txt"))
417 dep_files -= set(input_files)
418 dep_files = list(sorted(dep_files))
419 input_basenames = [v.filename[9:] for v in input_files]
420 output_files = [
421 OutFile("translit/%s.res" % v[:-4])
422 for v in input_basenames
423 ]
424 return [
425 RepeatedOrSingleExecutionRequest(
426 name = "translit_res",
427 category = "translit",
428 dep_targets = dep_files,
429 input_files = input_files,
430 output_files = output_files,
431 tool = IcuTool("genrb"),
432 args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
433 "-k "
434 "{INPUT_BASENAME}",
435 format_with = {
436 },
437 repeat_with = {
438 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
439 }
440 )
441 ]
442
443
444def generate_tree(
445 config,
446 glob,
447 common_vars,
448 sub_dir,
449 out_sub_dir,
450 xml_filename,
451 use_pool_bundle,
452 dep_targets):
453 requests = []
454 category = "%s_tree" % sub_dir
455 out_prefix = "%s/" % out_sub_dir if out_sub_dir else ""
456 # TODO: Clean this up for curr
457 input_files = [InFile(filename) for filename in glob("%s/*.txt" % sub_dir)]
458 if sub_dir == "curr":
459 input_files.remove(InFile("curr/supplementalData.txt"))
460 input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files]
461 output_files = [
462 OutFile("%s%s.res" % (out_prefix, v[:-4]))
463 for v in input_basenames
464 ]
465
466 # Generate Pool Bundle
467 if use_pool_bundle:
468 input_pool_files = [OutFile("%spool.res" % out_prefix)]
469 pool_target_name = "%s_pool_write" % sub_dir
470 use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
471 OUT_PREFIX = out_prefix,
472 **common_vars
473 )
474 requests += [
475 SingleExecutionRequest(
476 name = pool_target_name,
477 category = category,
478 dep_targets = dep_targets,
479 input_files = input_files,
480 output_files = input_pool_files,
481 tool = IcuTool("genrb"),
482 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
483 "--writePoolBundle -k "
484 "{INPUT_BASENAMES_SPACED}",
485 format_with = {
486 "IN_SUB_DIR": sub_dir,
487 "OUT_PREFIX": out_prefix,
488 "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames)
489 }
490 ),
491 ]
492 dep_targets = dep_targets + [DepTarget(pool_target_name)]
493 else:
494 use_pool_bundle_option = ""
495
496 # Generate Res File Tree
497 requests += [
498 RepeatedOrSingleExecutionRequest(
499 name = "%s_res" % sub_dir,
500 category = category,
501 dep_targets = dep_targets,
502 input_files = input_files,
503 output_files = output_files,
504 tool = IcuTool("genrb"),
505 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
506 "{EXTRA_OPTION} -k "
507 "{INPUT_BASENAME}",
508 format_with = {
509 "IN_SUB_DIR": sub_dir,
510 "OUT_PREFIX": out_prefix,
511 "EXTRA_OPTION": use_pool_bundle_option
512 },
513 repeat_with = {
514 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
515 }
516 )
517 ]
518
519 # Generate index txt file
520 synthetic_locales = set()
521 deprecates_xml_path = os.path.join(os.path.dirname(__file__), xml_filename)
522 deprecates_xml = ET.parse(deprecates_xml_path)
523 for child in deprecates_xml.getroot():
524 if child.tag == "alias":
525 synthetic_locales.add(child.attrib["from"])
526 elif child.tag == "emptyLocale":
527 synthetic_locales.add(child.attrib["locale"])
528 else:
529 raise ValueError("Unknown tag in deprecates XML: %s" % child.tag)
530 index_input_files = []
531 for f in input_files:
532 file_stem = f.filename[f.filename.rfind("/")+1:-4]
533 if file_stem == "root":
534 continue
535 if file_stem in synthetic_locales:
536 continue
537 index_input_files.append(f)
538 cldr_version = locale_dependencies.data["cldrVersion"] if sub_dir == "locales" else None
539 index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
540 IN_SUB_DIR = sub_dir,
541 **common_vars
542 ))
543 index_file_target_name = "%s_index_txt" % sub_dir
544 requests += [
545 IndexTxtRequest(
546 name = index_file_target_name,
547 category = category,
548 input_files = index_input_files,
549 output_file = index_file_txt,
550 cldr_version = cldr_version
551 )
552 ]
553
554 # Generate index res file
555 index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
556 OUT_PREFIX = out_prefix,
557 **common_vars
558 ))
559 requests += [
560 SingleExecutionRequest(
561 name = "%s_index_res" % sub_dir,
562 category = category,
563 dep_targets = [DepTarget(index_file_target_name)],
564 input_files = [],
565 output_files = [index_res_file],
566 tool = IcuTool("genrb"),
567 args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
568 "-k "
569 "{INDEX_NAME}.txt",
570 format_with = {
571 "IN_SUB_DIR": sub_dir,
572 "OUT_PREFIX": out_prefix
573 }
574 )
575 ]
576
577 return requests