]> git.saurik.com Git - apple/icu.git/blame - icuSources/data/BUILDRULES.py
ICU-66108.tar.gz
[apple/icu.git] / icuSources / data / BUILDRULES.py
CommitLineData
3d1f044b
A
1# Copyright (C) 2018 and later: Unicode, Inc. and others.
2# License & terms of use: http://www.unicode.org/copyright.html
3
4# Python 2/3 Compatibility (ICU-20299)
5# TODO(ICU-20301): Remove this.
6from __future__ import print_function
7
340931cb
A
8from icutools.databuilder import *
9from icutools.databuilder import utils
10from icutools.databuilder.request_types import *
3d1f044b
A
11
12import os
13import sys
3d1f044b
A
14
15
340931cb 16def generate(config, io, common_vars):
3d1f044b
A
17 requests = []
18
340931cb 19 if len(io.glob("misc/*")) == 0:
3d1f044b
A
20 print("Error: Cannot find data directory; please specify --src_dir", file=sys.stderr)
21 exit(1)
22
340931cb
A
23 requests += generate_cnvalias(config, io, common_vars)
24 requests += generate_ulayout(config, io, common_vars)
25 requests += generate_confusables(config, io, common_vars)
26 requests += generate_conversion_mappings(config, io, common_vars)
27 requests += generate_brkitr_brk(config, io, common_vars)
28 requests += generate_stringprep(config, io, common_vars)
29 requests += generate_brkitr_dictionaries(config, io, common_vars)
30 requests += generate_normalization(config, io, common_vars)
31 requests += generate_coll_ucadata(config, io, common_vars)
32 requests += generate_full_unicore_data(config, io, common_vars)
33 requests += generate_unames(config, io, common_vars)
34 requests += generate_misc(config, io, common_vars)
35 requests += generate_curr_supplemental(config, io, common_vars)
36 requests += generate_translit(config, io, common_vars)
3d1f044b
A
37
38 # Res Tree Files
39 # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
340931cb 40 requests += generate_tree(config, io, common_vars,
3d1f044b
A
41 "locales",
42 None,
340931cb 43 config.use_pool_bundle,
3d1f044b
A
44 [])
45
340931cb 46 requests += generate_tree(config, io, common_vars,
3d1f044b
A
47 "curr",
48 "curr",
340931cb 49 config.use_pool_bundle,
3d1f044b
A
50 [])
51
340931cb 52 requests += generate_tree(config, io, common_vars,
3d1f044b
A
53 "lang",
54 "lang",
340931cb 55 config.use_pool_bundle,
3d1f044b
A
56 [])
57
340931cb 58 requests += generate_tree(config, io, common_vars,
3d1f044b
A
59 "region",
60 "region",
340931cb 61 config.use_pool_bundle,
3d1f044b
A
62 [])
63
340931cb 64 requests += generate_tree(config, io, common_vars,
3d1f044b
A
65 "zone",
66 "zone",
340931cb 67 config.use_pool_bundle,
3d1f044b
A
68 [])
69
340931cb 70 requests += generate_tree(config, io, common_vars,
3d1f044b
A
71 "unit",
72 "unit",
340931cb 73 config.use_pool_bundle,
3d1f044b
A
74 [])
75
340931cb 76 requests += generate_tree(config, io, common_vars,
3d1f044b
A
77 "coll",
78 "coll",
340931cb 79 # Never use pool bundle for coll, brkitr, or rbnf
3d1f044b
A
80 False,
81 # Depends on timezoneTypes.res and keyTypeData.res.
82 # TODO: We should not need this dependency to build collation.
83 # TODO: Bake keyTypeData.res into the common library?
84 [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
85
340931cb 86 requests += generate_tree(config, io, common_vars,
3d1f044b
A
87 "brkitr",
88 "brkitr",
340931cb 89 # Never use pool bundle for coll, brkitr, or rbnf
3d1f044b
A
90 False,
91 [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
92
340931cb 93 requests += generate_tree(config, io, common_vars,
3d1f044b
A
94 "rbnf",
95 "rbnf",
340931cb 96 # Never use pool bundle for coll, brkitr, or rbnf
3d1f044b
A
97 False,
98 [])
99
100 requests += [
101 ListRequest(
102 name = "icudata_list",
103 variable_name = "icudata_all_output_files",
104 output_file = TmpFile("icudata.lst"),
105 include_tmp = False
106 )
107 ]
108
109 return requests
110
111
340931cb 112def generate_cnvalias(config, io, common_vars):
3d1f044b
A
113 # UConv Name Aliases
114 input_file = InFile("mappings/convrtrs.txt")
115 output_file = OutFile("cnvalias.icu")
116 return [
117 SingleExecutionRequest(
118 name = "cnvalias",
119 category = "cnvalias",
120 dep_targets = [],
121 input_files = [input_file],
122 output_files = [output_file],
123 tool = IcuTool("gencnval"),
124 args = "-s {IN_DIR} -d {OUT_DIR} "
125 "{INPUT_FILES[0]}",
126 format_with = {}
127 )
128 ]
129
130
340931cb 131def generate_confusables(config, io, common_vars):
3d1f044b
A
132 # CONFUSABLES
133 txt1 = InFile("unidata/confusables.txt")
134 txt2 = InFile("unidata/confusablesWholeScript.txt")
135 cfu = OutFile("confusables.cfu")
136 return [
137 SingleExecutionRequest(
138 name = "confusables",
139 category = "confusables",
140 dep_targets = [DepTarget("cnvalias")],
141 input_files = [txt1, txt2],
142 output_files = [cfu],
143 tool = IcuTool("gencfu"),
144 args = "-d {OUT_DIR} -i {OUT_DIR} "
145 "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
146 "-o {OUTPUT_FILES[0]}",
147 format_with = {}
148 )
149 ]
150
151
340931cb 152def generate_conversion_mappings(config, io, common_vars):
3d1f044b 153 # UConv Conversion Table Files
340931cb 154 input_files = [InFile(filename) for filename in io.glob("mappings/*.ucm")]
3d1f044b
A
155 output_files = [OutFile("%s.cnv" % v.filename[9:-4]) for v in input_files]
156 # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
157 return [
158 RepeatedOrSingleExecutionRequest(
159 name = "conversion_mappings",
160 category = "conversion_mappings",
161 dep_targets = [],
162 input_files = input_files,
163 output_files = output_files,
164 tool = IcuTool("makeconv"),
165 args = "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
166 format_with = {},
167 repeat_with = {
168 "INPUT_FILE_PLACEHOLDER": utils.SpaceSeparatedList(file.filename for file in input_files)
169 }
170 )
171 ]
172
173
340931cb 174def generate_brkitr_brk(config, io, common_vars):
3d1f044b 175 # BRK Files
340931cb 176 input_files = [InFile(filename) for filename in io.glob("brkitr/rules/*.txt")]
3d1f044b
A
177 output_files = [OutFile("brkitr/%s.brk" % v.filename[13:-4]) for v in input_files]
178 return [
179 RepeatedExecutionRequest(
180 name = "brkitr_brk",
181 category = "brkitr_rules",
340931cb 182 dep_targets = [DepTarget("cnvalias"), DepTarget("ulayout")],
3d1f044b
A
183 input_files = input_files,
184 output_files = output_files,
185 tool = IcuTool("genbrk"),
186 args = "-d {OUT_DIR} -i {OUT_DIR} "
187 "-c -r {IN_DIR}/{INPUT_FILE} "
188 "-o {OUTPUT_FILE}",
189 format_with = {},
190 repeat_with = {}
191 )
192 ]
193
194
340931cb 195def generate_stringprep(config, io, common_vars):
3d1f044b 196 # SPP FILES
340931cb 197 input_files = [InFile(filename) for filename in io.glob("sprep/*.txt")]
3d1f044b
A
198 output_files = [OutFile("%s.spp" % v.filename[6:-4]) for v in input_files]
199 bundle_names = [v.filename[6:-4] for v in input_files]
200 return [
201 RepeatedExecutionRequest(
202 name = "stringprep",
203 category = "stringprep",
204 dep_targets = [InFile("unidata/NormalizationCorrections.txt")],
205 input_files = input_files,
206 output_files = output_files,
207 tool = IcuTool("gensprep"),
208 args = "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
209 "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
210 format_with = {},
211 repeat_with = {
212 "BUNDLE_NAME": bundle_names
213 }
214 )
215 ]
216
217
340931cb 218def generate_brkitr_dictionaries(config, io, common_vars):
3d1f044b 219 # Dict Files
340931cb 220 input_files = [InFile(filename) for filename in io.glob("brkitr/dictionaries/*.txt")]
3d1f044b
A
221 output_files = [OutFile("brkitr/%s.dict" % v.filename[20:-4]) for v in input_files]
222 extra_options_map = {
223 "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
224 "brkitr/dictionaries/cjdict.txt": "--uchars",
225 "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
226 "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
227 "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
228 }
229 extra_optionses = [extra_options_map[v.filename] for v in input_files]
230 return [
231 RepeatedExecutionRequest(
232 name = "dictionaries",
233 category = "brkitr_dictionaries",
234 dep_targets = [],
235 input_files = input_files,
236 output_files = output_files,
237 tool = IcuTool("gendict"),
238 args = "-i {OUT_DIR} "
239 "-c {EXTRA_OPTIONS} "
240 "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
241 format_with = {},
242 repeat_with = {
243 "EXTRA_OPTIONS": extra_optionses
244 }
245 )
246 ]
247
248
340931cb 249def generate_normalization(config, io, common_vars):
3d1f044b 250 # NRM Files
340931cb 251 input_files = [InFile(filename) for filename in io.glob("in/*.nrm")]
3d1f044b
A
252 # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
253 input_files.remove(InFile("in/nfc.nrm"))
254 output_files = [OutFile(v.filename[3:]) for v in input_files]
255 return [
256 RepeatedExecutionRequest(
257 name = "normalization",
258 category = "normalization",
259 dep_targets = [],
260 input_files = input_files,
261 output_files = output_files,
262 tool = IcuTool("icupkg"),
263 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
264 format_with = {},
265 repeat_with = {}
266 )
267 ]
268
269
340931cb 270def generate_coll_ucadata(config, io, common_vars):
3d1f044b
A
271 # Collation Dependency File (ucadata.icu)
272 input_file = InFile("in/coll/ucadata-%s.icu" % config.coll_han_type)
273 output_file = OutFile("coll/ucadata.icu")
274 return [
275 SingleExecutionRequest(
276 name = "coll_ucadata",
277 category = "coll_ucadata",
278 dep_targets = [],
279 input_files = [input_file],
280 output_files = [output_file],
281 tool = IcuTool("icupkg"),
282 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
283 format_with = {}
284 )
285 ]
286
287
340931cb 288def generate_full_unicore_data(config, io, common_vars):
3d1f044b
A
289 # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
290 # are hardcoded in the common DLL and therefore not included in the data package any more.
291 # They are not built by default but need to be built for ICU4J data,
292 # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
293 # See ICU-4497.
294 if not config.include_uni_core_data:
295 return []
296
297 basenames = [
298 "pnames.icu",
299 "uprops.icu",
300 "ucase.icu",
301 "ubidi.icu",
302 "nfc.nrm"
303 ]
304 input_files = [InFile("in/%s" % bn) for bn in basenames]
305 output_files = [OutFile(bn) for bn in basenames]
306 return [
307 RepeatedExecutionRequest(
308 name = "unicore",
309 category = "unicore",
310 input_files = input_files,
311 output_files = output_files,
312 tool = IcuTool("icupkg"),
313 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
314 )
315 ]
316
317
340931cb 318def generate_unames(config, io, common_vars):
3d1f044b
A
319 # Unicode Character Names
320 input_file = InFile("in/unames.icu")
321 output_file = OutFile("unames.icu")
322 return [
323 SingleExecutionRequest(
324 name = "unames",
325 category = "unames",
326 dep_targets = [],
327 input_files = [input_file],
328 output_files = [output_file],
329 tool = IcuTool("icupkg"),
330 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
331 format_with = {}
332 )
333 ]
334
335
340931cb 336def generate_ulayout(config, io, common_vars):
3d1f044b
A
337 # Unicode text layout properties
338 basename = "ulayout"
339 input_file = InFile("in/%s.icu" % basename)
340 output_file = OutFile("%s.icu" % basename)
341 return [
342 SingleExecutionRequest(
343 name = basename,
344 category = basename,
345 dep_targets = [],
346 input_files = [input_file],
347 output_files = [output_file],
348 tool = IcuTool("icupkg"),
349 args = "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
350 format_with = {}
351 )
352 ]
353
354
340931cb 355def generate_misc(config, io, common_vars):
3d1f044b 356 # Misc Data Res Files
340931cb 357 input_files = [InFile(filename) for filename in io.glob("misc/*.txt")]
3d1f044b
A
358 input_basenames = [v.filename[5:] for v in input_files]
359 output_files = [OutFile("%s.res" % v[:-4]) for v in input_basenames]
360 return [
361 RepeatedExecutionRequest(
362 name = "misc_res",
363 category = "misc",
364 dep_targets = [],
365 input_files = input_files,
366 output_files = output_files,
367 tool = IcuTool("genrb"),
368 args = "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
369 "-k -q "
370 "{INPUT_BASENAME}",
371 format_with = {},
372 repeat_with = {
373 "INPUT_BASENAME": input_basenames
374 }
375 )
376 ]
377
378
340931cb 379def generate_curr_supplemental(config, io, common_vars):
3d1f044b
A
380 # Currency Supplemental Res File
381 input_file = InFile("curr/supplementalData.txt")
382 input_basename = "supplementalData.txt"
383 output_file = OutFile("curr/supplementalData.res")
384 return [
385 SingleExecutionRequest(
386 name = "curr_supplemental_res",
387 category = "curr_supplemental",
388 dep_targets = [],
389 input_files = [input_file],
390 output_files = [output_file],
391 tool = IcuTool("genrb"),
392 args = "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
393 "-k "
394 "{INPUT_BASENAME}",
395 format_with = {
396 "INPUT_BASENAME": input_basename
397 }
398 )
399 ]
400
401
340931cb 402def generate_translit(config, io, common_vars):
3d1f044b
A
403 input_files = [
404 InFile("translit/root.txt"),
405 InFile("translit/en.txt"),
406 InFile("translit/el.txt")
407 ]
340931cb 408 dep_files = set(InFile(filename) for filename in io.glob("translit/*.txt"))
3d1f044b
A
409 dep_files -= set(input_files)
410 dep_files = list(sorted(dep_files))
411 input_basenames = [v.filename[9:] for v in input_files]
412 output_files = [
413 OutFile("translit/%s.res" % v[:-4])
414 for v in input_basenames
415 ]
416 return [
417 RepeatedOrSingleExecutionRequest(
418 name = "translit_res",
419 category = "translit",
420 dep_targets = dep_files,
421 input_files = input_files,
422 output_files = output_files,
423 tool = IcuTool("genrb"),
424 args = "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
425 "-k "
426 "{INPUT_BASENAME}",
427 format_with = {
428 },
429 repeat_with = {
430 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
431 }
432 )
433 ]
434
435
436def generate_tree(
437 config,
340931cb 438 io,
3d1f044b
A
439 common_vars,
440 sub_dir,
441 out_sub_dir,
3d1f044b
A
442 use_pool_bundle,
443 dep_targets):
444 requests = []
445 category = "%s_tree" % sub_dir
446 out_prefix = "%s/" % out_sub_dir if out_sub_dir else ""
447 # TODO: Clean this up for curr
340931cb 448 input_files = [InFile(filename) for filename in io.glob("%s/*.txt" % sub_dir)]
3d1f044b
A
449 if sub_dir == "curr":
450 input_files.remove(InFile("curr/supplementalData.txt"))
451 input_basenames = [v.filename[len(sub_dir)+1:] for v in input_files]
452 output_files = [
453 OutFile("%s%s.res" % (out_prefix, v[:-4]))
454 for v in input_basenames
455 ]
456
457 # Generate Pool Bundle
458 if use_pool_bundle:
459 input_pool_files = [OutFile("%spool.res" % out_prefix)]
460 pool_target_name = "%s_pool_write" % sub_dir
461 use_pool_bundle_option = "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
462 OUT_PREFIX = out_prefix,
463 **common_vars
464 )
465 requests += [
466 SingleExecutionRequest(
467 name = pool_target_name,
468 category = category,
469 dep_targets = dep_targets,
470 input_files = input_files,
471 output_files = input_pool_files,
472 tool = IcuTool("genrb"),
473 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
474 "--writePoolBundle -k "
475 "{INPUT_BASENAMES_SPACED}",
476 format_with = {
477 "IN_SUB_DIR": sub_dir,
478 "OUT_PREFIX": out_prefix,
479 "INPUT_BASENAMES_SPACED": utils.SpaceSeparatedList(input_basenames)
480 }
481 ),
482 ]
483 dep_targets = dep_targets + [DepTarget(pool_target_name)]
484 else:
485 use_pool_bundle_option = ""
486
487 # Generate Res File Tree
488 requests += [
489 RepeatedOrSingleExecutionRequest(
490 name = "%s_res" % sub_dir,
491 category = category,
492 dep_targets = dep_targets,
493 input_files = input_files,
494 output_files = output_files,
495 tool = IcuTool("genrb"),
496 args = "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
497 "{EXTRA_OPTION} -k "
498 "{INPUT_BASENAME}",
499 format_with = {
500 "IN_SUB_DIR": sub_dir,
501 "OUT_PREFIX": out_prefix,
502 "EXTRA_OPTION": use_pool_bundle_option
503 },
504 repeat_with = {
505 "INPUT_BASENAME": utils.SpaceSeparatedList(input_basenames)
506 }
507 )
508 ]
509
340931cb
A
510 # Generate res_index file
511 # Exclude the deprecated locale variants and root; see ICU-20628. This
512 # could be data-driven, but we do not want to perform I/O in this script
513 # (for example, we do not want to read from an XML file).
514 excluded_locales = set([
515 "ja_JP_TRADITIONAL",
516 "th_TH_TRADITIONAL",
517 "de_",
518 "de__PHONEBOOK",
519 "es_",
520 "es__TRADITIONAL",
521 "root",
522 ])
523 # Put alias locales in a separate structure; see ICU-20627
524 dependency_data = io.read_locale_deps(sub_dir)
525 if "aliases" in dependency_data:
526 alias_locales = set(dependency_data["aliases"].keys())
527 else:
528 alias_locales = set()
529 alias_files = []
530 installed_files = []
3d1f044b 531 for f in input_files:
340931cb
A
532 file_stem = IndexRequest.locale_file_stem(f)
533 if file_stem in excluded_locales:
3d1f044b 534 continue
340931cb
A
535 destination = alias_files if file_stem in alias_locales else installed_files
536 destination.append(f)
537 cldr_version = dependency_data["cldrVersion"] if sub_dir == "locales" else None
3d1f044b
A
538 index_file_txt = TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
539 IN_SUB_DIR = sub_dir,
540 **common_vars
541 ))
3d1f044b
A
542 index_res_file = OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
543 OUT_PREFIX = out_prefix,
544 **common_vars
545 ))
340931cb 546 index_file_target_name = "%s_index_txt" % sub_dir
3d1f044b 547 requests += [
340931cb
A
548 IndexRequest(
549 name = index_file_target_name,
3d1f044b 550 category = category,
340931cb
A
551 installed_files = installed_files,
552 alias_files = alias_files,
553 txt_file = index_file_txt,
554 output_file = index_res_file,
555 cldr_version = cldr_version,
3d1f044b
A
556 args = "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
557 "-k "
558 "{INDEX_NAME}.txt",
559 format_with = {
560 "IN_SUB_DIR": sub_dir,
561 "OUT_PREFIX": out_prefix
562 }
563 )
564 ]
565
566 return requests