]>
git.saurik.com Git - apple/icu.git/blob - icuSources/data/BUILDRULES.py
1 # Copyright (C) 2018 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html
4 # Python 2/3 Compatibility (ICU-20299)
5 # TODO(ICU-20301): Remove this.
6 from __future__
import print_function
8 from icutools
.databuilder
import *
9 from icutools
.databuilder
import utils
10 from icutools
.databuilder
.request_types
import *
16 def generate(config
, io
, common_vars
):
19 if len(io
.glob("misc/*")) == 0:
20 print("Error: Cannot find data directory; please specify --src_dir", file=sys
.stderr
)
23 requests
+= generate_cnvalias(config
, io
, common_vars
)
24 requests
+= generate_ulayout(config
, io
, common_vars
)
25 requests
+= generate_confusables(config
, io
, common_vars
)
26 requests
+= generate_conversion_mappings(config
, io
, common_vars
)
27 requests
+= generate_brkitr_brk(config
, io
, common_vars
)
28 requests
+= generate_stringprep(config
, io
, common_vars
)
29 requests
+= generate_brkitr_dictionaries(config
, io
, common_vars
)
30 requests
+= generate_normalization(config
, io
, common_vars
)
31 requests
+= generate_coll_ucadata(config
, io
, common_vars
)
32 requests
+= generate_full_unicore_data(config
, io
, common_vars
)
33 requests
+= generate_unames(config
, io
, common_vars
)
34 requests
+= generate_misc(config
, io
, common_vars
)
35 requests
+= generate_curr_supplemental(config
, io
, common_vars
)
36 requests
+= generate_translit(config
, io
, common_vars
)
39 # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
40 requests
+= generate_tree(config
, io
, common_vars
,
43 config
.use_pool_bundle
,
46 requests
+= generate_tree(config
, io
, common_vars
,
49 config
.use_pool_bundle
,
52 requests
+= generate_tree(config
, io
, common_vars
,
55 config
.use_pool_bundle
,
58 requests
+= generate_tree(config
, io
, common_vars
,
61 config
.use_pool_bundle
,
64 requests
+= generate_tree(config
, io
, common_vars
,
67 config
.use_pool_bundle
,
70 requests
+= generate_tree(config
, io
, common_vars
,
73 config
.use_pool_bundle
,
76 requests
+= generate_tree(config
, io
, common_vars
,
79 # Never use pool bundle for coll, brkitr, or rbnf
81 # Depends on timezoneTypes.res and keyTypeData.res.
82 # TODO: We should not need this dependency to build collation.
83 # TODO: Bake keyTypeData.res into the common library?
84 [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
86 requests
+= generate_tree(config
, io
, common_vars
,
89 # Never use pool bundle for coll, brkitr, or rbnf
91 [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
93 requests
+= generate_tree(config
, io
, common_vars
,
96 # Never use pool bundle for coll, brkitr, or rbnf
102 name
= "icudata_list",
103 variable_name
= "icudata_all_output_files",
104 output_file
= TmpFile("icudata.lst"),
112 def generate_cnvalias(config
, io
, common_vars
):
114 input_file
= InFile("mappings/convrtrs.txt")
115 output_file
= OutFile("cnvalias.icu")
117 SingleExecutionRequest(
119 category
= "cnvalias",
121 input_files
= [input_file
],
122 output_files
= [output_file
],
123 tool
= IcuTool("gencnval"),
124 args
= "-s {IN_DIR} -d {OUT_DIR} "
131 def generate_confusables(config
, io
, common_vars
):
133 txt1
= InFile("unidata/confusables.txt")
134 txt2
= InFile("unidata/confusablesWholeScript.txt")
135 cfu
= OutFile("confusables.cfu")
137 SingleExecutionRequest(
138 name
= "confusables",
139 category
= "confusables",
140 dep_targets
= [DepTarget("cnvalias")],
141 input_files
= [txt1
, txt2
],
142 output_files
= [cfu
],
143 tool
= IcuTool("gencfu"),
144 args
= "-d {OUT_DIR} -i {OUT_DIR} "
145 "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
146 "-o {OUTPUT_FILES[0]}",
152 def generate_conversion_mappings(config
, io
, common_vars
):
153 # UConv Conversion Table Files
154 input_files
= [InFile(filename
) for filename
in io
.glob("mappings/*.ucm")]
155 output_files
= [OutFile("%s.cnv" % v
.filename
[9:-4]) for v
in input_files
]
156 # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
158 RepeatedOrSingleExecutionRequest(
159 name
= "conversion_mappings",
160 category
= "conversion_mappings",
162 input_files
= input_files
,
163 output_files
= output_files
,
164 tool
= IcuTool("makeconv"),
165 args
= "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
168 "INPUT_FILE_PLACEHOLDER": utils
.SpaceSeparatedList(file.filename
for file in input_files
)
174 def generate_brkitr_brk(config
, io
, common_vars
):
176 input_files
= [InFile(filename
) for filename
in io
.glob("brkitr/rules/*.txt")]
177 output_files
= [OutFile("brkitr/%s.brk" % v
.filename
[13:-4]) for v
in input_files
]
179 RepeatedExecutionRequest(
181 category
= "brkitr_rules",
182 dep_targets
= [DepTarget("cnvalias"), DepTarget("ulayout")],
183 input_files
= input_files
,
184 output_files
= output_files
,
185 tool
= IcuTool("genbrk"),
186 args
= "-d {OUT_DIR} -i {OUT_DIR} "
187 "-c -r {IN_DIR}/{INPUT_FILE} "
195 def generate_stringprep(config
, io
, common_vars
):
197 input_files
= [InFile(filename
) for filename
in io
.glob("sprep/*.txt")]
198 output_files
= [OutFile("%s.spp" % v
.filename
[6:-4]) for v
in input_files
]
199 bundle_names
= [v
.filename
[6:-4] for v
in input_files
]
201 RepeatedExecutionRequest(
203 category
= "stringprep",
204 dep_targets
= [InFile("unidata/NormalizationCorrections.txt")],
205 input_files
= input_files
,
206 output_files
= output_files
,
207 tool
= IcuTool("gensprep"),
208 args
= "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
209 "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
212 "BUNDLE_NAME": bundle_names
218 def generate_brkitr_dictionaries(config
, io
, common_vars
):
220 input_files
= [InFile(filename
) for filename
in io
.glob("brkitr/dictionaries/*.txt")]
221 output_files
= [OutFile("brkitr/%s.dict" % v
.filename
[20:-4]) for v
in input_files
]
222 extra_options_map
= {
223 "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
224 "brkitr/dictionaries/cjdict.txt": "--uchars",
225 "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
226 "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
227 "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
229 extra_optionses
= [extra_options_map
[v
.filename
] for v
in input_files
]
231 RepeatedExecutionRequest(
232 name
= "dictionaries",
233 category
= "brkitr_dictionaries",
235 input_files
= input_files
,
236 output_files
= output_files
,
237 tool
= IcuTool("gendict"),
238 args
= "-i {OUT_DIR} "
239 "-c {EXTRA_OPTIONS} "
240 "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
243 "EXTRA_OPTIONS": extra_optionses
249 def generate_normalization(config
, io
, common_vars
):
251 input_files
= [InFile(filename
) for filename
in io
.glob("in/*.nrm")]
252 # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
253 input_files
.remove(InFile("in/nfc.nrm"))
254 output_files
= [OutFile(v
.filename
[3:]) for v
in input_files
]
256 RepeatedExecutionRequest(
257 name
= "normalization",
258 category
= "normalization",
260 input_files
= input_files
,
261 output_files
= output_files
,
262 tool
= IcuTool("icupkg"),
263 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
270 def generate_coll_ucadata(config
, io
, common_vars
):
271 # Collation Dependency File (ucadata.icu)
272 input_file
= InFile("in/coll/ucadata-%s.icu" % config
.coll_han_type
)
273 output_file
= OutFile("coll/ucadata.icu")
275 SingleExecutionRequest(
276 name
= "coll_ucadata",
277 category
= "coll_ucadata",
279 input_files
= [input_file
],
280 output_files
= [output_file
],
281 tool
= IcuTool("icupkg"),
282 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
288 def generate_full_unicore_data(config
, io
, common_vars
):
289 # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
290 # are hardcoded in the common DLL and therefore not included in the data package any more.
291 # They are not built by default but need to be built for ICU4J data,
292 # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
294 if not config
.include_uni_core_data
:
304 input_files
= [InFile("in/%s" % bn
) for bn
in basenames
]
305 output_files
= [OutFile(bn
) for bn
in basenames
]
307 RepeatedExecutionRequest(
309 category
= "unicore",
310 input_files
= input_files
,
311 output_files
= output_files
,
312 tool
= IcuTool("icupkg"),
313 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
318 def generate_unames(config
, io
, common_vars
):
319 # Unicode Character Names
320 input_file
= InFile("in/unames.icu")
321 output_file
= OutFile("unames.icu")
323 SingleExecutionRequest(
327 input_files
= [input_file
],
328 output_files
= [output_file
],
329 tool
= IcuTool("icupkg"),
330 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
336 def generate_ulayout(config
, io
, common_vars
):
337 # Unicode text layout properties
339 input_file
= InFile("in/%s.icu" % basename
)
340 output_file
= OutFile("%s.icu" % basename
)
342 SingleExecutionRequest(
346 input_files
= [input_file
],
347 output_files
= [output_file
],
348 tool
= IcuTool("icupkg"),
349 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
355 def generate_misc(config
, io
, common_vars
):
356 # Misc Data Res Files
357 input_files
= [InFile(filename
) for filename
in io
.glob("misc/*.txt")]
358 input_basenames
= [v
.filename
[5:] for v
in input_files
]
359 output_files
= [OutFile("%s.res" % v
[:-4]) for v
in input_basenames
]
361 RepeatedExecutionRequest(
365 input_files
= input_files
,
366 output_files
= output_files
,
367 tool
= IcuTool("genrb"),
368 args
= "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
373 "INPUT_BASENAME": input_basenames
379 def generate_curr_supplemental(config
, io
, common_vars
):
380 # Currency Supplemental Res File
381 input_file
= InFile("curr/supplementalData.txt")
382 input_basename
= "supplementalData.txt"
383 output_file
= OutFile("curr/supplementalData.res")
385 SingleExecutionRequest(
386 name
= "curr_supplemental_res",
387 category
= "curr_supplemental",
389 input_files
= [input_file
],
390 output_files
= [output_file
],
391 tool
= IcuTool("genrb"),
392 args
= "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
396 "INPUT_BASENAME": input_basename
402 def generate_translit(config
, io
, common_vars
):
404 InFile("translit/root.txt"),
405 InFile("translit/en.txt"),
406 InFile("translit/el.txt")
408 dep_files
= set(InFile(filename
) for filename
in io
.glob("translit/*.txt"))
409 dep_files
-= set(input_files
)
410 dep_files
= list(sorted(dep_files
))
411 input_basenames
= [v
.filename
[9:] for v
in input_files
]
413 OutFile("translit/%s.res" % v
[:-4])
414 for v
in input_basenames
417 RepeatedOrSingleExecutionRequest(
418 name
= "translit_res",
419 category
= "translit",
420 dep_targets
= dep_files
,
421 input_files
= input_files
,
422 output_files
= output_files
,
423 tool
= IcuTool("genrb"),
424 args
= "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
430 "INPUT_BASENAME": utils
.SpaceSeparatedList(input_basenames
)
445 category
= "%s_tree" % sub_dir
446 out_prefix
= "%s/" % out_sub_dir
if out_sub_dir
else ""
447 # TODO: Clean this up for curr
448 input_files
= [InFile(filename
) for filename
in io
.glob("%s/*.txt" % sub_dir
)]
449 if sub_dir
== "curr":
450 input_files
.remove(InFile("curr/supplementalData.txt"))
451 input_basenames
= [v
.filename
[len(sub_dir
)+1:] for v
in input_files
]
453 OutFile("%s%s.res" % (out_prefix
, v
[:-4]))
454 for v
in input_basenames
457 # Generate Pool Bundle
459 input_pool_files
= [OutFile("%spool.res" % out_prefix
)]
460 pool_target_name
= "%s_pool_write" % sub_dir
461 use_pool_bundle_option
= "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
462 OUT_PREFIX
= out_prefix
,
466 SingleExecutionRequest(
467 name
= pool_target_name
,
469 dep_targets
= dep_targets
,
470 input_files
= input_files
,
471 output_files
= input_pool_files
,
472 tool
= IcuTool("genrb"),
473 args
= "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
474 "--writePoolBundle -k "
475 "{INPUT_BASENAMES_SPACED}",
477 "IN_SUB_DIR": sub_dir
,
478 "OUT_PREFIX": out_prefix
,
479 "INPUT_BASENAMES_SPACED": utils
.SpaceSeparatedList(input_basenames
)
483 dep_targets
= dep_targets
+ [DepTarget(pool_target_name
)]
485 use_pool_bundle_option
= ""
487 # Generate Res File Tree
489 RepeatedOrSingleExecutionRequest(
490 name
= "%s_res" % sub_dir
,
492 dep_targets
= dep_targets
,
493 input_files
= input_files
,
494 output_files
= output_files
,
495 tool
= IcuTool("genrb"),
496 args
= "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
500 "IN_SUB_DIR": sub_dir
,
501 "OUT_PREFIX": out_prefix
,
502 "EXTRA_OPTION": use_pool_bundle_option
505 "INPUT_BASENAME": utils
.SpaceSeparatedList(input_basenames
)
510 # Generate res_index file
511 # Exclude the deprecated locale variants and root; see ICU-20628. This
512 # could be data-driven, but we do not want to perform I/O in this script
513 # (for example, we do not want to read from an XML file).
514 excluded_locales
= set([
523 # Put alias locales in a separate structure; see ICU-20627
524 dependency_data
= io
.read_locale_deps(sub_dir
)
525 if "aliases" in dependency_data
:
526 alias_locales
= set(dependency_data
["aliases"].keys())
528 alias_locales
= set()
531 for f
in input_files
:
532 file_stem
= IndexRequest
.locale_file_stem(f
)
533 if file_stem
in excluded_locales
:
535 destination
= alias_files
if file_stem
in alias_locales
else installed_files
536 destination
.append(f
)
537 cldr_version
= dependency_data
["cldrVersion"] if sub_dir
== "locales" else None
538 index_file_txt
= TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
539 IN_SUB_DIR
= sub_dir
,
542 index_res_file
= OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
543 OUT_PREFIX
= out_prefix
,
546 index_file_target_name
= "%s_index_txt" % sub_dir
549 name
= index_file_target_name
,
551 installed_files
= installed_files
,
552 alias_files
= alias_files
,
553 txt_file
= index_file_txt
,
554 output_file
= index_res_file
,
555 cldr_version
= cldr_version
,
556 args
= "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
560 "IN_SUB_DIR": sub_dir
,
561 "OUT_PREFIX": out_prefix