]>
git.saurik.com Git - apple/icu.git/blob - icuSources/data/BUILDRULES.py
1 # Copyright (C) 2018 and later: Unicode, Inc. and others.
2 # License & terms of use: http://www.unicode.org/copyright.html
4 # Python 2/3 Compatibility (ICU-20299)
5 # TODO(ICU-20301): Remove this.
6 from __future__
import print_function
8 from buildtool
import *
9 from buildtool
import locale_dependencies
10 from buildtool
import utils
11 from buildtool
.request_types
import *
15 import xml
.etree
.ElementTree
as ET
18 def generate(config
, glob
, common_vars
):
21 if len(glob("misc/*")) == 0:
22 print("Error: Cannot find data directory; please specify --src_dir", file=sys
.stderr
)
25 requests
+= generate_cnvalias(config
, glob
, common_vars
)
26 requests
+= generate_confusables(config
, glob
, common_vars
)
27 requests
+= generate_conversion_mappings(config
, glob
, common_vars
)
28 requests
+= generate_brkitr_brk(config
, glob
, common_vars
)
29 requests
+= generate_stringprep(config
, glob
, common_vars
)
30 requests
+= generate_brkitr_dictionaries(config
, glob
, common_vars
)
31 requests
+= generate_normalization(config
, glob
, common_vars
)
32 requests
+= generate_coll_ucadata(config
, glob
, common_vars
)
33 requests
+= generate_full_unicore_data(config
, glob
, common_vars
)
34 requests
+= generate_unames(config
, glob
, common_vars
)
35 requests
+= generate_ulayout(config
, glob
, common_vars
)
36 requests
+= generate_misc(config
, glob
, common_vars
)
37 requests
+= generate_curr_supplemental(config
, glob
, common_vars
)
38 requests
+= generate_translit(config
, glob
, common_vars
)
41 # (input dirname, output dirname, resfiles.mk path, mk version var, mk source var, use pool file, dep files)
42 requests
+= generate_tree(config
, glob
, common_vars
,
45 "icu-locale-deprecates.xml",
49 requests
+= generate_tree(config
, glob
, common_vars
,
52 "icu-locale-deprecates.xml",
56 requests
+= generate_tree(config
, glob
, common_vars
,
59 "icu-locale-deprecates.xml",
63 requests
+= generate_tree(config
, glob
, common_vars
,
66 "icu-locale-deprecates.xml",
70 requests
+= generate_tree(config
, glob
, common_vars
,
73 "icu-locale-deprecates.xml",
77 requests
+= generate_tree(config
, glob
, common_vars
,
80 "icu-locale-deprecates.xml",
84 requests
+= generate_tree(config
, glob
, common_vars
,
87 "icu-coll-deprecates.xml",
89 # Depends on timezoneTypes.res and keyTypeData.res.
90 # TODO: We should not need this dependency to build collation.
91 # TODO: Bake keyTypeData.res into the common library?
92 [DepTarget("coll_ucadata"), DepTarget("misc_res"), InFile("unidata/UCARules.txt")])
94 requests
+= generate_tree(config
, glob
, common_vars
,
97 "icu-locale-deprecates.xml",
99 [DepTarget("brkitr_brk"), DepTarget("dictionaries")])
101 requests
+= generate_tree(config
, glob
, common_vars
,
104 "icu-rbnf-deprecates.xml",
110 name
= "icudata_list",
111 variable_name
= "icudata_all_output_files",
112 output_file
= TmpFile("icudata.lst"),
120 def generate_cnvalias(config
, glob
, common_vars
):
122 input_file
= InFile("mappings/convrtrs.txt")
123 output_file
= OutFile("cnvalias.icu")
125 SingleExecutionRequest(
127 category
= "cnvalias",
129 input_files
= [input_file
],
130 output_files
= [output_file
],
131 tool
= IcuTool("gencnval"),
132 args
= "-s {IN_DIR} -d {OUT_DIR} "
139 def generate_confusables(config
, glob
, common_vars
):
141 txt1
= InFile("unidata/confusables.txt")
142 txt2
= InFile("unidata/confusablesWholeScript.txt")
143 cfu
= OutFile("confusables.cfu")
145 SingleExecutionRequest(
146 name
= "confusables",
147 category
= "confusables",
148 dep_targets
= [DepTarget("cnvalias")],
149 input_files
= [txt1
, txt2
],
150 output_files
= [cfu
],
151 tool
= IcuTool("gencfu"),
152 args
= "-d {OUT_DIR} -i {OUT_DIR} "
153 "-c -r {IN_DIR}/{INPUT_FILES[0]} -w {IN_DIR}/{INPUT_FILES[1]} "
154 "-o {OUTPUT_FILES[0]}",
160 def generate_conversion_mappings(config
, glob
, common_vars
):
161 # UConv Conversion Table Files
162 input_files
= [InFile(filename
) for filename
in glob("mappings/*.ucm")]
163 output_files
= [OutFile("%s.cnv" % v
.filename
[9:-4]) for v
in input_files
]
164 # TODO: handle BUILD_SPECIAL_CNV_FILES? Means to add --ignore-siso-check flag to makeconv
166 RepeatedOrSingleExecutionRequest(
167 name
= "conversion_mappings",
168 category
= "conversion_mappings",
170 input_files
= input_files
,
171 output_files
= output_files
,
172 tool
= IcuTool("makeconv"),
173 args
= "-s {IN_DIR} -d {OUT_DIR} -c {INPUT_FILE_PLACEHOLDER}",
176 "INPUT_FILE_PLACEHOLDER": utils
.SpaceSeparatedList(file.filename
for file in input_files
)
182 def generate_brkitr_brk(config
, glob
, common_vars
):
184 input_files
= [InFile(filename
) for filename
in glob("brkitr/rules/*.txt")]
185 output_files
= [OutFile("brkitr/%s.brk" % v
.filename
[13:-4]) for v
in input_files
]
187 RepeatedExecutionRequest(
189 category
= "brkitr_rules",
190 dep_targets
= [DepTarget("cnvalias")],
191 input_files
= input_files
,
192 output_files
= output_files
,
193 tool
= IcuTool("genbrk"),
194 args
= "-d {OUT_DIR} -i {OUT_DIR} "
195 "-c -r {IN_DIR}/{INPUT_FILE} "
203 def generate_stringprep(config
, glob
, common_vars
):
205 input_files
= [InFile(filename
) for filename
in glob("sprep/*.txt")]
206 output_files
= [OutFile("%s.spp" % v
.filename
[6:-4]) for v
in input_files
]
207 bundle_names
= [v
.filename
[6:-4] for v
in input_files
]
209 RepeatedExecutionRequest(
211 category
= "stringprep",
212 dep_targets
= [InFile("unidata/NormalizationCorrections.txt")],
213 input_files
= input_files
,
214 output_files
= output_files
,
215 tool
= IcuTool("gensprep"),
216 args
= "-s {IN_DIR}/sprep -d {OUT_DIR} -i {OUT_DIR} "
217 "-b {BUNDLE_NAME} -m {IN_DIR}/unidata -u 3.2.0 {BUNDLE_NAME}.txt",
220 "BUNDLE_NAME": bundle_names
226 def generate_brkitr_dictionaries(config
, glob
, common_vars
):
228 input_files
= [InFile(filename
) for filename
in glob("brkitr/dictionaries/*.txt")]
229 output_files
= [OutFile("brkitr/%s.dict" % v
.filename
[20:-4]) for v
in input_files
]
230 extra_options_map
= {
231 "brkitr/dictionaries/burmesedict.txt": "--bytes --transform offset-0x1000",
232 "brkitr/dictionaries/cjdict.txt": "--uchars",
233 "brkitr/dictionaries/khmerdict.txt": "--bytes --transform offset-0x1780",
234 "brkitr/dictionaries/laodict.txt": "--bytes --transform offset-0x0e80",
235 "brkitr/dictionaries/thaidict.txt": "--bytes --transform offset-0x0e00"
237 extra_optionses
= [extra_options_map
[v
.filename
] for v
in input_files
]
239 RepeatedExecutionRequest(
240 name
= "dictionaries",
241 category
= "brkitr_dictionaries",
243 input_files
= input_files
,
244 output_files
= output_files
,
245 tool
= IcuTool("gendict"),
246 args
= "-i {OUT_DIR} "
247 "-c {EXTRA_OPTIONS} "
248 "{IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
251 "EXTRA_OPTIONS": extra_optionses
257 def generate_normalization(config
, glob
, common_vars
):
259 input_files
= [InFile(filename
) for filename
in glob("in/*.nrm")]
260 # nfc.nrm is pre-compiled into C++; see generate_full_unicore_data
261 input_files
.remove(InFile("in/nfc.nrm"))
262 output_files
= [OutFile(v
.filename
[3:]) for v
in input_files
]
264 RepeatedExecutionRequest(
265 name
= "normalization",
266 category
= "normalization",
268 input_files
= input_files
,
269 output_files
= output_files
,
270 tool
= IcuTool("icupkg"),
271 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}",
278 def generate_coll_ucadata(config
, glob
, common_vars
):
279 # Collation Dependency File (ucadata.icu)
280 input_file
= InFile("in/coll/ucadata-%s.icu" % config
.coll_han_type
)
281 output_file
= OutFile("coll/ucadata.icu")
283 SingleExecutionRequest(
284 name
= "coll_ucadata",
285 category
= "coll_ucadata",
287 input_files
= [input_file
],
288 output_files
= [output_file
],
289 tool
= IcuTool("icupkg"),
290 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
296 def generate_full_unicore_data(config
, glob
, common_vars
):
297 # The core Unicode properties files (pnames.icu, uprops.icu, ucase.icu, ubidi.icu)
298 # are hardcoded in the common DLL and therefore not included in the data package any more.
299 # They are not built by default but need to be built for ICU4J data,
300 # both in the .jar and in the .dat file (if ICU4J uses the .dat file).
302 if not config
.include_uni_core_data
:
312 input_files
= [InFile("in/%s" % bn
) for bn
in basenames
]
313 output_files
= [OutFile(bn
) for bn
in basenames
]
315 RepeatedExecutionRequest(
317 category
= "unicore",
318 input_files
= input_files
,
319 output_files
= output_files
,
320 tool
= IcuTool("icupkg"),
321 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILE} {OUT_DIR}/{OUTPUT_FILE}"
326 def generate_unames(config
, glob
, common_vars
):
327 # Unicode Character Names
328 input_file
= InFile("in/unames.icu")
329 output_file
= OutFile("unames.icu")
331 SingleExecutionRequest(
335 input_files
= [input_file
],
336 output_files
= [output_file
],
337 tool
= IcuTool("icupkg"),
338 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
344 def generate_ulayout(config
, glob
, common_vars
):
345 # Unicode text layout properties
347 input_file
= InFile("in/%s.icu" % basename
)
348 output_file
= OutFile("%s.icu" % basename
)
350 SingleExecutionRequest(
354 input_files
= [input_file
],
355 output_files
= [output_file
],
356 tool
= IcuTool("icupkg"),
357 args
= "-t{ICUDATA_CHAR} {IN_DIR}/{INPUT_FILES[0]} {OUT_DIR}/{OUTPUT_FILES[0]}",
363 def generate_misc(config
, glob
, common_vars
):
364 # Misc Data Res Files
365 input_files
= [InFile(filename
) for filename
in glob("misc/*.txt")]
366 input_basenames
= [v
.filename
[5:] for v
in input_files
]
367 output_files
= [OutFile("%s.res" % v
[:-4]) for v
in input_basenames
]
369 RepeatedExecutionRequest(
373 input_files
= input_files
,
374 output_files
= output_files
,
375 tool
= IcuTool("genrb"),
376 args
= "-s {IN_DIR}/misc -d {OUT_DIR} -i {OUT_DIR} "
381 "INPUT_BASENAME": input_basenames
387 def generate_curr_supplemental(config
, glob
, common_vars
):
388 # Currency Supplemental Res File
389 input_file
= InFile("curr/supplementalData.txt")
390 input_basename
= "supplementalData.txt"
391 output_file
= OutFile("curr/supplementalData.res")
393 SingleExecutionRequest(
394 name
= "curr_supplemental_res",
395 category
= "curr_supplemental",
397 input_files
= [input_file
],
398 output_files
= [output_file
],
399 tool
= IcuTool("genrb"),
400 args
= "-s {IN_DIR}/curr -d {OUT_DIR}/curr -i {OUT_DIR} "
404 "INPUT_BASENAME": input_basename
410 def generate_translit(config
, glob
, common_vars
):
412 InFile("translit/root.txt"),
413 InFile("translit/en.txt"),
414 InFile("translit/el.txt")
416 dep_files
= set(InFile(filename
) for filename
in glob("translit/*.txt"))
417 dep_files
-= set(input_files
)
418 dep_files
= list(sorted(dep_files
))
419 input_basenames
= [v
.filename
[9:] for v
in input_files
]
421 OutFile("translit/%s.res" % v
[:-4])
422 for v
in input_basenames
425 RepeatedOrSingleExecutionRequest(
426 name
= "translit_res",
427 category
= "translit",
428 dep_targets
= dep_files
,
429 input_files
= input_files
,
430 output_files
= output_files
,
431 tool
= IcuTool("genrb"),
432 args
= "-s {IN_DIR}/translit -d {OUT_DIR}/translit -i {OUT_DIR} "
438 "INPUT_BASENAME": utils
.SpaceSeparatedList(input_basenames
)
454 category
= "%s_tree" % sub_dir
455 out_prefix
= "%s/" % out_sub_dir
if out_sub_dir
else ""
456 # TODO: Clean this up for curr
457 input_files
= [InFile(filename
) for filename
in glob("%s/*.txt" % sub_dir
)]
458 if sub_dir
== "curr":
459 input_files
.remove(InFile("curr/supplementalData.txt"))
460 input_basenames
= [v
.filename
[len(sub_dir
)+1:] for v
in input_files
]
462 OutFile("%s%s.res" % (out_prefix
, v
[:-4]))
463 for v
in input_basenames
466 # Generate Pool Bundle
468 input_pool_files
= [OutFile("%spool.res" % out_prefix
)]
469 pool_target_name
= "%s_pool_write" % sub_dir
470 use_pool_bundle_option
= "--usePoolBundle {OUT_DIR}/{OUT_PREFIX}".format(
471 OUT_PREFIX
= out_prefix
,
475 SingleExecutionRequest(
476 name
= pool_target_name
,
478 dep_targets
= dep_targets
,
479 input_files
= input_files
,
480 output_files
= input_pool_files
,
481 tool
= IcuTool("genrb"),
482 args
= "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
483 "--writePoolBundle -k "
484 "{INPUT_BASENAMES_SPACED}",
486 "IN_SUB_DIR": sub_dir
,
487 "OUT_PREFIX": out_prefix
,
488 "INPUT_BASENAMES_SPACED": utils
.SpaceSeparatedList(input_basenames
)
492 dep_targets
= dep_targets
+ [DepTarget(pool_target_name
)]
494 use_pool_bundle_option
= ""
496 # Generate Res File Tree
498 RepeatedOrSingleExecutionRequest(
499 name
= "%s_res" % sub_dir
,
501 dep_targets
= dep_targets
,
502 input_files
= input_files
,
503 output_files
= output_files
,
504 tool
= IcuTool("genrb"),
505 args
= "-s {IN_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
509 "IN_SUB_DIR": sub_dir
,
510 "OUT_PREFIX": out_prefix
,
511 "EXTRA_OPTION": use_pool_bundle_option
514 "INPUT_BASENAME": utils
.SpaceSeparatedList(input_basenames
)
519 # Generate index txt file
520 synthetic_locales
= set()
521 deprecates_xml_path
= os
.path
.join(os
.path
.dirname(__file__
), xml_filename
)
522 deprecates_xml
= ET
.parse(deprecates_xml_path
)
523 for child
in deprecates_xml
.getroot():
524 if child
.tag
== "alias":
525 synthetic_locales
.add(child
.attrib
["from"])
526 elif child
.tag
== "emptyLocale":
527 synthetic_locales
.add(child
.attrib
["locale"])
529 raise ValueError("Unknown tag in deprecates XML: %s" % child
.tag
)
530 index_input_files
= []
531 for f
in input_files
:
532 file_stem
= f
.filename
[f
.filename
.rfind("/")+1:-4]
533 if file_stem
== "root":
535 if file_stem
in synthetic_locales
:
537 index_input_files
.append(f
)
538 cldr_version
= locale_dependencies
.data
["cldrVersion"] if sub_dir
== "locales" else None
539 index_file_txt
= TmpFile("{IN_SUB_DIR}/{INDEX_NAME}.txt".format(
540 IN_SUB_DIR
= sub_dir
,
543 index_file_target_name
= "%s_index_txt" % sub_dir
546 name
= index_file_target_name
,
548 input_files
= index_input_files
,
549 output_file
= index_file_txt
,
550 cldr_version
= cldr_version
554 # Generate index res file
555 index_res_file
= OutFile("{OUT_PREFIX}{INDEX_NAME}.res".format(
556 OUT_PREFIX
= out_prefix
,
560 SingleExecutionRequest(
561 name
= "%s_index_res" % sub_dir
,
563 dep_targets
= [DepTarget(index_file_target_name
)],
565 output_files
= [index_res_file
],
566 tool
= IcuTool("genrb"),
567 args
= "-s {TMP_DIR}/{IN_SUB_DIR} -d {OUT_DIR}/{OUT_PREFIX} -i {OUT_DIR} "
571 "IN_SUB_DIR": sub_dir
,
572 "OUT_PREFIX": out_prefix