]>
Commit | Line | Data |
---|---|---|
73c04bcf A |
1 | /* |
2 | ******************************************************************************* | |
3 | * | |
729e4ab9 | 4 | * Copyright (C) 2005-2010, International Business Machines |
73c04bcf A |
5 | * Corporation and others. All Rights Reserved. |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: icupkg.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2005jul29 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * This tool operates on ICU data (.dat package) files. | |
17 | * It takes one as input, or creates an empty one, and can remove, add, and | |
18 | * extract data pieces according to command-line options. | |
19 | * At the same time, it swaps each piece to a consistent set of platform | |
20 | * properties as desired. | |
21 | * Useful as an install-time tool for shipping only one flavor of ICU data | |
22 | * and preparing data files for the target platform. | |
23 | * Also for customizing ICU data (pruning, augmenting, replacing) and for | |
24 | * taking it apart. | |
25 | * Subsumes functionality and implementation code from | |
26 | * gencmn, decmn, and icuswap tools. | |
27 | * Will not work with data DLLs (shared libraries). | |
28 | */ | |
29 | ||
30 | #include "unicode/utypes.h" | |
31 | #include "unicode/putil.h" | |
32 | #include "cstring.h" | |
33 | #include "toolutil.h" | |
34 | #include "uoptions.h" | |
35 | #include "uparse.h" | |
729e4ab9 | 36 | #include "filestrm.h" |
73c04bcf | 37 | #include "package.h" |
729e4ab9 | 38 | #include "pkg_icu.h" |
73c04bcf A |
39 | |
40 | #include <stdio.h> | |
41 | #include <stdlib.h> | |
42 | #include <string.h> | |
43 | ||
46f4442e A |
44 | U_NAMESPACE_USE |
45 | ||
73c04bcf A |
46 | // TODO: add --matchmode=regex for using the ICU regex engine for item name pattern matching? |
47 | ||
48 | // general definitions ----------------------------------------------------- *** | |
49 | ||
50 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
51 | ||
73c04bcf A |
52 | // main() ------------------------------------------------------------------ *** |
53 | ||
54 | static void | |
55 | printUsage(const char *pname, UBool isHelp) { | |
56 | FILE *where=isHelp ? stdout : stderr; | |
57 | ||
58 | fprintf(where, | |
59 | "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n" | |
729e4ab9 | 60 | "\t[-a list] [-r list] [-x list] [-l [-o outputListFileName]]\n" |
73c04bcf A |
61 | "\t[-s path] [-d path] [-w] [-m mode]\n" |
62 | "\tinfilename [outfilename]\n", | |
63 | isHelp ? 'U' : 'u', pname); | |
64 | if(isHelp) { | |
65 | fprintf(where, | |
66 | "\n" | |
67 | "Read the input ICU .dat package file, modify it according to the options,\n" | |
68 | "swap it to the desired platform properties (charset & endianness),\n" | |
69 | "and optionally write the resulting ICU .dat package to the output file.\n" | |
70 | "Items are removed, then added, then extracted and listed.\n" | |
71 | "An ICU .dat package is written if items are removed or added,\n" | |
72 | "or if the input and output filenames differ,\n" | |
73 | "or if the --writepkg (-w) option is set.\n"); | |
74 | fprintf(where, | |
75 | "\n" | |
76 | "If the input filename is \"new\" then an empty package is created.\n" | |
77 | "If the output filename is missing, then it is automatically generated\n" | |
78 | "from the input filename: If the input filename ends with an l, b, or e\n" | |
79 | "matching its platform properties, then the output filename will\n" | |
80 | "contain the letter from the -t (--type) option.\n"); | |
81 | fprintf(where, | |
82 | "\n" | |
83 | "This tool can also be used to just swap a single ICU data file, replacing the\n" | |
84 | "former icuswap tool. For this mode, provide the infilename (and optional\n" | |
85 | "outfilename) for a non-package ICU data file.\n" | |
86 | "Allowed options include -t, -w, -s and -d.\n" | |
87 | "The filenames can be absolute, or relative to the source/dest dir paths.\n" | |
88 | "Other options are not allowed in this mode.\n"); | |
89 | fprintf(where, | |
90 | "\n" | |
91 | "Options:\n" | |
92 | "\t(Only the last occurrence of an option is used.)\n" | |
93 | "\n" | |
94 | "\t-h or -? or --help print this message and exit\n"); | |
95 | fprintf(where, | |
96 | "\n" | |
97 | "\t-tl or --type l output for little-endian/ASCII charset family\n" | |
98 | "\t-tb or --type b output for big-endian/ASCII charset family\n" | |
99 | "\t-te or --type e output for big-endian/EBCDIC charset family\n" | |
100 | "\t The output type defaults to the input type.\n" | |
101 | "\n" | |
102 | "\t-c or --copyright include the ICU copyright notice\n" | |
103 | "\t-C comment or --comment comment include a comment string\n"); | |
104 | fprintf(where, | |
105 | "\n" | |
106 | "\t-a list or --add list add items to the package\n" | |
107 | "\t-r list or --remove list remove items from the package\n" | |
108 | "\t-x list or --extract list extract items from the package\n" | |
109 | "\tThe list can be a single item's filename,\n" | |
110 | "\tor a .txt filename with a list of item filenames,\n" | |
111 | "\tor an ICU .dat package filename.\n"); | |
112 | fprintf(where, | |
113 | "\n" | |
114 | "\t-w or --writepkg write the output package even if no items are removed\n" | |
115 | "\t or added (e.g., for only swapping the data)\n"); | |
116 | fprintf(where, | |
117 | "\n" | |
118 | "\t-m mode or --matchmode mode set the matching mode for item names with\n" | |
119 | "\t wildcards\n" | |
120 | "\t noslash: the '*' wildcard does not match the '/' tree separator\n"); | |
121 | /* | |
122 | * Usage text columns, starting after the initial TAB. | |
123 | * 1 2 3 4 5 6 7 8 | |
124 | * 901234567890123456789012345678901234567890123456789012345678901234567890 | |
125 | */ | |
126 | fprintf(where, | |
127 | "\n" | |
128 | "\tList file syntax: Items are listed on one or more lines and separated\n" | |
129 | "\tby whitespace (space+tab).\n" | |
130 | "\tComments begin with # and are ignored. Empty lines are ignored.\n" | |
131 | "\tLines where the first non-whitespace character is one of %s\n" | |
132 | "\tare also ignored, to reserve for future syntax.\n", | |
729e4ab9 | 133 | U_PKG_RESERVED_CHARS); |
73c04bcf A |
134 | fprintf(where, |
135 | "\tItems for removal or extraction may contain a single '*' wildcard\n" | |
136 | "\tcharacter. The '*' matches zero or more characters.\n" | |
137 | "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n" | |
138 | "\tdoes not match '/'.\n"); | |
139 | fprintf(where, | |
140 | "\n" | |
141 | "\tItems must be listed relative to the package, and the --sourcedir or\n" | |
142 | "\tthe --destdir path will be prepended.\n" | |
143 | "\tThe paths are only prepended to item filenames while adding or\n" | |
144 | "\textracting items, not to ICU .dat package or list filenames.\n" | |
145 | "\t\n" | |
146 | "\tPaths may contain '/' instead of the platform's\n" | |
147 | "\tfile separator character, and are converted as appropriate.\n"); | |
148 | fprintf(where, | |
149 | "\n" | |
150 | "\t-s path or --sourcedir path directory for the --add items\n" | |
151 | "\t-d path or --destdir path directory for the --extract items\n" | |
152 | "\n" | |
729e4ab9 | 153 | "\t-l or --list list the package items to stdout or to output list file\n" |
73c04bcf A |
154 | "\t (after modifying the package)\n"); |
155 | } | |
156 | } | |
157 | ||
158 | static UOption options[]={ | |
159 | UOPTION_HELP_H, | |
160 | UOPTION_HELP_QUESTION_MARK, | |
161 | UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG), | |
162 | ||
163 | UOPTION_COPYRIGHT, | |
164 | UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG), | |
165 | ||
166 | UOPTION_SOURCEDIR, | |
167 | UOPTION_DESTDIR, | |
168 | ||
169 | UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG), | |
170 | ||
171 | UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG), | |
172 | ||
173 | UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG), | |
174 | UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG), | |
175 | UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG), | |
176 | ||
729e4ab9 A |
177 | UOPTION_DEF("list", 'l', UOPT_NO_ARG), |
178 | ||
179 | UOPTION_DEF("outlist", 'o', UOPT_REQUIRES_ARG) | |
73c04bcf A |
180 | }; |
181 | ||
182 | enum { | |
183 | OPT_HELP_H, | |
184 | OPT_HELP_QUESTION_MARK, | |
185 | OPT_OUT_TYPE, | |
186 | ||
187 | OPT_COPYRIGHT, | |
188 | OPT_COMMENT, | |
189 | ||
190 | OPT_SOURCEDIR, | |
191 | OPT_DESTDIR, | |
192 | ||
193 | OPT_WRITEPKG, | |
194 | ||
195 | OPT_MATCHMODE, | |
196 | ||
197 | OPT_ADD_LIST, | |
198 | OPT_REMOVE_LIST, | |
199 | OPT_EXTRACT_LIST, | |
200 | ||
201 | OPT_LIST_ITEMS, | |
729e4ab9 A |
202 | |
203 | OPT_LIST_FILE, | |
73c04bcf A |
204 | |
205 | OPT_COUNT | |
206 | }; | |
207 | ||
208 | static UBool | |
209 | isPackageName(const char *filename) { | |
210 | int32_t len; | |
211 | ||
212 | len=(int32_t)strlen(filename)-4; /* -4: subtract the length of ".dat" */ | |
213 | return (UBool)(len>0 && 0==strcmp(filename+len, ".dat")); | |
214 | } | |
46f4442e A |
215 | /* |
216 | This line is required by MinGW because it incorrectly globs the arguments. | |
217 | So when \* is used, it turns into a list of files instead of a literal "*" | |
218 | */ | |
219 | int _CRT_glob = 0; | |
220 | ||
73c04bcf A |
221 | extern int |
222 | main(int argc, char *argv[]) { | |
223 | const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment; | |
224 | char outType; | |
225 | UBool isHelp, isModified, isPackage; | |
729e4ab9 | 226 | int result = 0; |
73c04bcf | 227 | |
46f4442e | 228 | Package *pkg, *listPkg, *addListPkg; |
73c04bcf A |
229 | |
230 | U_MAIN_INIT_ARGS(argc, argv); | |
231 | ||
232 | /* get the program basename */ | |
233 | pname=findBasename(argv[0]); | |
234 | ||
235 | argc=u_parseArgs(argc, argv, LENGTHOF(options), options); | |
236 | isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; | |
237 | if(isHelp) { | |
238 | printUsage(pname, TRUE); | |
239 | return U_ZERO_ERROR; | |
240 | } | |
241 | if(argc<2 || 3<argc) { | |
242 | printUsage(pname, FALSE); | |
243 | return U_ILLEGAL_ARGUMENT_ERROR; | |
244 | } | |
245 | ||
246 | pkg=new Package; | |
247 | if(pkg==NULL) { | |
248 | fprintf(stderr, "icupkg: not enough memory\n"); | |
249 | return U_MEMORY_ALLOCATION_ERROR; | |
250 | } | |
251 | isModified=FALSE; | |
252 | ||
253 | if(options[OPT_SOURCEDIR].doesOccur) { | |
254 | sourcePath=options[OPT_SOURCEDIR].value; | |
255 | } else { | |
256 | // work relative to the current working directory | |
257 | sourcePath=NULL; | |
258 | } | |
259 | if(options[OPT_DESTDIR].doesOccur) { | |
260 | destPath=options[OPT_DESTDIR].value; | |
261 | } else { | |
262 | // work relative to the current working directory | |
263 | destPath=NULL; | |
264 | } | |
265 | ||
266 | if(0==strcmp(argv[1], "new")) { | |
267 | inFilename=NULL; | |
268 | isPackage=TRUE; | |
269 | } else { | |
270 | inFilename=argv[1]; | |
271 | if(isPackageName(inFilename)) { | |
272 | pkg->readPackage(inFilename); | |
273 | isPackage=TRUE; | |
274 | } else { | |
275 | /* swap a single file (icuswap replacement) rather than work on a package */ | |
276 | pkg->addFile(sourcePath, inFilename); | |
277 | isPackage=FALSE; | |
278 | } | |
279 | } | |
280 | ||
281 | if(argc>=3) { | |
282 | outFilename=argv[2]; | |
283 | if(0!=strcmp(argv[1], argv[2])) { | |
284 | isModified=TRUE; | |
285 | } | |
286 | } else if(isPackage) { | |
287 | outFilename=NULL; | |
288 | } else /* !isPackage */ { | |
289 | outFilename=inFilename; | |
290 | isModified=(UBool)(sourcePath!=destPath); | |
291 | } | |
292 | ||
293 | /* parse the output type option */ | |
294 | if(options[OPT_OUT_TYPE].doesOccur) { | |
295 | const char *type=options[OPT_OUT_TYPE].value; | |
296 | if(type[0]==0 || type[1]!=0) { | |
297 | /* the type must be exactly one letter */ | |
298 | printUsage(pname, FALSE); | |
299 | return U_ILLEGAL_ARGUMENT_ERROR; | |
300 | } | |
301 | outType=type[0]; | |
302 | switch(outType) { | |
303 | case 'l': | |
304 | case 'b': | |
305 | case 'e': | |
306 | break; | |
307 | default: | |
308 | printUsage(pname, FALSE); | |
309 | return U_ILLEGAL_ARGUMENT_ERROR; | |
310 | } | |
311 | ||
312 | /* | |
313 | * Set the isModified flag if the output type differs from the | |
314 | * input package type. | |
315 | * If we swap a single file, just assume that we are modifying it. | |
316 | * The Package class does not give us access to the item and its type. | |
317 | */ | |
729e4ab9 | 318 | isModified|=(UBool)(!isPackage || outType!=pkg->getInType()); |
73c04bcf A |
319 | } else if(isPackage) { |
320 | outType=pkg->getInType(); // default to input type | |
321 | } else /* !isPackage: swap single file */ { | |
322 | outType=0; /* tells extractItem() to not swap */ | |
323 | } | |
324 | ||
325 | if(options[OPT_WRITEPKG].doesOccur) { | |
326 | isModified=TRUE; | |
327 | } | |
328 | ||
329 | if(!isPackage) { | |
330 | /* | |
331 | * icuswap tool replacement: Only swap a single file. | |
332 | * Check that irrelevant options are not set. | |
333 | */ | |
334 | if( options[OPT_COMMENT].doesOccur || | |
335 | options[OPT_COPYRIGHT].doesOccur || | |
336 | options[OPT_MATCHMODE].doesOccur || | |
337 | options[OPT_REMOVE_LIST].doesOccur || | |
338 | options[OPT_ADD_LIST].doesOccur || | |
339 | options[OPT_EXTRACT_LIST].doesOccur || | |
340 | options[OPT_LIST_ITEMS].doesOccur | |
341 | ) { | |
342 | printUsage(pname, FALSE); | |
343 | return U_ILLEGAL_ARGUMENT_ERROR; | |
344 | } | |
345 | if(isModified) { | |
346 | pkg->extractItem(destPath, outFilename, 0, outType); | |
347 | } | |
348 | ||
349 | delete pkg; | |
729e4ab9 | 350 | return result; |
73c04bcf A |
351 | } |
352 | ||
353 | /* Work with a package. */ | |
354 | ||
355 | if(options[OPT_COMMENT].doesOccur) { | |
356 | outComment=options[OPT_COMMENT].value; | |
357 | } else if(options[OPT_COPYRIGHT].doesOccur) { | |
358 | outComment=U_COPYRIGHT_STRING; | |
359 | } else { | |
360 | outComment=NULL; | |
361 | } | |
362 | ||
363 | if(options[OPT_MATCHMODE].doesOccur) { | |
364 | if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) { | |
365 | pkg->setMatchMode(Package::MATCH_NOSLASH); | |
366 | } else { | |
367 | printUsage(pname, FALSE); | |
368 | return U_ILLEGAL_ARGUMENT_ERROR; | |
369 | } | |
370 | } | |
371 | ||
372 | /* remove items */ | |
373 | if(options[OPT_REMOVE_LIST].doesOccur) { | |
374 | listPkg=readList(NULL, options[OPT_REMOVE_LIST].value, FALSE); | |
375 | if(listPkg!=NULL) { | |
376 | pkg->removeItems(*listPkg); | |
377 | delete listPkg; | |
378 | isModified=TRUE; | |
379 | } else { | |
380 | printUsage(pname, FALSE); | |
381 | return U_ILLEGAL_ARGUMENT_ERROR; | |
382 | } | |
383 | } | |
384 | ||
385 | /* | |
386 | * add items | |
387 | * use a separate Package so that its memory and items stay around | |
388 | * as long as the main Package | |
389 | */ | |
46f4442e | 390 | addListPkg=NULL; |
73c04bcf | 391 | if(options[OPT_ADD_LIST].doesOccur) { |
46f4442e A |
392 | addListPkg=readList(sourcePath, options[OPT_ADD_LIST].value, TRUE); |
393 | if(addListPkg!=NULL) { | |
394 | pkg->addItems(*addListPkg); | |
395 | // delete addListPkg; deferred until after writePackage() | |
73c04bcf A |
396 | isModified=TRUE; |
397 | } else { | |
398 | printUsage(pname, FALSE); | |
399 | return U_ILLEGAL_ARGUMENT_ERROR; | |
400 | } | |
401 | } | |
402 | ||
403 | /* extract items */ | |
404 | if(options[OPT_EXTRACT_LIST].doesOccur) { | |
405 | listPkg=readList(NULL, options[OPT_EXTRACT_LIST].value, FALSE); | |
406 | if(listPkg!=NULL) { | |
407 | pkg->extractItems(destPath, *listPkg, outType); | |
408 | delete listPkg; | |
409 | } else { | |
410 | printUsage(pname, FALSE); | |
411 | return U_ILLEGAL_ARGUMENT_ERROR; | |
412 | } | |
413 | } | |
414 | ||
415 | /* list items */ | |
416 | if(options[OPT_LIST_ITEMS].doesOccur) { | |
46f4442e | 417 | int32_t i; |
729e4ab9 A |
418 | if (options[OPT_LIST_FILE].doesOccur) { |
419 | FileStream *out; | |
420 | out = T_FileStream_open(options[OPT_LIST_FILE].value, "w"); | |
421 | if (out != NULL) { | |
422 | for(i=0; i<pkg->getItemCount(); ++i) { | |
423 | T_FileStream_writeLine(out, pkg->getItem(i)->name); | |
424 | T_FileStream_writeLine(out, "\n"); | |
425 | } | |
426 | T_FileStream_close(out); | |
427 | } else { | |
428 | return U_ILLEGAL_ARGUMENT_ERROR; | |
429 | } | |
430 | } else { | |
431 | for(i=0; i<pkg->getItemCount(); ++i) { | |
432 | fprintf(stdout, "%s\n", pkg->getItem(i)->name); | |
433 | } | |
46f4442e | 434 | } |
73c04bcf A |
435 | } |
436 | ||
437 | /* check dependencies between items */ | |
438 | if(!pkg->checkDependencies()) { | |
439 | /* some dependencies are not fulfilled */ | |
440 | return U_MISSING_RESOURCE_ERROR; | |
441 | } | |
442 | ||
443 | /* write the output .dat package if there are any modifications */ | |
444 | if(isModified) { | |
445 | char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary | |
446 | ||
447 | if(outFilename==NULL || outFilename[0]==0) { | |
448 | if(inFilename==NULL || inFilename[0]==0) { | |
449 | fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n"); | |
450 | exit(U_ILLEGAL_ARGUMENT_ERROR); | |
451 | } | |
452 | ||
453 | /* | |
454 | * auto-generate a filename: | |
455 | * copy the inFilename, | |
456 | * and if the last basename character matches the input file's type, | |
457 | * then replace it with the output file's type | |
458 | */ | |
459 | char suffix[6]="?.dat"; | |
460 | char *s; | |
461 | ||
462 | suffix[0]=pkg->getInType(); | |
463 | strcpy(outFilenameBuffer, inFilename); | |
464 | s=strchr(outFilenameBuffer, 0); | |
465 | if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) { | |
466 | *(s-5)=outType; | |
467 | } | |
468 | outFilename=outFilenameBuffer; | |
469 | } | |
729e4ab9 | 470 | result = writePackageDatFile(outFilename, outComment, NULL, NULL, pkg, outType); |
73c04bcf A |
471 | } |
472 | ||
46f4442e | 473 | delete addListPkg; |
73c04bcf | 474 | delete pkg; |
729e4ab9 | 475 | return result; |
73c04bcf A |
476 | } |
477 | ||
478 | /* | |
479 | * Hey, Emacs, please set the following: | |
480 | * | |
481 | * Local Variables: | |
482 | * indent-tabs-mode: nil | |
483 | * End: | |
484 | * | |
485 | */ |