]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | ******************************************************************************* | |
3 | * | |
4 | * Copyright (C) 2005-2006, International Business Machines | |
5 | * Corporation and others. All Rights Reserved. | |
6 | * | |
7 | ******************************************************************************* | |
8 | * file name: icupkg.cpp | |
9 | * encoding: US-ASCII | |
10 | * tab size: 8 (not used) | |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2005jul29 | |
14 | * created by: Markus W. Scherer | |
15 | * | |
16 | * This tool operates on ICU data (.dat package) files. | |
17 | * It takes one as input, or creates an empty one, and can remove, add, and | |
18 | * extract data pieces according to command-line options. | |
19 | * At the same time, it swaps each piece to a consistent set of platform | |
20 | * properties as desired. | |
21 | * Useful as an install-time tool for shipping only one flavor of ICU data | |
22 | * and preparing data files for the target platform. | |
23 | * Also for customizing ICU data (pruning, augmenting, replacing) and for | |
24 | * taking it apart. | |
25 | * Subsumes functionality and implementation code from | |
26 | * gencmn, decmn, and icuswap tools. | |
27 | * Will not work with data DLLs (shared libraries). | |
28 | */ | |
29 | ||
30 | #include "unicode/utypes.h" | |
31 | #include "unicode/putil.h" | |
32 | #include "cstring.h" | |
33 | #include "toolutil.h" | |
34 | #include "uoptions.h" | |
35 | #include "uparse.h" | |
36 | #include "package.h" | |
37 | ||
38 | #include <stdio.h> | |
39 | #include <stdlib.h> | |
40 | #include <string.h> | |
41 | ||
42 | // TODO: add --matchmode=regex for using the ICU regex engine for item name pattern matching? | |
43 | ||
44 | // general definitions ----------------------------------------------------- *** | |
45 | ||
46 | #define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) | |
47 | ||
48 | // read a file list -------------------------------------------------------- *** | |
49 | ||
50 | static const char *reservedChars="\"%&'()*+,-./:;<=>?_"; | |
51 | ||
52 | static const struct { | |
53 | const char *suffix; | |
54 | int32_t length; | |
55 | } listFileSuffixes[]={ | |
56 | { ".txt", 4 }, | |
57 | { ".lst", 4 }, | |
58 | { ".tmp", 4 } | |
59 | }; | |
60 | ||
61 | /* check for multiple text file suffixes to see if this list name is a text file name */ | |
62 | static UBool | |
63 | isListTextFile(const char *listname) { | |
64 | const char *listNameEnd=strchr(listname, 0); | |
65 | const char *suffix; | |
66 | int32_t i, length; | |
67 | for(i=0; i<LENGTHOF(listFileSuffixes); ++i) { | |
68 | suffix=listFileSuffixes[i].suffix; | |
69 | length=listFileSuffixes[i].length; | |
70 | if((listNameEnd-listname)>length && 0==memcmp(listNameEnd-length, suffix, length)) { | |
71 | return TRUE; | |
72 | } | |
73 | } | |
74 | return FALSE; | |
75 | } | |
76 | ||
77 | /* | |
78 | * Read a file list. | |
79 | * If the listname ends with ".txt", then read the list file | |
80 | * (in the system/ invariant charset). | |
81 | * If the listname ends with ".dat", then read the ICU .dat package file. | |
82 | * Otherwise, read the file itself as a single-item list. | |
83 | */ | |
84 | static Package * | |
85 | readList(const char *filesPath, const char *listname, UBool readContents) { | |
86 | Package *listPkg; | |
87 | FILE *file; | |
88 | const char *listNameEnd; | |
89 | ||
90 | if(listname==NULL || listname[0]==0) { | |
91 | fprintf(stderr, "missing list file\n"); | |
92 | return NULL; | |
93 | } | |
94 | ||
95 | listPkg=new Package(); | |
96 | if(listPkg==NULL) { | |
97 | fprintf(stderr, "icupkg: not enough memory\n"); | |
98 | exit(U_MEMORY_ALLOCATION_ERROR); | |
99 | } | |
100 | ||
101 | listNameEnd=strchr(listname, 0); | |
102 | if(isListTextFile(listname)) { | |
103 | // read the list file | |
104 | char line[1024]; | |
105 | char *end; | |
106 | const char *start; | |
107 | ||
108 | file=fopen(listname, "r"); | |
109 | if(file==NULL) { | |
110 | fprintf(stderr, "icupkg: unable to open list file \"%s\"\n", listname); | |
111 | delete listPkg; | |
112 | exit(U_FILE_ACCESS_ERROR); | |
113 | } | |
114 | ||
115 | while(fgets(line, sizeof(line), file)) { | |
116 | // remove comments | |
117 | end=strchr(line, '#'); | |
118 | if(end!=NULL) { | |
119 | *end=0; | |
120 | } else { | |
121 | // remove trailing CR LF | |
122 | end=strchr(line, 0); | |
123 | while(line<end && (*(end-1)=='\r' || *(end-1)=='\n')) { | |
124 | *--end=0; | |
125 | } | |
126 | } | |
127 | ||
128 | // check first non-whitespace character and | |
129 | // skip empty lines and | |
130 | // skip lines starting with reserved characters | |
131 | start=u_skipWhitespace(line); | |
132 | if(*start==0 || NULL!=strchr(reservedChars, *start)) { | |
133 | continue; | |
134 | } | |
135 | ||
136 | // take whitespace-separated items from the line | |
137 | for(;;) { | |
138 | // find whitespace after the item or the end of the line | |
139 | for(end=(char *)start; *end!=0 && *end!=' ' && *end!='\t'; ++end) {} | |
140 | if(*end==0) { | |
141 | // this item is the last one on the line | |
142 | end=NULL; | |
143 | } else { | |
144 | // the item is terminated by whitespace, terminate it with NUL | |
145 | *end=0; | |
146 | } | |
147 | if(readContents) { | |
148 | listPkg->addFile(filesPath, start); | |
149 | } else { | |
150 | listPkg->addItem(start); | |
151 | } | |
152 | ||
153 | // find the start of the next item or exit the loop | |
154 | if(end==NULL || *(start=u_skipWhitespace(end+1))==0) { | |
155 | break; | |
156 | } | |
157 | } | |
158 | } | |
159 | fclose(file); | |
160 | } else if((listNameEnd-listname)>4 && 0==memcmp(listNameEnd-4, ".dat", 4)) { | |
161 | // read the ICU .dat package | |
162 | listPkg->readPackage(listname); | |
163 | } else { | |
164 | // list the single file itself | |
165 | if(readContents) { | |
166 | listPkg->addFile(filesPath, listname); | |
167 | } else { | |
168 | listPkg->addItem(listname); | |
169 | } | |
170 | } | |
171 | ||
172 | return listPkg; | |
173 | } | |
174 | ||
175 | // main() ------------------------------------------------------------------ *** | |
176 | ||
177 | static void | |
178 | printUsage(const char *pname, UBool isHelp) { | |
179 | FILE *where=isHelp ? stdout : stderr; | |
180 | ||
181 | fprintf(where, | |
182 | "%csage: %s [-h|-?|--help ] [-tl|-tb|-te] [-c] [-C comment]\n" | |
183 | "\t[-a list] [-r list] [-x list] [-l]\n" | |
184 | "\t[-s path] [-d path] [-w] [-m mode]\n" | |
185 | "\tinfilename [outfilename]\n", | |
186 | isHelp ? 'U' : 'u', pname); | |
187 | if(isHelp) { | |
188 | fprintf(where, | |
189 | "\n" | |
190 | "Read the input ICU .dat package file, modify it according to the options,\n" | |
191 | "swap it to the desired platform properties (charset & endianness),\n" | |
192 | "and optionally write the resulting ICU .dat package to the output file.\n" | |
193 | "Items are removed, then added, then extracted and listed.\n" | |
194 | "An ICU .dat package is written if items are removed or added,\n" | |
195 | "or if the input and output filenames differ,\n" | |
196 | "or if the --writepkg (-w) option is set.\n"); | |
197 | fprintf(where, | |
198 | "\n" | |
199 | "If the input filename is \"new\" then an empty package is created.\n" | |
200 | "If the output filename is missing, then it is automatically generated\n" | |
201 | "from the input filename: If the input filename ends with an l, b, or e\n" | |
202 | "matching its platform properties, then the output filename will\n" | |
203 | "contain the letter from the -t (--type) option.\n"); | |
204 | fprintf(where, | |
205 | "\n" | |
206 | "This tool can also be used to just swap a single ICU data file, replacing the\n" | |
207 | "former icuswap tool. For this mode, provide the infilename (and optional\n" | |
208 | "outfilename) for a non-package ICU data file.\n" | |
209 | "Allowed options include -t, -w, -s and -d.\n" | |
210 | "The filenames can be absolute, or relative to the source/dest dir paths.\n" | |
211 | "Other options are not allowed in this mode.\n"); | |
212 | fprintf(where, | |
213 | "\n" | |
214 | "Options:\n" | |
215 | "\t(Only the last occurrence of an option is used.)\n" | |
216 | "\n" | |
217 | "\t-h or -? or --help print this message and exit\n"); | |
218 | fprintf(where, | |
219 | "\n" | |
220 | "\t-tl or --type l output for little-endian/ASCII charset family\n" | |
221 | "\t-tb or --type b output for big-endian/ASCII charset family\n" | |
222 | "\t-te or --type e output for big-endian/EBCDIC charset family\n" | |
223 | "\t The output type defaults to the input type.\n" | |
224 | "\n" | |
225 | "\t-c or --copyright include the ICU copyright notice\n" | |
226 | "\t-C comment or --comment comment include a comment string\n"); | |
227 | fprintf(where, | |
228 | "\n" | |
229 | "\t-a list or --add list add items to the package\n" | |
230 | "\t-r list or --remove list remove items from the package\n" | |
231 | "\t-x list or --extract list extract items from the package\n" | |
232 | "\tThe list can be a single item's filename,\n" | |
233 | "\tor a .txt filename with a list of item filenames,\n" | |
234 | "\tor an ICU .dat package filename.\n"); | |
235 | fprintf(where, | |
236 | "\n" | |
237 | "\t-w or --writepkg write the output package even if no items are removed\n" | |
238 | "\t or added (e.g., for only swapping the data)\n"); | |
239 | fprintf(where, | |
240 | "\n" | |
241 | "\t-m mode or --matchmode mode set the matching mode for item names with\n" | |
242 | "\t wildcards\n" | |
243 | "\t noslash: the '*' wildcard does not match the '/' tree separator\n"); | |
244 | /* | |
245 | * Usage text columns, starting after the initial TAB. | |
246 | * 1 2 3 4 5 6 7 8 | |
247 | * 901234567890123456789012345678901234567890123456789012345678901234567890 | |
248 | */ | |
249 | fprintf(where, | |
250 | "\n" | |
251 | "\tList file syntax: Items are listed on one or more lines and separated\n" | |
252 | "\tby whitespace (space+tab).\n" | |
253 | "\tComments begin with # and are ignored. Empty lines are ignored.\n" | |
254 | "\tLines where the first non-whitespace character is one of %s\n" | |
255 | "\tare also ignored, to reserve for future syntax.\n", | |
256 | reservedChars); | |
257 | fprintf(where, | |
258 | "\tItems for removal or extraction may contain a single '*' wildcard\n" | |
259 | "\tcharacter. The '*' matches zero or more characters.\n" | |
260 | "\tIf --matchmode noslash (-m noslash) is set, then the '*'\n" | |
261 | "\tdoes not match '/'.\n"); | |
262 | fprintf(where, | |
263 | "\n" | |
264 | "\tItems must be listed relative to the package, and the --sourcedir or\n" | |
265 | "\tthe --destdir path will be prepended.\n" | |
266 | "\tThe paths are only prepended to item filenames while adding or\n" | |
267 | "\textracting items, not to ICU .dat package or list filenames.\n" | |
268 | "\t\n" | |
269 | "\tPaths may contain '/' instead of the platform's\n" | |
270 | "\tfile separator character, and are converted as appropriate.\n"); | |
271 | fprintf(where, | |
272 | "\n" | |
273 | "\t-s path or --sourcedir path directory for the --add items\n" | |
274 | "\t-d path or --destdir path directory for the --extract items\n" | |
275 | "\n" | |
276 | "\t-l or --list list the package items to stdout\n" | |
277 | "\t (after modifying the package)\n"); | |
278 | } | |
279 | } | |
280 | ||
281 | static UOption options[]={ | |
282 | UOPTION_HELP_H, | |
283 | UOPTION_HELP_QUESTION_MARK, | |
284 | UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG), | |
285 | ||
286 | UOPTION_COPYRIGHT, | |
287 | UOPTION_DEF("comment", 'C', UOPT_REQUIRES_ARG), | |
288 | ||
289 | UOPTION_SOURCEDIR, | |
290 | UOPTION_DESTDIR, | |
291 | ||
292 | UOPTION_DEF("writepkg", 'w', UOPT_NO_ARG), | |
293 | ||
294 | UOPTION_DEF("matchmode", 'm', UOPT_REQUIRES_ARG), | |
295 | ||
296 | UOPTION_DEF("add", 'a', UOPT_REQUIRES_ARG), | |
297 | UOPTION_DEF("remove", 'r', UOPT_REQUIRES_ARG), | |
298 | UOPTION_DEF("extract", 'x', UOPT_REQUIRES_ARG), | |
299 | ||
300 | UOPTION_DEF("list", 'l', UOPT_NO_ARG) | |
301 | }; | |
302 | ||
303 | enum { | |
304 | OPT_HELP_H, | |
305 | OPT_HELP_QUESTION_MARK, | |
306 | OPT_OUT_TYPE, | |
307 | ||
308 | OPT_COPYRIGHT, | |
309 | OPT_COMMENT, | |
310 | ||
311 | OPT_SOURCEDIR, | |
312 | OPT_DESTDIR, | |
313 | ||
314 | OPT_WRITEPKG, | |
315 | ||
316 | OPT_MATCHMODE, | |
317 | ||
318 | OPT_ADD_LIST, | |
319 | OPT_REMOVE_LIST, | |
320 | OPT_EXTRACT_LIST, | |
321 | ||
322 | OPT_LIST_ITEMS, | |
323 | ||
324 | OPT_COUNT | |
325 | }; | |
326 | ||
327 | static UBool | |
328 | isPackageName(const char *filename) { | |
329 | int32_t len; | |
330 | ||
331 | len=(int32_t)strlen(filename)-4; /* -4: subtract the length of ".dat" */ | |
332 | return (UBool)(len>0 && 0==strcmp(filename+len, ".dat")); | |
333 | } | |
334 | ||
335 | extern int | |
336 | main(int argc, char *argv[]) { | |
337 | const char *pname, *sourcePath, *destPath, *inFilename, *outFilename, *outComment; | |
338 | char outType; | |
339 | UBool isHelp, isModified, isPackage; | |
340 | ||
341 | Package *pkg, *listPkg; | |
342 | ||
343 | U_MAIN_INIT_ARGS(argc, argv); | |
344 | ||
345 | /* get the program basename */ | |
346 | pname=findBasename(argv[0]); | |
347 | ||
348 | argc=u_parseArgs(argc, argv, LENGTHOF(options), options); | |
349 | isHelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; | |
350 | if(isHelp) { | |
351 | printUsage(pname, TRUE); | |
352 | return U_ZERO_ERROR; | |
353 | } | |
354 | if(argc<2 || 3<argc) { | |
355 | printUsage(pname, FALSE); | |
356 | return U_ILLEGAL_ARGUMENT_ERROR; | |
357 | } | |
358 | ||
359 | pkg=new Package; | |
360 | if(pkg==NULL) { | |
361 | fprintf(stderr, "icupkg: not enough memory\n"); | |
362 | return U_MEMORY_ALLOCATION_ERROR; | |
363 | } | |
364 | isModified=FALSE; | |
365 | ||
366 | if(options[OPT_SOURCEDIR].doesOccur) { | |
367 | sourcePath=options[OPT_SOURCEDIR].value; | |
368 | } else { | |
369 | // work relative to the current working directory | |
370 | sourcePath=NULL; | |
371 | } | |
372 | if(options[OPT_DESTDIR].doesOccur) { | |
373 | destPath=options[OPT_DESTDIR].value; | |
374 | } else { | |
375 | // work relative to the current working directory | |
376 | destPath=NULL; | |
377 | } | |
378 | ||
379 | if(0==strcmp(argv[1], "new")) { | |
380 | inFilename=NULL; | |
381 | isPackage=TRUE; | |
382 | } else { | |
383 | inFilename=argv[1]; | |
384 | if(isPackageName(inFilename)) { | |
385 | pkg->readPackage(inFilename); | |
386 | isPackage=TRUE; | |
387 | } else { | |
388 | /* swap a single file (icuswap replacement) rather than work on a package */ | |
389 | pkg->addFile(sourcePath, inFilename); | |
390 | isPackage=FALSE; | |
391 | } | |
392 | } | |
393 | ||
394 | if(argc>=3) { | |
395 | outFilename=argv[2]; | |
396 | if(0!=strcmp(argv[1], argv[2])) { | |
397 | isModified=TRUE; | |
398 | } | |
399 | } else if(isPackage) { | |
400 | outFilename=NULL; | |
401 | } else /* !isPackage */ { | |
402 | outFilename=inFilename; | |
403 | isModified=(UBool)(sourcePath!=destPath); | |
404 | } | |
405 | ||
406 | /* parse the output type option */ | |
407 | if(options[OPT_OUT_TYPE].doesOccur) { | |
408 | const char *type=options[OPT_OUT_TYPE].value; | |
409 | if(type[0]==0 || type[1]!=0) { | |
410 | /* the type must be exactly one letter */ | |
411 | printUsage(pname, FALSE); | |
412 | return U_ILLEGAL_ARGUMENT_ERROR; | |
413 | } | |
414 | outType=type[0]; | |
415 | switch(outType) { | |
416 | case 'l': | |
417 | case 'b': | |
418 | case 'e': | |
419 | break; | |
420 | default: | |
421 | printUsage(pname, FALSE); | |
422 | return U_ILLEGAL_ARGUMENT_ERROR; | |
423 | } | |
424 | ||
425 | /* | |
426 | * Set the isModified flag if the output type differs from the | |
427 | * input package type. | |
428 | * If we swap a single file, just assume that we are modifying it. | |
429 | * The Package class does not give us access to the item and its type. | |
430 | */ | |
431 | isModified=(UBool)(!isPackage || outType!=pkg->getInType()); | |
432 | } else if(isPackage) { | |
433 | outType=pkg->getInType(); // default to input type | |
434 | } else /* !isPackage: swap single file */ { | |
435 | outType=0; /* tells extractItem() to not swap */ | |
436 | } | |
437 | ||
438 | if(options[OPT_WRITEPKG].doesOccur) { | |
439 | isModified=TRUE; | |
440 | } | |
441 | ||
442 | if(!isPackage) { | |
443 | /* | |
444 | * icuswap tool replacement: Only swap a single file. | |
445 | * Check that irrelevant options are not set. | |
446 | */ | |
447 | if( options[OPT_COMMENT].doesOccur || | |
448 | options[OPT_COPYRIGHT].doesOccur || | |
449 | options[OPT_MATCHMODE].doesOccur || | |
450 | options[OPT_REMOVE_LIST].doesOccur || | |
451 | options[OPT_ADD_LIST].doesOccur || | |
452 | options[OPT_EXTRACT_LIST].doesOccur || | |
453 | options[OPT_LIST_ITEMS].doesOccur | |
454 | ) { | |
455 | printUsage(pname, FALSE); | |
456 | return U_ILLEGAL_ARGUMENT_ERROR; | |
457 | } | |
458 | if(isModified) { | |
459 | pkg->extractItem(destPath, outFilename, 0, outType); | |
460 | } | |
461 | ||
462 | delete pkg; | |
463 | return 0; | |
464 | } | |
465 | ||
466 | /* Work with a package. */ | |
467 | ||
468 | if(options[OPT_COMMENT].doesOccur) { | |
469 | outComment=options[OPT_COMMENT].value; | |
470 | } else if(options[OPT_COPYRIGHT].doesOccur) { | |
471 | outComment=U_COPYRIGHT_STRING; | |
472 | } else { | |
473 | outComment=NULL; | |
474 | } | |
475 | ||
476 | if(options[OPT_MATCHMODE].doesOccur) { | |
477 | if(0==strcmp(options[OPT_MATCHMODE].value, "noslash")) { | |
478 | pkg->setMatchMode(Package::MATCH_NOSLASH); | |
479 | } else { | |
480 | printUsage(pname, FALSE); | |
481 | return U_ILLEGAL_ARGUMENT_ERROR; | |
482 | } | |
483 | } | |
484 | ||
485 | /* remove items */ | |
486 | if(options[OPT_REMOVE_LIST].doesOccur) { | |
487 | listPkg=readList(NULL, options[OPT_REMOVE_LIST].value, FALSE); | |
488 | if(listPkg!=NULL) { | |
489 | pkg->removeItems(*listPkg); | |
490 | delete listPkg; | |
491 | isModified=TRUE; | |
492 | } else { | |
493 | printUsage(pname, FALSE); | |
494 | return U_ILLEGAL_ARGUMENT_ERROR; | |
495 | } | |
496 | } | |
497 | ||
498 | /* | |
499 | * add items | |
500 | * use a separate Package so that its memory and items stay around | |
501 | * as long as the main Package | |
502 | */ | |
503 | if(options[OPT_ADD_LIST].doesOccur) { | |
504 | listPkg=readList(sourcePath, options[OPT_ADD_LIST].value, TRUE); | |
505 | if(listPkg!=NULL) { | |
506 | pkg->addItems(*listPkg); | |
507 | delete listPkg; | |
508 | isModified=TRUE; | |
509 | } else { | |
510 | printUsage(pname, FALSE); | |
511 | return U_ILLEGAL_ARGUMENT_ERROR; | |
512 | } | |
513 | } | |
514 | ||
515 | /* extract items */ | |
516 | if(options[OPT_EXTRACT_LIST].doesOccur) { | |
517 | listPkg=readList(NULL, options[OPT_EXTRACT_LIST].value, FALSE); | |
518 | if(listPkg!=NULL) { | |
519 | pkg->extractItems(destPath, *listPkg, outType); | |
520 | delete listPkg; | |
521 | } else { | |
522 | printUsage(pname, FALSE); | |
523 | return U_ILLEGAL_ARGUMENT_ERROR; | |
524 | } | |
525 | } | |
526 | ||
527 | /* list items */ | |
528 | if(options[OPT_LIST_ITEMS].doesOccur) { | |
529 | pkg->listItems(stdout); | |
530 | } | |
531 | ||
532 | /* check dependencies between items */ | |
533 | if(!pkg->checkDependencies()) { | |
534 | /* some dependencies are not fulfilled */ | |
535 | return U_MISSING_RESOURCE_ERROR; | |
536 | } | |
537 | ||
538 | /* write the output .dat package if there are any modifications */ | |
539 | if(isModified) { | |
540 | char outFilenameBuffer[1024]; // for auto-generated output filename, if necessary | |
541 | ||
542 | if(outFilename==NULL || outFilename[0]==0) { | |
543 | if(inFilename==NULL || inFilename[0]==0) { | |
544 | fprintf(stderr, "icupkg: unable to auto-generate an output filename if there is no input filename\n"); | |
545 | exit(U_ILLEGAL_ARGUMENT_ERROR); | |
546 | } | |
547 | ||
548 | /* | |
549 | * auto-generate a filename: | |
550 | * copy the inFilename, | |
551 | * and if the last basename character matches the input file's type, | |
552 | * then replace it with the output file's type | |
553 | */ | |
554 | char suffix[6]="?.dat"; | |
555 | char *s; | |
556 | ||
557 | suffix[0]=pkg->getInType(); | |
558 | strcpy(outFilenameBuffer, inFilename); | |
559 | s=strchr(outFilenameBuffer, 0); | |
560 | if((s-outFilenameBuffer)>5 && 0==memcmp(s-5, suffix, 5)) { | |
561 | *(s-5)=outType; | |
562 | } | |
563 | outFilename=outFilenameBuffer; | |
564 | } | |
565 | pkg->writePackage(outFilename, outType, outComment); | |
566 | } | |
567 | ||
568 | delete pkg; | |
569 | return 0; | |
570 | } | |
571 | ||
572 | /* | |
573 | * Hey, Emacs, please set the following: | |
574 | * | |
575 | * Local Variables: | |
576 | * indent-tabs-mode: nil | |
577 | * End: | |
578 | * | |
579 | */ |