]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/wrtxml.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / tools / genrb / wrtxml.cpp
CommitLineData
46f4442e
A
1/*
2*******************************************************************************
3*
4388f060 4* Copyright (C) 2002-2012, International Business Machines
46f4442e
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
729e4ab9 9* File wrtxml.cpp
46f4442e
A
10*
11* Modification History:
12*
13* Date Name Description
14* 10/01/02 Ram Creation.
15* 02/07/08 Spieth Correct XLIFF generation on EBCDIC platform
16*
17*******************************************************************************
18*/
19#include "reslist.h"
20#include "unewdata.h"
21#include "unicode/ures.h"
22#include "errmsg.h"
23#include "filestrm.h"
24#include "cstring.h"
25#include "unicode/ucnv.h"
26#include "genrb.h"
27#include "rle.h"
28#include "ucol_tok.h"
29#include "uhash.h"
30#include "uresimp.h"
31#include "unicode/ustring.h"
32#include "unicode/uchar.h"
33#include "ustr.h"
34#include "prscmnts.h"
35#include "unicode/unistr.h"
51004dcb
A
36#include "unicode/utf8.h"
37#include "unicode/utf16.h"
46f4442e
A
38#include <time.h>
39
40U_NAMESPACE_USE
41
42static int tabCount = 0;
43
44static FileStream* out=NULL;
45static struct SRBRoot* srBundle ;
46static const char* outDir = NULL;
47static const char* enc ="";
48static UConverter* conv = NULL;
49
50const char* const* ISOLanguages;
51const char* const* ISOCountries;
52const char* textExt = ".txt";
53const char* xliffExt = ".xlf";
54
729e4ab9
A
55static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
56{
57 UErrorCode status = U_ZERO_ERROR;
46f4442e 58 int32_t len = 0;
46f4442e 59
729e4ab9
A
60 // preflight to get the destination buffer size
61 u_strToUTF8(NULL,
62 0,
63 &len,
64 outString.getBuffer(),
65 outString.length(),
66 &status);
67
68 // allocate the buffer
69 char* dest = (char*)uprv_malloc(len);
70 status = U_ZERO_ERROR;
71
72 // convert the data
73 u_strToUTF8(dest,
74 len,
75 &len,
76 outString.getBuffer(),
77 outString.length(),
78 &status);
79
80 // write data to out file
81 int32_t ret = T_FileStream_write(fileStream, dest, len);
46f4442e 82 uprv_free(dest);
729e4ab9 83 return (ret);
46f4442e
A
84}
85
46f4442e 86/*write indentation for formatting*/
729e4ab9 87static void write_tabs(FileStream* os){
46f4442e
A
88 int i=0;
89 for(;i<=tabCount;i++){
729e4ab9 90 write_utf8_file(os,UnicodeString(" "));
46f4442e
A
91 }
92}
93
94/*get ID for each element. ID is globally unique.*/
729e4ab9 95static char* getID(const char* id, const char* curKey, char* result) {
46f4442e
A
96 if(curKey == NULL) {
97 result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
98 uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
99 uprv_strcpy(result, id);
100 } else {
101 result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
102 uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
103 if(id[0]!='\0'){
104 uprv_strcpy(result, id);
105 uprv_strcat(result, "_");
106 }
107 uprv_strcat(result, curKey);
108 }
109 return result;
110}
111
112/*compute CRC for binary code*/
113/* The code is from http://www.theorem.com/java/CRC32.java
114 * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
115 * <P> This check is used in numerous systems to verify the integrity
116 * of information. It's also used as a hashing function. Unlike a regular
117 * checksum, it's sensitive to the order of the characters.
118 * It produces a 32 bit
119 *
120 * @author Michael Lecuyer (mjl@theorem.com)
121 * @version 1.1 August 11, 1998
122 */
123
124/* ICU is not endian portable, because ICU data generated on big endian machines can be
125 * ported to big endian machines but not to little endian machines and vice versa. The
126 * conversion is not portable across platforms with different endianess.
127 */
128
729e4ab9 129uint32_t computeCRC(char *ptr, uint32_t len, uint32_t lastcrc){
46f4442e
A
130 int32_t crc;
131 uint32_t temp1;
132 uint32_t temp2;
133
134 int32_t crc_ta[256];
135 int i = 0;
136 int j = 0;
137 uint32_t crc2 = 0;
138
139#define CRC32_POLYNOMIAL 0xEDB88320
140
141 /*build crc table*/
142 for (i = 0; i <= 255; i++) {
143 crc2 = i;
144 for (j = 8; j > 0; j--) {
145 if ((crc2 & 1) == 1) {
146 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
147 } else {
148 crc2 >>= 1;
149 }
150 }
151 crc_ta[i] = crc2;
152 }
153
154 crc = lastcrc;
155 while(len--!=0) {
156 temp1 = (uint32_t)crc>>8;
157 temp2 = crc_ta[(crc^*ptr) & 0xFF];
158 crc = temp1^temp2;
159 ptr++;
160 }
161 return(crc);
162}
163
164static void strnrepchr(char* src, int32_t srcLen, char s, char r){
165 int32_t i = 0;
166 for(i=0;i<srcLen;i++){
167 if(src[i]==s){
168 src[i]=r;
169 }
170 }
171}
172/* Parse the filename, and get its language information.
173 * If it fails to get the language information from the filename,
174 * use "en" as the default value for language
175 */
729e4ab9 176static char* parseFilename(const char* id, char* /*lang*/) {
46f4442e
A
177 int idLen = (int) uprv_strlen(id);
178 char* localeID = (char*) uprv_malloc(idLen);
179 int pos = 0;
180 int canonCapacity = 0;
181 char* canon = NULL;
182 int canonLen = 0;
183 /*int i;*/
184 UErrorCode status = U_ZERO_ERROR;
185 const char *ext = uprv_strchr(id, '.');
186
187 if(ext != NULL){
188 pos = (int) (ext - id);
189 } else {
190 pos = idLen;
191 }
192 uprv_memcpy(localeID, id, pos);
193 localeID[pos]=0; /* NUL terminate the string */
729e4ab9 194
46f4442e
A
195 canonCapacity =pos*3;
196 canon = (char*) uprv_malloc(canonCapacity);
197 canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
198
199 if(U_FAILURE(status)){
200 fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
201 exit(status);
202 }
203 strnrepchr(canon, canonLen, '_', '-');
204 return canon;
205}
206
207static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
208#if 0
209static const char* bundleStart = "<xliff version = \"1.2\" "
210 "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
211 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
212 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
213#else
214static const char* bundleStart = "<xliff version = \"1.1\" "
215 "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
216 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
217 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
218#endif
219static const char* bundleEnd = "</xliff>\n";
220
729e4ab9 221void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
46f4442e
A
222
223static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
224 const UChar* src, int32_t srcLen, UErrorCode* status){
225 int32_t srcIndex=0;
226 char* dest=NULL;
227 char* temp=NULL;
228 int32_t destLen=0;
229 UChar32 c = 0;
230
231 if(status==NULL || U_FAILURE(*status) || pDest==NULL || srcLen==0 || src == NULL){
232 return NULL;
233 }
234 dest =*pDest;
235 if(dest==NULL || destCap <=0){
236 destCap = srcLen * 8;
237 dest = (char*) uprv_malloc(sizeof(char) * destCap);
238 if(dest==NULL){
239 *status=U_MEMORY_ALLOCATION_ERROR;
240 return NULL;
241 }
242 }
243
244 dest[0]=0;
245
246 while(srcIndex<srcLen){
247 U16_NEXT(src, srcIndex, srcLen, c);
248
249 if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
250 *status = U_ILLEGAL_CHAR_FOUND;
251 fprintf(stderr, "Illegal Surrogate! \n");
252 uprv_free(dest);
253 return NULL;
254 }
255
51004dcb 256 if((destLen+U8_LENGTH(c)) < destCap){
46f4442e
A
257
258 /* ASCII Range */
259 if(c <=0x007F){
260 switch(c) {
261 case '\x26':
262 uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
263 destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
264 break;
265 case '\x3c':
266 uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
267 destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
268 break;
269 case '\x3e':
270 uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
271 destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
272 break;
273 case '\x22':
274 uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
275 destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
276 break;
277 case '\x27':
278 uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
279 destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
280 break;
281
282 /* Disallow C0 controls except TAB, CR, LF*/
283 case 0x00:
284 case 0x01:
285 case 0x02:
286 case 0x03:
287 case 0x04:
288 case 0x05:
289 case 0x06:
290 case 0x07:
291 case 0x08:
292 /*case 0x09:*/
293 /*case 0x0A: */
294 case 0x0B:
295 case 0x0C:
296 /*case 0x0D:*/
297 case 0x0E:
298 case 0x0F:
299 case 0x10:
300 case 0x11:
301 case 0x12:
302 case 0x13:
303 case 0x14:
304 case 0x15:
305 case 0x16:
306 case 0x17:
307 case 0x18:
308 case 0x19:
309 case 0x1A:
310 case 0x1B:
311 case 0x1C:
312 case 0x1D:
313 case 0x1E:
314 case 0x1F:
315 *status = U_ILLEGAL_CHAR_FOUND;
316 fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
317 uprv_free(dest);
318 return NULL;
319 default:
320 dest[destLen++]=(char)c;
321 }
322 }else{
323 UBool isError = FALSE;
324 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
325 if(isError){
326 *status = U_ILLEGAL_CHAR_FOUND;
327 fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
328 uprv_free(dest);
329 return NULL;
330 }
331 }
332 }else{
333 destCap += destLen;
334
335 temp = (char*) uprv_malloc(sizeof(char)*destCap);
336 if(temp==NULL){
337 *status=U_MEMORY_ALLOCATION_ERROR;
338 uprv_free(dest);
339 return NULL;
340 }
341 uprv_memmove(temp,dest,destLen);
342 destLen=0;
343 uprv_free(dest);
344 dest=temp;
345 temp=NULL;
346 }
347
348 }
349 *destLength = destLen;
350 return dest;
351}
352
353#define ASTERISK 0x002A
354#define SPACE 0x0020
355#define CR 0x000A
356#define LF 0x000D
357#define AT_SIGN 0x0040
358
359static void
360trim(char **src, int32_t *len){
361
362 char *s = NULL;
363 int32_t i = 0;
364 if(src == NULL || *src == NULL){
365 return;
366 }
367 s = *src;
368 /* trim from the end */
369 for( i=(*len-1); i>= 0; i--){
370 switch(s[i]){
371 case ASTERISK:
372 case SPACE:
373 case CR:
374 case LF:
375 s[i] = 0;
376 continue;
377 default:
378 break;
379 }
380 break;
381
382 }
383 *len = i+1;
384}
385
386static void
729e4ab9 387print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd, UErrorCode *status){
46f4442e
A
388 int32_t bufCapacity = srcLen*4;
389 char *buf = NULL;
390 int32_t bufLen = 0;
391
392 if(U_FAILURE(*status)){
393 return;
394 }
395
396 buf = (char*) (uprv_malloc(bufCapacity));
397 if(buf==0){
398 fprintf(stderr, "Could not allocate memory!!");
399 exit(U_MEMORY_ALLOCATION_ERROR);
400 }
401 buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
402 if(U_SUCCESS(*status)){
403 trim(&buf,&bufLen);
729e4ab9
A
404 write_utf8_file(out,UnicodeString(tagStart));
405 write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
406 write_utf8_file(out,UnicodeString(tagEnd));
407 write_utf8_file(out,UnicodeString("\n"));
46f4442e
A
408
409 }
410}
411static void
729e4ab9 412printNoteElements(struct UString *src, UErrorCode *status){
46f4442e
A
413
414#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
415
416 int32_t capacity = 0;
417 UChar* note = NULL;
418 int32_t noteLen = 0;
419 int32_t count = 0,i;
420
421 if(src == NULL){
422 return;
423 }
424
425 capacity = src->fLength;
426 note = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
427
428 count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
429 if(U_FAILURE(*status)){
430 uprv_free(note);
431 return;
432 }
433 for(i=0; i < count; i++){
434 noteLen = getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
435 if(U_FAILURE(*status)){
436 uprv_free(note);
437 return;
438 }
439 if(noteLen > 0){
729e4ab9
A
440 write_tabs(out);
441 print(note, noteLen,"<note>", "</note>", status);
46f4442e
A
442 }
443 }
444 uprv_free(note);
445#else
729e4ab9 446
46f4442e
A
447 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
448
449#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
450
451}
452
729e4ab9 453static void printAttribute(const char *name, const char *value, int32_t /*len*/)
46f4442e 454{
729e4ab9
A
455 write_utf8_file(out, UnicodeString(" "));
456 write_utf8_file(out, UnicodeString(name));
457 write_utf8_file(out, UnicodeString(" = \""));
458 write_utf8_file(out, UnicodeString(value));
459 write_utf8_file(out, UnicodeString("\""));
46f4442e
A
460}
461
729e4ab9 462static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
46f4442e 463{
729e4ab9
A
464 write_utf8_file(out, UnicodeString(" "));
465 write_utf8_file(out, UnicodeString(name));
466 write_utf8_file(out, UnicodeString(" = \""));
467 write_utf8_file(out, value);
468 write_utf8_file(out, UnicodeString("\""));
46f4442e
A
469}
470
471static void
729e4ab9 472printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
46f4442e
A
473
474#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
475
476 if(status==NULL || U_FAILURE(*status)){
477 return;
478 }
479
480 int32_t capacity = src->fLength + 1;
481 char* buf = NULL;
482 int32_t bufLen = 0;
483 UChar* desc = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
484 UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
485
486 int32_t descLen = 0, transLen=0;
487 if(desc==NULL || trans==NULL){
488 *status = U_MEMORY_ALLOCATION_ERROR;
489 uprv_free(desc);
490 uprv_free(trans);
491 return;
492 }
493 src->fLength = removeCmtText(src->fChars, src->fLength, status);
494 descLen = getDescription(src->fChars,src->fLength, &desc, capacity, status);
495 transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
496
497 /* first print translate attribute */
498 if(transLen > 0){
499 if(printTranslate){
500 /* print translate attribute */
501 buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
502 if(U_SUCCESS(*status)){
729e4ab9
A
503 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
504 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
505 }
506 }else if(getShowWarning()){
507 fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
508 /* no translate attribute .. just close the tag */
729e4ab9 509 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
510 }
511 }else{
512 /* no translate attribute .. just close the tag */
729e4ab9 513 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
514 }
515
516 if(descLen > 0){
729e4ab9
A
517 write_tabs(out);
518 print(desc, descLen, "<!--", "-->", status);
46f4442e
A
519 }
520
521 uprv_free(desc);
522 uprv_free(trans);
523#else
524
525 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
526
527#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
528
529}
530
531/*
532 * Print out a containing element, like:
533 * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
534 * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
535 */
729e4ab9 536static char *printContainer(struct SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
46f4442e 537{
729e4ab9
A
538 char resKeyBuffer[8];
539 const char *resname = NULL;
46f4442e
A
540 char *sid = NULL;
541
729e4ab9 542 write_tabs(out);
46f4442e 543
729e4ab9
A
544 resname = res_getKeyString(srBundle, res, resKeyBuffer);
545 if (resname != NULL && *resname != 0) {
46f4442e
A
546 sid = getID(id, resname, sid);
547 } else {
548 sid = getID(id, NULL, sid);
549 }
550
729e4ab9
A
551 write_utf8_file(out, UnicodeString("<"));
552 write_utf8_file(out, UnicodeString(container));
553 printAttribute("id", sid, (int32_t) uprv_strlen(sid));
46f4442e
A
554
555 if (resname != NULL) {
729e4ab9 556 printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
46f4442e
A
557 }
558
559 if (mimetype != NULL) {
729e4ab9 560 printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
46f4442e
A
561 }
562
563 if (restype != NULL) {
729e4ab9 564 printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
46f4442e
A
565 }
566
567 tabCount += 1;
568 if (res->fComment.fLength > 0) {
569 /* printComments will print the closing ">\n" */
729e4ab9 570 printComments(&res->fComment, resname, TRUE, status);
46f4442e 571 } else {
729e4ab9 572 write_utf8_file(out, UnicodeString(">\n"));
46f4442e
A
573 }
574
575 return sid;
576}
577
578/* Writing Functions */
579
580static const char *trans_unit = "trans-unit";
581static const char *close_trans_unit = "</trans-unit>\n";
582static const char *source = "<source>";
583static const char *close_source = "</source>\n";
584static const char *group = "group";
585static const char *close_group = "</group>\n";
586
587static const char *bin_unit = "bin-unit";
588static const char *close_bin_unit = "</bin-unit>\n";
589static const char *bin_source = "<bin-source>\n";
590static const char *close_bin_source = "</bin-source>\n";
591static const char *external_file = "<external-file";
592/*static const char *close_external_file = "</external-file>\n";*/
593static const char *internal_file = "<internal-file";
594static const char *close_internal_file = "</internal-file>\n";
595
596static const char *application_mimetype = "application"; /* add "/octet-stream"? */
597
598static const char *alias_restype = "x-icu-alias";
599static const char *array_restype = "x-icu-array";
600static const char *binary_restype = "x-icu-binary";
601static const char *integer_restype = "x-icu-integer";
602static const char *intvector_restype = "x-icu-intvector";
603static const char *table_restype = "x-icu-table";
604
605static void
729e4ab9 606string_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
607
608 char *sid = NULL;
609 char* buf = NULL;
610 int32_t bufLen = 0;
611
612 if(status==NULL || U_FAILURE(*status)){
613 return;
614 }
615
729e4ab9 616 sid = printContainer(res, trans_unit, NULL, NULL, id, status);
46f4442e 617
729e4ab9 618 write_tabs(out);
46f4442e 619
729e4ab9 620 write_utf8_file(out, UnicodeString(source));
46f4442e
A
621
622 buf = convertAndEscape(&buf, 0, &bufLen, res->u.fString.fChars, res->u.fString.fLength, status);
623
624 if (U_FAILURE(*status)) {
625 return;
626 }
627
729e4ab9
A
628 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
629 write_utf8_file(out, UnicodeString(close_source));
46f4442e 630
729e4ab9 631 printNoteElements(&res->fComment, status);
46f4442e
A
632
633 tabCount -= 1;
729e4ab9 634 write_tabs(out);
46f4442e 635
729e4ab9 636 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
637
638 uprv_free(buf);
639 uprv_free(sid);
640}
641
642static void
729e4ab9 643alias_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
644 char *sid = NULL;
645 char* buf = NULL;
646 int32_t bufLen=0;
647
729e4ab9 648 sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
46f4442e 649
729e4ab9 650 write_tabs(out);
46f4442e 651
729e4ab9 652 write_utf8_file(out, UnicodeString(source));
46f4442e
A
653
654 buf = convertAndEscape(&buf, 0, &bufLen, res->u.fString.fChars, res->u.fString.fLength, status);
729e4ab9 655
46f4442e
A
656 if(U_FAILURE(*status)){
657 return;
658 }
729e4ab9
A
659 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
660 write_utf8_file(out, UnicodeString(close_source));
46f4442e 661
729e4ab9 662 printNoteElements(&res->fComment, status);
46f4442e
A
663
664 tabCount -= 1;
729e4ab9 665 write_tabs(out);
46f4442e 666
729e4ab9 667 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
668
669 uprv_free(buf);
670 uprv_free(sid);
671}
672
673static void
729e4ab9 674array_write_xml(struct SResource *res, const char* id, const char* language, UErrorCode *status) {
46f4442e
A
675 char* sid = NULL;
676 int index = 0;
677
678 struct SResource *current = NULL;
46f4442e 679
729e4ab9 680 sid = printContainer(res, group, array_restype, NULL, id, status);
46f4442e
A
681
682 current = res->u.fArray.fFirst;
46f4442e
A
683
684 while (current != NULL) {
685 char c[256] = {0};
686 char* subId = NULL;
687
688 itostr(c, index, 10, 0);
689 index += 1;
690 subId = getID(sid, c, subId);
691
729e4ab9 692 res_write_xml(current, subId, language, FALSE, status);
46f4442e
A
693 uprv_free(subId);
694 subId = NULL;
695
696 if(U_FAILURE(*status)){
697 return;
698 }
699
700 current = current->fNext;
701 }
702
703 tabCount -= 1;
729e4ab9
A
704 write_tabs(out);
705 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
706
707 uprv_free(sid);
708}
709
710static void
729e4ab9 711intvector_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
712 char* sid = NULL;
713 char* ivd = NULL;
714 uint32_t i=0;
715 uint32_t len=0;
716 char buf[256] = {'0'};
717
729e4ab9 718 sid = printContainer(res, group, intvector_restype, NULL, id, status);
46f4442e
A
719
720 for(i = 0; i < res->u.fIntVector.fCount; i += 1) {
721 char c[256] = {0};
722
723 itostr(c, i, 10, 0);
724 ivd = getID(sid, c, ivd);
725 len = itostr(buf, res->u.fIntVector.fArray[i], 10, 0);
726
729e4ab9
A
727 write_tabs(out);
728 write_utf8_file(out, UnicodeString("<"));
729 write_utf8_file(out, UnicodeString(trans_unit));
46f4442e 730
729e4ab9
A
731 printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
732 printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
46f4442e 733
729e4ab9 734 write_utf8_file(out, UnicodeString(">\n"));
46f4442e
A
735
736 tabCount += 1;
729e4ab9
A
737 write_tabs(out);
738 write_utf8_file(out, UnicodeString(source));
46f4442e 739
729e4ab9 740 write_utf8_file(out, UnicodeString(buf, len));
46f4442e 741
729e4ab9 742 write_utf8_file(out, UnicodeString(close_source));
46f4442e 743 tabCount -= 1;
729e4ab9
A
744 write_tabs(out);
745 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
746
747 uprv_free(ivd);
748 ivd = NULL;
749 }
750
751 tabCount -= 1;
729e4ab9 752 write_tabs(out);
46f4442e 753
729e4ab9 754 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
755 uprv_free(sid);
756 sid = NULL;
757}
758
759static void
729e4ab9 760int_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
761 char* sid = NULL;
762 char buf[256] = {0};
763 uint32_t len = 0;
764
729e4ab9 765 sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
46f4442e 766
729e4ab9 767 write_tabs(out);
46f4442e 768
729e4ab9 769 write_utf8_file(out, UnicodeString(source));
46f4442e
A
770
771 len = itostr(buf, res->u.fIntValue.fValue, 10, 0);
729e4ab9 772 write_utf8_file(out, UnicodeString(buf, len));
46f4442e 773
729e4ab9 774 write_utf8_file(out, UnicodeString(close_source));
46f4442e 775
729e4ab9 776 printNoteElements(&res->fComment, status);
46f4442e
A
777
778 tabCount -= 1;
729e4ab9 779 write_tabs(out);
46f4442e 780
729e4ab9 781 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
782
783 uprv_free(sid);
784 sid = NULL;
785}
786
787static void
729e4ab9 788bin_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
789 const char* m_type = application_mimetype;
790 char* sid = NULL;
791 uint32_t crc = 0xFFFFFFFF;
792
793 char fileName[1024] ={0};
794 int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
795 char* fn = (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
796 (res->u.fBinaryValue.fFileName !=NULL ?
797 uprv_strlen(res->u.fBinaryValue.fFileName) :0)));
798 const char* ext = NULL;
799
800 char* f = NULL;
801
802 fn[0]=0;
803
804 if(res->u.fBinaryValue.fFileName != NULL){
805 uprv_strcpy(fileName, res->u.fBinaryValue.fFileName);
806 f = uprv_strrchr(fileName, '\\');
807
808 if (f != NULL) {
809 f++;
810 } else {
811 f = fileName;
812 }
813
814 ext = uprv_strrchr(fileName, '.');
815
816 if (ext == NULL) {
817 fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
818 exit(U_ILLEGAL_ARGUMENT_ERROR);
819 }
820
821 if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
822 m_type = "image";
823 } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
824 m_type = "audio";
825 } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
826 m_type = "video";
827 } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
828 m_type = "text";
829 }
830
729e4ab9 831 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
46f4442e 832
729e4ab9 833 write_tabs(out);
46f4442e 834
729e4ab9 835 write_utf8_file(out, UnicodeString(bin_source));
46f4442e
A
836
837 tabCount+= 1;
729e4ab9 838 write_tabs(out);
46f4442e 839
729e4ab9
A
840 write_utf8_file(out, UnicodeString(external_file));
841 printAttribute("href", f, (int32_t)uprv_strlen(f));
842 write_utf8_file(out, UnicodeString("/>\n"));
46f4442e 843 tabCount -= 1;
729e4ab9 844 write_tabs(out);
46f4442e 845
729e4ab9 846 write_utf8_file(out, UnicodeString(close_bin_source));
46f4442e 847
729e4ab9 848 printNoteElements(&res->fComment, status);
46f4442e 849 tabCount -= 1;
729e4ab9
A
850 write_tabs(out);
851 write_utf8_file(out, UnicodeString(close_bin_unit));
46f4442e
A
852 } else {
853 char temp[256] = {0};
854 uint32_t i = 0;
855 int32_t len=0;
856
729e4ab9 857 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
46f4442e 858
729e4ab9
A
859 write_tabs(out);
860 write_utf8_file(out, UnicodeString(bin_source));
46f4442e
A
861
862 tabCount += 1;
729e4ab9 863 write_tabs(out);
46f4442e 864
729e4ab9
A
865 write_utf8_file(out, UnicodeString(internal_file));
866 printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
46f4442e
A
867
868 while(i <res->u.fBinaryValue.fLength){
869 len = itostr(temp, res->u.fBinaryValue.fData[i], 16, 2);
870 crc = computeCRC(temp, len, crc);
871 i++;
872 }
873
874 len = itostr(temp, crc, 10, 0);
729e4ab9 875 printAttribute("crc", temp, len);
46f4442e 876
729e4ab9 877 write_utf8_file(out, UnicodeString(">"));
46f4442e
A
878
879 i = 0;
880 while(i <res->u.fBinaryValue.fLength){
881 len = itostr(temp, res->u.fBinaryValue.fData[i], 16, 2);
729e4ab9 882 write_utf8_file(out, UnicodeString(temp));
46f4442e
A
883 i += 1;
884 }
885
729e4ab9 886 write_utf8_file(out, UnicodeString(close_internal_file));
46f4442e
A
887
888 tabCount -= 2;
729e4ab9 889 write_tabs(out);
46f4442e 890
729e4ab9
A
891 write_utf8_file(out, UnicodeString(close_bin_source));
892 printNoteElements(&res->fComment, status);
46f4442e
A
893
894 tabCount -= 1;
729e4ab9
A
895 write_tabs(out);
896 write_utf8_file(out, UnicodeString(close_bin_unit));
46f4442e
A
897
898 uprv_free(sid);
899 sid = NULL;
900 }
901
902 uprv_free(fn);
903}
904
905
906
907static void
729e4ab9 908table_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
46f4442e
A
909
910 uint32_t i = 0;
911
912 struct SResource *current = NULL;
46f4442e
A
913 char* sid = NULL;
914
915 if (U_FAILURE(*status)) {
916 return ;
917 }
918
729e4ab9 919 sid = printContainer(res, group, table_restype, NULL, id, status);
46f4442e
A
920
921 if(isTopLevel) {
922 sid[0] = '\0';
923 }
924
4388f060 925 current = res->u.fTable.fFirst;
46f4442e
A
926 i = 0;
927
928 while (current != NULL) {
729e4ab9 929 res_write_xml(current, sid, language, FALSE, status);
46f4442e
A
930
931 if(U_FAILURE(*status)){
932 return;
933 }
934
935 i += 1;
936 current = current->fNext;
937 }
938
939 tabCount -= 1;
729e4ab9 940 write_tabs(out);
46f4442e 941
729e4ab9 942 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
943
944 uprv_free(sid);
945 sid = NULL;
946}
947
948void
729e4ab9 949res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
46f4442e
A
950
951 if (U_FAILURE(*status)) {
952 return ;
953 }
954
955 if (res != NULL) {
956 switch (res->fType) {
957 case URES_STRING:
729e4ab9 958 string_write_xml (res, id, language, status);
46f4442e
A
959 return;
960
961 case URES_ALIAS:
729e4ab9 962 alias_write_xml (res, id, language, status);
46f4442e
A
963 return;
964
965 case URES_INT_VECTOR:
729e4ab9 966 intvector_write_xml (res, id, language, status);
46f4442e
A
967 return;
968
969 case URES_BINARY:
729e4ab9 970 bin_write_xml (res, id, language, status);
46f4442e
A
971 return;
972
973 case URES_INT:
729e4ab9 974 int_write_xml (res, id, language, status);
46f4442e
A
975 return;
976
977 case URES_ARRAY:
729e4ab9 978 array_write_xml (res, id, language, status);
46f4442e
A
979 return;
980
981 case URES_TABLE:
729e4ab9 982 table_write_xml (res, id, language, isTopLevel, status);
46f4442e
A
983 return;
984
985 default:
986 break;
987 }
988 }
989
990 *status = U_INTERNAL_PROGRAM_ERROR;
991}
992
993void
994bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
995 char *writtenFilename, int writtenFilenameLen,
996 const char* language, const char* outFileName, UErrorCode *status) {
997
46f4442e
A
998 char* xmlfileName = NULL;
999 char* outputFileName = NULL;
1000 char* originalFileName = NULL;
1001 const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
1002 const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
1003 const char* file2 = "original = \"";
1004 const char* file4 = "\" date = \"";
1005 const char* fileEnd = "</file>\n";
1006 const char* headerStart = "<header>\n";
1007 const char* headerEnd = "</header>\n";
1008 const char* bodyStart = "<body>\n";
1009 const char* bodyEnd = "</body>\n";
1010
1011 const char *tool_start = "<tool";
1012 const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
1013 const char *tool_name = "genrb";
1014
1015 char* temp = NULL;
1016 char* lang = NULL;
1017 const char* pos = NULL;
1018 int32_t first, index;
1019 time_t currTime;
1020 char timeBuf[128];
1021
1022 outDir = outputDir;
1023
1024 srBundle = bundle;
1025
1026 pos = uprv_strrchr(filename, '\\');
1027 if(pos != NULL) {
1028 first = (int32_t)(pos - filename + 1);
1029 } else {
1030 first = 0;
1031 }
1032 index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
1033 originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
1034 uprv_memset(originalFileName, 0, sizeof(char)*index+1);
1035 uprv_strncpy(originalFileName, filename + first, index);
1036
1037 if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
1038 fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
1039 }
1040
1041 temp = originalFileName;
1042 originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1043 uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1044 uprv_strcat(originalFileName, temp);
1045 uprv_strcat(originalFileName, textExt);
1046 uprv_free(temp);
1047 temp = NULL;
1048
1049
1050 if (language == NULL) {
1051/* lang = parseFilename(filename, lang);
1052 if (lang == NULL) {*/
1053 /* now check if locale name is valid or not
1054 * this is to cater for situation where
1055 * pegasusServer.txt contains
1056 *
1057 * en{
1058 * ..
1059 * }
1060 */
1061 lang = parseFilename(srBundle->fLocale, lang);
1062 /*
1063 * Neither the file name nor the table name inside the
1064 * txt file contain a valid country and language codes
1065 * throw an error.
1066 * pegasusServer.txt contains
1067 *
1068 * testelements{
1069 * ....
1070 * }
1071 */
1072 if(lang==NULL){
1073 fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
1074 exit(U_ILLEGAL_ARGUMENT_ERROR);
1075 }
1076 /* }*/
1077 } else {
1078 lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
1079 uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
1080 uprv_strcpy(lang, language);
1081 }
1082
1083 if(outFileName) {
1084 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
1085 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
1086 uprv_strcpy(outputFileName,outFileName);
1087 } else {
1088 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1089 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1090 uprv_strcpy(outputFileName,srBundle->fLocale);
1091 }
1092
1093 if(outputDir) {
1094 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1095 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1096 } else {
1097 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1098 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1099 }
1100
1101 if(outputDir){
1102 uprv_strcpy(xmlfileName, outputDir);
1103 if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
1104 uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
1105 }
1106 }
1107 uprv_strcat(xmlfileName,outputFileName);
1108 uprv_strcat(xmlfileName,xliffExt);
1109
1110 if (writtenFilename) {
1111 uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
1112 }
1113
1114 if (U_FAILURE(*status)) {
1115 goto cleanup_bundle_write_xml;
1116 }
1117
1118 out= T_FileStream_open(xmlfileName,"w");
1119
1120 if(out==NULL){
1121 *status = U_FILE_ACCESS_ERROR;
1122 goto cleanup_bundle_write_xml;
1123 }
729e4ab9 1124 write_utf8_file(out, xmlHeader);
46f4442e
A
1125
1126 if(outputEnc && *outputEnc!='\0'){
1127 /* store the output encoding */
1128 enc = outputEnc;
1129 conv=ucnv_open(enc,status);
1130 if(U_FAILURE(*status)){
1131 goto cleanup_bundle_write_xml;
1132 }
1133 }
729e4ab9
A
1134 write_utf8_file(out, bundleStart);
1135 write_tabs(out);
1136 write_utf8_file(out, fileStart);
46f4442e
A
1137 /* check if lang and language are the same */
1138 if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
1139 fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
1140 }
729e4ab9
A
1141 write_utf8_file(out, UnicodeString(lang));
1142 write_utf8_file(out, UnicodeString(file1));
1143 write_utf8_file(out, UnicodeString(file2));
1144 write_utf8_file(out, UnicodeString(originalFileName));
1145 write_utf8_file(out, UnicodeString(file4));
46f4442e
A
1146
1147 time(&currTime);
1148 strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
729e4ab9
A
1149 write_utf8_file(out, UnicodeString(timeBuf));
1150 write_utf8_file(out, UnicodeString("\">\n"));
46f4442e
A
1151
1152 tabCount += 1;
729e4ab9
A
1153 write_tabs(out);
1154 write_utf8_file(out, headerStart);
46f4442e
A
1155
1156 tabCount += 1;
729e4ab9 1157 write_tabs(out);
46f4442e 1158
729e4ab9
A
1159 write_utf8_file(out, tool_start);
1160 printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
1161 printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
1162 write_utf8_file(out, UnicodeString("/>\n"));
46f4442e
A
1163
1164 tabCount -= 1;
729e4ab9 1165 write_tabs(out);
46f4442e 1166
729e4ab9 1167 write_utf8_file(out, UnicodeString(headerEnd));
46f4442e 1168
729e4ab9 1169 write_tabs(out);
46f4442e
A
1170 tabCount += 1;
1171
729e4ab9 1172 write_utf8_file(out, UnicodeString(bodyStart));
46f4442e
A
1173
1174
729e4ab9 1175 res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
46f4442e
A
1176
1177 tabCount -= 1;
729e4ab9 1178 write_tabs(out);
46f4442e 1179
729e4ab9 1180 write_utf8_file(out, UnicodeString(bodyEnd));
46f4442e 1181 tabCount--;
729e4ab9
A
1182 write_tabs(out);
1183 write_utf8_file(out, UnicodeString(fileEnd));
46f4442e 1184 tabCount--;
729e4ab9
A
1185 write_tabs(out);
1186 write_utf8_file(out, UnicodeString(bundleEnd));
46f4442e
A
1187 T_FileStream_close(out);
1188
1189 ucnv_close(conv);
1190
1191cleanup_bundle_write_xml:
1192 uprv_free(originalFileName);
1193 uprv_free(lang);
1194 if(xmlfileName != NULL) {
1195 uprv_free(xmlfileName);
1196 }
1197 if(outputFileName != NULL){
1198 uprv_free(outputFileName);
1199 }
1200}