]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/wrtxml.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / tools / genrb / wrtxml.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
46f4442e
A
3/*
4*******************************************************************************
5*
2ca993e8 6* Copyright (C) 2002-2015, International Business Machines
46f4442e
A
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10*
729e4ab9 11* File wrtxml.cpp
46f4442e
A
12*
13* Modification History:
14*
15* Date Name Description
16* 10/01/02 Ram Creation.
17* 02/07/08 Spieth Correct XLIFF generation on EBCDIC platform
18*
19*******************************************************************************
20*/
57a6839d
A
21
22// Safer use of UnicodeString.
23#ifndef UNISTR_FROM_CHAR_EXPLICIT
24# define UNISTR_FROM_CHAR_EXPLICIT explicit
25#endif
26
27// Less important, but still a good idea.
28#ifndef UNISTR_FROM_STRING_EXPLICIT
29# define UNISTR_FROM_STRING_EXPLICIT explicit
30#endif
31
46f4442e
A
32#include "reslist.h"
33#include "unewdata.h"
34#include "unicode/ures.h"
35#include "errmsg.h"
36#include "filestrm.h"
37#include "cstring.h"
38#include "unicode/ucnv.h"
39#include "genrb.h"
40#include "rle.h"
46f4442e
A
41#include "uhash.h"
42#include "uresimp.h"
43#include "unicode/ustring.h"
44#include "unicode/uchar.h"
45#include "ustr.h"
46#include "prscmnts.h"
47#include "unicode/unistr.h"
51004dcb
A
48#include "unicode/utf8.h"
49#include "unicode/utf16.h"
46f4442e
A
50#include <time.h>
51
52U_NAMESPACE_USE
53
54static int tabCount = 0;
55
56static FileStream* out=NULL;
57static struct SRBRoot* srBundle ;
58static const char* outDir = NULL;
59static const char* enc ="";
60static UConverter* conv = NULL;
61
62const char* const* ISOLanguages;
63const char* const* ISOCountries;
64const char* textExt = ".txt";
65const char* xliffExt = ".xlf";
66
729e4ab9
A
67static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
68{
69 UErrorCode status = U_ZERO_ERROR;
46f4442e 70 int32_t len = 0;
46f4442e 71
729e4ab9
A
72 // preflight to get the destination buffer size
73 u_strToUTF8(NULL,
74 0,
75 &len,
f3c0d7a5 76 toUCharPtr(outString.getBuffer()),
729e4ab9
A
77 outString.length(),
78 &status);
79
80 // allocate the buffer
81 char* dest = (char*)uprv_malloc(len);
82 status = U_ZERO_ERROR;
83
84 // convert the data
85 u_strToUTF8(dest,
86 len,
87 &len,
f3c0d7a5 88 toUCharPtr(outString.getBuffer()),
729e4ab9
A
89 outString.length(),
90 &status);
91
92 // write data to out file
93 int32_t ret = T_FileStream_write(fileStream, dest, len);
46f4442e 94 uprv_free(dest);
729e4ab9 95 return (ret);
46f4442e
A
96}
97
46f4442e 98/*write indentation for formatting*/
729e4ab9 99static void write_tabs(FileStream* os){
46f4442e
A
100 int i=0;
101 for(;i<=tabCount;i++){
729e4ab9 102 write_utf8_file(os,UnicodeString(" "));
46f4442e
A
103 }
104}
105
106/*get ID for each element. ID is globally unique.*/
729e4ab9 107static char* getID(const char* id, const char* curKey, char* result) {
46f4442e
A
108 if(curKey == NULL) {
109 result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
110 uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
111 uprv_strcpy(result, id);
112 } else {
113 result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
114 uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
115 if(id[0]!='\0'){
116 uprv_strcpy(result, id);
117 uprv_strcat(result, "_");
118 }
119 uprv_strcat(result, curKey);
120 }
121 return result;
122}
123
124/*compute CRC for binary code*/
125/* The code is from http://www.theorem.com/java/CRC32.java
126 * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
127 * <P> This check is used in numerous systems to verify the integrity
128 * of information. It's also used as a hashing function. Unlike a regular
129 * checksum, it's sensitive to the order of the characters.
130 * It produces a 32 bit
131 *
132 * @author Michael Lecuyer (mjl@theorem.com)
133 * @version 1.1 August 11, 1998
134 */
135
136/* ICU is not endian portable, because ICU data generated on big endian machines can be
137 * ported to big endian machines but not to little endian machines and vice versa. The
138 * conversion is not portable across platforms with different endianess.
139 */
140
2ca993e8 141uint32_t computeCRC(const char *ptr, uint32_t len, uint32_t lastcrc){
46f4442e
A
142 int32_t crc;
143 uint32_t temp1;
144 uint32_t temp2;
145
146 int32_t crc_ta[256];
147 int i = 0;
148 int j = 0;
149 uint32_t crc2 = 0;
150
151#define CRC32_POLYNOMIAL 0xEDB88320
152
153 /*build crc table*/
154 for (i = 0; i <= 255; i++) {
155 crc2 = i;
156 for (j = 8; j > 0; j--) {
157 if ((crc2 & 1) == 1) {
158 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
159 } else {
160 crc2 >>= 1;
161 }
162 }
163 crc_ta[i] = crc2;
164 }
165
166 crc = lastcrc;
167 while(len--!=0) {
168 temp1 = (uint32_t)crc>>8;
169 temp2 = crc_ta[(crc^*ptr) & 0xFF];
170 crc = temp1^temp2;
171 ptr++;
172 }
173 return(crc);
174}
175
176static void strnrepchr(char* src, int32_t srcLen, char s, char r){
177 int32_t i = 0;
178 for(i=0;i<srcLen;i++){
179 if(src[i]==s){
180 src[i]=r;
181 }
182 }
183}
184/* Parse the filename, and get its language information.
185 * If it fails to get the language information from the filename,
186 * use "en" as the default value for language
187 */
729e4ab9 188static char* parseFilename(const char* id, char* /*lang*/) {
46f4442e
A
189 int idLen = (int) uprv_strlen(id);
190 char* localeID = (char*) uprv_malloc(idLen);
191 int pos = 0;
192 int canonCapacity = 0;
193 char* canon = NULL;
194 int canonLen = 0;
195 /*int i;*/
196 UErrorCode status = U_ZERO_ERROR;
197 const char *ext = uprv_strchr(id, '.');
198
199 if(ext != NULL){
200 pos = (int) (ext - id);
201 } else {
202 pos = idLen;
203 }
204 uprv_memcpy(localeID, id, pos);
205 localeID[pos]=0; /* NUL terminate the string */
729e4ab9 206
46f4442e
A
207 canonCapacity =pos*3;
208 canon = (char*) uprv_malloc(canonCapacity);
209 canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
210
211 if(U_FAILURE(status)){
212 fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
213 exit(status);
214 }
215 strnrepchr(canon, canonLen, '_', '-');
216 return canon;
217}
218
219static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
220#if 0
221static const char* bundleStart = "<xliff version = \"1.2\" "
222 "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
223 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
224 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
225#else
226static const char* bundleStart = "<xliff version = \"1.1\" "
227 "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
228 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
229 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
230#endif
231static const char* bundleEnd = "</xliff>\n";
232
729e4ab9 233void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
46f4442e
A
234
235static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
236 const UChar* src, int32_t srcLen, UErrorCode* status){
237 int32_t srcIndex=0;
238 char* dest=NULL;
239 char* temp=NULL;
240 int32_t destLen=0;
241 UChar32 c = 0;
242
243 if(status==NULL || U_FAILURE(*status) || pDest==NULL || srcLen==0 || src == NULL){
244 return NULL;
245 }
246 dest =*pDest;
247 if(dest==NULL || destCap <=0){
248 destCap = srcLen * 8;
249 dest = (char*) uprv_malloc(sizeof(char) * destCap);
250 if(dest==NULL){
251 *status=U_MEMORY_ALLOCATION_ERROR;
252 return NULL;
253 }
254 }
255
256 dest[0]=0;
257
258 while(srcIndex<srcLen){
259 U16_NEXT(src, srcIndex, srcLen, c);
260
261 if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
262 *status = U_ILLEGAL_CHAR_FOUND;
263 fprintf(stderr, "Illegal Surrogate! \n");
264 uprv_free(dest);
265 return NULL;
266 }
267
51004dcb 268 if((destLen+U8_LENGTH(c)) < destCap){
46f4442e
A
269
270 /* ASCII Range */
271 if(c <=0x007F){
272 switch(c) {
273 case '\x26':
274 uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
275 destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
276 break;
277 case '\x3c':
278 uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
279 destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
280 break;
281 case '\x3e':
282 uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
283 destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
284 break;
285 case '\x22':
286 uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
287 destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
288 break;
289 case '\x27':
290 uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
291 destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
292 break;
293
294 /* Disallow C0 controls except TAB, CR, LF*/
295 case 0x00:
296 case 0x01:
297 case 0x02:
298 case 0x03:
299 case 0x04:
300 case 0x05:
301 case 0x06:
302 case 0x07:
303 case 0x08:
304 /*case 0x09:*/
305 /*case 0x0A: */
306 case 0x0B:
307 case 0x0C:
308 /*case 0x0D:*/
309 case 0x0E:
310 case 0x0F:
311 case 0x10:
312 case 0x11:
313 case 0x12:
314 case 0x13:
315 case 0x14:
316 case 0x15:
317 case 0x16:
318 case 0x17:
319 case 0x18:
320 case 0x19:
321 case 0x1A:
322 case 0x1B:
323 case 0x1C:
324 case 0x1D:
325 case 0x1E:
326 case 0x1F:
327 *status = U_ILLEGAL_CHAR_FOUND;
328 fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
329 uprv_free(dest);
330 return NULL;
331 default:
332 dest[destLen++]=(char)c;
333 }
334 }else{
335 UBool isError = FALSE;
336 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
337 if(isError){
338 *status = U_ILLEGAL_CHAR_FOUND;
339 fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
340 uprv_free(dest);
341 return NULL;
342 }
343 }
344 }else{
345 destCap += destLen;
346
347 temp = (char*) uprv_malloc(sizeof(char)*destCap);
348 if(temp==NULL){
349 *status=U_MEMORY_ALLOCATION_ERROR;
350 uprv_free(dest);
351 return NULL;
352 }
353 uprv_memmove(temp,dest,destLen);
354 destLen=0;
355 uprv_free(dest);
356 dest=temp;
357 temp=NULL;
358 }
359
360 }
361 *destLength = destLen;
362 return dest;
363}
364
365#define ASTERISK 0x002A
366#define SPACE 0x0020
367#define CR 0x000A
368#define LF 0x000D
369#define AT_SIGN 0x0040
370
0f5d89e8 371#if UCONFIG_NO_REGULAR_EXPRESSIONS==0
46f4442e
A
372static void
373trim(char **src, int32_t *len){
374
375 char *s = NULL;
376 int32_t i = 0;
377 if(src == NULL || *src == NULL){
378 return;
379 }
380 s = *src;
381 /* trim from the end */
382 for( i=(*len-1); i>= 0; i--){
383 switch(s[i]){
384 case ASTERISK:
385 case SPACE:
386 case CR:
387 case LF:
388 s[i] = 0;
389 continue;
390 default:
391 break;
392 }
393 break;
394
395 }
396 *len = i+1;
397}
398
399static void
729e4ab9 400print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd, UErrorCode *status){
46f4442e
A
401 int32_t bufCapacity = srcLen*4;
402 char *buf = NULL;
403 int32_t bufLen = 0;
404
405 if(U_FAILURE(*status)){
406 return;
407 }
408
409 buf = (char*) (uprv_malloc(bufCapacity));
410 if(buf==0){
411 fprintf(stderr, "Could not allocate memory!!");
412 exit(U_MEMORY_ALLOCATION_ERROR);
413 }
414 buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
415 if(U_SUCCESS(*status)){
416 trim(&buf,&bufLen);
729e4ab9
A
417 write_utf8_file(out,UnicodeString(tagStart));
418 write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
419 write_utf8_file(out,UnicodeString(tagEnd));
420 write_utf8_file(out,UnicodeString("\n"));
46f4442e
A
421
422 }
423}
0f5d89e8
A
424#endif
425
46f4442e 426static void
2ca993e8 427printNoteElements(const UString *src, UErrorCode *status){
46f4442e
A
428
429#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
430
431 int32_t capacity = 0;
432 UChar* note = NULL;
433 int32_t noteLen = 0;
434 int32_t count = 0,i;
435
436 if(src == NULL){
437 return;
438 }
439
440 capacity = src->fLength;
441 note = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
442
443 count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
444 if(U_FAILURE(*status)){
445 uprv_free(note);
446 return;
447 }
448 for(i=0; i < count; i++){
449 noteLen = getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
450 if(U_FAILURE(*status)){
451 uprv_free(note);
452 return;
453 }
454 if(noteLen > 0){
729e4ab9
A
455 write_tabs(out);
456 print(note, noteLen,"<note>", "</note>", status);
46f4442e
A
457 }
458 }
459 uprv_free(note);
460#else
729e4ab9 461
46f4442e
A
462 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
463
464#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
465
466}
467
729e4ab9 468static void printAttribute(const char *name, const char *value, int32_t /*len*/)
46f4442e 469{
729e4ab9
A
470 write_utf8_file(out, UnicodeString(" "));
471 write_utf8_file(out, UnicodeString(name));
472 write_utf8_file(out, UnicodeString(" = \""));
473 write_utf8_file(out, UnicodeString(value));
474 write_utf8_file(out, UnicodeString("\""));
46f4442e
A
475}
476
0f5d89e8 477#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
729e4ab9 478static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
46f4442e 479{
729e4ab9
A
480 write_utf8_file(out, UnicodeString(" "));
481 write_utf8_file(out, UnicodeString(name));
482 write_utf8_file(out, UnicodeString(" = \""));
483 write_utf8_file(out, value);
484 write_utf8_file(out, UnicodeString("\""));
46f4442e 485}
0f5d89e8 486#endif
46f4442e
A
487
488static void
729e4ab9 489printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
46f4442e
A
490
491#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
492
493 if(status==NULL || U_FAILURE(*status)){
494 return;
495 }
496
497 int32_t capacity = src->fLength + 1;
498 char* buf = NULL;
499 int32_t bufLen = 0;
500 UChar* desc = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
501 UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
502
503 int32_t descLen = 0, transLen=0;
504 if(desc==NULL || trans==NULL){
505 *status = U_MEMORY_ALLOCATION_ERROR;
506 uprv_free(desc);
507 uprv_free(trans);
508 return;
509 }
2ca993e8 510 // TODO: make src const, stop modifying it in-place, make printContainer() take const resource, etc.
46f4442e
A
511 src->fLength = removeCmtText(src->fChars, src->fLength, status);
512 descLen = getDescription(src->fChars,src->fLength, &desc, capacity, status);
513 transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
514
515 /* first print translate attribute */
516 if(transLen > 0){
517 if(printTranslate){
518 /* print translate attribute */
519 buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
520 if(U_SUCCESS(*status)){
729e4ab9
A
521 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
522 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
523 }
524 }else if(getShowWarning()){
525 fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
526 /* no translate attribute .. just close the tag */
729e4ab9 527 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
528 }
529 }else{
530 /* no translate attribute .. just close the tag */
729e4ab9 531 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
532 }
533
534 if(descLen > 0){
729e4ab9
A
535 write_tabs(out);
536 print(desc, descLen, "<!--", "-->", status);
46f4442e
A
537 }
538
539 uprv_free(desc);
540 uprv_free(trans);
541#else
542
543 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
544
545#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
546
547}
548
549/*
550 * Print out a containing element, like:
551 * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
552 * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
553 */
2ca993e8 554static char *printContainer(SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
46f4442e 555{
729e4ab9 556 const char *resname = NULL;
46f4442e
A
557 char *sid = NULL;
558
729e4ab9 559 write_tabs(out);
46f4442e 560
2ca993e8 561 resname = res->getKeyString(srBundle);
729e4ab9 562 if (resname != NULL && *resname != 0) {
46f4442e
A
563 sid = getID(id, resname, sid);
564 } else {
565 sid = getID(id, NULL, sid);
566 }
567
729e4ab9
A
568 write_utf8_file(out, UnicodeString("<"));
569 write_utf8_file(out, UnicodeString(container));
570 printAttribute("id", sid, (int32_t) uprv_strlen(sid));
46f4442e
A
571
572 if (resname != NULL) {
729e4ab9 573 printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
46f4442e
A
574 }
575
576 if (mimetype != NULL) {
729e4ab9 577 printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
46f4442e
A
578 }
579
580 if (restype != NULL) {
729e4ab9 581 printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
46f4442e
A
582 }
583
584 tabCount += 1;
585 if (res->fComment.fLength > 0) {
586 /* printComments will print the closing ">\n" */
729e4ab9 587 printComments(&res->fComment, resname, TRUE, status);
46f4442e 588 } else {
729e4ab9 589 write_utf8_file(out, UnicodeString(">\n"));
46f4442e
A
590 }
591
592 return sid;
593}
594
595/* Writing Functions */
596
597static const char *trans_unit = "trans-unit";
598static const char *close_trans_unit = "</trans-unit>\n";
599static const char *source = "<source>";
600static const char *close_source = "</source>\n";
601static const char *group = "group";
602static const char *close_group = "</group>\n";
603
604static const char *bin_unit = "bin-unit";
605static const char *close_bin_unit = "</bin-unit>\n";
606static const char *bin_source = "<bin-source>\n";
607static const char *close_bin_source = "</bin-source>\n";
608static const char *external_file = "<external-file";
609/*static const char *close_external_file = "</external-file>\n";*/
610static const char *internal_file = "<internal-file";
611static const char *close_internal_file = "</internal-file>\n";
612
613static const char *application_mimetype = "application"; /* add "/octet-stream"? */
614
615static const char *alias_restype = "x-icu-alias";
616static const char *array_restype = "x-icu-array";
617static const char *binary_restype = "x-icu-binary";
618static const char *integer_restype = "x-icu-integer";
619static const char *intvector_restype = "x-icu-intvector";
620static const char *table_restype = "x-icu-table";
621
622static void
2ca993e8 623string_write_xml(StringResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
624
625 char *sid = NULL;
626 char* buf = NULL;
627 int32_t bufLen = 0;
628
629 if(status==NULL || U_FAILURE(*status)){
630 return;
631 }
632
729e4ab9 633 sid = printContainer(res, trans_unit, NULL, NULL, id, status);
46f4442e 634
729e4ab9 635 write_tabs(out);
46f4442e 636
729e4ab9 637 write_utf8_file(out, UnicodeString(source));
46f4442e 638
2ca993e8 639 buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
46f4442e
A
640
641 if (U_FAILURE(*status)) {
642 return;
643 }
644
729e4ab9
A
645 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
646 write_utf8_file(out, UnicodeString(close_source));
46f4442e 647
729e4ab9 648 printNoteElements(&res->fComment, status);
46f4442e
A
649
650 tabCount -= 1;
729e4ab9 651 write_tabs(out);
46f4442e 652
729e4ab9 653 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
654
655 uprv_free(buf);
656 uprv_free(sid);
657}
658
659static void
2ca993e8 660alias_write_xml(AliasResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
661 char *sid = NULL;
662 char* buf = NULL;
663 int32_t bufLen=0;
664
729e4ab9 665 sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
46f4442e 666
729e4ab9 667 write_tabs(out);
46f4442e 668
729e4ab9 669 write_utf8_file(out, UnicodeString(source));
46f4442e 670
2ca993e8 671 buf = convertAndEscape(&buf, 0, &bufLen, res->getBuffer(), res->length(), status);
729e4ab9 672
46f4442e
A
673 if(U_FAILURE(*status)){
674 return;
675 }
729e4ab9
A
676 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
677 write_utf8_file(out, UnicodeString(close_source));
46f4442e 678
729e4ab9 679 printNoteElements(&res->fComment, status);
46f4442e
A
680
681 tabCount -= 1;
729e4ab9 682 write_tabs(out);
46f4442e 683
729e4ab9 684 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
685
686 uprv_free(buf);
687 uprv_free(sid);
688}
689
690static void
2ca993e8 691array_write_xml(ArrayResource *res, const char* id, const char* language, UErrorCode *status) {
46f4442e
A
692 char* sid = NULL;
693 int index = 0;
694
695 struct SResource *current = NULL;
46f4442e 696
729e4ab9 697 sid = printContainer(res, group, array_restype, NULL, id, status);
46f4442e 698
2ca993e8 699 current = res->fFirst;
46f4442e
A
700
701 while (current != NULL) {
702 char c[256] = {0};
703 char* subId = NULL;
704
705 itostr(c, index, 10, 0);
706 index += 1;
707 subId = getID(sid, c, subId);
708
729e4ab9 709 res_write_xml(current, subId, language, FALSE, status);
46f4442e
A
710 uprv_free(subId);
711 subId = NULL;
712
713 if(U_FAILURE(*status)){
714 return;
715 }
716
717 current = current->fNext;
718 }
719
720 tabCount -= 1;
729e4ab9
A
721 write_tabs(out);
722 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
723
724 uprv_free(sid);
725}
726
727static void
2ca993e8 728intvector_write_xml(IntVectorResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
729 char* sid = NULL;
730 char* ivd = NULL;
731 uint32_t i=0;
732 uint32_t len=0;
733 char buf[256] = {'0'};
734
729e4ab9 735 sid = printContainer(res, group, intvector_restype, NULL, id, status);
46f4442e 736
2ca993e8 737 for(i = 0; i < res->fCount; i += 1) {
46f4442e
A
738 char c[256] = {0};
739
740 itostr(c, i, 10, 0);
741 ivd = getID(sid, c, ivd);
2ca993e8 742 len = itostr(buf, res->fArray[i], 10, 0);
46f4442e 743
729e4ab9
A
744 write_tabs(out);
745 write_utf8_file(out, UnicodeString("<"));
746 write_utf8_file(out, UnicodeString(trans_unit));
46f4442e 747
729e4ab9
A
748 printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
749 printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
46f4442e 750
729e4ab9 751 write_utf8_file(out, UnicodeString(">\n"));
46f4442e
A
752
753 tabCount += 1;
729e4ab9
A
754 write_tabs(out);
755 write_utf8_file(out, UnicodeString(source));
46f4442e 756
729e4ab9 757 write_utf8_file(out, UnicodeString(buf, len));
46f4442e 758
729e4ab9 759 write_utf8_file(out, UnicodeString(close_source));
46f4442e 760 tabCount -= 1;
729e4ab9
A
761 write_tabs(out);
762 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
763
764 uprv_free(ivd);
765 ivd = NULL;
766 }
767
768 tabCount -= 1;
729e4ab9 769 write_tabs(out);
46f4442e 770
729e4ab9 771 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
772 uprv_free(sid);
773 sid = NULL;
774}
775
776static void
2ca993e8 777int_write_xml(IntResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
778 char* sid = NULL;
779 char buf[256] = {0};
780 uint32_t len = 0;
781
729e4ab9 782 sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
46f4442e 783
729e4ab9 784 write_tabs(out);
46f4442e 785
729e4ab9 786 write_utf8_file(out, UnicodeString(source));
46f4442e 787
2ca993e8 788 len = itostr(buf, res->fValue, 10, 0);
729e4ab9 789 write_utf8_file(out, UnicodeString(buf, len));
46f4442e 790
729e4ab9 791 write_utf8_file(out, UnicodeString(close_source));
46f4442e 792
729e4ab9 793 printNoteElements(&res->fComment, status);
46f4442e
A
794
795 tabCount -= 1;
729e4ab9 796 write_tabs(out);
46f4442e 797
729e4ab9 798 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
799
800 uprv_free(sid);
801 sid = NULL;
802}
803
804static void
2ca993e8 805bin_write_xml(BinaryResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
806 const char* m_type = application_mimetype;
807 char* sid = NULL;
808 uint32_t crc = 0xFFFFFFFF;
809
810 char fileName[1024] ={0};
811 int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
812 char* fn = (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
2ca993e8
A
813 (res->fFileName !=NULL ?
814 uprv_strlen(res->fFileName) :0)));
46f4442e
A
815 const char* ext = NULL;
816
817 char* f = NULL;
818
819 fn[0]=0;
820
2ca993e8
A
821 if(res->fFileName != NULL){
822 uprv_strcpy(fileName, res->fFileName);
46f4442e
A
823 f = uprv_strrchr(fileName, '\\');
824
825 if (f != NULL) {
826 f++;
827 } else {
828 f = fileName;
829 }
830
831 ext = uprv_strrchr(fileName, '.');
832
833 if (ext == NULL) {
834 fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
835 exit(U_ILLEGAL_ARGUMENT_ERROR);
836 }
837
838 if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
839 m_type = "image";
840 } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
841 m_type = "audio";
842 } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
843 m_type = "video";
844 } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
845 m_type = "text";
846 }
847
729e4ab9 848 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
46f4442e 849
729e4ab9 850 write_tabs(out);
46f4442e 851
729e4ab9 852 write_utf8_file(out, UnicodeString(bin_source));
46f4442e
A
853
854 tabCount+= 1;
729e4ab9 855 write_tabs(out);
46f4442e 856
729e4ab9
A
857 write_utf8_file(out, UnicodeString(external_file));
858 printAttribute("href", f, (int32_t)uprv_strlen(f));
859 write_utf8_file(out, UnicodeString("/>\n"));
46f4442e 860 tabCount -= 1;
729e4ab9 861 write_tabs(out);
46f4442e 862
729e4ab9 863 write_utf8_file(out, UnicodeString(close_bin_source));
46f4442e 864
729e4ab9 865 printNoteElements(&res->fComment, status);
46f4442e 866 tabCount -= 1;
729e4ab9
A
867 write_tabs(out);
868 write_utf8_file(out, UnicodeString(close_bin_unit));
46f4442e
A
869 } else {
870 char temp[256] = {0};
871 uint32_t i = 0;
872 int32_t len=0;
873
729e4ab9 874 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
46f4442e 875
729e4ab9
A
876 write_tabs(out);
877 write_utf8_file(out, UnicodeString(bin_source));
46f4442e
A
878
879 tabCount += 1;
729e4ab9 880 write_tabs(out);
46f4442e 881
729e4ab9
A
882 write_utf8_file(out, UnicodeString(internal_file));
883 printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
46f4442e 884
2ca993e8
A
885 while(i <res->fLength){
886 len = itostr(temp, res->fData[i], 16, 2);
46f4442e
A
887 crc = computeCRC(temp, len, crc);
888 i++;
889 }
890
891 len = itostr(temp, crc, 10, 0);
729e4ab9 892 printAttribute("crc", temp, len);
46f4442e 893
729e4ab9 894 write_utf8_file(out, UnicodeString(">"));
46f4442e
A
895
896 i = 0;
2ca993e8
A
897 while(i <res->fLength){
898 len = itostr(temp, res->fData[i], 16, 2);
729e4ab9 899 write_utf8_file(out, UnicodeString(temp));
46f4442e
A
900 i += 1;
901 }
902
729e4ab9 903 write_utf8_file(out, UnicodeString(close_internal_file));
46f4442e
A
904
905 tabCount -= 2;
729e4ab9 906 write_tabs(out);
46f4442e 907
729e4ab9
A
908 write_utf8_file(out, UnicodeString(close_bin_source));
909 printNoteElements(&res->fComment, status);
46f4442e
A
910
911 tabCount -= 1;
729e4ab9
A
912 write_tabs(out);
913 write_utf8_file(out, UnicodeString(close_bin_unit));
46f4442e
A
914
915 uprv_free(sid);
916 sid = NULL;
917 }
918
919 uprv_free(fn);
920}
921
922
923
924static void
2ca993e8 925table_write_xml(TableResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
46f4442e
A
926
927 uint32_t i = 0;
928
929 struct SResource *current = NULL;
46f4442e
A
930 char* sid = NULL;
931
932 if (U_FAILURE(*status)) {
933 return ;
934 }
935
729e4ab9 936 sid = printContainer(res, group, table_restype, NULL, id, status);
46f4442e
A
937
938 if(isTopLevel) {
939 sid[0] = '\0';
940 }
941
2ca993e8 942 current = res->fFirst;
46f4442e
A
943 i = 0;
944
945 while (current != NULL) {
729e4ab9 946 res_write_xml(current, sid, language, FALSE, status);
46f4442e
A
947
948 if(U_FAILURE(*status)){
949 return;
950 }
951
952 i += 1;
953 current = current->fNext;
954 }
955
956 tabCount -= 1;
729e4ab9 957 write_tabs(out);
46f4442e 958
729e4ab9 959 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
960
961 uprv_free(sid);
962 sid = NULL;
963}
964
965void
729e4ab9 966res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
46f4442e
A
967
968 if (U_FAILURE(*status)) {
969 return ;
970 }
971
972 if (res != NULL) {
973 switch (res->fType) {
974 case URES_STRING:
2ca993e8 975 string_write_xml (static_cast<StringResource *>(res), id, language, status);
46f4442e
A
976 return;
977
978 case URES_ALIAS:
2ca993e8 979 alias_write_xml (static_cast<AliasResource *>(res), id, language, status);
46f4442e
A
980 return;
981
982 case URES_INT_VECTOR:
2ca993e8 983 intvector_write_xml (static_cast<IntVectorResource *>(res), id, language, status);
46f4442e
A
984 return;
985
986 case URES_BINARY:
2ca993e8 987 bin_write_xml (static_cast<BinaryResource *>(res), id, language, status);
46f4442e
A
988 return;
989
990 case URES_INT:
2ca993e8 991 int_write_xml (static_cast<IntResource *>(res), id, language, status);
46f4442e
A
992 return;
993
994 case URES_ARRAY:
2ca993e8 995 array_write_xml (static_cast<ArrayResource *>(res), id, language, status);
46f4442e
A
996 return;
997
998 case URES_TABLE:
2ca993e8 999 table_write_xml (static_cast<TableResource *>(res), id, language, isTopLevel, status);
46f4442e
A
1000 return;
1001
1002 default:
1003 break;
1004 }
1005 }
1006
1007 *status = U_INTERNAL_PROGRAM_ERROR;
1008}
1009
1010void
1011bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
1012 char *writtenFilename, int writtenFilenameLen,
1013 const char* language, const char* outFileName, UErrorCode *status) {
1014
46f4442e
A
1015 char* xmlfileName = NULL;
1016 char* outputFileName = NULL;
1017 char* originalFileName = NULL;
1018 const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
1019 const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
1020 const char* file2 = "original = \"";
1021 const char* file4 = "\" date = \"";
1022 const char* fileEnd = "</file>\n";
1023 const char* headerStart = "<header>\n";
1024 const char* headerEnd = "</header>\n";
1025 const char* bodyStart = "<body>\n";
1026 const char* bodyEnd = "</body>\n";
1027
1028 const char *tool_start = "<tool";
1029 const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
1030 const char *tool_name = "genrb";
1031
1032 char* temp = NULL;
1033 char* lang = NULL;
1034 const char* pos = NULL;
1035 int32_t first, index;
1036 time_t currTime;
1037 char timeBuf[128];
1038
1039 outDir = outputDir;
1040
1041 srBundle = bundle;
1042
1043 pos = uprv_strrchr(filename, '\\');
1044 if(pos != NULL) {
1045 first = (int32_t)(pos - filename + 1);
1046 } else {
1047 first = 0;
1048 }
1049 index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
1050 originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
1051 uprv_memset(originalFileName, 0, sizeof(char)*index+1);
1052 uprv_strncpy(originalFileName, filename + first, index);
1053
1054 if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
1055 fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
1056 }
1057
1058 temp = originalFileName;
1059 originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1060 uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1061 uprv_strcat(originalFileName, temp);
1062 uprv_strcat(originalFileName, textExt);
1063 uprv_free(temp);
1064 temp = NULL;
1065
1066
1067 if (language == NULL) {
1068/* lang = parseFilename(filename, lang);
1069 if (lang == NULL) {*/
1070 /* now check if locale name is valid or not
1071 * this is to cater for situation where
1072 * pegasusServer.txt contains
1073 *
1074 * en{
1075 * ..
1076 * }
1077 */
1078 lang = parseFilename(srBundle->fLocale, lang);
1079 /*
1080 * Neither the file name nor the table name inside the
1081 * txt file contain a valid country and language codes
1082 * throw an error.
1083 * pegasusServer.txt contains
1084 *
1085 * testelements{
1086 * ....
1087 * }
1088 */
1089 if(lang==NULL){
1090 fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
1091 exit(U_ILLEGAL_ARGUMENT_ERROR);
1092 }
1093 /* }*/
1094 } else {
1095 lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
1096 uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
1097 uprv_strcpy(lang, language);
1098 }
1099
1100 if(outFileName) {
1101 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
1102 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
1103 uprv_strcpy(outputFileName,outFileName);
1104 } else {
1105 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1106 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1107 uprv_strcpy(outputFileName,srBundle->fLocale);
1108 }
1109
1110 if(outputDir) {
1111 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1112 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1113 } else {
1114 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1115 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1116 }
1117
1118 if(outputDir){
1119 uprv_strcpy(xmlfileName, outputDir);
1120 if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
1121 uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
1122 }
1123 }
1124 uprv_strcat(xmlfileName,outputFileName);
1125 uprv_strcat(xmlfileName,xliffExt);
1126
1127 if (writtenFilename) {
1128 uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
1129 }
1130
1131 if (U_FAILURE(*status)) {
1132 goto cleanup_bundle_write_xml;
1133 }
1134
1135 out= T_FileStream_open(xmlfileName,"w");
1136
1137 if(out==NULL){
1138 *status = U_FILE_ACCESS_ERROR;
1139 goto cleanup_bundle_write_xml;
1140 }
57a6839d 1141 write_utf8_file(out, UnicodeString(xmlHeader));
46f4442e
A
1142
1143 if(outputEnc && *outputEnc!='\0'){
1144 /* store the output encoding */
1145 enc = outputEnc;
1146 conv=ucnv_open(enc,status);
1147 if(U_FAILURE(*status)){
1148 goto cleanup_bundle_write_xml;
1149 }
1150 }
57a6839d 1151 write_utf8_file(out, UnicodeString(bundleStart));
729e4ab9 1152 write_tabs(out);
57a6839d 1153 write_utf8_file(out, UnicodeString(fileStart));
46f4442e
A
1154 /* check if lang and language are the same */
1155 if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
1156 fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
1157 }
729e4ab9
A
1158 write_utf8_file(out, UnicodeString(lang));
1159 write_utf8_file(out, UnicodeString(file1));
1160 write_utf8_file(out, UnicodeString(file2));
1161 write_utf8_file(out, UnicodeString(originalFileName));
1162 write_utf8_file(out, UnicodeString(file4));
46f4442e
A
1163
1164 time(&currTime);
1165 strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
729e4ab9
A
1166 write_utf8_file(out, UnicodeString(timeBuf));
1167 write_utf8_file(out, UnicodeString("\">\n"));
46f4442e
A
1168
1169 tabCount += 1;
729e4ab9 1170 write_tabs(out);
57a6839d 1171 write_utf8_file(out, UnicodeString(headerStart));
46f4442e
A
1172
1173 tabCount += 1;
729e4ab9 1174 write_tabs(out);
46f4442e 1175
57a6839d 1176 write_utf8_file(out, UnicodeString(tool_start));
729e4ab9
A
1177 printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
1178 printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
1179 write_utf8_file(out, UnicodeString("/>\n"));
46f4442e
A
1180
1181 tabCount -= 1;
729e4ab9 1182 write_tabs(out);
46f4442e 1183
729e4ab9 1184 write_utf8_file(out, UnicodeString(headerEnd));
46f4442e 1185
729e4ab9 1186 write_tabs(out);
46f4442e
A
1187 tabCount += 1;
1188
729e4ab9 1189 write_utf8_file(out, UnicodeString(bodyStart));
46f4442e
A
1190
1191
729e4ab9 1192 res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
46f4442e
A
1193
1194 tabCount -= 1;
729e4ab9 1195 write_tabs(out);
46f4442e 1196
729e4ab9 1197 write_utf8_file(out, UnicodeString(bodyEnd));
46f4442e 1198 tabCount--;
729e4ab9
A
1199 write_tabs(out);
1200 write_utf8_file(out, UnicodeString(fileEnd));
46f4442e 1201 tabCount--;
729e4ab9
A
1202 write_tabs(out);
1203 write_utf8_file(out, UnicodeString(bundleEnd));
46f4442e
A
1204 T_FileStream_close(out);
1205
1206 ucnv_close(conv);
1207
1208cleanup_bundle_write_xml:
1209 uprv_free(originalFileName);
1210 uprv_free(lang);
1211 if(xmlfileName != NULL) {
1212 uprv_free(xmlfileName);
1213 }
1214 if(outputFileName != NULL){
1215 uprv_free(outputFileName);
1216 }
1217}