]> git.saurik.com Git - apple/icu.git/blame - icuSources/tools/genrb/wrtxml.cpp
ICU-551.24.tar.gz
[apple/icu.git] / icuSources / tools / genrb / wrtxml.cpp
CommitLineData
46f4442e
A
1/*
2*******************************************************************************
3*
57a6839d 4* Copyright (C) 2002-2014, International Business Machines
46f4442e
A
5* Corporation and others. All Rights Reserved.
6*
7*******************************************************************************
8*
729e4ab9 9* File wrtxml.cpp
46f4442e
A
10*
11* Modification History:
12*
13* Date Name Description
14* 10/01/02 Ram Creation.
15* 02/07/08 Spieth Correct XLIFF generation on EBCDIC platform
16*
17*******************************************************************************
18*/
57a6839d
A
19
20// Safer use of UnicodeString.
21#ifndef UNISTR_FROM_CHAR_EXPLICIT
22# define UNISTR_FROM_CHAR_EXPLICIT explicit
23#endif
24
25// Less important, but still a good idea.
26#ifndef UNISTR_FROM_STRING_EXPLICIT
27# define UNISTR_FROM_STRING_EXPLICIT explicit
28#endif
29
46f4442e
A
30#include "reslist.h"
31#include "unewdata.h"
32#include "unicode/ures.h"
33#include "errmsg.h"
34#include "filestrm.h"
35#include "cstring.h"
36#include "unicode/ucnv.h"
37#include "genrb.h"
38#include "rle.h"
46f4442e
A
39#include "uhash.h"
40#include "uresimp.h"
41#include "unicode/ustring.h"
42#include "unicode/uchar.h"
43#include "ustr.h"
44#include "prscmnts.h"
45#include "unicode/unistr.h"
51004dcb
A
46#include "unicode/utf8.h"
47#include "unicode/utf16.h"
46f4442e
A
48#include <time.h>
49
50U_NAMESPACE_USE
51
52static int tabCount = 0;
53
54static FileStream* out=NULL;
55static struct SRBRoot* srBundle ;
56static const char* outDir = NULL;
57static const char* enc ="";
58static UConverter* conv = NULL;
59
60const char* const* ISOLanguages;
61const char* const* ISOCountries;
62const char* textExt = ".txt";
63const char* xliffExt = ".xlf";
64
729e4ab9
A
65static int32_t write_utf8_file(FileStream* fileStream, UnicodeString outString)
66{
67 UErrorCode status = U_ZERO_ERROR;
46f4442e 68 int32_t len = 0;
46f4442e 69
729e4ab9
A
70 // preflight to get the destination buffer size
71 u_strToUTF8(NULL,
72 0,
73 &len,
74 outString.getBuffer(),
75 outString.length(),
76 &status);
77
78 // allocate the buffer
79 char* dest = (char*)uprv_malloc(len);
80 status = U_ZERO_ERROR;
81
82 // convert the data
83 u_strToUTF8(dest,
84 len,
85 &len,
86 outString.getBuffer(),
87 outString.length(),
88 &status);
89
90 // write data to out file
91 int32_t ret = T_FileStream_write(fileStream, dest, len);
46f4442e 92 uprv_free(dest);
729e4ab9 93 return (ret);
46f4442e
A
94}
95
46f4442e 96/*write indentation for formatting*/
729e4ab9 97static void write_tabs(FileStream* os){
46f4442e
A
98 int i=0;
99 for(;i<=tabCount;i++){
729e4ab9 100 write_utf8_file(os,UnicodeString(" "));
46f4442e
A
101 }
102}
103
104/*get ID for each element. ID is globally unique.*/
729e4ab9 105static char* getID(const char* id, const char* curKey, char* result) {
46f4442e
A
106 if(curKey == NULL) {
107 result = (char *)uprv_malloc(sizeof(char)*uprv_strlen(id) + 1);
108 uprv_memset(result, 0, sizeof(char)*uprv_strlen(id) + 1);
109 uprv_strcpy(result, id);
110 } else {
111 result = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
112 uprv_memset(result, 0, sizeof(char)*(uprv_strlen(id) + 1 + uprv_strlen(curKey)) + 1);
113 if(id[0]!='\0'){
114 uprv_strcpy(result, id);
115 uprv_strcat(result, "_");
116 }
117 uprv_strcat(result, curKey);
118 }
119 return result;
120}
121
122/*compute CRC for binary code*/
123/* The code is from http://www.theorem.com/java/CRC32.java
124 * Calculates the CRC32 - 32 bit Cyclical Redundancy Check
125 * <P> This check is used in numerous systems to verify the integrity
126 * of information. It's also used as a hashing function. Unlike a regular
127 * checksum, it's sensitive to the order of the characters.
128 * It produces a 32 bit
129 *
130 * @author Michael Lecuyer (mjl@theorem.com)
131 * @version 1.1 August 11, 1998
132 */
133
134/* ICU is not endian portable, because ICU data generated on big endian machines can be
135 * ported to big endian machines but not to little endian machines and vice versa. The
136 * conversion is not portable across platforms with different endianess.
137 */
138
729e4ab9 139uint32_t computeCRC(char *ptr, uint32_t len, uint32_t lastcrc){
46f4442e
A
140 int32_t crc;
141 uint32_t temp1;
142 uint32_t temp2;
143
144 int32_t crc_ta[256];
145 int i = 0;
146 int j = 0;
147 uint32_t crc2 = 0;
148
149#define CRC32_POLYNOMIAL 0xEDB88320
150
151 /*build crc table*/
152 for (i = 0; i <= 255; i++) {
153 crc2 = i;
154 for (j = 8; j > 0; j--) {
155 if ((crc2 & 1) == 1) {
156 crc2 = (crc2 >> 1) ^ CRC32_POLYNOMIAL;
157 } else {
158 crc2 >>= 1;
159 }
160 }
161 crc_ta[i] = crc2;
162 }
163
164 crc = lastcrc;
165 while(len--!=0) {
166 temp1 = (uint32_t)crc>>8;
167 temp2 = crc_ta[(crc^*ptr) & 0xFF];
168 crc = temp1^temp2;
169 ptr++;
170 }
171 return(crc);
172}
173
174static void strnrepchr(char* src, int32_t srcLen, char s, char r){
175 int32_t i = 0;
176 for(i=0;i<srcLen;i++){
177 if(src[i]==s){
178 src[i]=r;
179 }
180 }
181}
182/* Parse the filename, and get its language information.
183 * If it fails to get the language information from the filename,
184 * use "en" as the default value for language
185 */
729e4ab9 186static char* parseFilename(const char* id, char* /*lang*/) {
46f4442e
A
187 int idLen = (int) uprv_strlen(id);
188 char* localeID = (char*) uprv_malloc(idLen);
189 int pos = 0;
190 int canonCapacity = 0;
191 char* canon = NULL;
192 int canonLen = 0;
193 /*int i;*/
194 UErrorCode status = U_ZERO_ERROR;
195 const char *ext = uprv_strchr(id, '.');
196
197 if(ext != NULL){
198 pos = (int) (ext - id);
199 } else {
200 pos = idLen;
201 }
202 uprv_memcpy(localeID, id, pos);
203 localeID[pos]=0; /* NUL terminate the string */
729e4ab9 204
46f4442e
A
205 canonCapacity =pos*3;
206 canon = (char*) uprv_malloc(canonCapacity);
207 canonLen = uloc_canonicalize(localeID, canon, canonCapacity, &status);
208
209 if(U_FAILURE(status)){
210 fprintf(stderr, "Could not canonicalize the locale ID: %s. Error: %s\n", localeID, u_errorName(status));
211 exit(status);
212 }
213 strnrepchr(canon, canonLen, '_', '-');
214 return canon;
215}
216
217static const char* xmlHeader = "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n";
218#if 0
219static const char* bundleStart = "<xliff version = \"1.2\" "
220 "xmlns='urn:oasis:names:tc:xliff:document:1.2' "
221 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
222 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.2 xliff-core-1.2-transitional.xsd'>\n";
223#else
224static const char* bundleStart = "<xliff version = \"1.1\" "
225 "xmlns='urn:oasis:names:tc:xliff:document:1.1' "
226 "xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' "
227 "xsi:schemaLocation='urn:oasis:names:tc:xliff:document:1.1 http://www.oasis-open.org/committees/xliff/documents/xliff-core-1.1.xsd'>\n";
228#endif
229static const char* bundleEnd = "</xliff>\n";
230
729e4ab9 231void res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status);
46f4442e
A
232
233static char* convertAndEscape(char** pDest, int32_t destCap, int32_t* destLength,
234 const UChar* src, int32_t srcLen, UErrorCode* status){
235 int32_t srcIndex=0;
236 char* dest=NULL;
237 char* temp=NULL;
238 int32_t destLen=0;
239 UChar32 c = 0;
240
241 if(status==NULL || U_FAILURE(*status) || pDest==NULL || srcLen==0 || src == NULL){
242 return NULL;
243 }
244 dest =*pDest;
245 if(dest==NULL || destCap <=0){
246 destCap = srcLen * 8;
247 dest = (char*) uprv_malloc(sizeof(char) * destCap);
248 if(dest==NULL){
249 *status=U_MEMORY_ALLOCATION_ERROR;
250 return NULL;
251 }
252 }
253
254 dest[0]=0;
255
256 while(srcIndex<srcLen){
257 U16_NEXT(src, srcIndex, srcLen, c);
258
259 if (U16_IS_LEAD(c) || U16_IS_TRAIL(c)) {
260 *status = U_ILLEGAL_CHAR_FOUND;
261 fprintf(stderr, "Illegal Surrogate! \n");
262 uprv_free(dest);
263 return NULL;
264 }
265
51004dcb 266 if((destLen+U8_LENGTH(c)) < destCap){
46f4442e
A
267
268 /* ASCII Range */
269 if(c <=0x007F){
270 switch(c) {
271 case '\x26':
272 uprv_strcpy(dest+( destLen),"\x26\x61\x6d\x70\x3b"); /* &amp;*/
273 destLen+=(int32_t)uprv_strlen("\x26\x61\x6d\x70\x3b");
274 break;
275 case '\x3c':
276 uprv_strcpy(dest+(destLen),"\x26\x6c\x74\x3b"); /* &lt;*/
277 destLen+=(int32_t)uprv_strlen("\x26\x6c\x74\x3b");
278 break;
279 case '\x3e':
280 uprv_strcpy(dest+(destLen),"\x26\x67\x74\x3b"); /* &gt;*/
281 destLen+=(int32_t)uprv_strlen("\x26\x67\x74\x3b");
282 break;
283 case '\x22':
284 uprv_strcpy(dest+(destLen),"\x26\x71\x75\x6f\x74\x3b"); /* &quot;*/
285 destLen+=(int32_t)uprv_strlen("\x26\x71\x75\x6f\x74\x3b");
286 break;
287 case '\x27':
288 uprv_strcpy(dest+(destLen),"\x26\x61\x70\x6f\x73\x3b"); /* &apos; */
289 destLen+=(int32_t)uprv_strlen("\x26\x61\x70\x6f\x73\x3b");
290 break;
291
292 /* Disallow C0 controls except TAB, CR, LF*/
293 case 0x00:
294 case 0x01:
295 case 0x02:
296 case 0x03:
297 case 0x04:
298 case 0x05:
299 case 0x06:
300 case 0x07:
301 case 0x08:
302 /*case 0x09:*/
303 /*case 0x0A: */
304 case 0x0B:
305 case 0x0C:
306 /*case 0x0D:*/
307 case 0x0E:
308 case 0x0F:
309 case 0x10:
310 case 0x11:
311 case 0x12:
312 case 0x13:
313 case 0x14:
314 case 0x15:
315 case 0x16:
316 case 0x17:
317 case 0x18:
318 case 0x19:
319 case 0x1A:
320 case 0x1B:
321 case 0x1C:
322 case 0x1D:
323 case 0x1E:
324 case 0x1F:
325 *status = U_ILLEGAL_CHAR_FOUND;
326 fprintf(stderr, "Illegal Character \\u%04X!\n",(int)c);
327 uprv_free(dest);
328 return NULL;
329 default:
330 dest[destLen++]=(char)c;
331 }
332 }else{
333 UBool isError = FALSE;
334 U8_APPEND((unsigned char*)dest,destLen,destCap,c,isError);
335 if(isError){
336 *status = U_ILLEGAL_CHAR_FOUND;
337 fprintf(stderr, "Illegal Character \\U%08X!\n",(int)c);
338 uprv_free(dest);
339 return NULL;
340 }
341 }
342 }else{
343 destCap += destLen;
344
345 temp = (char*) uprv_malloc(sizeof(char)*destCap);
346 if(temp==NULL){
347 *status=U_MEMORY_ALLOCATION_ERROR;
348 uprv_free(dest);
349 return NULL;
350 }
351 uprv_memmove(temp,dest,destLen);
352 destLen=0;
353 uprv_free(dest);
354 dest=temp;
355 temp=NULL;
356 }
357
358 }
359 *destLength = destLen;
360 return dest;
361}
362
363#define ASTERISK 0x002A
364#define SPACE 0x0020
365#define CR 0x000A
366#define LF 0x000D
367#define AT_SIGN 0x0040
368
369static void
370trim(char **src, int32_t *len){
371
372 char *s = NULL;
373 int32_t i = 0;
374 if(src == NULL || *src == NULL){
375 return;
376 }
377 s = *src;
378 /* trim from the end */
379 for( i=(*len-1); i>= 0; i--){
380 switch(s[i]){
381 case ASTERISK:
382 case SPACE:
383 case CR:
384 case LF:
385 s[i] = 0;
386 continue;
387 default:
388 break;
389 }
390 break;
391
392 }
393 *len = i+1;
394}
395
396static void
729e4ab9 397print(UChar* src, int32_t srcLen,const char *tagStart,const char *tagEnd, UErrorCode *status){
46f4442e
A
398 int32_t bufCapacity = srcLen*4;
399 char *buf = NULL;
400 int32_t bufLen = 0;
401
402 if(U_FAILURE(*status)){
403 return;
404 }
405
406 buf = (char*) (uprv_malloc(bufCapacity));
407 if(buf==0){
408 fprintf(stderr, "Could not allocate memory!!");
409 exit(U_MEMORY_ALLOCATION_ERROR);
410 }
411 buf = convertAndEscape(&buf, bufCapacity, &bufLen, src, srcLen,status);
412 if(U_SUCCESS(*status)){
413 trim(&buf,&bufLen);
729e4ab9
A
414 write_utf8_file(out,UnicodeString(tagStart));
415 write_utf8_file(out,UnicodeString(buf, bufLen, "UTF-8"));
416 write_utf8_file(out,UnicodeString(tagEnd));
417 write_utf8_file(out,UnicodeString("\n"));
46f4442e
A
418
419 }
420}
421static void
729e4ab9 422printNoteElements(struct UString *src, UErrorCode *status){
46f4442e
A
423
424#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
425
426 int32_t capacity = 0;
427 UChar* note = NULL;
428 int32_t noteLen = 0;
429 int32_t count = 0,i;
430
431 if(src == NULL){
432 return;
433 }
434
435 capacity = src->fLength;
436 note = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
437
438 count = getCount(src->fChars,src->fLength, UPC_NOTE, status);
439 if(U_FAILURE(*status)){
440 uprv_free(note);
441 return;
442 }
443 for(i=0; i < count; i++){
444 noteLen = getAt(src->fChars,src->fLength, &note, capacity, i, UPC_NOTE, status);
445 if(U_FAILURE(*status)){
446 uprv_free(note);
447 return;
448 }
449 if(noteLen > 0){
729e4ab9
A
450 write_tabs(out);
451 print(note, noteLen,"<note>", "</note>", status);
46f4442e
A
452 }
453 }
454 uprv_free(note);
455#else
729e4ab9 456
46f4442e
A
457 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
458
459#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
460
461}
462
729e4ab9 463static void printAttribute(const char *name, const char *value, int32_t /*len*/)
46f4442e 464{
729e4ab9
A
465 write_utf8_file(out, UnicodeString(" "));
466 write_utf8_file(out, UnicodeString(name));
467 write_utf8_file(out, UnicodeString(" = \""));
468 write_utf8_file(out, UnicodeString(value));
469 write_utf8_file(out, UnicodeString("\""));
46f4442e
A
470}
471
729e4ab9 472static void printAttribute(const char *name, const UnicodeString value, int32_t /*len*/)
46f4442e 473{
729e4ab9
A
474 write_utf8_file(out, UnicodeString(" "));
475 write_utf8_file(out, UnicodeString(name));
476 write_utf8_file(out, UnicodeString(" = \""));
477 write_utf8_file(out, value);
478 write_utf8_file(out, UnicodeString("\""));
46f4442e
A
479}
480
481static void
729e4ab9 482printComments(struct UString *src, const char *resName, UBool printTranslate, UErrorCode *status){
46f4442e
A
483
484#if UCONFIG_NO_REGULAR_EXPRESSIONS==0 /* donot compile when no RegularExpressions are available */
485
486 if(status==NULL || U_FAILURE(*status)){
487 return;
488 }
489
490 int32_t capacity = src->fLength + 1;
491 char* buf = NULL;
492 int32_t bufLen = 0;
493 UChar* desc = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
494 UChar* trans = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * capacity);
495
496 int32_t descLen = 0, transLen=0;
497 if(desc==NULL || trans==NULL){
498 *status = U_MEMORY_ALLOCATION_ERROR;
499 uprv_free(desc);
500 uprv_free(trans);
501 return;
502 }
503 src->fLength = removeCmtText(src->fChars, src->fLength, status);
504 descLen = getDescription(src->fChars,src->fLength, &desc, capacity, status);
505 transLen = getTranslate(src->fChars,src->fLength, &trans, capacity, status);
506
507 /* first print translate attribute */
508 if(transLen > 0){
509 if(printTranslate){
510 /* print translate attribute */
511 buf = convertAndEscape(&buf, 0, &bufLen, trans, transLen, status);
512 if(U_SUCCESS(*status)){
729e4ab9
A
513 printAttribute("translate", UnicodeString(buf, bufLen, "UTF-8"), bufLen);
514 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
515 }
516 }else if(getShowWarning()){
517 fprintf(stderr, "Warning: Tranlate attribute for resource %s cannot be set. XLIFF prohibits it.\n", resName);
518 /* no translate attribute .. just close the tag */
729e4ab9 519 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
520 }
521 }else{
522 /* no translate attribute .. just close the tag */
729e4ab9 523 write_utf8_file(out,UnicodeString(">\n"));
46f4442e
A
524 }
525
526 if(descLen > 0){
729e4ab9
A
527 write_tabs(out);
528 print(desc, descLen, "<!--", "-->", status);
46f4442e
A
529 }
530
531 uprv_free(desc);
532 uprv_free(trans);
533#else
534
535 fprintf(stderr, "Warning: Could not output comments to XLIFF file. ICU has been built without RegularExpression support.\n");
536
537#endif /* UCONFIG_NO_REGULAR_EXPRESSIONS */
538
539}
540
541/*
542 * Print out a containing element, like:
543 * <trans-unit id = "blah" resname = "blah" restype = "x-id-alias" translate = "no">
544 * <group id "calendar_gregorian" resname = "gregorian" restype = "x-icu-array">
545 */
729e4ab9 546static char *printContainer(struct SResource *res, const char *container, const char *restype, const char *mimetype, const char *id, UErrorCode *status)
46f4442e 547{
729e4ab9
A
548 char resKeyBuffer[8];
549 const char *resname = NULL;
46f4442e
A
550 char *sid = NULL;
551
729e4ab9 552 write_tabs(out);
46f4442e 553
729e4ab9
A
554 resname = res_getKeyString(srBundle, res, resKeyBuffer);
555 if (resname != NULL && *resname != 0) {
46f4442e
A
556 sid = getID(id, resname, sid);
557 } else {
558 sid = getID(id, NULL, sid);
559 }
560
729e4ab9
A
561 write_utf8_file(out, UnicodeString("<"));
562 write_utf8_file(out, UnicodeString(container));
563 printAttribute("id", sid, (int32_t) uprv_strlen(sid));
46f4442e
A
564
565 if (resname != NULL) {
729e4ab9 566 printAttribute("resname", resname, (int32_t) uprv_strlen(resname));
46f4442e
A
567 }
568
569 if (mimetype != NULL) {
729e4ab9 570 printAttribute("mime-type", mimetype, (int32_t) uprv_strlen(mimetype));
46f4442e
A
571 }
572
573 if (restype != NULL) {
729e4ab9 574 printAttribute("restype", restype, (int32_t) uprv_strlen(restype));
46f4442e
A
575 }
576
577 tabCount += 1;
578 if (res->fComment.fLength > 0) {
579 /* printComments will print the closing ">\n" */
729e4ab9 580 printComments(&res->fComment, resname, TRUE, status);
46f4442e 581 } else {
729e4ab9 582 write_utf8_file(out, UnicodeString(">\n"));
46f4442e
A
583 }
584
585 return sid;
586}
587
588/* Writing Functions */
589
590static const char *trans_unit = "trans-unit";
591static const char *close_trans_unit = "</trans-unit>\n";
592static const char *source = "<source>";
593static const char *close_source = "</source>\n";
594static const char *group = "group";
595static const char *close_group = "</group>\n";
596
597static const char *bin_unit = "bin-unit";
598static const char *close_bin_unit = "</bin-unit>\n";
599static const char *bin_source = "<bin-source>\n";
600static const char *close_bin_source = "</bin-source>\n";
601static const char *external_file = "<external-file";
602/*static const char *close_external_file = "</external-file>\n";*/
603static const char *internal_file = "<internal-file";
604static const char *close_internal_file = "</internal-file>\n";
605
606static const char *application_mimetype = "application"; /* add "/octet-stream"? */
607
608static const char *alias_restype = "x-icu-alias";
609static const char *array_restype = "x-icu-array";
610static const char *binary_restype = "x-icu-binary";
611static const char *integer_restype = "x-icu-integer";
612static const char *intvector_restype = "x-icu-intvector";
613static const char *table_restype = "x-icu-table";
614
615static void
729e4ab9 616string_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
617
618 char *sid = NULL;
619 char* buf = NULL;
620 int32_t bufLen = 0;
621
622 if(status==NULL || U_FAILURE(*status)){
623 return;
624 }
625
729e4ab9 626 sid = printContainer(res, trans_unit, NULL, NULL, id, status);
46f4442e 627
729e4ab9 628 write_tabs(out);
46f4442e 629
729e4ab9 630 write_utf8_file(out, UnicodeString(source));
46f4442e
A
631
632 buf = convertAndEscape(&buf, 0, &bufLen, res->u.fString.fChars, res->u.fString.fLength, status);
633
634 if (U_FAILURE(*status)) {
635 return;
636 }
637
729e4ab9
A
638 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
639 write_utf8_file(out, UnicodeString(close_source));
46f4442e 640
729e4ab9 641 printNoteElements(&res->fComment, status);
46f4442e
A
642
643 tabCount -= 1;
729e4ab9 644 write_tabs(out);
46f4442e 645
729e4ab9 646 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
647
648 uprv_free(buf);
649 uprv_free(sid);
650}
651
652static void
729e4ab9 653alias_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
654 char *sid = NULL;
655 char* buf = NULL;
656 int32_t bufLen=0;
657
729e4ab9 658 sid = printContainer(res, trans_unit, alias_restype, NULL, id, status);
46f4442e 659
729e4ab9 660 write_tabs(out);
46f4442e 661
729e4ab9 662 write_utf8_file(out, UnicodeString(source));
46f4442e
A
663
664 buf = convertAndEscape(&buf, 0, &bufLen, res->u.fString.fChars, res->u.fString.fLength, status);
729e4ab9 665
46f4442e
A
666 if(U_FAILURE(*status)){
667 return;
668 }
729e4ab9
A
669 write_utf8_file(out, UnicodeString(buf, bufLen, "UTF-8"));
670 write_utf8_file(out, UnicodeString(close_source));
46f4442e 671
729e4ab9 672 printNoteElements(&res->fComment, status);
46f4442e
A
673
674 tabCount -= 1;
729e4ab9 675 write_tabs(out);
46f4442e 676
729e4ab9 677 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
678
679 uprv_free(buf);
680 uprv_free(sid);
681}
682
683static void
729e4ab9 684array_write_xml(struct SResource *res, const char* id, const char* language, UErrorCode *status) {
46f4442e
A
685 char* sid = NULL;
686 int index = 0;
687
688 struct SResource *current = NULL;
46f4442e 689
729e4ab9 690 sid = printContainer(res, group, array_restype, NULL, id, status);
46f4442e
A
691
692 current = res->u.fArray.fFirst;
46f4442e
A
693
694 while (current != NULL) {
695 char c[256] = {0};
696 char* subId = NULL;
697
698 itostr(c, index, 10, 0);
699 index += 1;
700 subId = getID(sid, c, subId);
701
729e4ab9 702 res_write_xml(current, subId, language, FALSE, status);
46f4442e
A
703 uprv_free(subId);
704 subId = NULL;
705
706 if(U_FAILURE(*status)){
707 return;
708 }
709
710 current = current->fNext;
711 }
712
713 tabCount -= 1;
729e4ab9
A
714 write_tabs(out);
715 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
716
717 uprv_free(sid);
718}
719
720static void
729e4ab9 721intvector_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
722 char* sid = NULL;
723 char* ivd = NULL;
724 uint32_t i=0;
725 uint32_t len=0;
726 char buf[256] = {'0'};
727
729e4ab9 728 sid = printContainer(res, group, intvector_restype, NULL, id, status);
46f4442e
A
729
730 for(i = 0; i < res->u.fIntVector.fCount; i += 1) {
731 char c[256] = {0};
732
733 itostr(c, i, 10, 0);
734 ivd = getID(sid, c, ivd);
735 len = itostr(buf, res->u.fIntVector.fArray[i], 10, 0);
736
729e4ab9
A
737 write_tabs(out);
738 write_utf8_file(out, UnicodeString("<"));
739 write_utf8_file(out, UnicodeString(trans_unit));
46f4442e 740
729e4ab9
A
741 printAttribute("id", ivd, (int32_t)uprv_strlen(ivd));
742 printAttribute("restype", integer_restype, (int32_t) strlen(integer_restype));
46f4442e 743
729e4ab9 744 write_utf8_file(out, UnicodeString(">\n"));
46f4442e
A
745
746 tabCount += 1;
729e4ab9
A
747 write_tabs(out);
748 write_utf8_file(out, UnicodeString(source));
46f4442e 749
729e4ab9 750 write_utf8_file(out, UnicodeString(buf, len));
46f4442e 751
729e4ab9 752 write_utf8_file(out, UnicodeString(close_source));
46f4442e 753 tabCount -= 1;
729e4ab9
A
754 write_tabs(out);
755 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
756
757 uprv_free(ivd);
758 ivd = NULL;
759 }
760
761 tabCount -= 1;
729e4ab9 762 write_tabs(out);
46f4442e 763
729e4ab9 764 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
765 uprv_free(sid);
766 sid = NULL;
767}
768
769static void
729e4ab9 770int_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
771 char* sid = NULL;
772 char buf[256] = {0};
773 uint32_t len = 0;
774
729e4ab9 775 sid = printContainer(res, trans_unit, integer_restype, NULL, id, status);
46f4442e 776
729e4ab9 777 write_tabs(out);
46f4442e 778
729e4ab9 779 write_utf8_file(out, UnicodeString(source));
46f4442e
A
780
781 len = itostr(buf, res->u.fIntValue.fValue, 10, 0);
729e4ab9 782 write_utf8_file(out, UnicodeString(buf, len));
46f4442e 783
729e4ab9 784 write_utf8_file(out, UnicodeString(close_source));
46f4442e 785
729e4ab9 786 printNoteElements(&res->fComment, status);
46f4442e
A
787
788 tabCount -= 1;
729e4ab9 789 write_tabs(out);
46f4442e 790
729e4ab9 791 write_utf8_file(out, UnicodeString(close_trans_unit));
46f4442e
A
792
793 uprv_free(sid);
794 sid = NULL;
795}
796
797static void
729e4ab9 798bin_write_xml(struct SResource *res, const char* id, const char* /*language*/, UErrorCode *status) {
46f4442e
A
799 const char* m_type = application_mimetype;
800 char* sid = NULL;
801 uint32_t crc = 0xFFFFFFFF;
802
803 char fileName[1024] ={0};
804 int32_t tLen = ( outDir == NULL) ? 0 :(int32_t)uprv_strlen(outDir);
805 char* fn = (char*) uprv_malloc(sizeof(char) * (tLen+1024 +
806 (res->u.fBinaryValue.fFileName !=NULL ?
807 uprv_strlen(res->u.fBinaryValue.fFileName) :0)));
808 const char* ext = NULL;
809
810 char* f = NULL;
811
812 fn[0]=0;
813
814 if(res->u.fBinaryValue.fFileName != NULL){
815 uprv_strcpy(fileName, res->u.fBinaryValue.fFileName);
816 f = uprv_strrchr(fileName, '\\');
817
818 if (f != NULL) {
819 f++;
820 } else {
821 f = fileName;
822 }
823
824 ext = uprv_strrchr(fileName, '.');
825
826 if (ext == NULL) {
827 fprintf(stderr, "Error: %s is an unknown binary filename type.\n", fileName);
828 exit(U_ILLEGAL_ARGUMENT_ERROR);
829 }
830
831 if(uprv_strcmp(ext, ".jpg")==0 || uprv_strcmp(ext, ".jpeg")==0 || uprv_strcmp(ext, ".gif")==0 ){
832 m_type = "image";
833 } else if(uprv_strcmp(ext, ".wav")==0 || uprv_strcmp(ext, ".au")==0 ){
834 m_type = "audio";
835 } else if(uprv_strcmp(ext, ".avi")==0 || uprv_strcmp(ext, ".mpg")==0 || uprv_strcmp(ext, ".mpeg")==0){
836 m_type = "video";
837 } else if(uprv_strcmp(ext, ".txt")==0 || uprv_strcmp(ext, ".text")==0){
838 m_type = "text";
839 }
840
729e4ab9 841 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
46f4442e 842
729e4ab9 843 write_tabs(out);
46f4442e 844
729e4ab9 845 write_utf8_file(out, UnicodeString(bin_source));
46f4442e
A
846
847 tabCount+= 1;
729e4ab9 848 write_tabs(out);
46f4442e 849
729e4ab9
A
850 write_utf8_file(out, UnicodeString(external_file));
851 printAttribute("href", f, (int32_t)uprv_strlen(f));
852 write_utf8_file(out, UnicodeString("/>\n"));
46f4442e 853 tabCount -= 1;
729e4ab9 854 write_tabs(out);
46f4442e 855
729e4ab9 856 write_utf8_file(out, UnicodeString(close_bin_source));
46f4442e 857
729e4ab9 858 printNoteElements(&res->fComment, status);
46f4442e 859 tabCount -= 1;
729e4ab9
A
860 write_tabs(out);
861 write_utf8_file(out, UnicodeString(close_bin_unit));
46f4442e
A
862 } else {
863 char temp[256] = {0};
864 uint32_t i = 0;
865 int32_t len=0;
866
729e4ab9 867 sid = printContainer(res, bin_unit, binary_restype, m_type, id, status);
46f4442e 868
729e4ab9
A
869 write_tabs(out);
870 write_utf8_file(out, UnicodeString(bin_source));
46f4442e
A
871
872 tabCount += 1;
729e4ab9 873 write_tabs(out);
46f4442e 874
729e4ab9
A
875 write_utf8_file(out, UnicodeString(internal_file));
876 printAttribute("form", application_mimetype, (int32_t) uprv_strlen(application_mimetype));
46f4442e
A
877
878 while(i <res->u.fBinaryValue.fLength){
879 len = itostr(temp, res->u.fBinaryValue.fData[i], 16, 2);
880 crc = computeCRC(temp, len, crc);
881 i++;
882 }
883
884 len = itostr(temp, crc, 10, 0);
729e4ab9 885 printAttribute("crc", temp, len);
46f4442e 886
729e4ab9 887 write_utf8_file(out, UnicodeString(">"));
46f4442e
A
888
889 i = 0;
890 while(i <res->u.fBinaryValue.fLength){
891 len = itostr(temp, res->u.fBinaryValue.fData[i], 16, 2);
729e4ab9 892 write_utf8_file(out, UnicodeString(temp));
46f4442e
A
893 i += 1;
894 }
895
729e4ab9 896 write_utf8_file(out, UnicodeString(close_internal_file));
46f4442e
A
897
898 tabCount -= 2;
729e4ab9 899 write_tabs(out);
46f4442e 900
729e4ab9
A
901 write_utf8_file(out, UnicodeString(close_bin_source));
902 printNoteElements(&res->fComment, status);
46f4442e
A
903
904 tabCount -= 1;
729e4ab9
A
905 write_tabs(out);
906 write_utf8_file(out, UnicodeString(close_bin_unit));
46f4442e
A
907
908 uprv_free(sid);
909 sid = NULL;
910 }
911
912 uprv_free(fn);
913}
914
915
916
917static void
729e4ab9 918table_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
46f4442e
A
919
920 uint32_t i = 0;
921
922 struct SResource *current = NULL;
46f4442e
A
923 char* sid = NULL;
924
925 if (U_FAILURE(*status)) {
926 return ;
927 }
928
729e4ab9 929 sid = printContainer(res, group, table_restype, NULL, id, status);
46f4442e
A
930
931 if(isTopLevel) {
932 sid[0] = '\0';
933 }
934
4388f060 935 current = res->u.fTable.fFirst;
46f4442e
A
936 i = 0;
937
938 while (current != NULL) {
729e4ab9 939 res_write_xml(current, sid, language, FALSE, status);
46f4442e
A
940
941 if(U_FAILURE(*status)){
942 return;
943 }
944
945 i += 1;
946 current = current->fNext;
947 }
948
949 tabCount -= 1;
729e4ab9 950 write_tabs(out);
46f4442e 951
729e4ab9 952 write_utf8_file(out, UnicodeString(close_group));
46f4442e
A
953
954 uprv_free(sid);
955 sid = NULL;
956}
957
958void
729e4ab9 959res_write_xml(struct SResource *res, const char* id, const char* language, UBool isTopLevel, UErrorCode *status) {
46f4442e
A
960
961 if (U_FAILURE(*status)) {
962 return ;
963 }
964
965 if (res != NULL) {
966 switch (res->fType) {
967 case URES_STRING:
729e4ab9 968 string_write_xml (res, id, language, status);
46f4442e
A
969 return;
970
971 case URES_ALIAS:
729e4ab9 972 alias_write_xml (res, id, language, status);
46f4442e
A
973 return;
974
975 case URES_INT_VECTOR:
729e4ab9 976 intvector_write_xml (res, id, language, status);
46f4442e
A
977 return;
978
979 case URES_BINARY:
729e4ab9 980 bin_write_xml (res, id, language, status);
46f4442e
A
981 return;
982
983 case URES_INT:
729e4ab9 984 int_write_xml (res, id, language, status);
46f4442e
A
985 return;
986
987 case URES_ARRAY:
729e4ab9 988 array_write_xml (res, id, language, status);
46f4442e
A
989 return;
990
991 case URES_TABLE:
729e4ab9 992 table_write_xml (res, id, language, isTopLevel, status);
46f4442e
A
993 return;
994
995 default:
996 break;
997 }
998 }
999
1000 *status = U_INTERNAL_PROGRAM_ERROR;
1001}
1002
1003void
1004bundle_write_xml(struct SRBRoot *bundle, const char *outputDir,const char* outputEnc, const char* filename,
1005 char *writtenFilename, int writtenFilenameLen,
1006 const char* language, const char* outFileName, UErrorCode *status) {
1007
46f4442e
A
1008 char* xmlfileName = NULL;
1009 char* outputFileName = NULL;
1010 char* originalFileName = NULL;
1011 const char* fileStart = "<file xml:space = \"preserve\" source-language = \"";
1012 const char* file1 = "\" datatype = \"x-icu-resource-bundle\" ";
1013 const char* file2 = "original = \"";
1014 const char* file4 = "\" date = \"";
1015 const char* fileEnd = "</file>\n";
1016 const char* headerStart = "<header>\n";
1017 const char* headerEnd = "</header>\n";
1018 const char* bodyStart = "<body>\n";
1019 const char* bodyEnd = "</body>\n";
1020
1021 const char *tool_start = "<tool";
1022 const char *tool_id = "genrb-" GENRB_VERSION "-icu-" U_ICU_VERSION;
1023 const char *tool_name = "genrb";
1024
1025 char* temp = NULL;
1026 char* lang = NULL;
1027 const char* pos = NULL;
1028 int32_t first, index;
1029 time_t currTime;
1030 char timeBuf[128];
1031
1032 outDir = outputDir;
1033
1034 srBundle = bundle;
1035
1036 pos = uprv_strrchr(filename, '\\');
1037 if(pos != NULL) {
1038 first = (int32_t)(pos - filename + 1);
1039 } else {
1040 first = 0;
1041 }
1042 index = (int32_t)(uprv_strlen(filename) - uprv_strlen(textExt) - first);
1043 originalFileName = (char *)uprv_malloc(sizeof(char)*index+1);
1044 uprv_memset(originalFileName, 0, sizeof(char)*index+1);
1045 uprv_strncpy(originalFileName, filename + first, index);
1046
1047 if(uprv_strcmp(originalFileName, srBundle->fLocale) != 0) {
1048 fprintf(stdout, "Warning: The file name is not same as the resource name!\n");
1049 }
1050
1051 temp = originalFileName;
1052 originalFileName = (char *)uprv_malloc(sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1053 uprv_memset(originalFileName, 0, sizeof(char)* (uprv_strlen(temp)+uprv_strlen(textExt)) + 1);
1054 uprv_strcat(originalFileName, temp);
1055 uprv_strcat(originalFileName, textExt);
1056 uprv_free(temp);
1057 temp = NULL;
1058
1059
1060 if (language == NULL) {
1061/* lang = parseFilename(filename, lang);
1062 if (lang == NULL) {*/
1063 /* now check if locale name is valid or not
1064 * this is to cater for situation where
1065 * pegasusServer.txt contains
1066 *
1067 * en{
1068 * ..
1069 * }
1070 */
1071 lang = parseFilename(srBundle->fLocale, lang);
1072 /*
1073 * Neither the file name nor the table name inside the
1074 * txt file contain a valid country and language codes
1075 * throw an error.
1076 * pegasusServer.txt contains
1077 *
1078 * testelements{
1079 * ....
1080 * }
1081 */
1082 if(lang==NULL){
1083 fprintf(stderr, "Error: The file name and table name do not contain a valid language code. Please use -l option to specify it.\n");
1084 exit(U_ILLEGAL_ARGUMENT_ERROR);
1085 }
1086 /* }*/
1087 } else {
1088 lang = (char *)uprv_malloc(sizeof(char)*uprv_strlen(language) +1);
1089 uprv_memset(lang, 0, sizeof(char)*uprv_strlen(language) +1);
1090 uprv_strcpy(lang, language);
1091 }
1092
1093 if(outFileName) {
1094 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(outFileName) + 1);
1095 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(outFileName) + 1);
1096 uprv_strcpy(outputFileName,outFileName);
1097 } else {
1098 outputFileName = (char *)uprv_malloc(sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1099 uprv_memset(outputFileName, 0, sizeof(char)*uprv_strlen(srBundle->fLocale) + 1);
1100 uprv_strcpy(outputFileName,srBundle->fLocale);
1101 }
1102
1103 if(outputDir) {
1104 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputDir) + uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1105 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputDir)+ uprv_strlen(outputFileName) + uprv_strlen(xliffExt) + 1) +1);
1106 } else {
1107 xmlfileName = (char *)uprv_malloc(sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1108 uprv_memset(xmlfileName, 0, sizeof(char)*(uprv_strlen(outputFileName) + uprv_strlen(xliffExt)) +1);
1109 }
1110
1111 if(outputDir){
1112 uprv_strcpy(xmlfileName, outputDir);
1113 if(outputDir[uprv_strlen(outputDir)-1] !=U_FILE_SEP_CHAR){
1114 uprv_strcat(xmlfileName,U_FILE_SEP_STRING);
1115 }
1116 }
1117 uprv_strcat(xmlfileName,outputFileName);
1118 uprv_strcat(xmlfileName,xliffExt);
1119
1120 if (writtenFilename) {
1121 uprv_strncpy(writtenFilename, xmlfileName, writtenFilenameLen);
1122 }
1123
1124 if (U_FAILURE(*status)) {
1125 goto cleanup_bundle_write_xml;
1126 }
1127
1128 out= T_FileStream_open(xmlfileName,"w");
1129
1130 if(out==NULL){
1131 *status = U_FILE_ACCESS_ERROR;
1132 goto cleanup_bundle_write_xml;
1133 }
57a6839d 1134 write_utf8_file(out, UnicodeString(xmlHeader));
46f4442e
A
1135
1136 if(outputEnc && *outputEnc!='\0'){
1137 /* store the output encoding */
1138 enc = outputEnc;
1139 conv=ucnv_open(enc,status);
1140 if(U_FAILURE(*status)){
1141 goto cleanup_bundle_write_xml;
1142 }
1143 }
57a6839d 1144 write_utf8_file(out, UnicodeString(bundleStart));
729e4ab9 1145 write_tabs(out);
57a6839d 1146 write_utf8_file(out, UnicodeString(fileStart));
46f4442e
A
1147 /* check if lang and language are the same */
1148 if(language != NULL && uprv_strcmp(lang, srBundle->fLocale)!=0){
1149 fprintf(stderr,"Warning: The top level tag in the resource and language specified are not the same. Please check the input.\n");
1150 }
729e4ab9
A
1151 write_utf8_file(out, UnicodeString(lang));
1152 write_utf8_file(out, UnicodeString(file1));
1153 write_utf8_file(out, UnicodeString(file2));
1154 write_utf8_file(out, UnicodeString(originalFileName));
1155 write_utf8_file(out, UnicodeString(file4));
46f4442e
A
1156
1157 time(&currTime);
1158 strftime(timeBuf, sizeof(timeBuf), "%Y-%m-%dT%H:%M:%SZ", gmtime(&currTime));
729e4ab9
A
1159 write_utf8_file(out, UnicodeString(timeBuf));
1160 write_utf8_file(out, UnicodeString("\">\n"));
46f4442e
A
1161
1162 tabCount += 1;
729e4ab9 1163 write_tabs(out);
57a6839d 1164 write_utf8_file(out, UnicodeString(headerStart));
46f4442e
A
1165
1166 tabCount += 1;
729e4ab9 1167 write_tabs(out);
46f4442e 1168
57a6839d 1169 write_utf8_file(out, UnicodeString(tool_start));
729e4ab9
A
1170 printAttribute("tool-id", tool_id, (int32_t) uprv_strlen(tool_id));
1171 printAttribute("tool-name", tool_name, (int32_t) uprv_strlen(tool_name));
1172 write_utf8_file(out, UnicodeString("/>\n"));
46f4442e
A
1173
1174 tabCount -= 1;
729e4ab9 1175 write_tabs(out);
46f4442e 1176
729e4ab9 1177 write_utf8_file(out, UnicodeString(headerEnd));
46f4442e 1178
729e4ab9 1179 write_tabs(out);
46f4442e
A
1180 tabCount += 1;
1181
729e4ab9 1182 write_utf8_file(out, UnicodeString(bodyStart));
46f4442e
A
1183
1184
729e4ab9 1185 res_write_xml(bundle->fRoot, bundle->fLocale, lang, TRUE, status);
46f4442e
A
1186
1187 tabCount -= 1;
729e4ab9 1188 write_tabs(out);
46f4442e 1189
729e4ab9 1190 write_utf8_file(out, UnicodeString(bodyEnd));
46f4442e 1191 tabCount--;
729e4ab9
A
1192 write_tabs(out);
1193 write_utf8_file(out, UnicodeString(fileEnd));
46f4442e 1194 tabCount--;
729e4ab9
A
1195 write_tabs(out);
1196 write_utf8_file(out, UnicodeString(bundleEnd));
46f4442e
A
1197 T_FileStream_close(out);
1198
1199 ucnv_close(conv);
1200
1201cleanup_bundle_write_xml:
1202 uprv_free(originalFileName);
1203 uprv_free(lang);
1204 if(xmlfileName != NULL) {
1205 uprv_free(xmlfileName);
1206 }
1207 if(outputFileName != NULL){
1208 uprv_free(outputFileName);
1209 }
1210}