]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/pkg_genc.c
ICU-491.11.2.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / pkg_genc.c
1 /******************************************************************************
2 * Copyright (C) 2009-2011, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #if U_PLATFORM_HAS_WIN32_API
9 # define VC_EXTRALEAN
10 # define WIN32_LEAN_AND_MEAN
11 # define NOUSER
12 # define NOSERVICE
13 # define NOIME
14 # define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 # ifdef __GNUC__
18 # define WINDOWS_WITH_GNUC
19 # endif
20 #endif
21
22 #if U_PLATFORM_IS_LINUX_BASED
23 # define U_ELF
24 #endif
25
26 #ifdef U_ELF
27 # include <elf.h>
28 # if defined(ELFCLASS64)
29 # define U_ELF64
30 # endif
31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 # ifndef EM_X86_64
33 # define EM_X86_64 62
34 # endif
35 # define ICU_ENTRY_OFFSET 0
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51 #define HEX_0X 0 /* 0x1234 */
52 #define HEX_0H 1 /* 01234h */
53
54
55 /* prototypes --------------------------------------------------------------- */
56 static void
57 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
58
59 static uint32_t
60 write8(FileStream *out, uint8_t byte, uint32_t column);
61
62 static uint32_t
63 write32(FileStream *out, uint32_t byte, uint32_t column);
64
65 #if U_PLATFORM == U_PF_OS400
66 static uint32_t
67 write8str(FileStream *out, uint8_t byte, uint32_t column);
68 #endif
69 /* -------------------------------------------------------------------------- */
70
71 /*
72 Creating Template Files for New Platforms
73
74 Let the cc compiler help you get started.
75 Compile this program
76 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
77 with the -S option to produce assembly output.
78
79 For example, this will generate array.s:
80 gcc -S array.c
81
82 This will produce a .s file that may look like this:
83
84 .file "array.c"
85 .version "01.01"
86 gcc2_compiled.:
87 .globl x
88 .section .rodata
89 .align 4
90 .type x,@object
91 .size x,20
92 x:
93 .long 1
94 .long 2
95 .long -559038737
96 .long -1
97 .long 16
98 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
99
100 which gives a starting point that will compile, and can be transformed
101 to become the template, generally with some consulting of as docs and
102 some experimentation.
103
104 If you want ICU to automatically use this assembly, you should
105 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
106 where the name is the compiler or platform that you used in this
107 assemblyHeader data structure.
108 */
109 static const struct AssemblyType {
110 const char *name;
111 const char *header;
112 const char *beginLine;
113 const char *footer;
114 int8_t hexType; /* HEX_0X or HEX_0h */
115 } assemblyHeader[] = {
116 {"gcc",
117 ".globl %s\n"
118 "\t.section .note.GNU-stack,\"\",%%progbits\n"
119 "\t.section .rodata\n"
120 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
121 "\t.type %s,%%object\n"
122 "%s:\n\n",
123
124 ".long ","",HEX_0X
125 },
126 {"gcc-darwin",
127 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
128 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
129 ".globl _%s\n"
130 "\t.data\n"
131 "\t.const\n"
132 "\t.align 4\n" /* 1<<4 = 16 */
133 "_%s:\n\n",
134
135 ".long ","",HEX_0X
136 },
137 {"gcc-cygwin",
138 ".globl _%s\n"
139 "\t.section .rodata\n"
140 "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
141 "_%s:\n\n",
142
143 ".long ","",HEX_0X
144 },
145 {"sun",
146 "\t.section \".rodata\"\n"
147 "\t.align 8\n"
148 ".globl %s\n"
149 "%s:\n",
150
151 ".word ","",HEX_0X
152 },
153 {"sun-x86",
154 "Drodata.rodata:\n"
155 "\t.type Drodata.rodata,@object\n"
156 "\t.size Drodata.rodata,0\n"
157 "\t.globl %s\n"
158 "\t.align 8\n"
159 "%s:\n",
160
161 ".4byte ","",HEX_0X
162 },
163 {"xlc",
164 ".globl %s{RO}\n"
165 "\t.toc\n"
166 "%s:\n"
167 "\t.csect %s{RO}, 4\n",
168
169 ".long ","",HEX_0X
170 },
171 {"aCC-ia64",
172 "\t.file \"%s.s\"\n"
173 "\t.type %s,@object\n"
174 "\t.global %s\n"
175 "\t.secalias .abe$0.rodata, \".rodata\"\n"
176 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
177 "\t.align 16\n"
178 "%s::\t",
179
180 "data4 ","",HEX_0X
181 },
182 {"aCC-parisc",
183 "\t.SPACE $TEXT$\n"
184 "\t.SUBSPA $LIT$\n"
185 "%s\n"
186 "\t.EXPORT %s\n"
187 "\t.ALIGN 16\n",
188
189 ".WORD ","",HEX_0X
190 },
191 { "masm",
192 "\tTITLE %s\n"
193 "; generated by genccode\n"
194 ".386\n"
195 ".model flat\n"
196 "\tPUBLIC _%s\n"
197 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
198 "\tALIGN 16\n"
199 "_%s\tLABEL DWORD\n",
200 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
201 }
202 };
203
204 static int32_t assemblyHeaderIndex = -1;
205 static int32_t hexType = HEX_0X;
206
207 U_CAPI UBool U_EXPORT2
208 checkAssemblyHeaderName(const char* optAssembly) {
209 int32_t idx;
210 assemblyHeaderIndex = -1;
211 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
212 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
213 assemblyHeaderIndex = idx;
214 hexType = assemblyHeader[idx].hexType; /* set the hex type */
215 return TRUE;
216 }
217 }
218
219 return FALSE;
220 }
221
222
223 U_CAPI void U_EXPORT2
224 printAssemblyHeadersToStdErr(void) {
225 int32_t idx;
226 fprintf(stderr, "%s", assemblyHeader[0].name);
227 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
228 fprintf(stderr, ", %s", assemblyHeader[idx].name);
229 }
230 fprintf(stderr,
231 ")\n");
232 }
233
234 U_CAPI void U_EXPORT2
235 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
236 uint32_t column = MAX_COLUMN;
237 char entry[64];
238 uint32_t buffer[1024];
239 char *bufferStr = (char *)buffer;
240 FileStream *in, *out;
241 size_t i, length;
242
243 in=T_FileStream_open(filename, "rb");
244 if(in==NULL) {
245 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
246 exit(U_FILE_ACCESS_ERROR);
247 }
248
249 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
250 out=T_FileStream_open(bufferStr, "w");
251 if(out==NULL) {
252 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
253 exit(U_FILE_ACCESS_ERROR);
254 }
255
256 if (outFilePath != NULL) {
257 uprv_strcpy(outFilePath, bufferStr);
258 }
259
260 #ifdef WINDOWS_WITH_GNUC
261 /* Need to fix the file seperator character when using MinGW. */
262 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
263 #endif
264
265 if(optEntryPoint != NULL) {
266 uprv_strcpy(entry, optEntryPoint);
267 uprv_strcat(entry, "_dat");
268 }
269
270 /* turn dashes or dots in the entry name into underscores */
271 length=uprv_strlen(entry);
272 for(i=0; i<length; ++i) {
273 if(entry[i]=='-' || entry[i]=='.') {
274 entry[i]='_';
275 }
276 }
277
278 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
279 entry, entry, entry, entry,
280 entry, entry, entry, entry);
281 T_FileStream_writeLine(out, bufferStr);
282 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
283
284 for(;;) {
285 length=T_FileStream_read(in, buffer, sizeof(buffer));
286 if(length==0) {
287 break;
288 }
289 if (length != sizeof(buffer)) {
290 /* pad with extra 0's when at the end of the file */
291 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
292 buffer[length+i] = 0;
293 }
294 }
295 for(i=0; i<(length/sizeof(buffer[0])); i++) {
296 column = write32(out, buffer[i], column);
297 }
298 }
299
300 T_FileStream_writeLine(out, "\n");
301
302 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
303 entry, entry, entry, entry,
304 entry, entry, entry, entry);
305 T_FileStream_writeLine(out, bufferStr);
306
307 if(T_FileStream_error(in)) {
308 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
309 exit(U_FILE_ACCESS_ERROR);
310 }
311
312 if(T_FileStream_error(out)) {
313 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
314 exit(U_FILE_ACCESS_ERROR);
315 }
316
317 T_FileStream_close(out);
318 T_FileStream_close(in);
319 }
320
321 U_CAPI void U_EXPORT2
322 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
323 uint32_t column = MAX_COLUMN;
324 char buffer[4096], entry[64];
325 FileStream *in, *out;
326 size_t i, length;
327
328 in=T_FileStream_open(filename, "rb");
329 if(in==NULL) {
330 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
331 exit(U_FILE_ACCESS_ERROR);
332 }
333
334 if(optName != NULL) { /* prepend 'icudt28_' */
335 strcpy(entry, optName);
336 strcat(entry, "_");
337 } else {
338 entry[0] = 0;
339 }
340
341 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
342 if (outFilePath != NULL) {
343 uprv_strcpy(outFilePath, buffer);
344 }
345 out=T_FileStream_open(buffer, "w");
346 if(out==NULL) {
347 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
348 exit(U_FILE_ACCESS_ERROR);
349 }
350
351 /* turn dashes or dots in the entry name into underscores */
352 length=uprv_strlen(entry);
353 for(i=0; i<length; ++i) {
354 if(entry[i]=='-' || entry[i]=='.') {
355 entry[i]='_';
356 }
357 }
358
359 #if U_PLATFORM == U_PF_OS400
360 /*
361 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
362
363 This is here because this platform can't currently put
364 const data into the read-only pages of an object or
365 shared library (service program). Only strings are allowed in read-only
366 pages, so we use char * strings to store the data.
367
368 In order to prevent the beginning of the data from ever matching the
369 magic numbers we must still use the initial double.
370 [grhoten 4/24/2003]
371 */
372 sprintf(buffer,
373 "#ifndef IN_GENERATED_CCODE\n"
374 "#define IN_GENERATED_CCODE\n"
375 "#define U_DISABLE_RENAMING 1\n"
376 "#include \"unicode/umachine.h\"\n"
377 "#endif\n"
378 "U_CDECL_BEGIN\n"
379 "const struct {\n"
380 " double bogus;\n"
381 " const char *bytes; \n"
382 "} %s={ 0.0, \n",
383 entry);
384 T_FileStream_writeLine(out, buffer);
385
386 for(;;) {
387 length=T_FileStream_read(in, buffer, sizeof(buffer));
388 if(length==0) {
389 break;
390 }
391 for(i=0; i<length; ++i) {
392 column = write8str(out, (uint8_t)buffer[i], column);
393 }
394 }
395
396 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
397 #else
398 /* Function renaming shouldn't be done in data */
399 sprintf(buffer,
400 "#ifndef IN_GENERATED_CCODE\n"
401 "#define IN_GENERATED_CCODE\n"
402 "#define U_DISABLE_RENAMING 1\n"
403 "#include \"unicode/umachine.h\"\n"
404 "#endif\n"
405 "U_CDECL_BEGIN\n"
406 "const struct {\n"
407 " double bogus;\n"
408 " uint8_t bytes[%ld]; \n"
409 "} %s={ 0.0, {\n",
410 (long)T_FileStream_size(in), entry);
411 T_FileStream_writeLine(out, buffer);
412
413 for(;;) {
414 length=T_FileStream_read(in, buffer, sizeof(buffer));
415 if(length==0) {
416 break;
417 }
418 for(i=0; i<length; ++i) {
419 column = write8(out, (uint8_t)buffer[i], column);
420 }
421 }
422
423 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
424 #endif
425
426 if(T_FileStream_error(in)) {
427 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
428 exit(U_FILE_ACCESS_ERROR);
429 }
430
431 if(T_FileStream_error(out)) {
432 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
433 exit(U_FILE_ACCESS_ERROR);
434 }
435
436 T_FileStream_close(out);
437 T_FileStream_close(in);
438 }
439
440 static uint32_t
441 write32(FileStream *out, uint32_t bitField, uint32_t column) {
442 int32_t i;
443 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
444 char *s = bitFieldStr;
445 uint8_t *ptrIdx = (uint8_t *)&bitField;
446 static const char hexToStr[16] = {
447 '0','1','2','3',
448 '4','5','6','7',
449 '8','9','A','B',
450 'C','D','E','F'
451 };
452
453 /* write the value, possibly with comma and newline */
454 if(column==MAX_COLUMN) {
455 /* first byte */
456 column=1;
457 } else if(column<32) {
458 *(s++)=',';
459 ++column;
460 } else {
461 *(s++)='\n';
462 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
463 s+=uprv_strlen(s);
464 column=1;
465 }
466
467 if (bitField < 10) {
468 /* It's a small number. Don't waste the space for 0x */
469 *(s++)=hexToStr[bitField];
470 }
471 else {
472 int seenNonZero = 0; /* This is used to remove leading zeros */
473
474 if(hexType==HEX_0X) {
475 *(s++)='0';
476 *(s++)='x';
477 } else if(hexType==HEX_0H) {
478 *(s++)='0';
479 }
480
481 /* This creates a 32-bit field */
482 #if U_IS_BIG_ENDIAN
483 for (i = 0; i < sizeof(uint32_t); i++)
484 #else
485 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
486 #endif
487 {
488 uint8_t value = ptrIdx[i];
489 if (value || seenNonZero) {
490 *(s++)=hexToStr[value>>4];
491 *(s++)=hexToStr[value&0xF];
492 seenNonZero = 1;
493 }
494 }
495 if(hexType==HEX_0H) {
496 *(s++)='h';
497 }
498 }
499
500 *(s++)=0;
501 T_FileStream_writeLine(out, bitFieldStr);
502 return column;
503 }
504
505 static uint32_t
506 write8(FileStream *out, uint8_t byte, uint32_t column) {
507 char s[4];
508 int i=0;
509
510 /* convert the byte value to a string */
511 if(byte>=100) {
512 s[i++]=(char)('0'+byte/100);
513 byte%=100;
514 }
515 if(i>0 || byte>=10) {
516 s[i++]=(char)('0'+byte/10);
517 byte%=10;
518 }
519 s[i++]=(char)('0'+byte);
520 s[i]=0;
521
522 /* write the value, possibly with comma and newline */
523 if(column==MAX_COLUMN) {
524 /* first byte */
525 column=1;
526 } else if(column<16) {
527 T_FileStream_writeLine(out, ",");
528 ++column;
529 } else {
530 T_FileStream_writeLine(out, ",\n");
531 column=1;
532 }
533 T_FileStream_writeLine(out, s);
534 return column;
535 }
536
537 #if U_PLATFORM == U_PF_OS400
538 static uint32_t
539 write8str(FileStream *out, uint8_t byte, uint32_t column) {
540 char s[8];
541
542 if (byte > 7)
543 sprintf(s, "\\x%X", byte);
544 else
545 sprintf(s, "\\%X", byte);
546
547 /* write the value, possibly with comma and newline */
548 if(column==MAX_COLUMN) {
549 /* first byte */
550 column=1;
551 T_FileStream_writeLine(out, "\"");
552 } else if(column<24) {
553 ++column;
554 } else {
555 T_FileStream_writeLine(out, "\"\n\"");
556 column=1;
557 }
558 T_FileStream_writeLine(out, s);
559 return column;
560 }
561 #endif
562
563 static void
564 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
565 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
566
567 /* copy path */
568 if(destdir!=NULL && *destdir!=0) {
569 do {
570 *outFilename++=*destdir++;
571 } while(*destdir!=0);
572 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
573 *outFilename++=U_FILE_SEP_CHAR;
574 }
575 inFilename=basename;
576 } else {
577 while(inFilename<basename) {
578 *outFilename++=*inFilename++;
579 }
580 }
581
582 if(suffix==NULL) {
583 /* the filename does not have a suffix */
584 uprv_strcpy(entryName, inFilename);
585 if(optFilename != NULL) {
586 uprv_strcpy(outFilename, optFilename);
587 } else {
588 uprv_strcpy(outFilename, inFilename);
589 }
590 uprv_strcat(outFilename, newSuffix);
591 } else {
592 char *saveOutFilename = outFilename;
593 /* copy basename */
594 while(inFilename<suffix) {
595 if(*inFilename=='-') {
596 /* iSeries cannot have '-' in the .o objects. */
597 *outFilename++=*entryName++='_';
598 inFilename++;
599 }
600 else {
601 *outFilename++=*entryName++=*inFilename++;
602 }
603 }
604
605 /* replace '.' by '_' */
606 *outFilename++=*entryName++='_';
607 ++inFilename;
608
609 /* copy suffix */
610 while(*inFilename!=0) {
611 *outFilename++=*entryName++=*inFilename++;
612 }
613
614 *entryName=0;
615
616 if(optFilename != NULL) {
617 uprv_strcpy(saveOutFilename, optFilename);
618 uprv_strcat(saveOutFilename, newSuffix);
619 } else {
620 /* add ".c" */
621 uprv_strcpy(outFilename, newSuffix);
622 }
623 }
624 }
625
626 #ifdef CAN_GENERATE_OBJECTS
627 static void
628 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
629 union {
630 char bytes[2048];
631 #ifdef U_ELF
632 Elf32_Ehdr header32;
633 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
634 #elif U_PLATFORM_HAS_WIN32_API
635 IMAGE_FILE_HEADER header;
636 #endif
637 } buffer;
638
639 const char *filename;
640 FileStream *in;
641 int32_t length;
642
643 #ifdef U_ELF
644
645 #elif U_PLATFORM_HAS_WIN32_API
646 const IMAGE_FILE_HEADER *pHeader;
647 #else
648 # error "Unknown platform for CAN_GENERATE_OBJECTS."
649 #endif
650
651 if(optMatchArch != NULL) {
652 filename=optMatchArch;
653 } else {
654 /* set defaults */
655 #ifdef U_ELF
656 /* set EM_386 because elf.h does not provide better defaults */
657 *pCPU=EM_386;
658 *pBits=32;
659 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
660 #elif U_PLATFORM_HAS_WIN32_API
661 /* _M_IA64 should be defined in windows.h */
662 # if defined(_M_IA64)
663 *pCPU=IMAGE_FILE_MACHINE_IA64;
664 # elif defined(_M_AMD64)
665 *pCPU=IMAGE_FILE_MACHINE_AMD64;
666 # else
667 *pCPU=IMAGE_FILE_MACHINE_I386;
668 # endif
669 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
670 *pIsBigEndian=FALSE;
671 #else
672 # error "Unknown platform for CAN_GENERATE_OBJECTS."
673 #endif
674 return;
675 }
676
677 in=T_FileStream_open(filename, "rb");
678 if(in==NULL) {
679 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
680 exit(U_FILE_ACCESS_ERROR);
681 }
682 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
683
684 #ifdef U_ELF
685 if(length<sizeof(Elf32_Ehdr)) {
686 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
687 exit(U_UNSUPPORTED_ERROR);
688 }
689 if(
690 buffer.header32.e_ident[0]!=ELFMAG0 ||
691 buffer.header32.e_ident[1]!=ELFMAG1 ||
692 buffer.header32.e_ident[2]!=ELFMAG2 ||
693 buffer.header32.e_ident[3]!=ELFMAG3 ||
694 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
695 ) {
696 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
697 exit(U_UNSUPPORTED_ERROR);
698 }
699
700 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
701 #ifdef U_ELF64
702 if(*pBits!=32 && *pBits!=64) {
703 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
704 exit(U_UNSUPPORTED_ERROR);
705 }
706 #else
707 if(*pBits!=32) {
708 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
709 exit(U_UNSUPPORTED_ERROR);
710 }
711 #endif
712
713 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
714 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
715 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
716 exit(U_UNSUPPORTED_ERROR);
717 }
718 /* TODO: Support byte swapping */
719
720 *pCPU=buffer.header32.e_machine;
721 #elif U_PLATFORM_HAS_WIN32_API
722 if(length<sizeof(IMAGE_FILE_HEADER)) {
723 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
724 exit(U_UNSUPPORTED_ERROR);
725 }
726 /* TODO: Use buffer.header. Keep aliasing legal. */
727 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
728 *pCPU=pHeader->Machine;
729 /*
730 * The number of bits is implicit with the Machine value.
731 * *pBits is ignored in the calling code, so this need not be precise.
732 */
733 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
734 /* Windows always runs on little-endian CPUs. */
735 *pIsBigEndian=FALSE;
736 #else
737 # error "Unknown platform for CAN_GENERATE_OBJECTS."
738 #endif
739
740 T_FileStream_close(in);
741 }
742
743 U_CAPI void U_EXPORT2
744 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
745 /* common variables */
746 char buffer[4096], entry[40]={ 0 };
747 FileStream *in, *out;
748 const char *newSuffix;
749 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
750
751 uint16_t cpu, bits;
752 UBool makeBigEndian;
753
754 /* platform-specific variables and initialization code */
755 #ifdef U_ELF
756 /* 32-bit Elf file header */
757 static Elf32_Ehdr header32={
758 {
759 /* e_ident[] */
760 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
761 ELFCLASS32,
762 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
763 EV_CURRENT /* EI_VERSION */
764 },
765 ET_REL,
766 EM_386,
767 EV_CURRENT, /* e_version */
768 0, /* e_entry */
769 0, /* e_phoff */
770 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
771 0, /* e_flags */
772 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
773 0, /* e_phentsize */
774 0, /* e_phnum */
775 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
776 5, /* e_shnum */
777 2 /* e_shstrndx */
778 };
779
780 /* 32-bit Elf section header table */
781 static Elf32_Shdr sectionHeaders32[5]={
782 { /* SHN_UNDEF */
783 0
784 },
785 { /* .symtab */
786 1, /* sh_name */
787 SHT_SYMTAB,
788 0, /* sh_flags */
789 0, /* sh_addr */
790 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
791 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
792 3, /* sh_link=sect hdr index of .strtab */
793 1, /* sh_info=One greater than the symbol table index of the last
794 * local symbol (with STB_LOCAL). */
795 4, /* sh_addralign */
796 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
797 },
798 { /* .shstrtab */
799 9, /* sh_name */
800 SHT_STRTAB,
801 0, /* sh_flags */
802 0, /* sh_addr */
803 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
804 40, /* sh_size */
805 0, /* sh_link */
806 0, /* sh_info */
807 1, /* sh_addralign */
808 0 /* sh_entsize */
809 },
810 { /* .strtab */
811 19, /* sh_name */
812 SHT_STRTAB,
813 0, /* sh_flags */
814 0, /* sh_addr */
815 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
816 (Elf32_Word)sizeof(entry), /* sh_size */
817 0, /* sh_link */
818 0, /* sh_info */
819 1, /* sh_addralign */
820 0 /* sh_entsize */
821 },
822 { /* .rodata */
823 27, /* sh_name */
824 SHT_PROGBITS,
825 SHF_ALLOC, /* sh_flags */
826 0, /* sh_addr */
827 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
828 0, /* sh_size */
829 0, /* sh_link */
830 0, /* sh_info */
831 16, /* sh_addralign */
832 0 /* sh_entsize */
833 }
834 };
835
836 /* symbol table */
837 static Elf32_Sym symbols32[2]={
838 { /* STN_UNDEF */
839 0
840 },
841 { /* data entry point */
842 1, /* st_name */
843 0, /* st_value */
844 0, /* st_size */
845 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
846 0, /* st_other */
847 4 /* st_shndx=index of related section table entry */
848 }
849 };
850
851 /* section header string table, with decimal string offsets */
852 static const char sectionStrings[40]=
853 /* 0 */ "\0"
854 /* 1 */ ".symtab\0"
855 /* 9 */ ".shstrtab\0"
856 /* 19 */ ".strtab\0"
857 /* 27 */ ".rodata\0"
858 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
859 /* 40: padded to multiple of 8 bytes */
860
861 /*
862 * Use entry[] for the string table which will contain only the
863 * entry point name.
864 * entry[0] must be 0 (NUL)
865 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
866 */
867
868 /* 16-align .rodata in the .o file, just in case */
869 static const char padding[16]={ 0 };
870 int32_t paddingSize;
871
872 #ifdef U_ELF64
873 /* 64-bit Elf file header */
874 static Elf64_Ehdr header64={
875 {
876 /* e_ident[] */
877 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
878 ELFCLASS64,
879 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
880 EV_CURRENT /* EI_VERSION */
881 },
882 ET_REL,
883 EM_X86_64,
884 EV_CURRENT, /* e_version */
885 0, /* e_entry */
886 0, /* e_phoff */
887 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
888 0, /* e_flags */
889 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
890 0, /* e_phentsize */
891 0, /* e_phnum */
892 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
893 5, /* e_shnum */
894 2 /* e_shstrndx */
895 };
896
897 /* 64-bit Elf section header table */
898 static Elf64_Shdr sectionHeaders64[5]={
899 { /* SHN_UNDEF */
900 0
901 },
902 { /* .symtab */
903 1, /* sh_name */
904 SHT_SYMTAB,
905 0, /* sh_flags */
906 0, /* sh_addr */
907 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
908 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
909 3, /* sh_link=sect hdr index of .strtab */
910 1, /* sh_info=One greater than the symbol table index of the last
911 * local symbol (with STB_LOCAL). */
912 4, /* sh_addralign */
913 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
914 },
915 { /* .shstrtab */
916 9, /* sh_name */
917 SHT_STRTAB,
918 0, /* sh_flags */
919 0, /* sh_addr */
920 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
921 40, /* sh_size */
922 0, /* sh_link */
923 0, /* sh_info */
924 1, /* sh_addralign */
925 0 /* sh_entsize */
926 },
927 { /* .strtab */
928 19, /* sh_name */
929 SHT_STRTAB,
930 0, /* sh_flags */
931 0, /* sh_addr */
932 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
933 (Elf64_Xword)sizeof(entry), /* sh_size */
934 0, /* sh_link */
935 0, /* sh_info */
936 1, /* sh_addralign */
937 0 /* sh_entsize */
938 },
939 { /* .rodata */
940 27, /* sh_name */
941 SHT_PROGBITS,
942 SHF_ALLOC, /* sh_flags */
943 0, /* sh_addr */
944 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
945 0, /* sh_size */
946 0, /* sh_link */
947 0, /* sh_info */
948 16, /* sh_addralign */
949 0 /* sh_entsize */
950 }
951 };
952
953 /*
954 * 64-bit symbol table
955 * careful: different order of items compared with Elf32_sym!
956 */
957 static Elf64_Sym symbols64[2]={
958 { /* STN_UNDEF */
959 0
960 },
961 { /* data entry point */
962 1, /* st_name */
963 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
964 0, /* st_other */
965 4, /* st_shndx=index of related section table entry */
966 0, /* st_value */
967 0 /* st_size */
968 }
969 };
970
971 #endif /* U_ELF64 */
972
973 /* entry[] have a leading NUL */
974 entryOffset=1;
975
976 /* in the common code, count entryLength from after the NUL */
977 entryLengthOffset=1;
978
979 newSuffix=".o";
980
981 #elif U_PLATFORM_HAS_WIN32_API
982 struct {
983 IMAGE_FILE_HEADER fileHeader;
984 IMAGE_SECTION_HEADER sections[2];
985 char linkerOptions[100];
986 } objHeader;
987 IMAGE_SYMBOL symbols[1];
988 struct {
989 DWORD sizeofLongNames;
990 char longNames[100];
991 } symbolNames;
992
993 /*
994 * entry sometimes have a leading '_'
995 * overwritten if entryOffset==0 depending on the target platform
996 * see check for cpu below
997 */
998 entry[0]='_';
999
1000 newSuffix=".obj";
1001 #else
1002 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1003 #endif
1004
1005 /* deal with options, files and the entry point name */
1006 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1007 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian);
1008 #if U_PLATFORM_HAS_WIN32_API
1009 if(cpu==IMAGE_FILE_MACHINE_I386) {
1010 entryOffset=1;
1011 }
1012 #endif
1013
1014 in=T_FileStream_open(filename, "rb");
1015 if(in==NULL) {
1016 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1017 exit(U_FILE_ACCESS_ERROR);
1018 }
1019 size=T_FileStream_size(in);
1020
1021 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1022 if (outFilePath != NULL) {
1023 uprv_strcpy(outFilePath, buffer);
1024 }
1025
1026 if(optEntryPoint != NULL) {
1027 uprv_strcpy(entry+entryOffset, optEntryPoint);
1028 uprv_strcat(entry+entryOffset, "_dat");
1029 }
1030 /* turn dashes in the entry name into underscores */
1031 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1032 for(i=0; i<entryLength; ++i) {
1033 if(entry[entryLengthOffset+i]=='-') {
1034 entry[entryLengthOffset+i]='_';
1035 }
1036 }
1037
1038 /* open the output file */
1039 out=T_FileStream_open(buffer, "wb");
1040 if(out==NULL) {
1041 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1042 exit(U_FILE_ACCESS_ERROR);
1043 }
1044
1045 #ifdef U_ELF
1046 if(bits==32) {
1047 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1048 header32.e_machine=cpu;
1049
1050 /* 16-align .rodata in the .o file, just in case */
1051 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1052 if(paddingSize!=0) {
1053 paddingSize=0x10-paddingSize;
1054 sectionHeaders32[4].sh_offset+=paddingSize;
1055 }
1056
1057 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1058
1059 symbols32[1].st_size=(Elf32_Word)size;
1060
1061 /* write .o headers */
1062 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1063 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1064 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1065 } else /* bits==64 */ {
1066 #ifdef U_ELF64
1067 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1068 header64.e_machine=cpu;
1069
1070 /* 16-align .rodata in the .o file, just in case */
1071 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1072 if(paddingSize!=0) {
1073 paddingSize=0x10-paddingSize;
1074 sectionHeaders64[4].sh_offset+=paddingSize;
1075 }
1076
1077 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1078
1079 symbols64[1].st_size=(Elf64_Xword)size;
1080
1081 /* write .o headers */
1082 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1083 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1084 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1085 #endif
1086 }
1087
1088 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1089 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1090 if(paddingSize!=0) {
1091 T_FileStream_write(out, padding, paddingSize);
1092 }
1093 #elif U_PLATFORM_HAS_WIN32_API
1094 /* populate the .obj headers */
1095 uprv_memset(&objHeader, 0, sizeof(objHeader));
1096 uprv_memset(&symbols, 0, sizeof(symbols));
1097 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1098
1099 /* write the linker export directive */
1100 uprv_strcpy(objHeader.linkerOptions, "-export:");
1101 length=8;
1102 uprv_strcpy(objHeader.linkerOptions+length, entry);
1103 length+=entryLength;
1104 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1105 length+=6;
1106
1107 /* set the file header */
1108 objHeader.fileHeader.Machine=cpu;
1109 objHeader.fileHeader.NumberOfSections=2;
1110 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1111 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1112 objHeader.fileHeader.NumberOfSymbols=1;
1113
1114 /* set the section for the linker options */
1115 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1116 objHeader.sections[0].SizeOfRawData=length;
1117 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1118 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1119
1120 /* set the data section */
1121 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1122 objHeader.sections[1].SizeOfRawData=size;
1123 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1124 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1125
1126 /* set the symbol table */
1127 if(entryLength<=8) {
1128 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1129 symbolNames.sizeofLongNames=4;
1130 } else {
1131 symbols[0].N.Name.Short=0;
1132 symbols[0].N.Name.Long=4;
1133 symbolNames.sizeofLongNames=4+entryLength+1;
1134 uprv_strcpy(symbolNames.longNames, entry);
1135 }
1136 symbols[0].SectionNumber=2;
1137 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1138
1139 /* write the file header and the linker options section */
1140 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1141 #else
1142 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1143 #endif
1144
1145 /* copy the data file into section 2 */
1146 for(;;) {
1147 length=T_FileStream_read(in, buffer, sizeof(buffer));
1148 if(length==0) {
1149 break;
1150 }
1151 T_FileStream_write(out, buffer, (int32_t)length);
1152 }
1153
1154 #if U_PLATFORM_HAS_WIN32_API
1155 /* write the symbol table */
1156 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1157 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1158 #endif
1159
1160 if(T_FileStream_error(in)) {
1161 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1162 exit(U_FILE_ACCESS_ERROR);
1163 }
1164
1165 if(T_FileStream_error(out)) {
1166 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1167 exit(U_FILE_ACCESS_ERROR);
1168 }
1169
1170 T_FileStream_close(out);
1171 T_FileStream_close(in);
1172 }
1173 #endif