]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/pkg_genc.c
ICU-531.48.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / pkg_genc.c
1 /******************************************************************************
2 * Copyright (C) 2009-2013, International Business Machines
3 * Corporation and others. All Rights Reserved.
4 *******************************************************************************
5 */
6 #include "unicode/utypes.h"
7
8 #if U_PLATFORM_HAS_WIN32_API
9 # define VC_EXTRALEAN
10 # define WIN32_LEAN_AND_MEAN
11 # define NOUSER
12 # define NOSERVICE
13 # define NOIME
14 # define NOMCX
15 #include <windows.h>
16 #include <time.h>
17 # ifdef __GNUC__
18 # define WINDOWS_WITH_GNUC
19 # endif
20 #endif
21
22 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
23 # define U_ELF
24 #endif
25
26 #ifdef U_ELF
27 # include <elf.h>
28 # if defined(ELFCLASS64)
29 # define U_ELF64
30 # endif
31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32 # ifndef EM_X86_64
33 # define EM_X86_64 62
34 # endif
35 # define ICU_ENTRY_OFFSET 0
36 #endif
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include "unicode/putil.h"
41 #include "cmemory.h"
42 #include "cstring.h"
43 #include "filestrm.h"
44 #include "toolutil.h"
45 #include "unicode/uclean.h"
46 #include "uoptions.h"
47 #include "pkg_genc.h"
48
49 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51 #define HEX_0X 0 /* 0x1234 */
52 #define HEX_0H 1 /* 01234h */
53
54 /* prototypes --------------------------------------------------------------- */
55 static void
56 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58 static uint32_t
59 write8(FileStream *out, uint8_t byte, uint32_t column);
60
61 static uint32_t
62 write32(FileStream *out, uint32_t byte, uint32_t column);
63
64 #if U_PLATFORM == U_PF_OS400
65 static uint32_t
66 write8str(FileStream *out, uint8_t byte, uint32_t column);
67 #endif
68 /* -------------------------------------------------------------------------- */
69
70 /*
71 Creating Template Files for New Platforms
72
73 Let the cc compiler help you get started.
74 Compile this program
75 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76 with the -S option to produce assembly output.
77
78 For example, this will generate array.s:
79 gcc -S array.c
80
81 This will produce a .s file that may look like this:
82
83 .file "array.c"
84 .version "01.01"
85 gcc2_compiled.:
86 .globl x
87 .section .rodata
88 .align 4
89 .type x,@object
90 .size x,20
91 x:
92 .long 1
93 .long 2
94 .long -559038737
95 .long -1
96 .long 16
97 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99 which gives a starting point that will compile, and can be transformed
100 to become the template, generally with some consulting of as docs and
101 some experimentation.
102
103 If you want ICU to automatically use this assembly, you should
104 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105 where the name is the compiler or platform that you used in this
106 assemblyHeader data structure.
107 */
108 static const struct AssemblyType {
109 const char *name;
110 const char *header;
111 const char *beginLine;
112 const char *footer;
113 int8_t hexType; /* HEX_0X or HEX_0h */
114 } assemblyHeader[] = {
115 // For gcc assemblers, the meaning of .align changes depending on the
116 // hardware, so we use .balign 16 which always means 16 bytes.
117 // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html
118 {"gcc",
119 ".globl %s\n"
120 "\t.section .note.GNU-stack,\"\",%%progbits\n"
121 "\t.section .rodata\n"
122 "\t.balign 16\n"
123 "\t.type %s,%%object\n"
124 "%s:\n\n",
125
126 ".long ","",HEX_0X
127 },
128 {"gcc-darwin",
129 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
130 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
131 ".globl _%s\n"
132 "\t.data\n"
133 "\t.const\n"
134 "\t.balign 16\n"
135 "_%s:\n\n",
136
137 ".long ","",HEX_0X
138 },
139 {"gcc-cygwin",
140 ".globl _%s\n"
141 "\t.section .rodata\n"
142 "\t.balign 16\n"
143 "_%s:\n\n",
144
145 ".long ","",HEX_0X
146 },
147 {"gcc-mingw64",
148 ".globl %s\n"
149 "\t.section .rodata\n"
150 "\t.balign 16\n"
151 "%s:\n\n",
152
153 ".long ","",HEX_0X
154 },
155 // 16 bytes alignment.
156 // http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf
157 {"sun",
158 "\t.section \".rodata\"\n"
159 "\t.align 16\n"
160 ".globl %s\n"
161 "%s:\n",
162
163 ".word ","",HEX_0X
164 },
165 // 16 bytes alignment for sun-x86.
166 // http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html
167 {"sun-x86",
168 "Drodata.rodata:\n"
169 "\t.type Drodata.rodata,@object\n"
170 "\t.size Drodata.rodata,0\n"
171 "\t.globl %s\n"
172 "\t.align 16\n"
173 "%s:\n",
174
175 ".4byte ","",HEX_0X
176 },
177 // 1<<4 bit alignment for aix.
178 // http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm
179 {"xlc",
180 ".globl %s{RO}\n"
181 "\t.toc\n"
182 "%s:\n"
183 "\t.csect %s{RO}, 4\n",
184
185 ".long ","",HEX_0X
186 },
187 {"aCC-ia64",
188 "\t.file \"%s.s\"\n"
189 "\t.type %s,@object\n"
190 "\t.global %s\n"
191 "\t.secalias .abe$0.rodata, \".rodata\"\n"
192 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
193 "\t.align 16\n"
194 "%s::\t",
195
196 "data4 ","",HEX_0X
197 },
198 {"aCC-parisc",
199 "\t.SPACE $TEXT$\n"
200 "\t.SUBSPA $LIT$\n"
201 "%s\n"
202 "\t.EXPORT %s\n"
203 "\t.ALIGN 16\n",
204
205 ".WORD ","",HEX_0X
206 },
207 // align 16 bytes
208 // http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx
209 { "masm",
210 "\tTITLE %s\n"
211 "; generated by genccode\n"
212 ".386\n"
213 ".model flat\n"
214 "\tPUBLIC _%s\n"
215 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
216 "\tALIGN 16\n"
217 "_%s\tLABEL DWORD\n",
218 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
219 }
220 };
221
222 static int32_t assemblyHeaderIndex = -1;
223 static int32_t hexType = HEX_0X;
224
225 U_CAPI UBool U_EXPORT2
226 checkAssemblyHeaderName(const char* optAssembly) {
227 int32_t idx;
228 assemblyHeaderIndex = -1;
229 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
230 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
231 assemblyHeaderIndex = idx;
232 hexType = assemblyHeader[idx].hexType; /* set the hex type */
233 return TRUE;
234 }
235 }
236
237 return FALSE;
238 }
239
240
241 U_CAPI void U_EXPORT2
242 printAssemblyHeadersToStdErr(void) {
243 int32_t idx;
244 fprintf(stderr, "%s", assemblyHeader[0].name);
245 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
246 fprintf(stderr, ", %s", assemblyHeader[idx].name);
247 }
248 fprintf(stderr,
249 ")\n");
250 }
251
252 U_CAPI void U_EXPORT2
253 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
254 uint32_t column = MAX_COLUMN;
255 char entry[64];
256 uint32_t buffer[1024];
257 char *bufferStr = (char *)buffer;
258 FileStream *in, *out;
259 size_t i, length;
260
261 in=T_FileStream_open(filename, "rb");
262 if(in==NULL) {
263 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
264 exit(U_FILE_ACCESS_ERROR);
265 }
266
267 getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
268 out=T_FileStream_open(bufferStr, "w");
269 if(out==NULL) {
270 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
271 exit(U_FILE_ACCESS_ERROR);
272 }
273
274 if (outFilePath != NULL) {
275 uprv_strcpy(outFilePath, bufferStr);
276 }
277
278 #ifdef WINDOWS_WITH_GNUC
279 /* Need to fix the file seperator character when using MinGW. */
280 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
281 #endif
282
283 if(optEntryPoint != NULL) {
284 uprv_strcpy(entry, optEntryPoint);
285 uprv_strcat(entry, "_dat");
286 }
287
288 /* turn dashes or dots in the entry name into underscores */
289 length=uprv_strlen(entry);
290 for(i=0; i<length; ++i) {
291 if(entry[i]=='-' || entry[i]=='.') {
292 entry[i]='_';
293 }
294 }
295
296 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
297 entry, entry, entry, entry,
298 entry, entry, entry, entry);
299 T_FileStream_writeLine(out, bufferStr);
300 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
301
302 for(;;) {
303 length=T_FileStream_read(in, buffer, sizeof(buffer));
304 if(length==0) {
305 break;
306 }
307 if (length != sizeof(buffer)) {
308 /* pad with extra 0's when at the end of the file */
309 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
310 buffer[length+i] = 0;
311 }
312 }
313 for(i=0; i<(length/sizeof(buffer[0])); i++) {
314 column = write32(out, buffer[i], column);
315 }
316 }
317
318 T_FileStream_writeLine(out, "\n");
319
320 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
321 entry, entry, entry, entry,
322 entry, entry, entry, entry);
323 T_FileStream_writeLine(out, bufferStr);
324
325 if(T_FileStream_error(in)) {
326 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
327 exit(U_FILE_ACCESS_ERROR);
328 }
329
330 if(T_FileStream_error(out)) {
331 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
332 exit(U_FILE_ACCESS_ERROR);
333 }
334
335 T_FileStream_close(out);
336 T_FileStream_close(in);
337 }
338
339 U_CAPI void U_EXPORT2
340 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
341 uint32_t column = MAX_COLUMN;
342 char buffer[4096], entry[64];
343 FileStream *in, *out;
344 size_t i, length;
345
346 in=T_FileStream_open(filename, "rb");
347 if(in==NULL) {
348 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
349 exit(U_FILE_ACCESS_ERROR);
350 }
351
352 if(optName != NULL) { /* prepend 'icudt28_' */
353 strcpy(entry, optName);
354 strcat(entry, "_");
355 } else {
356 entry[0] = 0;
357 }
358
359 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
360 if (outFilePath != NULL) {
361 uprv_strcpy(outFilePath, buffer);
362 }
363 out=T_FileStream_open(buffer, "w");
364 if(out==NULL) {
365 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
366 exit(U_FILE_ACCESS_ERROR);
367 }
368
369 /* turn dashes or dots in the entry name into underscores */
370 length=uprv_strlen(entry);
371 for(i=0; i<length; ++i) {
372 if(entry[i]=='-' || entry[i]=='.') {
373 entry[i]='_';
374 }
375 }
376
377 #if U_PLATFORM == U_PF_OS400
378 /*
379 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
380
381 This is here because this platform can't currently put
382 const data into the read-only pages of an object or
383 shared library (service program). Only strings are allowed in read-only
384 pages, so we use char * strings to store the data.
385
386 In order to prevent the beginning of the data from ever matching the
387 magic numbers we must still use the initial double.
388 [grhoten 4/24/2003]
389 */
390 sprintf(buffer,
391 "#ifndef IN_GENERATED_CCODE\n"
392 "#define IN_GENERATED_CCODE\n"
393 "#define U_DISABLE_RENAMING 1\n"
394 "#include \"unicode/umachine.h\"\n"
395 "#endif\n"
396 "U_CDECL_BEGIN\n"
397 "const struct {\n"
398 " double bogus;\n"
399 " const char *bytes; \n"
400 "} %s={ 0.0, \n",
401 entry);
402 T_FileStream_writeLine(out, buffer);
403
404 for(;;) {
405 length=T_FileStream_read(in, buffer, sizeof(buffer));
406 if(length==0) {
407 break;
408 }
409 for(i=0; i<length; ++i) {
410 column = write8str(out, (uint8_t)buffer[i], column);
411 }
412 }
413
414 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
415 #else
416 /* Function renaming shouldn't be done in data */
417 sprintf(buffer,
418 "#ifndef IN_GENERATED_CCODE\n"
419 "#define IN_GENERATED_CCODE\n"
420 "#define U_DISABLE_RENAMING 1\n"
421 "#include \"unicode/umachine.h\"\n"
422 "#endif\n"
423 "U_CDECL_BEGIN\n"
424 "const struct {\n"
425 " double bogus;\n"
426 " uint8_t bytes[%ld]; \n"
427 "} %s={ 0.0, {\n",
428 (long)T_FileStream_size(in), entry);
429 T_FileStream_writeLine(out, buffer);
430
431 for(;;) {
432 length=T_FileStream_read(in, buffer, sizeof(buffer));
433 if(length==0) {
434 break;
435 }
436 for(i=0; i<length; ++i) {
437 column = write8(out, (uint8_t)buffer[i], column);
438 }
439 }
440
441 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
442 #endif
443
444 if(T_FileStream_error(in)) {
445 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
446 exit(U_FILE_ACCESS_ERROR);
447 }
448
449 if(T_FileStream_error(out)) {
450 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
451 exit(U_FILE_ACCESS_ERROR);
452 }
453
454 T_FileStream_close(out);
455 T_FileStream_close(in);
456 }
457
458 static uint32_t
459 write32(FileStream *out, uint32_t bitField, uint32_t column) {
460 int32_t i;
461 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
462 char *s = bitFieldStr;
463 uint8_t *ptrIdx = (uint8_t *)&bitField;
464 static const char hexToStr[16] = {
465 '0','1','2','3',
466 '4','5','6','7',
467 '8','9','A','B',
468 'C','D','E','F'
469 };
470
471 /* write the value, possibly with comma and newline */
472 if(column==MAX_COLUMN) {
473 /* first byte */
474 column=1;
475 } else if(column<32) {
476 *(s++)=',';
477 ++column;
478 } else {
479 *(s++)='\n';
480 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
481 s+=uprv_strlen(s);
482 column=1;
483 }
484
485 if (bitField < 10) {
486 /* It's a small number. Don't waste the space for 0x */
487 *(s++)=hexToStr[bitField];
488 }
489 else {
490 int seenNonZero = 0; /* This is used to remove leading zeros */
491
492 if(hexType==HEX_0X) {
493 *(s++)='0';
494 *(s++)='x';
495 } else if(hexType==HEX_0H) {
496 *(s++)='0';
497 }
498
499 /* This creates a 32-bit field */
500 #if U_IS_BIG_ENDIAN
501 for (i = 0; i < sizeof(uint32_t); i++)
502 #else
503 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
504 #endif
505 {
506 uint8_t value = ptrIdx[i];
507 if (value || seenNonZero) {
508 *(s++)=hexToStr[value>>4];
509 *(s++)=hexToStr[value&0xF];
510 seenNonZero = 1;
511 }
512 }
513 if(hexType==HEX_0H) {
514 *(s++)='h';
515 }
516 }
517
518 *(s++)=0;
519 T_FileStream_writeLine(out, bitFieldStr);
520 return column;
521 }
522
523 static uint32_t
524 write8(FileStream *out, uint8_t byte, uint32_t column) {
525 char s[4];
526 int i=0;
527
528 /* convert the byte value to a string */
529 if(byte>=100) {
530 s[i++]=(char)('0'+byte/100);
531 byte%=100;
532 }
533 if(i>0 || byte>=10) {
534 s[i++]=(char)('0'+byte/10);
535 byte%=10;
536 }
537 s[i++]=(char)('0'+byte);
538 s[i]=0;
539
540 /* write the value, possibly with comma and newline */
541 if(column==MAX_COLUMN) {
542 /* first byte */
543 column=1;
544 } else if(column<16) {
545 T_FileStream_writeLine(out, ",");
546 ++column;
547 } else {
548 T_FileStream_writeLine(out, ",\n");
549 column=1;
550 }
551 T_FileStream_writeLine(out, s);
552 return column;
553 }
554
555 #if U_PLATFORM == U_PF_OS400
556 static uint32_t
557 write8str(FileStream *out, uint8_t byte, uint32_t column) {
558 char s[8];
559
560 if (byte > 7)
561 sprintf(s, "\\x%X", byte);
562 else
563 sprintf(s, "\\%X", byte);
564
565 /* write the value, possibly with comma and newline */
566 if(column==MAX_COLUMN) {
567 /* first byte */
568 column=1;
569 T_FileStream_writeLine(out, "\"");
570 } else if(column<24) {
571 ++column;
572 } else {
573 T_FileStream_writeLine(out, "\"\n\"");
574 column=1;
575 }
576 T_FileStream_writeLine(out, s);
577 return column;
578 }
579 #endif
580
581 static void
582 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
583 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
584
585 /* copy path */
586 if(destdir!=NULL && *destdir!=0) {
587 do {
588 *outFilename++=*destdir++;
589 } while(*destdir!=0);
590 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
591 *outFilename++=U_FILE_SEP_CHAR;
592 }
593 inFilename=basename;
594 } else {
595 while(inFilename<basename) {
596 *outFilename++=*inFilename++;
597 }
598 }
599
600 if(suffix==NULL) {
601 /* the filename does not have a suffix */
602 uprv_strcpy(entryName, inFilename);
603 if(optFilename != NULL) {
604 uprv_strcpy(outFilename, optFilename);
605 } else {
606 uprv_strcpy(outFilename, inFilename);
607 }
608 uprv_strcat(outFilename, newSuffix);
609 } else {
610 char *saveOutFilename = outFilename;
611 /* copy basename */
612 while(inFilename<suffix) {
613 if(*inFilename=='-') {
614 /* iSeries cannot have '-' in the .o objects. */
615 *outFilename++=*entryName++='_';
616 inFilename++;
617 }
618 else {
619 *outFilename++=*entryName++=*inFilename++;
620 }
621 }
622
623 /* replace '.' by '_' */
624 *outFilename++=*entryName++='_';
625 ++inFilename;
626
627 /* copy suffix */
628 while(*inFilename!=0) {
629 *outFilename++=*entryName++=*inFilename++;
630 }
631
632 *entryName=0;
633
634 if(optFilename != NULL) {
635 uprv_strcpy(saveOutFilename, optFilename);
636 uprv_strcat(saveOutFilename, newSuffix);
637 } else {
638 /* add ".c" */
639 uprv_strcpy(outFilename, newSuffix);
640 }
641 }
642 }
643
644 #ifdef CAN_GENERATE_OBJECTS
645 static void
646 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
647 union {
648 char bytes[2048];
649 #ifdef U_ELF
650 Elf32_Ehdr header32;
651 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
652 #elif U_PLATFORM_HAS_WIN32_API
653 IMAGE_FILE_HEADER header;
654 #endif
655 } buffer;
656
657 const char *filename;
658 FileStream *in;
659 int32_t length;
660
661 #ifdef U_ELF
662
663 #elif U_PLATFORM_HAS_WIN32_API
664 const IMAGE_FILE_HEADER *pHeader;
665 #else
666 # error "Unknown platform for CAN_GENERATE_OBJECTS."
667 #endif
668
669 if(optMatchArch != NULL) {
670 filename=optMatchArch;
671 } else {
672 /* set defaults */
673 #ifdef U_ELF
674 /* set EM_386 because elf.h does not provide better defaults */
675 *pCPU=EM_386;
676 *pBits=32;
677 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
678 #elif U_PLATFORM_HAS_WIN32_API
679 /* _M_IA64 should be defined in windows.h */
680 # if defined(_M_IA64)
681 *pCPU=IMAGE_FILE_MACHINE_IA64;
682 # elif defined(_M_AMD64)
683 *pCPU=IMAGE_FILE_MACHINE_AMD64;
684 # else
685 *pCPU=IMAGE_FILE_MACHINE_I386;
686 # endif
687 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
688 *pIsBigEndian=FALSE;
689 #else
690 # error "Unknown platform for CAN_GENERATE_OBJECTS."
691 #endif
692 return;
693 }
694
695 in=T_FileStream_open(filename, "rb");
696 if(in==NULL) {
697 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
698 exit(U_FILE_ACCESS_ERROR);
699 }
700 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
701
702 #ifdef U_ELF
703 if(length<sizeof(Elf32_Ehdr)) {
704 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
705 exit(U_UNSUPPORTED_ERROR);
706 }
707 if(
708 buffer.header32.e_ident[0]!=ELFMAG0 ||
709 buffer.header32.e_ident[1]!=ELFMAG1 ||
710 buffer.header32.e_ident[2]!=ELFMAG2 ||
711 buffer.header32.e_ident[3]!=ELFMAG3 ||
712 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
713 ) {
714 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
715 exit(U_UNSUPPORTED_ERROR);
716 }
717
718 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
719 #ifdef U_ELF64
720 if(*pBits!=32 && *pBits!=64) {
721 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
722 exit(U_UNSUPPORTED_ERROR);
723 }
724 #else
725 if(*pBits!=32) {
726 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
727 exit(U_UNSUPPORTED_ERROR);
728 }
729 #endif
730
731 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
732 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
733 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
734 exit(U_UNSUPPORTED_ERROR);
735 }
736 /* TODO: Support byte swapping */
737
738 *pCPU=buffer.header32.e_machine;
739 #elif U_PLATFORM_HAS_WIN32_API
740 if(length<sizeof(IMAGE_FILE_HEADER)) {
741 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
742 exit(U_UNSUPPORTED_ERROR);
743 }
744 /* TODO: Use buffer.header. Keep aliasing legal. */
745 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
746 *pCPU=pHeader->Machine;
747 /*
748 * The number of bits is implicit with the Machine value.
749 * *pBits is ignored in the calling code, so this need not be precise.
750 */
751 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
752 /* Windows always runs on little-endian CPUs. */
753 *pIsBigEndian=FALSE;
754 #else
755 # error "Unknown platform for CAN_GENERATE_OBJECTS."
756 #endif
757
758 T_FileStream_close(in);
759 }
760
761 U_CAPI void U_EXPORT2
762 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
763 /* common variables */
764 char buffer[4096], entry[40]={ 0 };
765 FileStream *in, *out;
766 const char *newSuffix;
767 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
768
769 uint16_t cpu, bits;
770 UBool makeBigEndian;
771
772 /* platform-specific variables and initialization code */
773 #ifdef U_ELF
774 /* 32-bit Elf file header */
775 static Elf32_Ehdr header32={
776 {
777 /* e_ident[] */
778 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
779 ELFCLASS32,
780 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
781 EV_CURRENT /* EI_VERSION */
782 },
783 ET_REL,
784 EM_386,
785 EV_CURRENT, /* e_version */
786 0, /* e_entry */
787 0, /* e_phoff */
788 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
789 0, /* e_flags */
790 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
791 0, /* e_phentsize */
792 0, /* e_phnum */
793 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
794 5, /* e_shnum */
795 2 /* e_shstrndx */
796 };
797
798 /* 32-bit Elf section header table */
799 static Elf32_Shdr sectionHeaders32[5]={
800 { /* SHN_UNDEF */
801 0
802 },
803 { /* .symtab */
804 1, /* sh_name */
805 SHT_SYMTAB,
806 0, /* sh_flags */
807 0, /* sh_addr */
808 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
809 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
810 3, /* sh_link=sect hdr index of .strtab */
811 1, /* sh_info=One greater than the symbol table index of the last
812 * local symbol (with STB_LOCAL). */
813 4, /* sh_addralign */
814 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
815 },
816 { /* .shstrtab */
817 9, /* sh_name */
818 SHT_STRTAB,
819 0, /* sh_flags */
820 0, /* sh_addr */
821 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
822 40, /* sh_size */
823 0, /* sh_link */
824 0, /* sh_info */
825 1, /* sh_addralign */
826 0 /* sh_entsize */
827 },
828 { /* .strtab */
829 19, /* sh_name */
830 SHT_STRTAB,
831 0, /* sh_flags */
832 0, /* sh_addr */
833 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
834 (Elf32_Word)sizeof(entry), /* sh_size */
835 0, /* sh_link */
836 0, /* sh_info */
837 1, /* sh_addralign */
838 0 /* sh_entsize */
839 },
840 { /* .rodata */
841 27, /* sh_name */
842 SHT_PROGBITS,
843 SHF_ALLOC, /* sh_flags */
844 0, /* sh_addr */
845 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
846 0, /* sh_size */
847 0, /* sh_link */
848 0, /* sh_info */
849 16, /* sh_addralign */
850 0 /* sh_entsize */
851 }
852 };
853
854 /* symbol table */
855 static Elf32_Sym symbols32[2]={
856 { /* STN_UNDEF */
857 0
858 },
859 { /* data entry point */
860 1, /* st_name */
861 0, /* st_value */
862 0, /* st_size */
863 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
864 0, /* st_other */
865 4 /* st_shndx=index of related section table entry */
866 }
867 };
868
869 /* section header string table, with decimal string offsets */
870 static const char sectionStrings[40]=
871 /* 0 */ "\0"
872 /* 1 */ ".symtab\0"
873 /* 9 */ ".shstrtab\0"
874 /* 19 */ ".strtab\0"
875 /* 27 */ ".rodata\0"
876 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
877 /* 40: padded to multiple of 8 bytes */
878
879 /*
880 * Use entry[] for the string table which will contain only the
881 * entry point name.
882 * entry[0] must be 0 (NUL)
883 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
884 */
885
886 /* 16-align .rodata in the .o file, just in case */
887 static const char padding[16]={ 0 };
888 int32_t paddingSize;
889
890 #ifdef U_ELF64
891 /* 64-bit Elf file header */
892 static Elf64_Ehdr header64={
893 {
894 /* e_ident[] */
895 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
896 ELFCLASS64,
897 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
898 EV_CURRENT /* EI_VERSION */
899 },
900 ET_REL,
901 EM_X86_64,
902 EV_CURRENT, /* e_version */
903 0, /* e_entry */
904 0, /* e_phoff */
905 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
906 0, /* e_flags */
907 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
908 0, /* e_phentsize */
909 0, /* e_phnum */
910 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
911 5, /* e_shnum */
912 2 /* e_shstrndx */
913 };
914
915 /* 64-bit Elf section header table */
916 static Elf64_Shdr sectionHeaders64[5]={
917 { /* SHN_UNDEF */
918 0
919 },
920 { /* .symtab */
921 1, /* sh_name */
922 SHT_SYMTAB,
923 0, /* sh_flags */
924 0, /* sh_addr */
925 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
926 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
927 3, /* sh_link=sect hdr index of .strtab */
928 1, /* sh_info=One greater than the symbol table index of the last
929 * local symbol (with STB_LOCAL). */
930 4, /* sh_addralign */
931 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
932 },
933 { /* .shstrtab */
934 9, /* sh_name */
935 SHT_STRTAB,
936 0, /* sh_flags */
937 0, /* sh_addr */
938 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
939 40, /* sh_size */
940 0, /* sh_link */
941 0, /* sh_info */
942 1, /* sh_addralign */
943 0 /* sh_entsize */
944 },
945 { /* .strtab */
946 19, /* sh_name */
947 SHT_STRTAB,
948 0, /* sh_flags */
949 0, /* sh_addr */
950 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
951 (Elf64_Xword)sizeof(entry), /* sh_size */
952 0, /* sh_link */
953 0, /* sh_info */
954 1, /* sh_addralign */
955 0 /* sh_entsize */
956 },
957 { /* .rodata */
958 27, /* sh_name */
959 SHT_PROGBITS,
960 SHF_ALLOC, /* sh_flags */
961 0, /* sh_addr */
962 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
963 0, /* sh_size */
964 0, /* sh_link */
965 0, /* sh_info */
966 16, /* sh_addralign */
967 0 /* sh_entsize */
968 }
969 };
970
971 /*
972 * 64-bit symbol table
973 * careful: different order of items compared with Elf32_sym!
974 */
975 static Elf64_Sym symbols64[2]={
976 { /* STN_UNDEF */
977 0
978 },
979 { /* data entry point */
980 1, /* st_name */
981 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
982 0, /* st_other */
983 4, /* st_shndx=index of related section table entry */
984 0, /* st_value */
985 0 /* st_size */
986 }
987 };
988
989 #endif /* U_ELF64 */
990
991 /* entry[] have a leading NUL */
992 entryOffset=1;
993
994 /* in the common code, count entryLength from after the NUL */
995 entryLengthOffset=1;
996
997 newSuffix=".o";
998
999 #elif U_PLATFORM_HAS_WIN32_API
1000 struct {
1001 IMAGE_FILE_HEADER fileHeader;
1002 IMAGE_SECTION_HEADER sections[2];
1003 char linkerOptions[100];
1004 } objHeader;
1005 IMAGE_SYMBOL symbols[1];
1006 struct {
1007 DWORD sizeofLongNames;
1008 char longNames[100];
1009 } symbolNames;
1010
1011 /*
1012 * entry sometimes have a leading '_'
1013 * overwritten if entryOffset==0 depending on the target platform
1014 * see check for cpu below
1015 */
1016 entry[0]='_';
1017
1018 newSuffix=".obj";
1019 #else
1020 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1021 #endif
1022
1023 /* deal with options, files and the entry point name */
1024 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1025 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1026 #if U_PLATFORM_HAS_WIN32_API
1027 if(cpu==IMAGE_FILE_MACHINE_I386) {
1028 entryOffset=1;
1029 }
1030 #endif
1031
1032 in=T_FileStream_open(filename, "rb");
1033 if(in==NULL) {
1034 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1035 exit(U_FILE_ACCESS_ERROR);
1036 }
1037 size=T_FileStream_size(in);
1038
1039 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1040 if (outFilePath != NULL) {
1041 uprv_strcpy(outFilePath, buffer);
1042 }
1043
1044 if(optEntryPoint != NULL) {
1045 uprv_strcpy(entry+entryOffset, optEntryPoint);
1046 uprv_strcat(entry+entryOffset, "_dat");
1047 }
1048 /* turn dashes in the entry name into underscores */
1049 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1050 for(i=0; i<entryLength; ++i) {
1051 if(entry[entryLengthOffset+i]=='-') {
1052 entry[entryLengthOffset+i]='_';
1053 }
1054 }
1055
1056 /* open the output file */
1057 out=T_FileStream_open(buffer, "wb");
1058 if(out==NULL) {
1059 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1060 exit(U_FILE_ACCESS_ERROR);
1061 }
1062
1063 #ifdef U_ELF
1064 if(bits==32) {
1065 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1066 header32.e_machine=cpu;
1067
1068 /* 16-align .rodata in the .o file, just in case */
1069 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1070 if(paddingSize!=0) {
1071 paddingSize=0x10-paddingSize;
1072 sectionHeaders32[4].sh_offset+=paddingSize;
1073 }
1074
1075 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1076
1077 symbols32[1].st_size=(Elf32_Word)size;
1078
1079 /* write .o headers */
1080 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1081 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1082 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1083 } else /* bits==64 */ {
1084 #ifdef U_ELF64
1085 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1086 header64.e_machine=cpu;
1087
1088 /* 16-align .rodata in the .o file, just in case */
1089 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1090 if(paddingSize!=0) {
1091 paddingSize=0x10-paddingSize;
1092 sectionHeaders64[4].sh_offset+=paddingSize;
1093 }
1094
1095 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1096
1097 symbols64[1].st_size=(Elf64_Xword)size;
1098
1099 /* write .o headers */
1100 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1101 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1102 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1103 #endif
1104 }
1105
1106 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1107 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1108 if(paddingSize!=0) {
1109 T_FileStream_write(out, padding, paddingSize);
1110 }
1111 #elif U_PLATFORM_HAS_WIN32_API
1112 /* populate the .obj headers */
1113 uprv_memset(&objHeader, 0, sizeof(objHeader));
1114 uprv_memset(&symbols, 0, sizeof(symbols));
1115 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1116
1117 /* write the linker export directive */
1118 uprv_strcpy(objHeader.linkerOptions, "-export:");
1119 length=8;
1120 uprv_strcpy(objHeader.linkerOptions+length, entry);
1121 length+=entryLength;
1122 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1123 length+=6;
1124
1125 /* set the file header */
1126 objHeader.fileHeader.Machine=cpu;
1127 objHeader.fileHeader.NumberOfSections=2;
1128 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1129 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1130 objHeader.fileHeader.NumberOfSymbols=1;
1131
1132 /* set the section for the linker options */
1133 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1134 objHeader.sections[0].SizeOfRawData=length;
1135 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1136 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1137
1138 /* set the data section */
1139 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1140 objHeader.sections[1].SizeOfRawData=size;
1141 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1142 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1143
1144 /* set the symbol table */
1145 if(entryLength<=8) {
1146 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1147 symbolNames.sizeofLongNames=4;
1148 } else {
1149 symbols[0].N.Name.Short=0;
1150 symbols[0].N.Name.Long=4;
1151 symbolNames.sizeofLongNames=4+entryLength+1;
1152 uprv_strcpy(symbolNames.longNames, entry);
1153 }
1154 symbols[0].SectionNumber=2;
1155 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1156
1157 /* write the file header and the linker options section */
1158 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1159 #else
1160 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1161 #endif
1162
1163 /* copy the data file into section 2 */
1164 for(;;) {
1165 length=T_FileStream_read(in, buffer, sizeof(buffer));
1166 if(length==0) {
1167 break;
1168 }
1169 T_FileStream_write(out, buffer, (int32_t)length);
1170 }
1171
1172 #if U_PLATFORM_HAS_WIN32_API
1173 /* write the symbol table */
1174 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1175 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1176 #endif
1177
1178 if(T_FileStream_error(in)) {
1179 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1180 exit(U_FILE_ACCESS_ERROR);
1181 }
1182
1183 if(T_FileStream_error(out)) {
1184 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1185 exit(U_FILE_ACCESS_ERROR);
1186 }
1187
1188 T_FileStream_close(out);
1189 T_FileStream_close(in);
1190 }
1191 #endif