]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/pkg_genc.cpp
ICU-59152.0.1.tar.gz
[apple/icu.git] / icuSources / tools / toolutil / pkg_genc.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4 * Copyright (C) 2009-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *******************************************************************************
7 */
8 #include "unicode/utypes.h"
9
10 #if U_PLATFORM_HAS_WIN32_API
11 # define VC_EXTRALEAN
12 # define WIN32_LEAN_AND_MEAN
13 # define NOUSER
14 # define NOSERVICE
15 # define NOIME
16 # define NOMCX
17 #include <windows.h>
18 #include <time.h>
19 # ifdef __GNUC__
20 # define WINDOWS_WITH_GNUC
21 # endif
22 #endif
23
24 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
25 # define U_ELF
26 #endif
27
28 #ifdef U_ELF
29 # include <elf.h>
30 # if defined(ELFCLASS64)
31 # define U_ELF64
32 # endif
33 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
34 # ifndef EM_X86_64
35 # define EM_X86_64 62
36 # endif
37 # define ICU_ENTRY_OFFSET 0
38 #endif
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include "unicode/putil.h"
43 #include "cmemory.h"
44 #include "cstring.h"
45 #include "filestrm.h"
46 #include "toolutil.h"
47 #include "unicode/uclean.h"
48 #include "uoptions.h"
49 #include "pkg_genc.h"
50
51 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
52
53 #define HEX_0X 0 /* 0x1234 */
54 #define HEX_0H 1 /* 01234h */
55
56 /* prototypes --------------------------------------------------------------- */
57 static void
58 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
59
60 static uint32_t
61 write8(FileStream *out, uint8_t byte, uint32_t column);
62
63 static uint32_t
64 write32(FileStream *out, uint32_t byte, uint32_t column);
65
66 #if U_PLATFORM == U_PF_OS400
67 static uint32_t
68 write8str(FileStream *out, uint8_t byte, uint32_t column);
69 #endif
70 /* -------------------------------------------------------------------------- */
71
72 /*
73 Creating Template Files for New Platforms
74
75 Let the cc compiler help you get started.
76 Compile this program
77 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
78 with the -S option to produce assembly output.
79
80 For example, this will generate array.s:
81 gcc -S array.c
82
83 This will produce a .s file that may look like this:
84
85 .file "array.c"
86 .version "01.01"
87 gcc2_compiled.:
88 .globl x
89 .section .rodata
90 .align 4
91 .type x,@object
92 .size x,20
93 x:
94 .long 1
95 .long 2
96 .long -559038737
97 .long -1
98 .long 16
99 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
100
101 which gives a starting point that will compile, and can be transformed
102 to become the template, generally with some consulting of as docs and
103 some experimentation.
104
105 If you want ICU to automatically use this assembly, you should
106 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
107 where the name is the compiler or platform that you used in this
108 assemblyHeader data structure.
109 */
110 static const struct AssemblyType {
111 const char *name;
112 const char *header;
113 const char *beginLine;
114 const char *footer;
115 int8_t hexType; /* HEX_0X or HEX_0h */
116 } assemblyHeader[] = {
117 /* For gcc assemblers, the meaning of .align changes depending on the */
118 /* hardware, so we use .balign 16 which always means 16 bytes. */
119 /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
120 {"gcc",
121 ".globl %s\n"
122 "\t.section .note.GNU-stack,\"\",%%progbits\n"
123 "\t.section .rodata\n"
124 "\t.balign 16\n"
125 "#ifdef U_HIDE_DATA_SYMBOL\n"
126 "\t.hidden %s\n"
127 "#endif\n"
128 "\t.type %s,%%object\n"
129 "%s:\n\n",
130
131 ".long ",".size %s, .-%s\n",HEX_0X
132 },
133 {"gcc-darwin",
134 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
135 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
136 ".globl _%s\n"
137 "#ifdef U_HIDE_DATA_SYMBOL\n"
138 "\t.private_extern _%s\n"
139 "#endif\n"
140 "\t.data\n"
141 "\t.const\n"
142 "\t.balign 16\n"
143 "_%s:\n\n",
144
145 ".long ","",HEX_0X
146 },
147 {"gcc-cygwin",
148 ".globl _%s\n"
149 "\t.section .rodata\n"
150 "\t.balign 16\n"
151 "_%s:\n\n",
152
153 ".long ","",HEX_0X
154 },
155 {"gcc-mingw64",
156 ".globl %s\n"
157 "\t.section .rodata\n"
158 "\t.balign 16\n"
159 "%s:\n\n",
160
161 ".long ","",HEX_0X
162 },
163 /* 16 bytes alignment. */
164 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
165 {"sun",
166 "\t.section \".rodata\"\n"
167 "\t.align 16\n"
168 ".globl %s\n"
169 "%s:\n",
170
171 ".word ","",HEX_0X
172 },
173 /* 16 bytes alignment for sun-x86. */
174 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
175 {"sun-x86",
176 "Drodata.rodata:\n"
177 "\t.type Drodata.rodata,@object\n"
178 "\t.size Drodata.rodata,0\n"
179 "\t.globl %s\n"
180 "\t.align 16\n"
181 "%s:\n",
182
183 ".4byte ","",HEX_0X
184 },
185 /* 1<<4 bit alignment for aix. */
186 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
187 {"xlc",
188 ".globl %s{RO}\n"
189 "\t.toc\n"
190 "%s:\n"
191 "\t.csect %s{RO}, 4\n",
192
193 ".long ","",HEX_0X
194 },
195 {"aCC-ia64",
196 "\t.file \"%s.s\"\n"
197 "\t.type %s,@object\n"
198 "\t.global %s\n"
199 "\t.secalias .abe$0.rodata, \".rodata\"\n"
200 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
201 "\t.align 16\n"
202 "%s::\t",
203
204 "data4 ","",HEX_0X
205 },
206 {"aCC-parisc",
207 "\t.SPACE $TEXT$\n"
208 "\t.SUBSPA $LIT$\n"
209 "%s\n"
210 "\t.EXPORT %s\n"
211 "\t.ALIGN 16\n",
212
213 ".WORD ","",HEX_0X
214 },
215 /* align 16 bytes */
216 /* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
217 { "masm",
218 "\tTITLE %s\n"
219 "; generated by genccode\n"
220 ".386\n"
221 ".model flat\n"
222 "\tPUBLIC _%s\n"
223 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
224 "\tALIGN 16\n"
225 "_%s\tLABEL DWORD\n",
226 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
227 }
228 };
229
230 static int32_t assemblyHeaderIndex = -1;
231 static int32_t hexType = HEX_0X;
232
233 U_CAPI UBool U_EXPORT2
234 checkAssemblyHeaderName(const char* optAssembly) {
235 int32_t idx;
236 assemblyHeaderIndex = -1;
237 for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
238 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
239 assemblyHeaderIndex = idx;
240 hexType = assemblyHeader[idx].hexType; /* set the hex type */
241 return TRUE;
242 }
243 }
244
245 return FALSE;
246 }
247
248
249 U_CAPI void U_EXPORT2
250 printAssemblyHeadersToStdErr(void) {
251 int32_t idx;
252 fprintf(stderr, "%s", assemblyHeader[0].name);
253 for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
254 fprintf(stderr, ", %s", assemblyHeader[idx].name);
255 }
256 fprintf(stderr,
257 ")\n");
258 }
259
260 U_CAPI void U_EXPORT2
261 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
262 uint32_t column = MAX_COLUMN;
263 char entry[64];
264 uint32_t buffer[1024];
265 char *bufferStr = (char *)buffer;
266 FileStream *in, *out;
267 size_t i, length;
268
269 in=T_FileStream_open(filename, "rb");
270 if(in==NULL) {
271 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
272 exit(U_FILE_ACCESS_ERROR);
273 }
274
275 getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
276 out=T_FileStream_open(bufferStr, "w");
277 if(out==NULL) {
278 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
279 exit(U_FILE_ACCESS_ERROR);
280 }
281
282 if (outFilePath != NULL) {
283 uprv_strcpy(outFilePath, bufferStr);
284 }
285
286 #if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN
287 /* Need to fix the file seperator character when using MinGW. */
288 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
289 #endif
290
291 if(optEntryPoint != NULL) {
292 uprv_strcpy(entry, optEntryPoint);
293 uprv_strcat(entry, "_dat");
294 }
295
296 /* turn dashes or dots in the entry name into underscores */
297 length=uprv_strlen(entry);
298 for(i=0; i<length; ++i) {
299 if(entry[i]=='-' || entry[i]=='.') {
300 entry[i]='_';
301 }
302 }
303
304 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
305 entry, entry, entry, entry,
306 entry, entry, entry, entry);
307 T_FileStream_writeLine(out, bufferStr);
308 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
309
310 for(;;) {
311 length=T_FileStream_read(in, buffer, sizeof(buffer));
312 if(length==0) {
313 break;
314 }
315 if (length != sizeof(buffer)) {
316 /* pad with extra 0's when at the end of the file */
317 for(i=0; i < (length % sizeof(uint32_t)); ++i) {
318 buffer[length+i] = 0;
319 }
320 }
321 for(i=0; i<(length/sizeof(buffer[0])); i++) {
322 column = write32(out, buffer[i], column);
323 }
324 }
325
326 T_FileStream_writeLine(out, "\n");
327
328 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
329 entry, entry, entry, entry,
330 entry, entry, entry, entry);
331 T_FileStream_writeLine(out, bufferStr);
332
333 if(T_FileStream_error(in)) {
334 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
335 exit(U_FILE_ACCESS_ERROR);
336 }
337
338 if(T_FileStream_error(out)) {
339 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
340 exit(U_FILE_ACCESS_ERROR);
341 }
342
343 T_FileStream_close(out);
344 T_FileStream_close(in);
345 }
346
347 U_CAPI void U_EXPORT2
348 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
349 uint32_t column = MAX_COLUMN;
350 char buffer[4096], entry[64];
351 FileStream *in, *out;
352 size_t i, length;
353
354 in=T_FileStream_open(filename, "rb");
355 if(in==NULL) {
356 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
357 exit(U_FILE_ACCESS_ERROR);
358 }
359
360 if(optName != NULL) { /* prepend 'icudt28_' */
361 strcpy(entry, optName);
362 strcat(entry, "_");
363 } else {
364 entry[0] = 0;
365 }
366
367 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
368 if (outFilePath != NULL) {
369 uprv_strcpy(outFilePath, buffer);
370 }
371 out=T_FileStream_open(buffer, "w");
372 if(out==NULL) {
373 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
374 exit(U_FILE_ACCESS_ERROR);
375 }
376
377 /* turn dashes or dots in the entry name into underscores */
378 length=uprv_strlen(entry);
379 for(i=0; i<length; ++i) {
380 if(entry[i]=='-' || entry[i]=='.') {
381 entry[i]='_';
382 }
383 }
384
385 #if U_PLATFORM == U_PF_OS400
386 /*
387 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
388
389 This is here because this platform can't currently put
390 const data into the read-only pages of an object or
391 shared library (service program). Only strings are allowed in read-only
392 pages, so we use char * strings to store the data.
393
394 In order to prevent the beginning of the data from ever matching the
395 magic numbers we must still use the initial double.
396 [grhoten 4/24/2003]
397 */
398 sprintf(buffer,
399 "#ifndef IN_GENERATED_CCODE\n"
400 "#define IN_GENERATED_CCODE\n"
401 "#define U_DISABLE_RENAMING 1\n"
402 "#include \"unicode/umachine.h\"\n"
403 "#endif\n"
404 "U_CDECL_BEGIN\n"
405 "const struct {\n"
406 " double bogus;\n"
407 " const char *bytes; \n"
408 "} %s={ 0.0, \n",
409 entry);
410 T_FileStream_writeLine(out, buffer);
411
412 for(;;) {
413 length=T_FileStream_read(in, buffer, sizeof(buffer));
414 if(length==0) {
415 break;
416 }
417 for(i=0; i<length; ++i) {
418 column = write8str(out, (uint8_t)buffer[i], column);
419 }
420 }
421
422 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
423 #else
424 /* Function renaming shouldn't be done in data */
425 sprintf(buffer,
426 "#ifndef IN_GENERATED_CCODE\n"
427 "#define IN_GENERATED_CCODE\n"
428 "#define U_DISABLE_RENAMING 1\n"
429 "#include \"unicode/umachine.h\"\n"
430 "#endif\n"
431 "U_CDECL_BEGIN\n"
432 "const struct {\n"
433 " double bogus;\n"
434 " uint8_t bytes[%ld]; \n"
435 "} %s={ 0.0, {\n",
436 (long)T_FileStream_size(in), entry);
437 T_FileStream_writeLine(out, buffer);
438
439 for(;;) {
440 length=T_FileStream_read(in, buffer, sizeof(buffer));
441 if(length==0) {
442 break;
443 }
444 for(i=0; i<length; ++i) {
445 column = write8(out, (uint8_t)buffer[i], column);
446 }
447 }
448
449 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
450 #endif
451
452 if(T_FileStream_error(in)) {
453 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
454 exit(U_FILE_ACCESS_ERROR);
455 }
456
457 if(T_FileStream_error(out)) {
458 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
459 exit(U_FILE_ACCESS_ERROR);
460 }
461
462 T_FileStream_close(out);
463 T_FileStream_close(in);
464 }
465
466 static uint32_t
467 write32(FileStream *out, uint32_t bitField, uint32_t column) {
468 int32_t i;
469 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
470 char *s = bitFieldStr;
471 uint8_t *ptrIdx = (uint8_t *)&bitField;
472 static const char hexToStr[16] = {
473 '0','1','2','3',
474 '4','5','6','7',
475 '8','9','A','B',
476 'C','D','E','F'
477 };
478
479 /* write the value, possibly with comma and newline */
480 if(column==MAX_COLUMN) {
481 /* first byte */
482 column=1;
483 } else if(column<32) {
484 *(s++)=',';
485 ++column;
486 } else {
487 *(s++)='\n';
488 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
489 s+=uprv_strlen(s);
490 column=1;
491 }
492
493 if (bitField < 10) {
494 /* It's a small number. Don't waste the space for 0x */
495 *(s++)=hexToStr[bitField];
496 }
497 else {
498 int seenNonZero = 0; /* This is used to remove leading zeros */
499
500 if(hexType==HEX_0X) {
501 *(s++)='0';
502 *(s++)='x';
503 } else if(hexType==HEX_0H) {
504 *(s++)='0';
505 }
506
507 /* This creates a 32-bit field */
508 #if U_IS_BIG_ENDIAN
509 for (i = 0; i < sizeof(uint32_t); i++)
510 #else
511 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
512 #endif
513 {
514 uint8_t value = ptrIdx[i];
515 if (value || seenNonZero) {
516 *(s++)=hexToStr[value>>4];
517 *(s++)=hexToStr[value&0xF];
518 seenNonZero = 1;
519 }
520 }
521 if(hexType==HEX_0H) {
522 *(s++)='h';
523 }
524 }
525
526 *(s++)=0;
527 T_FileStream_writeLine(out, bitFieldStr);
528 return column;
529 }
530
531 static uint32_t
532 write8(FileStream *out, uint8_t byte, uint32_t column) {
533 char s[4];
534 int i=0;
535
536 /* convert the byte value to a string */
537 if(byte>=100) {
538 s[i++]=(char)('0'+byte/100);
539 byte%=100;
540 }
541 if(i>0 || byte>=10) {
542 s[i++]=(char)('0'+byte/10);
543 byte%=10;
544 }
545 s[i++]=(char)('0'+byte);
546 s[i]=0;
547
548 /* write the value, possibly with comma and newline */
549 if(column==MAX_COLUMN) {
550 /* first byte */
551 column=1;
552 } else if(column<16) {
553 T_FileStream_writeLine(out, ",");
554 ++column;
555 } else {
556 T_FileStream_writeLine(out, ",\n");
557 column=1;
558 }
559 T_FileStream_writeLine(out, s);
560 return column;
561 }
562
563 #if U_PLATFORM == U_PF_OS400
564 static uint32_t
565 write8str(FileStream *out, uint8_t byte, uint32_t column) {
566 char s[8];
567
568 if (byte > 7)
569 sprintf(s, "\\x%X", byte);
570 else
571 sprintf(s, "\\%X", byte);
572
573 /* write the value, possibly with comma and newline */
574 if(column==MAX_COLUMN) {
575 /* first byte */
576 column=1;
577 T_FileStream_writeLine(out, "\"");
578 } else if(column<24) {
579 ++column;
580 } else {
581 T_FileStream_writeLine(out, "\"\n\"");
582 column=1;
583 }
584 T_FileStream_writeLine(out, s);
585 return column;
586 }
587 #endif
588
589 static void
590 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
591 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
592
593 /* copy path */
594 if(destdir!=NULL && *destdir!=0) {
595 do {
596 *outFilename++=*destdir++;
597 } while(*destdir!=0);
598 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
599 *outFilename++=U_FILE_SEP_CHAR;
600 }
601 inFilename=basename;
602 } else {
603 while(inFilename<basename) {
604 *outFilename++=*inFilename++;
605 }
606 }
607
608 if(suffix==NULL) {
609 /* the filename does not have a suffix */
610 uprv_strcpy(entryName, inFilename);
611 if(optFilename != NULL) {
612 uprv_strcpy(outFilename, optFilename);
613 } else {
614 uprv_strcpy(outFilename, inFilename);
615 }
616 uprv_strcat(outFilename, newSuffix);
617 } else {
618 char *saveOutFilename = outFilename;
619 /* copy basename */
620 while(inFilename<suffix) {
621 if(*inFilename=='-') {
622 /* iSeries cannot have '-' in the .o objects. */
623 *outFilename++=*entryName++='_';
624 inFilename++;
625 }
626 else {
627 *outFilename++=*entryName++=*inFilename++;
628 }
629 }
630
631 /* replace '.' by '_' */
632 *outFilename++=*entryName++='_';
633 ++inFilename;
634
635 /* copy suffix */
636 while(*inFilename!=0) {
637 *outFilename++=*entryName++=*inFilename++;
638 }
639
640 *entryName=0;
641
642 if(optFilename != NULL) {
643 uprv_strcpy(saveOutFilename, optFilename);
644 uprv_strcat(saveOutFilename, newSuffix);
645 } else {
646 /* add ".c" */
647 uprv_strcpy(outFilename, newSuffix);
648 }
649 }
650 }
651
652 #ifdef CAN_GENERATE_OBJECTS
653 static void
654 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
655 union {
656 char bytes[2048];
657 #ifdef U_ELF
658 Elf32_Ehdr header32;
659 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
660 #elif U_PLATFORM_HAS_WIN32_API
661 IMAGE_FILE_HEADER header;
662 #endif
663 } buffer;
664
665 const char *filename;
666 FileStream *in;
667 int32_t length;
668
669 #ifdef U_ELF
670
671 #elif U_PLATFORM_HAS_WIN32_API
672 const IMAGE_FILE_HEADER *pHeader;
673 #else
674 # error "Unknown platform for CAN_GENERATE_OBJECTS."
675 #endif
676
677 if(optMatchArch != NULL) {
678 filename=optMatchArch;
679 } else {
680 /* set defaults */
681 #ifdef U_ELF
682 /* set EM_386 because elf.h does not provide better defaults */
683 *pCPU=EM_386;
684 *pBits=32;
685 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
686 #elif U_PLATFORM_HAS_WIN32_API
687 /* _M_IA64 should be defined in windows.h */
688 # if defined(_M_IA64)
689 *pCPU=IMAGE_FILE_MACHINE_IA64;
690 *pBits = 64;
691 # elif defined(_M_AMD64)
692 // link.exe does not really care about the .obj machine type and this will
693 // allow us to build a dll for both ARM & x64 with an amd64 built tool
694 // ARM is same as x64 except for first 2 bytes of object file
695 *pCPU = IMAGE_FILE_MACHINE_UNKNOWN;
696 // *pCPU = IMAGE_FILE_MACHINE_ARMNT; // If we wanted to be explicit
697 // *pCPU = IMAGE_FILE_MACHINE_AMD64; // We would use one of these names
698 *pBits = 64; // Doesn't seem to be used for anything interesting?
699 # else
700 *pCPU=IMAGE_FILE_MACHINE_I386; // We would use one of these names
701 *pBits = 32;
702 # endif
703 *pIsBigEndian=FALSE;
704 #else
705 # error "Unknown platform for CAN_GENERATE_OBJECTS."
706 #endif
707 return;
708 }
709
710 in=T_FileStream_open(filename, "rb");
711 if(in==NULL) {
712 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
713 exit(U_FILE_ACCESS_ERROR);
714 }
715 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
716
717 #ifdef U_ELF
718 if(length<(int32_t)sizeof(Elf32_Ehdr)) {
719 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
720 exit(U_UNSUPPORTED_ERROR);
721 }
722 if(
723 buffer.header32.e_ident[0]!=ELFMAG0 ||
724 buffer.header32.e_ident[1]!=ELFMAG1 ||
725 buffer.header32.e_ident[2]!=ELFMAG2 ||
726 buffer.header32.e_ident[3]!=ELFMAG3 ||
727 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
728 ) {
729 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
730 exit(U_UNSUPPORTED_ERROR);
731 }
732
733 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
734 #ifdef U_ELF64
735 if(*pBits!=32 && *pBits!=64) {
736 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
737 exit(U_UNSUPPORTED_ERROR);
738 }
739 #else
740 if(*pBits!=32) {
741 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
742 exit(U_UNSUPPORTED_ERROR);
743 }
744 #endif
745
746 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
747 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
748 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
749 exit(U_UNSUPPORTED_ERROR);
750 }
751 /* TODO: Support byte swapping */
752
753 *pCPU=buffer.header32.e_machine;
754 #elif U_PLATFORM_HAS_WIN32_API
755 if(length<sizeof(IMAGE_FILE_HEADER)) {
756 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
757 exit(U_UNSUPPORTED_ERROR);
758 }
759 /* TODO: Use buffer.header. Keep aliasing legal. */
760 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
761 *pCPU=pHeader->Machine;
762 /*
763 * The number of bits is implicit with the Machine value.
764 * *pBits is ignored in the calling code, so this need not be precise.
765 */
766 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
767 /* Windows always runs on little-endian CPUs. */
768 *pIsBigEndian=FALSE;
769 #else
770 # error "Unknown platform for CAN_GENERATE_OBJECTS."
771 #endif
772
773 T_FileStream_close(in);
774 }
775
776 U_CAPI void U_EXPORT2
777 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
778 /* common variables */
779 char buffer[4096], entry[96]={ 0 };
780 FileStream *in, *out;
781 const char *newSuffix;
782 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
783
784 uint16_t cpu, bits;
785 UBool makeBigEndian;
786
787 /* platform-specific variables and initialization code */
788 #ifdef U_ELF
789 /* 32-bit Elf file header */
790 static Elf32_Ehdr header32={
791 {
792 /* e_ident[] */
793 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
794 ELFCLASS32,
795 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
796 EV_CURRENT /* EI_VERSION */
797 },
798 ET_REL,
799 EM_386,
800 EV_CURRENT, /* e_version */
801 0, /* e_entry */
802 0, /* e_phoff */
803 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
804 0, /* e_flags */
805 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
806 0, /* e_phentsize */
807 0, /* e_phnum */
808 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
809 5, /* e_shnum */
810 2 /* e_shstrndx */
811 };
812
813 /* 32-bit Elf section header table */
814 static Elf32_Shdr sectionHeaders32[5]={
815 { /* SHN_UNDEF */
816 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
817 },
818 { /* .symtab */
819 1, /* sh_name */
820 SHT_SYMTAB,
821 0, /* sh_flags */
822 0, /* sh_addr */
823 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
824 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
825 3, /* sh_link=sect hdr index of .strtab */
826 1, /* sh_info=One greater than the symbol table index of the last
827 * local symbol (with STB_LOCAL). */
828 4, /* sh_addralign */
829 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
830 },
831 { /* .shstrtab */
832 9, /* sh_name */
833 SHT_STRTAB,
834 0, /* sh_flags */
835 0, /* sh_addr */
836 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
837 40, /* sh_size */
838 0, /* sh_link */
839 0, /* sh_info */
840 1, /* sh_addralign */
841 0 /* sh_entsize */
842 },
843 { /* .strtab */
844 19, /* sh_name */
845 SHT_STRTAB,
846 0, /* sh_flags */
847 0, /* sh_addr */
848 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
849 (Elf32_Word)sizeof(entry), /* sh_size */
850 0, /* sh_link */
851 0, /* sh_info */
852 1, /* sh_addralign */
853 0 /* sh_entsize */
854 },
855 { /* .rodata */
856 27, /* sh_name */
857 SHT_PROGBITS,
858 SHF_ALLOC, /* sh_flags */
859 0, /* sh_addr */
860 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
861 0, /* sh_size */
862 0, /* sh_link */
863 0, /* sh_info */
864 16, /* sh_addralign */
865 0 /* sh_entsize */
866 }
867 };
868
869 /* symbol table */
870 static Elf32_Sym symbols32[2]={
871 { /* STN_UNDEF */
872 0, 0, 0, 0, 0, 0
873 },
874 { /* data entry point */
875 1, /* st_name */
876 0, /* st_value */
877 0, /* st_size */
878 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
879 0, /* st_other */
880 4 /* st_shndx=index of related section table entry */
881 }
882 };
883
884 /* section header string table, with decimal string offsets */
885 static const char sectionStrings[40]=
886 /* 0 */ "\0"
887 /* 1 */ ".symtab\0"
888 /* 9 */ ".shstrtab\0"
889 /* 19 */ ".strtab\0"
890 /* 27 */ ".rodata\0"
891 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
892 /* 40: padded to multiple of 8 bytes */
893
894 /*
895 * Use entry[] for the string table which will contain only the
896 * entry point name.
897 * entry[0] must be 0 (NUL)
898 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
899 */
900
901 /* 16-align .rodata in the .o file, just in case */
902 static const char padding[16]={ 0 };
903 int32_t paddingSize;
904
905 #ifdef U_ELF64
906 /* 64-bit Elf file header */
907 static Elf64_Ehdr header64={
908 {
909 /* e_ident[] */
910 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
911 ELFCLASS64,
912 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
913 EV_CURRENT /* EI_VERSION */
914 },
915 ET_REL,
916 EM_X86_64,
917 EV_CURRENT, /* e_version */
918 0, /* e_entry */
919 0, /* e_phoff */
920 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
921 0, /* e_flags */
922 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
923 0, /* e_phentsize */
924 0, /* e_phnum */
925 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
926 5, /* e_shnum */
927 2 /* e_shstrndx */
928 };
929
930 /* 64-bit Elf section header table */
931 static Elf64_Shdr sectionHeaders64[5]={
932 { /* SHN_UNDEF */
933 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
934 },
935 { /* .symtab */
936 1, /* sh_name */
937 SHT_SYMTAB,
938 0, /* sh_flags */
939 0, /* sh_addr */
940 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
941 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
942 3, /* sh_link=sect hdr index of .strtab */
943 1, /* sh_info=One greater than the symbol table index of the last
944 * local symbol (with STB_LOCAL). */
945 4, /* sh_addralign */
946 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
947 },
948 { /* .shstrtab */
949 9, /* sh_name */
950 SHT_STRTAB,
951 0, /* sh_flags */
952 0, /* sh_addr */
953 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
954 40, /* sh_size */
955 0, /* sh_link */
956 0, /* sh_info */
957 1, /* sh_addralign */
958 0 /* sh_entsize */
959 },
960 { /* .strtab */
961 19, /* sh_name */
962 SHT_STRTAB,
963 0, /* sh_flags */
964 0, /* sh_addr */
965 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
966 (Elf64_Xword)sizeof(entry), /* sh_size */
967 0, /* sh_link */
968 0, /* sh_info */
969 1, /* sh_addralign */
970 0 /* sh_entsize */
971 },
972 { /* .rodata */
973 27, /* sh_name */
974 SHT_PROGBITS,
975 SHF_ALLOC, /* sh_flags */
976 0, /* sh_addr */
977 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
978 0, /* sh_size */
979 0, /* sh_link */
980 0, /* sh_info */
981 16, /* sh_addralign */
982 0 /* sh_entsize */
983 }
984 };
985
986 /*
987 * 64-bit symbol table
988 * careful: different order of items compared with Elf32_sym!
989 */
990 static Elf64_Sym symbols64[2]={
991 { /* STN_UNDEF */
992 0, 0, 0, 0, 0, 0
993 },
994 { /* data entry point */
995 1, /* st_name */
996 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
997 0, /* st_other */
998 4, /* st_shndx=index of related section table entry */
999 0, /* st_value */
1000 0 /* st_size */
1001 }
1002 };
1003
1004 #endif /* U_ELF64 */
1005
1006 /* entry[] have a leading NUL */
1007 entryOffset=1;
1008
1009 /* in the common code, count entryLength from after the NUL */
1010 entryLengthOffset=1;
1011
1012 newSuffix=".o";
1013
1014 #elif U_PLATFORM_HAS_WIN32_API
1015 struct {
1016 IMAGE_FILE_HEADER fileHeader;
1017 IMAGE_SECTION_HEADER sections[2];
1018 char linkerOptions[100];
1019 } objHeader;
1020 IMAGE_SYMBOL symbols[1];
1021 struct {
1022 DWORD sizeofLongNames;
1023 char longNames[100];
1024 } symbolNames;
1025
1026 /*
1027 * entry sometimes have a leading '_'
1028 * overwritten if entryOffset==0 depending on the target platform
1029 * see check for cpu below
1030 */
1031 entry[0]='_';
1032
1033 newSuffix=".obj";
1034 #else
1035 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1036 #endif
1037
1038 /* deal with options, files and the entry point name */
1039 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1040 if (optMatchArch)
1041 {
1042 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1043 }
1044 else
1045 {
1046 printf("genccode: using architecture cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1047 }
1048 #if U_PLATFORM_HAS_WIN32_API
1049 if(cpu==IMAGE_FILE_MACHINE_I386) {
1050 entryOffset=1;
1051 }
1052 #endif
1053
1054 in=T_FileStream_open(filename, "rb");
1055 if(in==NULL) {
1056 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1057 exit(U_FILE_ACCESS_ERROR);
1058 }
1059 size=T_FileStream_size(in);
1060
1061 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1062 if (outFilePath != NULL) {
1063 uprv_strcpy(outFilePath, buffer);
1064 }
1065
1066 if(optEntryPoint != NULL) {
1067 uprv_strcpy(entry+entryOffset, optEntryPoint);
1068 uprv_strcat(entry+entryOffset, "_dat");
1069 }
1070 /* turn dashes in the entry name into underscores */
1071 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1072 for(i=0; i<entryLength; ++i) {
1073 if(entry[entryLengthOffset+i]=='-') {
1074 entry[entryLengthOffset+i]='_';
1075 }
1076 }
1077
1078 /* open the output file */
1079 out=T_FileStream_open(buffer, "wb");
1080 if(out==NULL) {
1081 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1082 exit(U_FILE_ACCESS_ERROR);
1083 }
1084
1085 #ifdef U_ELF
1086 if(bits==32) {
1087 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1088 header32.e_machine=cpu;
1089
1090 /* 16-align .rodata in the .o file, just in case */
1091 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1092 if(paddingSize!=0) {
1093 paddingSize=0x10-paddingSize;
1094 sectionHeaders32[4].sh_offset+=paddingSize;
1095 }
1096
1097 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1098
1099 symbols32[1].st_size=(Elf32_Word)size;
1100
1101 /* write .o headers */
1102 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1103 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1104 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1105 } else /* bits==64 */ {
1106 #ifdef U_ELF64
1107 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1108 header64.e_machine=cpu;
1109
1110 /* 16-align .rodata in the .o file, just in case */
1111 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1112 if(paddingSize!=0) {
1113 paddingSize=0x10-paddingSize;
1114 sectionHeaders64[4].sh_offset+=paddingSize;
1115 }
1116
1117 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1118
1119 symbols64[1].st_size=(Elf64_Xword)size;
1120
1121 /* write .o headers */
1122 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1123 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1124 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1125 #endif
1126 }
1127
1128 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1129 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1130 if(paddingSize!=0) {
1131 T_FileStream_write(out, padding, paddingSize);
1132 }
1133 #elif U_PLATFORM_HAS_WIN32_API
1134 /* populate the .obj headers */
1135 uprv_memset(&objHeader, 0, sizeof(objHeader));
1136 uprv_memset(&symbols, 0, sizeof(symbols));
1137 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1138
1139 /* write the linker export directive */
1140 uprv_strcpy(objHeader.linkerOptions, "-export:");
1141 length=8;
1142 uprv_strcpy(objHeader.linkerOptions+length, entry);
1143 length+=entryLength;
1144 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1145 length+=6;
1146
1147 /* set the file header */
1148 objHeader.fileHeader.Machine=cpu;
1149 objHeader.fileHeader.NumberOfSections=2;
1150 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1151 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1152 objHeader.fileHeader.NumberOfSymbols=1;
1153
1154 /* set the section for the linker options */
1155 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1156 objHeader.sections[0].SizeOfRawData=length;
1157 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1158 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1159
1160 /* set the data section */
1161 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1162 objHeader.sections[1].SizeOfRawData=size;
1163 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1164 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1165
1166 /* set the symbol table */
1167 if(entryLength<=8) {
1168 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1169 symbolNames.sizeofLongNames=4;
1170 } else {
1171 symbols[0].N.Name.Short=0;
1172 symbols[0].N.Name.Long=4;
1173 symbolNames.sizeofLongNames=4+entryLength+1;
1174 uprv_strcpy(symbolNames.longNames, entry);
1175 }
1176 symbols[0].SectionNumber=2;
1177 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1178
1179 /* write the file header and the linker options section */
1180 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1181 #else
1182 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1183 #endif
1184
1185 /* copy the data file into section 2 */
1186 for(;;) {
1187 length=T_FileStream_read(in, buffer, sizeof(buffer));
1188 if(length==0) {
1189 break;
1190 }
1191 T_FileStream_write(out, buffer, (int32_t)length);
1192 }
1193
1194 #if U_PLATFORM_HAS_WIN32_API
1195 /* write the symbol table */
1196 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1197 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1198 #endif
1199
1200 if(T_FileStream_error(in)) {
1201 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1202 exit(U_FILE_ACCESS_ERROR);
1203 }
1204
1205 if(T_FileStream_error(out)) {
1206 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1207 exit(U_FILE_ACCESS_ERROR);
1208 }
1209
1210 T_FileStream_close(out);
1211 T_FileStream_close(in);
1212 }
1213 #endif