]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/toolutil/pkg_genc.cpp
ae8b3ece973071a9a776100c091d271d74aa64e2
[apple/icu.git] / icuSources / tools / toolutil / pkg_genc.cpp
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /******************************************************************************
4 * Copyright (C) 2009-2016, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *******************************************************************************
7 */
8 #include "unicode/utypes.h"
9
10 #if U_PLATFORM_HAS_WIN32_API
11 # define VC_EXTRALEAN
12 # define WIN32_LEAN_AND_MEAN
13 # define NOUSER
14 # define NOSERVICE
15 # define NOIME
16 # define NOMCX
17 #include <windows.h>
18 #include <time.h>
19 # ifdef __GNUC__
20 # define WINDOWS_WITH_GNUC
21 # endif
22 #endif
23
24 #if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
25 # define U_ELF
26 #endif
27
28 #ifdef U_ELF
29 # include <elf.h>
30 # if defined(ELFCLASS64)
31 # define U_ELF64
32 # endif
33 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
34 # ifndef EM_X86_64
35 # define EM_X86_64 62
36 # endif
37 # define ICU_ENTRY_OFFSET 0
38 #endif
39
40 #include <stdio.h>
41 #include <stdlib.h>
42 #include "unicode/putil.h"
43 #include "cmemory.h"
44 #include "cstring.h"
45 #include "filestrm.h"
46 #include "toolutil.h"
47 #include "unicode/uclean.h"
48 #include "uoptions.h"
49 #include "pkg_genc.h"
50 #include "filetools.h"
51
52 #define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
53
54 #define HEX_0X 0 /* 0x1234 */
55 #define HEX_0H 1 /* 01234h */
56
57 /* prototypes --------------------------------------------------------------- */
58 static void
59 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
60
61 static uint32_t
62 write8(FileStream *out, uint8_t byte, uint32_t column);
63
64 static uint32_t
65 write32(FileStream *out, uint32_t byte, uint32_t column);
66
67 #if U_PLATFORM == U_PF_OS400
68 static uint32_t
69 write8str(FileStream *out, uint8_t byte, uint32_t column);
70 #endif
71 /* -------------------------------------------------------------------------- */
72
73 /*
74 Creating Template Files for New Platforms
75
76 Let the cc compiler help you get started.
77 Compile this program
78 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
79 with the -S option to produce assembly output.
80
81 For example, this will generate array.s:
82 gcc -S array.c
83
84 This will produce a .s file that may look like this:
85
86 .file "array.c"
87 .version "01.01"
88 gcc2_compiled.:
89 .globl x
90 .section .rodata
91 .align 4
92 .type x,@object
93 .size x,20
94 x:
95 .long 1
96 .long 2
97 .long -559038737
98 .long -1
99 .long 16
100 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
101
102 which gives a starting point that will compile, and can be transformed
103 to become the template, generally with some consulting of as docs and
104 some experimentation.
105
106 If you want ICU to automatically use this assembly, you should
107 specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
108 where the name is the compiler or platform that you used in this
109 assemblyHeader data structure.
110 */
111 static const struct AssemblyType {
112 const char *name;
113 const char *header;
114 const char *beginLine;
115 const char *footer;
116 int8_t hexType; /* HEX_0X or HEX_0h */
117 } assemblyHeader[] = {
118 /* For gcc assemblers, the meaning of .align changes depending on the */
119 /* hardware, so we use .balign 16 which always means 16 bytes. */
120 /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
121 {"gcc",
122 ".globl %s\n"
123 "\t.section .note.GNU-stack,\"\",%%progbits\n"
124 "\t.section .rodata\n"
125 "\t.balign 16\n"
126 "#ifdef U_HIDE_DATA_SYMBOL\n"
127 "\t.hidden %s\n"
128 "#endif\n"
129 "\t.type %s,%%object\n"
130 "%s:\n\n",
131
132 ".long ",".size %s, .-%s\n",HEX_0X
133 },
134 {"gcc-darwin",
135 /*"\t.section __TEXT,__text,regular,pure_instructions\n"
136 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
137 ".globl _%s\n"
138 "#ifdef U_HIDE_DATA_SYMBOL\n"
139 "\t.private_extern _%s\n"
140 "#endif\n"
141 "\t.data\n"
142 "\t.const\n"
143 "\t.balign 16\n"
144 "_%s:\n\n",
145
146 ".long ","",HEX_0X
147 },
148 {"gcc-cygwin",
149 ".globl _%s\n"
150 "\t.section .rodata\n"
151 "\t.balign 16\n"
152 "_%s:\n\n",
153
154 ".long ","",HEX_0X
155 },
156 {"gcc-mingw64",
157 ".globl %s\n"
158 "\t.section .rodata\n"
159 "\t.balign 16\n"
160 "%s:\n\n",
161
162 ".long ","",HEX_0X
163 },
164 /* 16 bytes alignment. */
165 /* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
166 {"sun",
167 "\t.section \".rodata\"\n"
168 "\t.align 16\n"
169 ".globl %s\n"
170 "%s:\n",
171
172 ".word ","",HEX_0X
173 },
174 /* 16 bytes alignment for sun-x86. */
175 /* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
176 {"sun-x86",
177 "Drodata.rodata:\n"
178 "\t.type Drodata.rodata,@object\n"
179 "\t.size Drodata.rodata,0\n"
180 "\t.globl %s\n"
181 "\t.align 16\n"
182 "%s:\n",
183
184 ".4byte ","",HEX_0X
185 },
186 /* 1<<4 bit alignment for aix. */
187 /* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
188 {"xlc",
189 ".globl %s{RO}\n"
190 "\t.toc\n"
191 "%s:\n"
192 "\t.csect %s{RO}, 4\n",
193
194 ".long ","",HEX_0X
195 },
196 {"aCC-ia64",
197 "\t.file \"%s.s\"\n"
198 "\t.type %s,@object\n"
199 "\t.global %s\n"
200 "\t.secalias .abe$0.rodata, \".rodata\"\n"
201 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
202 "\t.align 16\n"
203 "%s::\t",
204
205 "data4 ","",HEX_0X
206 },
207 {"aCC-parisc",
208 "\t.SPACE $TEXT$\n"
209 "\t.SUBSPA $LIT$\n"
210 "%s\n"
211 "\t.EXPORT %s\n"
212 "\t.ALIGN 16\n",
213
214 ".WORD ","",HEX_0X
215 },
216 /* align 16 bytes */
217 /* http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
218 { "masm",
219 "\tTITLE %s\n"
220 "; generated by genccode\n"
221 ".386\n"
222 ".model flat\n"
223 "\tPUBLIC _%s\n"
224 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
225 "\tALIGN 16\n"
226 "_%s\tLABEL DWORD\n",
227 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
228 }
229 };
230
231 static int32_t assemblyHeaderIndex = -1;
232 static int32_t hexType = HEX_0X;
233
234 U_CAPI UBool U_EXPORT2
235 checkAssemblyHeaderName(const char* optAssembly) {
236 int32_t idx;
237 assemblyHeaderIndex = -1;
238 for (idx = 0; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
239 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
240 assemblyHeaderIndex = idx;
241 hexType = assemblyHeader[idx].hexType; /* set the hex type */
242 return TRUE;
243 }
244 }
245
246 return FALSE;
247 }
248
249
250 U_CAPI void U_EXPORT2
251 printAssemblyHeadersToStdErr(void) {
252 int32_t idx;
253 fprintf(stderr, "%s", assemblyHeader[0].name);
254 for (idx = 1; idx < UPRV_LENGTHOF(assemblyHeader); idx++) {
255 fprintf(stderr, ", %s", assemblyHeader[idx].name);
256 }
257 fprintf(stderr,
258 ")\n");
259 }
260
261 U_CAPI void U_EXPORT2
262 writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
263 uint32_t column = MAX_COLUMN;
264 char entry[64];
265 uint32_t buffer[1024];
266 char *bufferStr = (char *)buffer;
267 FileStream *in, *out;
268 size_t i, length;
269
270 in=T_FileStream_open(filename, "rb");
271 if(in==NULL) {
272 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
273 exit(U_FILE_ACCESS_ERROR);
274 }
275
276 getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
277 out=T_FileStream_open(bufferStr, "w");
278 if(out==NULL) {
279 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
280 exit(U_FILE_ACCESS_ERROR);
281 }
282
283 if (outFilePath != NULL) {
284 uprv_strcpy(outFilePath, bufferStr);
285 }
286
287 #if defined (WINDOWS_WITH_GNUC) && U_PLATFORM != U_PF_CYGWIN
288 /* Need to fix the file separator character when using MinGW. */
289 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
290 #endif
291
292 if(optEntryPoint != NULL) {
293 uprv_strcpy(entry, optEntryPoint);
294 uprv_strcat(entry, "_dat");
295 }
296
297 /* turn dashes or dots in the entry name into underscores */
298 length=uprv_strlen(entry);
299 for(i=0; i<length; ++i) {
300 if(entry[i]=='-' || entry[i]=='.') {
301 entry[i]='_';
302 }
303 }
304
305 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
306 entry, entry, entry, entry,
307 entry, entry, entry, entry);
308 T_FileStream_writeLine(out, bufferStr);
309 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
310
311 for(;;) {
312 memset(buffer, 0, sizeof(buffer));
313 length=T_FileStream_read(in, buffer, sizeof(buffer));
314 if(length==0) {
315 break;
316 }
317 for(i=0; i<(length/sizeof(buffer[0])); i++) {
318 column = write32(out, buffer[i], column);
319 }
320 }
321
322 T_FileStream_writeLine(out, "\n");
323
324 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
325 entry, entry, entry, entry,
326 entry, entry, entry, entry);
327 T_FileStream_writeLine(out, bufferStr);
328
329 if(T_FileStream_error(in)) {
330 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
331 exit(U_FILE_ACCESS_ERROR);
332 }
333
334 if(T_FileStream_error(out)) {
335 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
336 exit(U_FILE_ACCESS_ERROR);
337 }
338
339 T_FileStream_close(out);
340 T_FileStream_close(in);
341 }
342
343 U_CAPI void U_EXPORT2
344 writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
345 uint32_t column = MAX_COLUMN;
346 char buffer[4096], entry[64];
347 FileStream *in, *out;
348 size_t i, length;
349
350 in=T_FileStream_open(filename, "rb");
351 if(in==NULL) {
352 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
353 exit(U_FILE_ACCESS_ERROR);
354 }
355
356 if(optName != NULL) { /* prepend 'icudt28_' */
357 strcpy(entry, optName);
358 strcat(entry, "_");
359 } else {
360 entry[0] = 0;
361 }
362
363 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
364 if (outFilePath != NULL) {
365 uprv_strcpy(outFilePath, buffer);
366 }
367 out=T_FileStream_open(buffer, "w");
368 if(out==NULL) {
369 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
370 exit(U_FILE_ACCESS_ERROR);
371 }
372
373 /* turn dashes or dots in the entry name into underscores */
374 length=uprv_strlen(entry);
375 for(i=0; i<length; ++i) {
376 if(entry[i]=='-' || entry[i]=='.') {
377 entry[i]='_';
378 }
379 }
380
381 #if U_PLATFORM == U_PF_OS400
382 /*
383 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
384
385 This is here because this platform can't currently put
386 const data into the read-only pages of an object or
387 shared library (service program). Only strings are allowed in read-only
388 pages, so we use char * strings to store the data.
389
390 In order to prevent the beginning of the data from ever matching the
391 magic numbers we must still use the initial double.
392 [grhoten 4/24/2003]
393 */
394 sprintf(buffer,
395 "#ifndef IN_GENERATED_CCODE\n"
396 "#define IN_GENERATED_CCODE\n"
397 "#define U_DISABLE_RENAMING 1\n"
398 "#include \"unicode/umachine.h\"\n"
399 "#endif\n"
400 "U_CDECL_BEGIN\n"
401 "const struct {\n"
402 " double bogus;\n"
403 " const char *bytes; \n"
404 "} %s={ 0.0, \n",
405 entry);
406 T_FileStream_writeLine(out, buffer);
407
408 for(;;) {
409 length=T_FileStream_read(in, buffer, sizeof(buffer));
410 if(length==0) {
411 break;
412 }
413 for(i=0; i<length; ++i) {
414 column = write8str(out, (uint8_t)buffer[i], column);
415 }
416 }
417
418 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
419 #else
420 /* Function renaming shouldn't be done in data */
421 sprintf(buffer,
422 "#ifndef IN_GENERATED_CCODE\n"
423 "#define IN_GENERATED_CCODE\n"
424 "#define U_DISABLE_RENAMING 1\n"
425 "#include \"unicode/umachine.h\"\n"
426 "#endif\n"
427 "U_CDECL_BEGIN\n"
428 "const struct {\n"
429 " double bogus;\n"
430 " uint8_t bytes[%ld]; \n"
431 "} %s={ 0.0, {\n",
432 (long)T_FileStream_size(in), entry);
433 T_FileStream_writeLine(out, buffer);
434
435 for(;;) {
436 length=T_FileStream_read(in, buffer, sizeof(buffer));
437 if(length==0) {
438 break;
439 }
440 for(i=0; i<length; ++i) {
441 column = write8(out, (uint8_t)buffer[i], column);
442 }
443 }
444
445 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
446 #endif
447
448 if(T_FileStream_error(in)) {
449 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
450 exit(U_FILE_ACCESS_ERROR);
451 }
452
453 if(T_FileStream_error(out)) {
454 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
455 exit(U_FILE_ACCESS_ERROR);
456 }
457
458 T_FileStream_close(out);
459 T_FileStream_close(in);
460 }
461
462 static uint32_t
463 write32(FileStream *out, uint32_t bitField, uint32_t column) {
464 int32_t i;
465 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
466 char *s = bitFieldStr;
467 uint8_t *ptrIdx = (uint8_t *)&bitField;
468 static const char hexToStr[16] = {
469 '0','1','2','3',
470 '4','5','6','7',
471 '8','9','A','B',
472 'C','D','E','F'
473 };
474
475 /* write the value, possibly with comma and newline */
476 if(column==MAX_COLUMN) {
477 /* first byte */
478 column=1;
479 } else if(column<32) {
480 *(s++)=',';
481 ++column;
482 } else {
483 *(s++)='\n';
484 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
485 s+=uprv_strlen(s);
486 column=1;
487 }
488
489 if (bitField < 10) {
490 /* It's a small number. Don't waste the space for 0x */
491 *(s++)=hexToStr[bitField];
492 }
493 else {
494 int seenNonZero = 0; /* This is used to remove leading zeros */
495
496 if(hexType==HEX_0X) {
497 *(s++)='0';
498 *(s++)='x';
499 } else if(hexType==HEX_0H) {
500 *(s++)='0';
501 }
502
503 /* This creates a 32-bit field */
504 #if U_IS_BIG_ENDIAN
505 for (i = 0; i < sizeof(uint32_t); i++)
506 #else
507 for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
508 #endif
509 {
510 uint8_t value = ptrIdx[i];
511 if (value || seenNonZero) {
512 *(s++)=hexToStr[value>>4];
513 *(s++)=hexToStr[value&0xF];
514 seenNonZero = 1;
515 }
516 }
517 if(hexType==HEX_0H) {
518 *(s++)='h';
519 }
520 }
521
522 *(s++)=0;
523 T_FileStream_writeLine(out, bitFieldStr);
524 return column;
525 }
526
527 static uint32_t
528 write8(FileStream *out, uint8_t byte, uint32_t column) {
529 char s[4];
530 int i=0;
531
532 /* convert the byte value to a string */
533 if(byte>=100) {
534 s[i++]=(char)('0'+byte/100);
535 byte%=100;
536 }
537 if(i>0 || byte>=10) {
538 s[i++]=(char)('0'+byte/10);
539 byte%=10;
540 }
541 s[i++]=(char)('0'+byte);
542 s[i]=0;
543
544 /* write the value, possibly with comma and newline */
545 if(column==MAX_COLUMN) {
546 /* first byte */
547 column=1;
548 } else if(column<16) {
549 T_FileStream_writeLine(out, ",");
550 ++column;
551 } else {
552 T_FileStream_writeLine(out, ",\n");
553 column=1;
554 }
555 T_FileStream_writeLine(out, s);
556 return column;
557 }
558
559 #if U_PLATFORM == U_PF_OS400
560 static uint32_t
561 write8str(FileStream *out, uint8_t byte, uint32_t column) {
562 char s[8];
563
564 if (byte > 7)
565 sprintf(s, "\\x%X", byte);
566 else
567 sprintf(s, "\\%X", byte);
568
569 /* write the value, possibly with comma and newline */
570 if(column==MAX_COLUMN) {
571 /* first byte */
572 column=1;
573 T_FileStream_writeLine(out, "\"");
574 } else if(column<24) {
575 ++column;
576 } else {
577 T_FileStream_writeLine(out, "\"\n\"");
578 column=1;
579 }
580 T_FileStream_writeLine(out, s);
581 return column;
582 }
583 #endif
584
585 static void
586 getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
587 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
588
589 /* copy path */
590 if(destdir!=NULL && *destdir!=0) {
591 do {
592 *outFilename++=*destdir++;
593 } while(*destdir!=0);
594 if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
595 *outFilename++=U_FILE_SEP_CHAR;
596 }
597 inFilename=basename;
598 } else {
599 while(inFilename<basename) {
600 *outFilename++=*inFilename++;
601 }
602 }
603
604 if(suffix==NULL) {
605 /* the filename does not have a suffix */
606 uprv_strcpy(entryName, inFilename);
607 if(optFilename != NULL) {
608 uprv_strcpy(outFilename, optFilename);
609 } else {
610 uprv_strcpy(outFilename, inFilename);
611 }
612 uprv_strcat(outFilename, newSuffix);
613 } else {
614 char *saveOutFilename = outFilename;
615 /* copy basename */
616 while(inFilename<suffix) {
617 if(*inFilename=='-') {
618 /* iSeries cannot have '-' in the .o objects. */
619 *outFilename++=*entryName++='_';
620 inFilename++;
621 }
622 else {
623 *outFilename++=*entryName++=*inFilename++;
624 }
625 }
626
627 /* replace '.' by '_' */
628 *outFilename++=*entryName++='_';
629 ++inFilename;
630
631 /* copy suffix */
632 while(*inFilename!=0) {
633 *outFilename++=*entryName++=*inFilename++;
634 }
635
636 *entryName=0;
637
638 if(optFilename != NULL) {
639 uprv_strcpy(saveOutFilename, optFilename);
640 uprv_strcat(saveOutFilename, newSuffix);
641 } else {
642 /* add ".c" */
643 uprv_strcpy(outFilename, newSuffix);
644 }
645 }
646 }
647
648 #ifdef CAN_GENERATE_OBJECTS
649 static void
650 getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
651 union {
652 char bytes[2048];
653 #ifdef U_ELF
654 Elf32_Ehdr header32;
655 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
656 #elif U_PLATFORM_HAS_WIN32_API
657 IMAGE_FILE_HEADER header;
658 #endif
659 } buffer;
660
661 const char *filename;
662 FileStream *in;
663 int32_t length;
664
665 #ifdef U_ELF
666
667 #elif U_PLATFORM_HAS_WIN32_API
668 const IMAGE_FILE_HEADER *pHeader;
669 #else
670 # error "Unknown platform for CAN_GENERATE_OBJECTS."
671 #endif
672
673 if(optMatchArch != NULL) {
674 filename=optMatchArch;
675 } else {
676 /* set defaults */
677 #ifdef U_ELF
678 /* set EM_386 because elf.h does not provide better defaults */
679 *pCPU=EM_386;
680 *pBits=32;
681 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
682 #elif U_PLATFORM_HAS_WIN32_API
683 // Windows always runs in little-endian mode.
684 *pIsBigEndian = FALSE;
685
686 // Note: The various _M_<arch> macros are predefined by the MSVC compiler based
687 // on the target compilation architecture.
688 // https://docs.microsoft.com/cpp/preprocessor/predefined-macros
689
690 // link.exe will link an IMAGE_FILE_MACHINE_UNKNOWN data-only .obj file
691 // no matter what architecture it is targeting (though other values are
692 // required to match). Unfortunately, the variable name decoration/mangling
693 // is slightly different on x86, which means we can't use the UNKNOWN type
694 // for all architectures though.
695 # if defined(_M_IX86)
696 *pCPU = IMAGE_FILE_MACHINE_I386;
697 # else
698 *pCPU = IMAGE_FILE_MACHINE_UNKNOWN;
699 # endif
700 # if defined(_M_IA64) || defined(_M_AMD64) || defined (_M_ARM64)
701 *pBits = 64; // Doesn't seem to be used for anything interesting though?
702 # elif defined(_M_IX86) || defined(_M_ARM)
703 *pBits = 32;
704 # else
705 # error "Unknown platform for CAN_GENERATE_OBJECTS."
706 # endif
707 #else
708 # error "Unknown platform for CAN_GENERATE_OBJECTS."
709 #endif
710 return;
711 }
712
713 in=T_FileStream_open(filename, "rb");
714 if(in==NULL) {
715 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
716 exit(U_FILE_ACCESS_ERROR);
717 }
718 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
719
720 #ifdef U_ELF
721 if(length<(int32_t)sizeof(Elf32_Ehdr)) {
722 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
723 exit(U_UNSUPPORTED_ERROR);
724 }
725 if(
726 buffer.header32.e_ident[0]!=ELFMAG0 ||
727 buffer.header32.e_ident[1]!=ELFMAG1 ||
728 buffer.header32.e_ident[2]!=ELFMAG2 ||
729 buffer.header32.e_ident[3]!=ELFMAG3 ||
730 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
731 ) {
732 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
733 exit(U_UNSUPPORTED_ERROR);
734 }
735
736 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
737 #ifdef U_ELF64
738 if(*pBits!=32 && *pBits!=64) {
739 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
740 exit(U_UNSUPPORTED_ERROR);
741 }
742 #else
743 if(*pBits!=32) {
744 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
745 exit(U_UNSUPPORTED_ERROR);
746 }
747 #endif
748
749 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
750 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
751 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
752 exit(U_UNSUPPORTED_ERROR);
753 }
754 /* TODO: Support byte swapping */
755
756 *pCPU=buffer.header32.e_machine;
757 #elif U_PLATFORM_HAS_WIN32_API
758 if(length<sizeof(IMAGE_FILE_HEADER)) {
759 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
760 exit(U_UNSUPPORTED_ERROR);
761 }
762 /* TODO: Use buffer.header. Keep aliasing legal. */
763 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
764 *pCPU=pHeader->Machine;
765 /*
766 * The number of bits is implicit with the Machine value.
767 * *pBits is ignored in the calling code, so this need not be precise.
768 */
769 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
770 /* Windows always runs on little-endian CPUs. */
771 *pIsBigEndian=FALSE;
772 #else
773 # error "Unknown platform for CAN_GENERATE_OBJECTS."
774 #endif
775
776 T_FileStream_close(in);
777 }
778
779 U_CAPI void U_EXPORT2
780 writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
781 /* common variables */
782 char buffer[4096], entry[96]={ 0 };
783 FileStream *in, *out;
784 const char *newSuffix;
785 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
786
787 uint16_t cpu, bits;
788 UBool makeBigEndian;
789
790 /* platform-specific variables and initialization code */
791 #ifdef U_ELF
792 /* 32-bit Elf file header */
793 static Elf32_Ehdr header32={
794 {
795 /* e_ident[] */
796 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
797 ELFCLASS32,
798 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
799 EV_CURRENT /* EI_VERSION */
800 },
801 ET_REL,
802 EM_386,
803 EV_CURRENT, /* e_version */
804 0, /* e_entry */
805 0, /* e_phoff */
806 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
807 0, /* e_flags */
808 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
809 0, /* e_phentsize */
810 0, /* e_phnum */
811 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
812 5, /* e_shnum */
813 2 /* e_shstrndx */
814 };
815
816 /* 32-bit Elf section header table */
817 static Elf32_Shdr sectionHeaders32[5]={
818 { /* SHN_UNDEF */
819 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
820 },
821 { /* .symtab */
822 1, /* sh_name */
823 SHT_SYMTAB,
824 0, /* sh_flags */
825 0, /* sh_addr */
826 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
827 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
828 3, /* sh_link=sect hdr index of .strtab */
829 1, /* sh_info=One greater than the symbol table index of the last
830 * local symbol (with STB_LOCAL). */
831 4, /* sh_addralign */
832 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
833 },
834 { /* .shstrtab */
835 9, /* sh_name */
836 SHT_STRTAB,
837 0, /* sh_flags */
838 0, /* sh_addr */
839 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
840 40, /* sh_size */
841 0, /* sh_link */
842 0, /* sh_info */
843 1, /* sh_addralign */
844 0 /* sh_entsize */
845 },
846 { /* .strtab */
847 19, /* sh_name */
848 SHT_STRTAB,
849 0, /* sh_flags */
850 0, /* sh_addr */
851 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
852 (Elf32_Word)sizeof(entry), /* sh_size */
853 0, /* sh_link */
854 0, /* sh_info */
855 1, /* sh_addralign */
856 0 /* sh_entsize */
857 },
858 { /* .rodata */
859 27, /* sh_name */
860 SHT_PROGBITS,
861 SHF_ALLOC, /* sh_flags */
862 0, /* sh_addr */
863 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
864 0, /* sh_size */
865 0, /* sh_link */
866 0, /* sh_info */
867 16, /* sh_addralign */
868 0 /* sh_entsize */
869 }
870 };
871
872 /* symbol table */
873 static Elf32_Sym symbols32[2]={
874 { /* STN_UNDEF */
875 0, 0, 0, 0, 0, 0
876 },
877 { /* data entry point */
878 1, /* st_name */
879 0, /* st_value */
880 0, /* st_size */
881 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
882 0, /* st_other */
883 4 /* st_shndx=index of related section table entry */
884 }
885 };
886
887 /* section header string table, with decimal string offsets */
888 static const char sectionStrings[40]=
889 /* 0 */ "\0"
890 /* 1 */ ".symtab\0"
891 /* 9 */ ".shstrtab\0"
892 /* 19 */ ".strtab\0"
893 /* 27 */ ".rodata\0"
894 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
895 /* 40: padded to multiple of 8 bytes */
896
897 /*
898 * Use entry[] for the string table which will contain only the
899 * entry point name.
900 * entry[0] must be 0 (NUL)
901 * The entry point name can be up to 38 characters long (sizeof(entry)-2).
902 */
903
904 /* 16-align .rodata in the .o file, just in case */
905 static const char padding[16]={ 0 };
906 int32_t paddingSize;
907
908 #ifdef U_ELF64
909 /* 64-bit Elf file header */
910 static Elf64_Ehdr header64={
911 {
912 /* e_ident[] */
913 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
914 ELFCLASS64,
915 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
916 EV_CURRENT /* EI_VERSION */
917 },
918 ET_REL,
919 EM_X86_64,
920 EV_CURRENT, /* e_version */
921 0, /* e_entry */
922 0, /* e_phoff */
923 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
924 0, /* e_flags */
925 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
926 0, /* e_phentsize */
927 0, /* e_phnum */
928 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
929 5, /* e_shnum */
930 2 /* e_shstrndx */
931 };
932
933 /* 64-bit Elf section header table */
934 static Elf64_Shdr sectionHeaders64[5]={
935 { /* SHN_UNDEF */
936 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
937 },
938 { /* .symtab */
939 1, /* sh_name */
940 SHT_SYMTAB,
941 0, /* sh_flags */
942 0, /* sh_addr */
943 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
944 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
945 3, /* sh_link=sect hdr index of .strtab */
946 1, /* sh_info=One greater than the symbol table index of the last
947 * local symbol (with STB_LOCAL). */
948 4, /* sh_addralign */
949 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
950 },
951 { /* .shstrtab */
952 9, /* sh_name */
953 SHT_STRTAB,
954 0, /* sh_flags */
955 0, /* sh_addr */
956 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
957 40, /* sh_size */
958 0, /* sh_link */
959 0, /* sh_info */
960 1, /* sh_addralign */
961 0 /* sh_entsize */
962 },
963 { /* .strtab */
964 19, /* sh_name */
965 SHT_STRTAB,
966 0, /* sh_flags */
967 0, /* sh_addr */
968 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
969 (Elf64_Xword)sizeof(entry), /* sh_size */
970 0, /* sh_link */
971 0, /* sh_info */
972 1, /* sh_addralign */
973 0 /* sh_entsize */
974 },
975 { /* .rodata */
976 27, /* sh_name */
977 SHT_PROGBITS,
978 SHF_ALLOC, /* sh_flags */
979 0, /* sh_addr */
980 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
981 0, /* sh_size */
982 0, /* sh_link */
983 0, /* sh_info */
984 16, /* sh_addralign */
985 0 /* sh_entsize */
986 }
987 };
988
989 /*
990 * 64-bit symbol table
991 * careful: different order of items compared with Elf32_sym!
992 */
993 static Elf64_Sym symbols64[2]={
994 { /* STN_UNDEF */
995 0, 0, 0, 0, 0, 0
996 },
997 { /* data entry point */
998 1, /* st_name */
999 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
1000 0, /* st_other */
1001 4, /* st_shndx=index of related section table entry */
1002 0, /* st_value */
1003 0 /* st_size */
1004 }
1005 };
1006
1007 #endif /* U_ELF64 */
1008
1009 /* entry[] have a leading NUL */
1010 entryOffset=1;
1011
1012 /* in the common code, count entryLength from after the NUL */
1013 entryLengthOffset=1;
1014
1015 newSuffix=".o";
1016
1017 #elif U_PLATFORM_HAS_WIN32_API
1018 struct {
1019 IMAGE_FILE_HEADER fileHeader;
1020 IMAGE_SECTION_HEADER sections[2];
1021 char linkerOptions[100];
1022 } objHeader;
1023 IMAGE_SYMBOL symbols[1];
1024 struct {
1025 DWORD sizeofLongNames;
1026 char longNames[100];
1027 } symbolNames;
1028
1029 /*
1030 * entry sometimes have a leading '_'
1031 * overwritten if entryOffset==0 depending on the target platform
1032 * see check for cpu below
1033 */
1034 entry[0]='_';
1035
1036 newSuffix=".obj";
1037 #else
1038 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1039 #endif
1040
1041 /* deal with options, files and the entry point name */
1042 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1043 if (optMatchArch)
1044 {
1045 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1046 }
1047 else
1048 {
1049 printf("genccode: using architecture cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1050 }
1051 #if U_PLATFORM_HAS_WIN32_API
1052 if(cpu==IMAGE_FILE_MACHINE_I386) {
1053 entryOffset=1;
1054 }
1055 #endif
1056
1057 in=T_FileStream_open(filename, "rb");
1058 if(in==NULL) {
1059 fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1060 exit(U_FILE_ACCESS_ERROR);
1061 }
1062 size=T_FileStream_size(in);
1063
1064 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1065 if (outFilePath != NULL) {
1066 uprv_strcpy(outFilePath, buffer);
1067 }
1068
1069 if(optEntryPoint != NULL) {
1070 uprv_strcpy(entry+entryOffset, optEntryPoint);
1071 uprv_strcat(entry+entryOffset, "_dat");
1072 }
1073 /* turn dashes in the entry name into underscores */
1074 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1075 for(i=0; i<entryLength; ++i) {
1076 if(entry[entryLengthOffset+i]=='-') {
1077 entry[entryLengthOffset+i]='_';
1078 }
1079 }
1080
1081 /* open the output file */
1082 out=T_FileStream_open(buffer, "wb");
1083 if(out==NULL) {
1084 fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1085 exit(U_FILE_ACCESS_ERROR);
1086 }
1087
1088 #ifdef U_ELF
1089 if(bits==32) {
1090 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1091 header32.e_machine=cpu;
1092
1093 /* 16-align .rodata in the .o file, just in case */
1094 paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1095 if(paddingSize!=0) {
1096 paddingSize=0x10-paddingSize;
1097 sectionHeaders32[4].sh_offset+=paddingSize;
1098 }
1099
1100 sectionHeaders32[4].sh_size=(Elf32_Word)size;
1101
1102 symbols32[1].st_size=(Elf32_Word)size;
1103
1104 /* write .o headers */
1105 T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1106 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1107 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1108 } else /* bits==64 */ {
1109 #ifdef U_ELF64
1110 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1111 header64.e_machine=cpu;
1112
1113 /* 16-align .rodata in the .o file, just in case */
1114 paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1115 if(paddingSize!=0) {
1116 paddingSize=0x10-paddingSize;
1117 sectionHeaders64[4].sh_offset+=paddingSize;
1118 }
1119
1120 sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1121
1122 symbols64[1].st_size=(Elf64_Xword)size;
1123
1124 /* write .o headers */
1125 T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1126 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1127 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1128 #endif
1129 }
1130
1131 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1132 T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1133 if(paddingSize!=0) {
1134 T_FileStream_write(out, padding, paddingSize);
1135 }
1136 #elif U_PLATFORM_HAS_WIN32_API
1137 /* populate the .obj headers */
1138 uprv_memset(&objHeader, 0, sizeof(objHeader));
1139 uprv_memset(&symbols, 0, sizeof(symbols));
1140 uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1141
1142 /* write the linker export directive */
1143 uprv_strcpy(objHeader.linkerOptions, "-export:");
1144 length=8;
1145 uprv_strcpy(objHeader.linkerOptions+length, entry);
1146 length+=entryLength;
1147 uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1148 length+=6;
1149
1150 /* set the file header */
1151 objHeader.fileHeader.Machine=cpu;
1152 objHeader.fileHeader.NumberOfSections=2;
1153 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1154 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1155 objHeader.fileHeader.NumberOfSymbols=1;
1156
1157 /* set the section for the linker options */
1158 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1159 objHeader.sections[0].SizeOfRawData=length;
1160 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1161 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1162
1163 /* set the data section */
1164 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1165 objHeader.sections[1].SizeOfRawData=size;
1166 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1167 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1168
1169 /* set the symbol table */
1170 if(entryLength<=8) {
1171 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1172 symbolNames.sizeofLongNames=4;
1173 } else {
1174 symbols[0].N.Name.Short=0;
1175 symbols[0].N.Name.Long=4;
1176 symbolNames.sizeofLongNames=4+entryLength+1;
1177 uprv_strcpy(symbolNames.longNames, entry);
1178 }
1179 symbols[0].SectionNumber=2;
1180 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1181
1182 /* write the file header and the linker options section */
1183 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1184 #else
1185 # error "Unknown platform for CAN_GENERATE_OBJECTS."
1186 #endif
1187
1188 /* copy the data file into section 2 */
1189 for(;;) {
1190 length=T_FileStream_read(in, buffer, sizeof(buffer));
1191 if(length==0) {
1192 break;
1193 }
1194 T_FileStream_write(out, buffer, (int32_t)length);
1195 }
1196
1197 #if U_PLATFORM_HAS_WIN32_API
1198 /* write the symbol table */
1199 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1200 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1201 #endif
1202
1203 if(T_FileStream_error(in)) {
1204 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1205 exit(U_FILE_ACCESS_ERROR);
1206 }
1207
1208 if(T_FileStream_error(out)) {
1209 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1210 exit(U_FILE_ACCESS_ERROR);
1211 }
1212
1213 T_FileStream_close(out);
1214 T_FileStream_close(in);
1215 }
1216 #endif