git.saurik.com Git - apple/icu.git/blob - icuSources/tools/makeconv/makeconv.cpp

2 // License & terms of use: http://www.unicode.org/copyright.html

3 /*

4 ********************************************************************************

5 *

8 *

9 ********************************************************************************

10 *

11 *

12 * makeconv.cpp:

13 * tool creating a binary (compressed) representation of the conversion mapping

14 * table (IBM NLTC ucmap format).

15 *

16 * 05/04/2000 helena Added fallback mapping into the picture...

17 * 06/29/2000 helena Major rewrite of the callback APIs.

18 */

20 #include <stdio.h>

21 #include "unicode/putil.h"

22 #include "unicode/ucnv_err.h"

23 #include "charstr.h"

24 #include "ucnv_bld.h"

25 #include "ucnv_imp.h"

26 #include "ucnv_cnv.h"

27 #include "cstring.h"

28 #include "cmemory.h"

29 #include "uinvchar.h"

30 #include "filestrm.h"

31 #include "toolutil.h"

32 #include "uoptions.h"

33 #include "unicode/udata.h"

34 #include "unewdata.h"

35 #include "uparse.h"

36 #include "ucm.h"

37 #include "makeconv.h"

38 #include "genmbcs.h"

40 #define DEBUG 0

42 typedef struct ConvData {

43 UCMFile *ucm;

44 NewConverter *cnvData, *extData;

45 UConverterSharedData sharedData;

46 UConverterStaticData staticData;

47 } ConvData;

49 static void

50 initConvData(ConvData *data) {

     uprv_memset(data, 0, sizeof(ConvData));

     data->sharedData.structSize=sizeof(UConverterSharedData);

     data->staticData.structSize=sizeof(UConverterStaticData);

     data->sharedData.staticData=&data->staticData;

55 }

57 static void

58 cleanupConvData(ConvData *data) {

59 if(data!=NULL) {

         if(data->cnvData!=NULL) {

             data->cnvData->close(data->cnvData);

62 data->cnvData=NULL;

63 }

         if(data->extData!=NULL) {

             data->extData->close(data->extData);

66 data->extData=NULL;

67 }

68 ucm_close(data->ucm);

69 data->ucm=NULL;

70 }

71 }

73 /*

74 * from ucnvstat.c - static prototypes of data-based converters

75 */

76 U_CAPI const UConverterStaticData * ucnv_converterStaticData[UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES];

78 /*

79 * Global - verbosity

80 */

81 UBool VERBOSE = FALSE;

82 UBool QUIET = FALSE;

83 UBool SMALL = FALSE;

84 UBool IGNORE_SISO_CHECK = FALSE;

86 static void

 createConverter(ConvData *data, const char* converterName, UErrorCode *pErrorCode);

89 /*

90 * Set up the UNewData and write the converter..

91 */

92 static void

 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status);

95 UBool haveCopyright=TRUE;

97 static UDataInfo dataInfo={

98 sizeof(UDataInfo),

99 0,

100

101 U_IS_BIG_ENDIAN,

102 U_CHARSET_FAMILY,

103 sizeof(UChar),

104 0,

105

     {0x63, 0x6e, 0x76, 0x74},     /* dataFormat="cnvt" */

     {6, 2, 0, 0},                 /* formatVersion */

     {0, 0, 0, 0}                  /* dataVersion (calculated at runtime) */

109 };

110

111 static void

 writeConverterData(ConvData *data, const char *cnvName, const char *cnvDir, UErrorCode *status)

113 {

114 UNewDataMemory *mem = NULL;

115 uint32_t sz2;

116 uint32_t size = 0;

117 int32_t tableType;

118

     if(U_FAILURE(*status))

120 {

121 return;

122 }

123

124 tableType=TABLE_NONE;

     if(data->cnvData!=NULL) {

126 tableType|=TABLE_BASE;

127 }

     if(data->extData!=NULL) {

129 tableType|=TABLE_EXT;

130 }

131

     mem = udata_create(cnvDir, "cnv", cnvName, &dataInfo, haveCopyright ? U_COPYRIGHT_STRING : NULL, status);

133

     if(U_FAILURE(*status))

135 {

         fprintf(stderr, "Couldn't create the udata %s.%s: %s\n",

137 cnvName,

138 "cnv",

139 u_errorName(*status));

140 return;

141 }

142

143 if(VERBOSE)

144 {

         printf("- Opened udata %s.%s\n", cnvName, "cnv");

146 }

147

148

149 /* all read only, clean, platform independent data. Mmmm. :) */

     udata_writeBlock(mem, &data->staticData, sizeof(UConverterStaticData));

     size += sizeof(UConverterStaticData); /* Is 4-aligned  - by size */

152 /* Now, write the table */

153 if(tableType&TABLE_BASE) {

         size += data->cnvData->write(data->cnvData, &data->staticData, mem, tableType);

155 }

156 if(tableType&TABLE_EXT) {

         size += data->extData->write(data->extData, &data->staticData, mem, tableType);

158 }

159

     sz2 = udata_finish(mem, status);

161 if(size != sz2)

162 {

         fprintf(stderr, "error: wrote %u bytes to the .cnv file but counted %u bytes\n", (int)sz2, (int)size);

164 *status=U_INTERNAL_PROGRAM_ERROR;

165 }

166 if(VERBOSE)

167 {

       printf("- Wrote %u bytes to the udata.\n", (int)sz2);

169 }

170 }

171

172 enum {

173 OPT_HELP_H,

174 OPT_HELP_QUESTION_MARK,

175 OPT_COPYRIGHT,

176 OPT_VERSION,

177 OPT_DESTDIR,

178 OPT_VERBOSE,

179 OPT_SMALL,

180 OPT_IGNORE_SISO_CHECK,

181 OPT_QUIET,

182 OPT_SOURCEDIR,

183

184 OPT_COUNT

185 };

186

187 static UOption options[]={

188 UOPTION_HELP_H,

189 UOPTION_HELP_QUESTION_MARK,

190 UOPTION_COPYRIGHT,

191 UOPTION_VERSION,

192 UOPTION_DESTDIR,

193 UOPTION_VERBOSE,

     { "small", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },

     { "ignore-siso-check", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0 },

196 UOPTION_QUIET,

197 UOPTION_SOURCEDIR,

198 };

199

 int main(int argc, char* argv[])

201 {

202 ConvData data;

203 char cnvName[UCNV_MAX_FULL_FILE_NAME_LENGTH];

204

205 U_MAIN_INIT_ARGS(argc, argv);

206

207 /* Set up the ICU version number */

208 UVersionInfo icuVersion;

209 u_getVersion(icuVersion);

     uprv_memcpy(&dataInfo.dataVersion, &icuVersion, sizeof(UVersionInfo));

211

212 /* preset then read command line options */

     options[OPT_DESTDIR].value=u_getDataDirectory();

     argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options);

215

216 /* error handling, printing usage message */

     if(argc<0) {

218 fprintf(stderr,

             "error in command line argument \"%s\"\n",

220 argv[-argc]);

     } else if(argc<2) {

222 argc=-1;

223 }

     if(argc<0 || options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur) {

         FILE *stdfile=argc<0 ? stderr : stdout;

226 fprintf(stdfile,

             "usage: %s [-options] files...\n"

             "\tread .ucm codepage mapping files and write .cnv files\n"

229 "options:\n"

             "\t-h or -? or --help  this usage text\n"

             "\t-V or --version     show a version message\n"

             "\t-c or --copyright   include a copyright notice\n"

             "\t-d or --destdir     destination directory, followed by the path\n"

             "\t-v or --verbose     Turn on verbose output\n"

             "\t-q or --quiet       do not display warnings and progress\n"

             "\t-s or --sourcedir   source directory, followed by the path\n",

237 argv[0]);

238 fprintf(stdfile,

             "\t      --small       Generate smaller .cnv files. They will be\n"

             "\t                    significantly smaller but may not be compatible with\n"

             "\t                    older versions of ICU and will require heap memory\n"

             "\t                    allocation when loaded.\n"

             "\t      --ignore-siso-check         Use SI/SO other than 0xf/0xe.\n");

         return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;

245 }

246

     if(options[OPT_VERSION].doesOccur) {

         printf("makeconv version %u.%u, ICU tool to read .ucm codepage mapping files and write .cnv files\n",

                dataInfo.formatVersion[0], dataInfo.formatVersion[1]);

         printf("%s\n", U_COPYRIGHT_STRING);

251 exit(0);

252 }

253

254 /* get the options values */

255 haveCopyright = options[OPT_COPYRIGHT].doesOccur;

     const char *destdir = options[OPT_DESTDIR].value;

257 VERBOSE = options[OPT_VERBOSE].doesOccur;

258 QUIET = options[OPT_QUIET].doesOccur;

259 SMALL = options[OPT_SMALL].doesOccur;

260

     if (options[OPT_IGNORE_SISO_CHECK].doesOccur) {

262 IGNORE_SISO_CHECK = TRUE;

263 }

264

265 icu::CharString outFileName;

266 UErrorCode err = U_ZERO_ERROR;

     if (destdir != NULL && *destdir != 0) {

         outFileName.append(destdir, err).ensureEndsWithFileSeparator(err);

         if (U_FAILURE(err)) {

270 return err;

271 }

272 }

     int32_t outBasenameStart = outFileName.length();

274

275 #if DEBUG

276 {

277 int i;

       printf("makeconv: processing %d files...\n", argc - 1);

       for(i=1; i<argc; ++i) {

         printf("%s ", argv[i]);

281 }

       printf("\n");

283 fflush(stdout);

284 }

285 #endif

286

     UBool printFilename = (UBool) (argc > 2 || VERBOSE);

288 icu::CharString pathBuf;

     for (++argv; --argc; ++argv)

290 {

291 UErrorCode localError = U_ZERO_ERROR;

         const char *arg = getLongPathname(*argv);

293

         const char* sourcedir = options[OPT_SOURCEDIR].value;

         if (sourcedir != NULL && *sourcedir != 0 && uprv_strcmp(sourcedir, ".") != 0) {

296 pathBuf.clear();

             pathBuf.appendPathPart(sourcedir, localError);

             pathBuf.appendPathPart(arg, localError);

299 arg = pathBuf.data();

300 }

301

302 /*produces the right destination path for display*/

303 outFileName.truncate(outBasenameStart);

         if (outBasenameStart != 0)

305 {

306 /* find the last file sepator */

             const char *basename = findBasename(arg);

             outFileName.append(basename, localError);

309 }

310 else

311 {

             outFileName.append(arg, localError);

313 }

         if (U_FAILURE(localError)) {

315 return localError;

316 }

317

318 /*removes the extension if any is found*/

         int32_t lastDotIndex = outFileName.lastIndexOf('.');

320 if (lastDotIndex >= outBasenameStart) {

321 outFileName.truncate(lastDotIndex);

322 }

323

324 /* the basename without extension is the converter name */

         if ((outFileName.length() - outBasenameStart) >= UPRV_LENGTHOF(cnvName)) {

             fprintf(stderr, "converter name %s too long\n", outFileName.data() + outBasenameStart);

327 return U_BUFFER_OVERFLOW_ERROR;

328 }

         uprv_strcpy(cnvName, outFileName.data() + outBasenameStart);

330

331 /*Adds the target extension*/

         outFileName.append(CONVERTER_FILE_EXTENSION, localError);

         if (U_FAILURE(localError)) {

334 return localError;

335 }

336

337 #if DEBUG

         printf("makeconv: processing %s  ...\n", arg);

339 fflush(stdout);

340 #endif

341 initConvData(&data);

         createConverter(&data, arg, &localError);

343

         if (U_FAILURE(localError))

345 {

346 /* if an error is found, print out an error msg and keep going */

             fprintf(stderr, "Error creating converter for \"%s\" file for \"%s\" (%s)\n",

                     outFileName.data(), arg, u_errorName(localError));

             if(U_SUCCESS(err)) {

350 err = localError;

351 }

352 }

353 else

354 {

355 /* Insure the static data name matches the file name */

356 /* Changed to ignore directory and only compare base name

357 LDH 1/2/08*/

358 char *p;

             p = strrchr(cnvName, U_FILE_SEP_CHAR); /* Find last file separator */

360

             if(p == NULL)            /* OK, try alternate */

362 {

                 p = strrchr(cnvName, U_FILE_ALT_SEP_CHAR);

364 if(p == NULL)

365 {

366 p=cnvName; /* If no separators, no problem */

367 }

368 }

369 else

370 {

371 p++; /* If found separator, don't include it in compare */

372 }

             if(uprv_stricmp(p,data.staticData.name) && !QUIET)

374 {

                 fprintf(stderr, "Warning: %s%s claims to be '%s'\n",

376 cnvName, CONVERTER_FILE_EXTENSION,

377 data.staticData.name);

378 }

379

             uprv_strcpy((char*)data.staticData.name, cnvName);

381

             if(!uprv_isInvariantString((char*)data.staticData.name, -1)) {

383 fprintf(stderr,

384 "Error: A converter name must contain only invariant characters.\n"

                     "%s is not a valid converter name.\n",

386 data.staticData.name);

                 if(U_SUCCESS(err)) {

388 err = U_INVALID_TABLE_FORMAT;

389 }

390 }

391

392 localError = U_ZERO_ERROR;

             writeConverterData(&data, cnvName, destdir, &localError);

394

             if(U_FAILURE(localError))

396 {

397 /* if an error is found, print out an error msg and keep going*/

                 fprintf(stderr, "Error writing \"%s\" file for \"%s\" (%s)\n", outFileName.data(), arg,

399 u_errorName(localError));

                 if(U_SUCCESS(err)) {

401 err = localError;

402 }

403 }

404 else if (printFilename)

405 {

                 puts(outFileName.data() + outBasenameStart);

407 }

408 }

409 fflush(stdout);

410 fflush(stderr);

411

412 cleanupConvData(&data);

413 }

414

415 return err;

416 }

417

418 static void

 getPlatformAndCCSIDFromName(const char *name, int8_t *pPlatform, int32_t *pCCSID) {

     if( (name[0]=='i' || name[0]=='I') &&

         (name[1]=='b' || name[1]=='B') &&

         (name[2]=='m' || name[2]=='M')

423 ) {

424 name+=3;

         if(*name=='-') {

426 ++name;

427 }

428 *pPlatform=UCNV_IBM;

         *pCCSID=(int32_t)uprv_strtoul(name, NULL, 10);

430 } else {

431 *pPlatform=UCNV_UNKNOWN;

432 *pCCSID=0;

433 }

434 }

435

436 static void

437 readHeader(ConvData *data,

438 FileStream* convFile,

439 UErrorCode *pErrorCode) {

440 char line[1024];

     char *s, *key, *value;

442 const UConverterStaticData *prototype;

443 UConverterStaticData *staticData;

444

     if(U_FAILURE(*pErrorCode)) {

446 return;

447 }

448

449 staticData=&data->staticData;

450 staticData->platform=UCNV_IBM;

451 staticData->subCharLen=0;

452

     while(T_FileStream_readLine(convFile, line, sizeof(line))) {

454 /* basic parsing and handling of state-related items */

         if(ucm_parseHeaderLine(data->ucm, line, &key, &value)) {

456 continue;

457 }

458

459 /* stop at the beginning of the mapping section */

         if(uprv_strcmp(line, "CHARMAP")==0) {

461 break;

462 }

463

464 /* collect the information from the header field, ignore unknown keys */

         if(uprv_strcmp(key, "code_set_name")==0) {

             if(*value!=0) {

                 uprv_strcpy((char *)staticData->name, value);

                 getPlatformAndCCSIDFromName(value, &staticData->platform, &staticData->codepage);

469 }

         } else if(uprv_strcmp(key, "subchar")==0) {

471 uint8_t bytes[UCNV_EXT_MAX_BYTES];

472 int8_t length;

473

474 s=value;

             length=ucm_parseBytes(bytes, line, (const char **)&s);

             if(1<=length && length<=4 && *s==0) {

477 staticData->subCharLen=length;

                 uprv_memcpy(staticData->subChar, bytes, length);

479 } else {

                 fprintf(stderr, "error: illegal <subchar> %s\n", value);

481 *pErrorCode=U_INVALID_TABLE_FORMAT;

482 return;

483 }

         } else if(uprv_strcmp(key, "subchar1")==0) {

485 uint8_t bytes[UCNV_EXT_MAX_BYTES];

486

487 s=value;

             if(1==ucm_parseBytes(bytes, line, (const char **)&s) && *s==0) {

                 staticData->subChar1=bytes[0];

490 } else {

                 fprintf(stderr, "error: illegal <subchar1> %s\n", value);

492 *pErrorCode=U_INVALID_TABLE_FORMAT;

493 return;

494 }

495 }

496 }

497

498 /* copy values from the UCMFile to the static data */

     staticData->maxBytesPerChar=(int8_t)data->ucm->states.maxCharLength;

     staticData->minBytesPerChar=(int8_t)data->ucm->states.minCharLength;

     staticData->conversionType=data->ucm->states.conversionType;

502

     if(staticData->conversionType==UCNV_UNSUPPORTED_CONVERTER) {

         fprintf(stderr, "ucm error: missing conversion type (<uconv_class>)\n");

505 *pErrorCode=U_INVALID_TABLE_FORMAT;

506 return;

507 }

508

509 /*

510 * Now that we know the type, copy any 'default' values from the table.

511 * We need not check the type any further because the parser only

512 * recognizes what we have prototypes for.

513 *

514 * For delta (extension-only) tables, copy values from the base file

515 * instead, see createConverter().

516 */

     if(data->ucm->baseName[0]==0) {

518 prototype=ucnv_converterStaticData[staticData->conversionType];

519 if(prototype!=NULL) {

             if(staticData->name[0]==0) {

                 uprv_strcpy((char *)staticData->name, prototype->name);

522 }

523

             if(staticData->codepage==0) {

525 staticData->codepage=prototype->codepage;

526 }

527

             if(staticData->platform==0) {

529 staticData->platform=prototype->platform;

530 }

531

             if(staticData->minBytesPerChar==0) {

533 staticData->minBytesPerChar=prototype->minBytesPerChar;

534 }

535

             if(staticData->maxBytesPerChar==0) {

537 staticData->maxBytesPerChar=prototype->maxBytesPerChar;

538 }

539

             if(staticData->subCharLen==0) {

541 staticData->subCharLen=prototype->subCharLen;

                 if(prototype->subCharLen>0) {

                     uprv_memcpy(staticData->subChar, prototype->subChar, prototype->subCharLen);

544 }

545 }

546 }

547 }

548

     if(data->ucm->states.outputType<0) {

         data->ucm->states.outputType=(int8_t)data->ucm->states.maxCharLength-1;

551 }

552

     if( staticData->subChar1!=0 &&

             (staticData->minBytesPerChar>1 ||

555 (staticData->conversionType!=UCNV_MBCS &&

556 staticData->conversionType!=UCNV_EBCDIC_STATEFUL))

557 ) {

         fprintf(stderr, "error: <subchar1> defined for a type other than MBCS or EBCDIC_STATEFUL\n");

559 *pErrorCode=U_INVALID_TABLE_FORMAT;

560 }

561 }

562

563 /* return TRUE if a base table was read, FALSE for an extension table */

564 static UBool

 readFile(ConvData *data, const char* converterName,

566 UErrorCode *pErrorCode) {

567 char line[1024];

568 char *end;

569 FileStream *convFile;

570

571 UCMStates *baseStates;

572 UBool dataIsBase;

573

     if(U_FAILURE(*pErrorCode)) {

575 return FALSE;

576 }

577

578 data->ucm=ucm_open();

579

     convFile=T_FileStream_open(converterName, "r");

581 if(convFile==NULL) {

582 *pErrorCode=U_FILE_ACCESS_ERROR;

583 return FALSE;

584 }

585

     readHeader(data, convFile, pErrorCode);

     if(U_FAILURE(*pErrorCode)) {

588 return FALSE;

589 }

590

     if(data->ucm->baseName[0]==0) {

592 dataIsBase=TRUE;

593 baseStates=&data->ucm->states;

594 ucm_processStates(baseStates, IGNORE_SISO_CHECK);

595 } else {

596 dataIsBase=FALSE;

597 baseStates=NULL;

598 }

599

600 /* read the base table */

     ucm_readTable(data->ucm, convFile, dataIsBase, baseStates, pErrorCode);

     if(U_FAILURE(*pErrorCode)) {

603 return FALSE;

604 }

605

606 /* read an extension table if there is one */

     while(T_FileStream_readLine(convFile, line, sizeof(line))) {

         end=uprv_strchr(line, 0);

609 while(line<end &&

               (*(end-1)=='\n' || *(end-1)=='\r' || *(end-1)==' ' || *(end-1)=='\t')) {

611 --end;

612 }

613 *end=0;

614

         if(line[0]=='#' || u_skipWhitespace(line)==end) {

616 continue; /* ignore empty and comment lines */

617 }

618

         if(0==uprv_strcmp(line, "CHARMAP")) {

620 /* read the extension table */

             ucm_readTable(data->ucm, convFile, FALSE, baseStates, pErrorCode);

622 } else {

             fprintf(stderr, "unexpected text after the base mapping table\n");

624 }

625 break;

626 }

627

628 T_FileStream_close(convFile);

629

     if(data->ucm->base->flagsType==UCM_FLAGS_MIXED || data->ucm->ext->flagsType==UCM_FLAGS_MIXED) {

         fprintf(stderr, "error: some entries have the mapping precision (with '|'), some do not\n");

632 *pErrorCode=U_INVALID_TABLE_FORMAT;

633 }

634

635 return dataIsBase;

636 }

637

638 static void

 createConverter(ConvData *data, const char *converterName, UErrorCode *pErrorCode) {

640 ConvData baseData;

641 UBool dataIsBase;

642

643 UConverterStaticData *staticData;

644 UCMStates *states, *baseStates;

645

     if(U_FAILURE(*pErrorCode)) {

647 return;

648 }

649

650 initConvData(data);

651

     dataIsBase=readFile(data, converterName, pErrorCode);

     if(U_FAILURE(*pErrorCode)) {

654 return;

655 }

656

657 staticData=&data->staticData;

658 states=&data->ucm->states;

659

660 if(dataIsBase) {

661 /*

662 * Build a normal .cnv file with a base table

663 * and an optional extension table.

664 */

         data->cnvData=MBCSOpen(data->ucm);

         if(data->cnvData==NULL) {

667 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

668

         } else if(!data->cnvData->isValid(data->cnvData,

670 staticData->subChar, staticData->subCharLen)

671 ) {

             fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");

673 *pErrorCode=U_INVALID_TABLE_FORMAT;

674

         } else if(staticData->subChar1!=0 &&

                     !data->cnvData->isValid(data->cnvData, &staticData->subChar1, 1)

677 ) {

             fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");

679 *pErrorCode=U_INVALID_TABLE_FORMAT;

680

681 } else if(

             data->ucm->ext->mappingsLength>0 &&

             !ucm_checkBaseExt(states, data->ucm->base, data->ucm->ext, data->ucm->ext, FALSE)

684 ) {

685 *pErrorCode=U_INVALID_TABLE_FORMAT;

         } else if(data->ucm->base->flagsType&UCM_FLAGS_EXPLICIT) {

687 /* sort the table so that it can be turned into UTF-8-friendly data */

             ucm_sortTable(data->ucm->base);

689 }

690

         if(U_SUCCESS(*pErrorCode)) {

692 if(

693 /* add the base table after ucm_checkBaseExt()! */

                 !data->cnvData->addTable(data->cnvData, data->ucm->base, &data->staticData)

695 ) {

696 *pErrorCode=U_INVALID_TABLE_FORMAT;

697 } else {

698 /*

699 * addTable() may have requested moving more mappings to the extension table

700 * if they fit into the base toUnicode table but not into the

701 * base fromUnicode table.

702 * (Especially for UTF-8-friendly fromUnicode tables.)

703 * Such mappings will have the MBCS_FROM_U_EXT_FLAG set, which causes them

704 * to be excluded from the extension toUnicode data.

705 * See MBCSOkForBaseFromUnicode() for which mappings do not fit into

706 * the base fromUnicode table.

707 */

                 ucm_moveMappings(data->ucm->base, data->ucm->ext);

                 ucm_sortTable(data->ucm->ext);

                 if(data->ucm->ext->mappingsLength>0) {

711 /* prepare the extension table, if there is one */

                     data->extData=CnvExtOpen(data->ucm);

                     if(data->extData==NULL) {

714 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

715 } else if(

                         !data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)

717 ) {

718 *pErrorCode=U_INVALID_TABLE_FORMAT;

719 }

720 }

721 }

722 }

723 } else {

724 /* Build an extension-only .cnv file. */

725 char baseFilename[500];

726 char *basename;

727

728 initConvData(&baseData);

729

730 /* assemble a path/filename for data->ucm->baseName */

731 uprv_strcpy(baseFilename, converterName);

         basename=(char *)findBasename(baseFilename);

         uprv_strcpy(basename, data->ucm->baseName);

         uprv_strcat(basename, ".ucm");

735

736 /* read the base table */

         dataIsBase=readFile(&baseData, baseFilename, pErrorCode);

         if(U_FAILURE(*pErrorCode)) {

739 return;

740 } else if(!dataIsBase) {

             fprintf(stderr, "error: the <icu:base> file \"%s\" is not a base table file\n", baseFilename);

742 *pErrorCode=U_INVALID_TABLE_FORMAT;

743 } else {

744 /* prepare the extension table */

             data->extData=CnvExtOpen(data->ucm);

             if(data->extData==NULL) {

747 *pErrorCode=U_MEMORY_ALLOCATION_ERROR;

748 } else {

749 /* fill in gaps in extension file header fields */

750 UCMapping *m, *mLimit;

751 uint8_t fallbackFlags;

752

753 baseStates=&baseData.ucm->states;

                 if(states->conversionType==UCNV_DBCS) {

                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=2);

                 } else if(states->minCharLength==0) {

                     staticData->minBytesPerChar=(int8_t)(states->minCharLength=baseStates->minCharLength);

758 }

                 if(states->maxCharLength<states->minCharLength) {

                     staticData->maxBytesPerChar=(int8_t)(states->maxCharLength=baseStates->maxCharLength);

761 }

762

                 if(staticData->subCharLen==0) {

                     uprv_memcpy(staticData->subChar, baseData.staticData.subChar, 4);

                     staticData->subCharLen=baseData.staticData.subCharLen;

766 }

767 /*

768 * do not copy subChar1 -

769 * only use what is explicitly specified

770 * because it cannot be unset in the extension file header

771 */

772

773 /* get the fallback flags */

774 fallbackFlags=0;

                 for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;

                     m<mLimit && fallbackFlags!=3;

777 ++m

778 ) {

                     if(m->f==1) {

780 fallbackFlags|=1;

                     } else if(m->f==3) {

782 fallbackFlags|=2;

783 }

784 }

785

                 if(fallbackFlags&1) {

787 staticData->hasFromUnicodeFallback=TRUE;

788 }

                 if(fallbackFlags&2) {

790 staticData->hasToUnicodeFallback=TRUE;

791 }

792

                 if(1!=ucm_countChars(baseStates, staticData->subChar, staticData->subCharLen)) {

                     fprintf(stderr, "       the substitution character byte sequence is illegal in this codepage structure!\n");

795 *pErrorCode=U_INVALID_TABLE_FORMAT;

796

                 } else if(staticData->subChar1!=0 && 1!=ucm_countChars(baseStates, &staticData->subChar1, 1)) {

                     fprintf(stderr, "       the subchar1 byte is illegal in this codepage structure!\n");

799 *pErrorCode=U_INVALID_TABLE_FORMAT;

800

801 } else if(

                     !ucm_checkValidity(data->ucm->ext, baseStates) ||

                     !ucm_checkBaseExt(baseStates, baseData.ucm->base, data->ucm->ext, data->ucm->ext, FALSE)

804 ) {

805 *pErrorCode=U_INVALID_TABLE_FORMAT;

806 } else {

                     if(states->maxCharLength>1) {

808 /*

809 * When building a normal .cnv file with a base table

810 * for an MBCS (not SBCS) table with explicit precision flags,

811 * the MBCSAddTable() function marks some mappings for moving

812 * to the extension table.

813 * They fit into the base toUnicode table but not into the

814 * base fromUnicode table.

815 * (Note: We do have explicit precision flags because they are

816 * required for extension table generation, and

817 * ucm_checkBaseExt() verified it.)

818 *

819 * We do not call MBCSAddTable() here (we probably could)

820 * so we need to do the analysis before building the extension table.

821 * We assume that MBCSAddTable() will build a UTF-8-friendly table.

822 * Redundant mappings in the extension table are ok except they cost some size.

823 *

824 * Do this after ucm_checkBaseExt().

825 */

                         const MBCSData *mbcsData=MBCSGetDummy();

827 int32_t needsMove=0;

                         for(m=baseData.ucm->base->mappings, mLimit=m+baseData.ucm->base->mappingsLength;

829 m<mLimit;

830 ++m

831 ) {

                             if(!MBCSOkForBaseFromUnicode(mbcsData, m->b.bytes, m->bLen, m->u, m->f)) {

833 m->f|=MBCS_FROM_U_EXT_FLAG;

834 m->moveFlag=UCM_MOVE_TO_EXT;

835 ++needsMove;

836 }

837 }

838

                         if(needsMove!=0) {

                             ucm_moveMappings(baseData.ucm->base, data->ucm->ext);

                             ucm_sortTable(data->ucm->ext);

842 }

843 }

                     if(!data->extData->addTable(data->extData, data->ucm->ext, &data->staticData)) {

845 *pErrorCode=U_INVALID_TABLE_FORMAT;

846 }

847 }

848 }

849 }

850

851 cleanupConvData(&baseData);

852 }

853 }

854

855 /*

856 * Hey, Emacs, please set the following:

857 *

858 * Local Variables:

859 * indent-tabs-mode: nil

860 * End:

861 *

862 */