/*
*******************************************************************************
*
-* Copyright (C) 2000-2003, International Business Machines
+* Copyright (C) 2000-2004, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* 05/09/2001 weiv Case bits are now in the CEs, not in front
*/
-#include <stdio.h>
#include "unicode/utypes.h"
+#include "unicode/putil.h"
#include "unicode/udata.h"
+#include "unicode/uclean.h"
#include "ucol_imp.h"
#include "genuca.h"
#include "uoptions.h"
#include "cstring.h"
#include "cmemory.h"
+#include <stdio.h>
+
/*
* Global - verbosity
*/
#else
+static const UDataInfo ucaDataInfo={
+ sizeof(UDataInfo),
+ 0,
+
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ sizeof(UChar),
+ 0,
+
+ {UCA_DATA_FORMAT_0, UCA_DATA_FORMAT_1, UCA_DATA_FORMAT_2, UCA_DATA_FORMAT_3}, /* dataFormat="UCol" */
+ /* 03/26/2002 bumped up version since format has changed */
+ /* 09/16/2002 bumped up version since we went from UColAttributeValue */
+ /* to int32_t in UColOptionSet */
+ /* 05/13/2003 This one also updated since we added UCA and UCD versions */
+ /* to header */
+ /* 09/11/2003 Adding information required by data swapper */
+ {UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1, UCA_FORMAT_VERSION_2, UCA_FORMAT_VERSION_3}, /* formatVersion */
+ {0, 0, 0, 0} /* dataVersion = Unicode Version*/
+};
+
+static const UDataInfo invUcaDataInfo={
+ sizeof(UDataInfo),
+ 0,
+
+ U_IS_BIG_ENDIAN,
+ U_CHARSET_FAMILY,
+ sizeof(UChar),
+ 0,
+
+ {INVUCA_DATA_FORMAT_0, INVUCA_DATA_FORMAT_1, INVUCA_DATA_FORMAT_2, INVUCA_DATA_FORMAT_3}, /* dataFormat="InvC" */
+ /* 03/26/2002 bumped up version since format has changed */
+ /* 04/29/2003 2.1 format - we have added UCA version to header */
+ {INVUCA_FORMAT_VERSION_0, INVUCA_FORMAT_VERSION_1, INVUCA_FORMAT_VERSION_2, INVUCA_FORMAT_VERSION_3}, /* formatVersion */
+ {0, 0, 0, 0} /* dataVersion = Unicode Version*/
+};
+
UCAElements le;
int32_t readElement(char **from, char *to, char separator, UErrorCode *status) {
}
}
+/*
+ * Takes two CEs (lead and continuation) and
+ * compares them as CEs should be compared:
+ * primary vs. primary, secondary vs. secondary
+ * tertiary vs. tertiary
+ */
+static int32_t compareCEs(uint32_t *source, uint32_t *target) {
+ uint32_t s1 = source[0], s2, t1 = target[0], t2;
+ if(isContinuation(source[1])) {
+ s2 = source[1];
+ } else {
+ s2 = 0;
+ }
+ if(isContinuation(target[1])) {
+ t2 = target[1];
+ } else {
+ t2 = 0;
+ }
+
+ uint32_t s = 0, t = 0;
+ if(s1 == t1 && s2 == t2) {
+ return 0;
+ }
+ s = (s1 & 0xFFFF0000)|((s2 & 0xFFFF0000)>>16);
+ t = (t1 & 0xFFFF0000)|((t2 & 0xFFFF0000)>>16);
+ if(s < t) {
+ return -1;
+ } else if(s > t) {
+ return 1;
+ } else {
+ s = (s1 & 0x0000FF00) | (s2 & 0x0000FF00)>>8;
+ t = (t1 & 0x0000FF00) | (t2 & 0x0000FF00)>>8;
+ if(s < t) {
+ return -1;
+ } else if(s > t) {
+ return 1;
+ } else {
+ s = (s1 & 0x000000FF)<<8 | (s2 & 0x000000FF);
+ t = (t1 & 0x000000FF)<<8 | (t2 & 0x000000FF);
+ if(s < t) {
+ return -1;
+ } else {
+ return 1;
+ }
+ }
+ }
+}
+
static uint32_t addToInverse(UCAElements *element, UErrorCode *status) {
- uint32_t comp = 0;
uint32_t position = inversePos;
uint32_t saveElement = element->CEs[0];
+ int32_t compResult = 0;
element->CEs[0] &= 0xFFFFFF3F;
if(element->noOfCEs == 1) {
element->CEs[1] = 0;
if(inversePos == 0) {
inverseTable[0][0] = inverseTable[0][1] = inverseTable[0][2] = 0;
addNewInverse(element, status);
- } else if(inverseTable[inversePos][0] > element->CEs[0]) {
- while(inverseTable[--position][0] > element->CEs[0]) {}
- if(VERBOSE) { fprintf(stdout, "p:%i ", position); }
- if(inverseTable[position][0] == element->CEs[0]) {
- if(isContinuation(element->CEs[1])) {
- comp = element->CEs[1];
- } else {
- comp = 0;
- }
- if(inverseTable[position][1] > comp) {
- while(inverseTable[--position][1] > comp) {}
- }
- if(inverseTable[position][1] == comp) {
- addToExistingInverse(element, position, status);
- } else {
- insertInverse(element, position+1, status);
- }
- } else {
- if(VERBOSE) { fprintf(stdout, "ins"); }
+ } else if(compareCEs(inverseTable[inversePos], element->CEs) > 0) {
+ while((compResult = compareCEs(inverseTable[--position], element->CEs)) > 0);
+ if(VERBOSE) { fprintf(stdout, "p:%u ", (int)position); }
+ if(compResult == 0) {
+ addToExistingInverse(element, position, status);
+ } else {
insertInverse(element, position+1, status);
}
- } else if(inverseTable[inversePos][0] == element->CEs[0]) {
- if(element->noOfCEs > 1 && isContinuation(element->CEs[1])) {
- comp = element->CEs[1];
- if(inverseTable[position][1] > comp) {
- while(inverseTable[--position][1] > comp) {}
- }
- if(inverseTable[position][1] == comp) {
- addToExistingInverse(element, position, status);
- } else {
- insertInverse(element, position+1, status);
- }
- } else {
- addToExistingInverse(element, inversePos, status);
- }
- } else {
+ } else if(compareCEs(inverseTable[inversePos], element->CEs) == 0) {
+ addToExistingInverse(element, inversePos, status);
+ } else {
addNewInverse(element, status);
}
element->CEs[0] = saveElement;
uint32_t i = 0;
result = (InverseUCATableHeader *)uprv_malloc(headerByteSize + inverseTableByteSize + contsByteSize);
+ uprv_memset(result, 0, headerByteSize + inverseTableByteSize + contsByteSize);
if(result != NULL) {
result->byteSize = headerByteSize + inverseTableByteSize + contsByteSize;
inversePos++;
for(i = 2; i<inversePos; i++) {
- if(inverseTable[i-1][0] > inverseTable[i][0]) {
- fprintf(stderr, "Error at %i: %08X & %08X\n", i, inverseTable[i-1][0], inverseTable[i][0]);
+ if(compareCEs(inverseTable[i-1], inverseTable[i]) > 0) {
+ fprintf(stderr, "Error at %i: %08X & %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i][0]);
} else if(inverseTable[i-1][0] == inverseTable[i][0] && !(inverseTable[i-1][1] < inverseTable[i][1])) {
- fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", i, inverseTable[i-1][0], inverseTable[i-1][1], inverseTable[i][0], inverseTable[i][1]);
+ fprintf(stderr, "Continuation error at %i: %08X %08X & %08X %08X\n", (int)i, (int)inverseTable[i-1][0], (int)inverseTable[i-1][1], (int)inverseTable[i][0], (int)inverseTable[i][1]);
}
}
uprv_memcpy(&invUcaInfo, &invUcaDataInfo, sizeof(UDataInfo));
u_getUnicodeVersion(invUcaInfo.dataVersion);
- pData=udata_create(outputDir, INVC_DATA_TYPE, U_ICUDATA_NAME "_" INVC_DATA_NAME, &invUcaInfo,
+ pData=udata_create(outputDir, INVC_DATA_TYPE, INVC_DATA_NAME, &invUcaInfo,
copyright, status);
if(U_FAILURE(*status)) {
- fprintf(stderr, "Error: unable to create data memory, error %d\n", *status);
+ fprintf(stderr, "Error: unable to create %s"INVC_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
return;
}
/* write the data to the file */
if (VERBOSE) {
fprintf(stdout, "Writing out inverse UCA table: %s%c%s.%s\n", outputDir, U_FILE_SEP_CHAR,
- U_ICUDATA_NAME "_" INVC_DATA_NAME,
+ INVC_DATA_NAME,
INVC_DATA_TYPE);
}
udata_writeBlock(pData, data, data->byteSize);
char *endCodePoint = NULL;
char *spacePointer = NULL;
char *result = fgets(buffer, 2048, data);
- int32_t buflen = uprv_strlen(buffer);
+ int32_t buflen = (int32_t)uprv_strlen(buffer);
if(U_FAILURE(*status)) {
return 0;
}
// we don't want any strange stuff after useful data!
while(pointer < commentStart) {
- if(*pointer != ' ') {
+ if(*pointer != ' ' && *pointer != '\t')
+ {
*status=U_INVALID_FORMAT_ERROR;
break;
}
}
if(U_FAILURE(*status)) {
- fprintf(stderr, "problem putting stuff in hash table\n");
+ fprintf(stderr, "problem putting stuff in hash table %s\n", u_errorName(*status));
*status = U_INTERNAL_PROGRAM_ERROR;
return NULL;
}
uint32_t size = data->size;
+ data->UCAConsts = data->size;
+ data->size += paddedsize(sizeof(UCAConstants));
+
if(noOfcontractions != 0) {
contractions[noOfcontractions][0] = 0;
contractions[noOfcontractions][1] = 0;
noOfcontractions++;
- data->UCAConsts = data->size;
- data->size += paddedsize(sizeof(UCAConstants));
data->contractionUCACombos = data->size;
+ data->contractionUCACombosWidth = 3;
+ data->contractionUCACombosSize = noOfcontractions;
data->size += paddedsize((noOfcontractions*3*sizeof(UChar)));
}
uprv_memcpy(&ucaInfo, &ucaDataInfo, sizeof(UDataInfo));
u_getUnicodeVersion(ucaInfo.dataVersion);
- pData=udata_create(outputDir, UCA_DATA_TYPE, U_ICUDATA_NAME "_" UCA_DATA_NAME, &ucaInfo,
+ pData=udata_create(outputDir, UCA_DATA_TYPE, UCA_DATA_NAME, &ucaInfo,
copyright, status);
if(U_FAILURE(*status)) {
- fprintf(stderr, "Error: unable to create data memory, error %d\n", *status);
+ fprintf(stderr, "Error: unable to create %s"UCA_DATA_NAME", error %s\n", outputDir, u_errorName(*status));
return;
}
UChar variableTopValue = 0;
UCATableHeader *myD = (UCATableHeader *)uprv_malloc(sizeof(UCATableHeader));
/* test for NULL */
- if(myD == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- fclose(data);
- return 0;
- }
+ if(myD == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ fclose(data);
+ return 0;
+ }
+ uprv_memset(myD, 0, sizeof(UCATableHeader));
UColOptionSet *opts = (UColOptionSet *)uprv_malloc(sizeof(UColOptionSet));
/* test for NULL */
- if(opts == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- uprv_free(myD);
- fclose(data);
- return 0;
- }
+ if(opts == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ uprv_free(myD);
+ fclose(data);
+ return 0;
+ }
+ uprv_memset(opts, 0, sizeof(UColOptionSet));
UChar contractionCEs[256][3];
+ uprv_memset(contractionCEs, 0, 256*3*sizeof(UChar));
uint32_t noOfContractions = 0;
UCAConstants consts;
+ uprv_memset(&consts, 0, sizeof(consts));
#if 0
UCAConstants consts = {
UCOL_RESET_TOP_VALUE,
return -1;
}
- memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);
+ uprv_memset(inverseTable, 0xDA, sizeof(int32_t)*3*0xFFFF);
opts->variableTopValue = variableTopValue;
opts->strength = UCOL_TERTIARY;
opts->caseLevel = UCOL_OFF; /* do we have an extra case level */
opts->normalizationMode = UCOL_OFF; /* attribute for normalization */
opts->hiraganaQ = UCOL_OFF; /* attribute for JIS X 4061, used only in Japanese */
+ opts->numericCollation = UCOL_OFF;
myD->jamoSpecial = FALSE;
- tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, status);
+ tempUCATable *t = uprv_uca_initTempTable(myD, opts, NULL, IMPLICIT_TAG, LEAD_SURROGATE_TAG, status);
if(U_FAILURE(*status))
{
fprintf(stderr, "Failed to init UCA temp table: %s\n", u_errorName(*status));
{0x2F800, 0x2FA1D, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //7 CJK_IMPLICIT_TAG, /* 0x2F800-0x2FA1D*/
#endif
{0xAC00, 0xD7B0, UCOL_SPECIAL_FLAG | (HANGUL_SYLLABLE_TAG << 24) }, //0 HANGUL_SYLLABLE_TAG,/* AC00-D7AF*/
- {0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/
+ //{0xD800, 0xDC00, UCOL_SPECIAL_FLAG | (LEAD_SURROGATE_TAG << 24) }, //1 LEAD_SURROGATE_TAG, /* D800-DBFF*/
{0xDC00, 0xE000, UCOL_SPECIAL_FLAG | (TRAIL_SURROGATE_TAG << 24) }, //2 TRAIL_SURROGATE DC00-DFFF
// Now directly handled in the collation code by the swapCJK function.
//{0x3400, 0x4DB6, UCOL_SPECIAL_FLAG | (CJK_IMPLICIT_TAG << 24) }, //3 CJK_IMPLICIT_TAG, /* 0x3400-0x4DB5*/
int32_t surrogateCount = 0;
while(!feof(data)) {
if(U_FAILURE(*status)) {
- fprintf(stderr, "Something returned an error %i (%s) while processing line %i of %s. Exiting...\n",
- *status, u_errorName(*status), line, filename);
+ fprintf(stderr, "Something returned an error %i (%s) while processing line %u of %s. Exiting...\n",
+ *status, u_errorName(*status), (int)line, filename);
exit(*status);
}
element = readAnElement(data, t, &consts, status);
line++;
if(VERBOSE) {
- fprintf(stdout, "%i ", line);
+ fprintf(stdout, "%u ", (int)line);
}
if(element != NULL) {
// we have read the line, now do something sensible with the read data!
fprintf(stderr, "UCA version not specified. Cannot create data file!\n");
return -1;
}
-
+/* {
+ uint32_t trieWord = utrie_get32(t->mapping, 0xDC01, NULL);
+ }*/
if (VERBOSE) {
- fprintf(stdout, "\nLines read: %i\n", line);
- fprintf(stdout, "Surrogate count: %i\n", surrogateCount);
+ fprintf(stdout, "\nLines read: %u\n", (int)line);
+ fprintf(stdout, "Surrogate count: %i\n", (int)surrogateCount);
fprintf(stdout, "Raw data breakdown:\n");
/*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
- fprintf(stdout, "Number of contractions: %i\n", noOfContractions);
- fprintf(stdout, "Contraction image size: %i\n", t->image->contractionSize);
- fprintf(stdout, "Expansions size: %i\n", t->expansions->position);
+ fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
+ fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
+ fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
}
/* produce canonical closure for table */
/* first set up constants for implicit calculation */
- uprv_uca_initImplicitConstants(consts.UCA_PRIMARY_IMPLICIT_MIN);
+ uprv_uca_initImplicitConstants(consts.UCA_PRIMARY_IMPLICIT_MIN, consts.UCA_PRIMARY_IMPLICIT_MAX, status);
/* do the closure */
int32_t noOfClosures = uprv_uca_canonicalClosure(t, status);
if(noOfClosures != 0) {
- fprintf(stderr, "Warning: %i canonical closures occured!\n", noOfClosures);
+ fprintf(stderr, "Warning: %i canonical closures occured!\n", (int)noOfClosures);
}
/* test */
if (VERBOSE) {
fprintf(stdout, "Compacted data breakdown:\n");
/*fprintf(stdout, "Compact array stage1 top: %i, stage2 top: %i\n", t->mapping->stage1Top, t->mapping->stage2Top);*/
- fprintf(stdout, "Number of contractions: %i\n", noOfContractions);
- fprintf(stdout, "Contraction image size: %i\n", t->image->contractionSize);
- fprintf(stdout, "Expansions size: %i\n", t->expansions->position);
+ fprintf(stdout, "Number of contractions: %u\n", (int)noOfContractions);
+ fprintf(stdout, "Contraction image size: %u\n", (int)t->image->contractionSize);
+ fprintf(stdout, "Expansions size: %i\n", (int)t->expansions->position);
+ }
+
+ if(U_FAILURE(*status)) {
+ fprintf(stderr, "Error creating table: %s\n", u_errorName(*status));
+ return -1;
}
/* populate the version info struct with version info*/
argv[0], u_getDataDirectory());
return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
}
-
if(options[3].doesOccur) {
- fprintf(stdout, "genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n",
+ fprintf(stdout, "genuca version %hu.%hu, ICU tool to read UCA text data and create UCA data tables for collation.\n",
#if UCONFIG_NO_COLLATION
0, 0
#else
- ucaDataInfo.formatVersion[0], ucaDataInfo.formatVersion[1]
+ UCA_FORMAT_VERSION_0, UCA_FORMAT_VERSION_1
#endif
);
- fprintf(stdout, "Copyright (C) 2000-2001, International Business Machines\n");
- fprintf(stdout, "Corporation and others. All Rights Reserved.\n");
+ fprintf(stdout, U_COPYRIGHT_STRING"\n");
exit(0);
}
if (options[7].doesOccur) {
u_setDataDirectory(options[7].value);
}
+ /* Initialize ICU */
+ u_init(&status);
+ if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
+ fprintf(stderr, "%s: can not initialize ICU. status = %s\n",
+ argv[0], u_errorName(status));
+ exit(1);
+ }
+ status = U_ZERO_ERROR;
+
/* prepare the filename beginning with the source dir */
uprv_strcpy(filename, srcDir);
UNewDataMemory *pData;
const char *msg;
- msg = "genuca writes dummy " U_ICUDATA_NAME "_" UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
+ msg = "genuca writes dummy " UCA_DATA_NAME "." UCA_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
fprintf(stderr, "%s\n", msg);
- pData = udata_create(destdir, UCA_DATA_TYPE, U_ICUDATA_NAME "_" UCA_DATA_NAME, &dummyDataInfo,
+ pData = udata_create(destdir, UCA_DATA_TYPE, UCA_DATA_NAME, &dummyDataInfo,
NULL, &status);
udata_writeBlock(pData, msg, strlen(msg));
udata_finish(pData, &status);
- msg = "genuca writes dummy " U_ICUDATA_NAME "_" INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
+ msg = "genuca writes dummy " INVC_DATA_NAME "." INVC_DATA_TYPE " because of UCONFIG_NO_COLLATION, see uconfig.h";
fprintf(stderr, "%s\n", msg);
- pData = udata_create(destdir, INVC_DATA_TYPE, U_ICUDATA_NAME "_" INVC_DATA_NAME, &dummyDataInfo,
+ pData = udata_create(destdir, INVC_DATA_TYPE, INVC_DATA_NAME, &dummyDataInfo,
NULL, &status);
udata_writeBlock(pData, msg, strlen(msg));
udata_finish(pData, &status);