/*
*******************************************************************************
*
-* Copyright (C) 2003-2004, International Business Machines
+* Copyright (C) 2003-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "ucm.h"
#include <stdio.h>
+#if !UCONFIG_NO_CONVERSION
+
/* -------------------------------------------------------------------------- */
static void
* allocate mappingsCapacity instead of mappingsLength so that
* if mappings are added, the reverseMap need not be
* reallocated each time
- * (see moveMappings() and ucm_addMapping())
+ * (see ucm_moveMappings() and ucm_addMapping())
*/
t->reverseMap=(int32_t *)uprv_malloc(t->mappingsCapacity*sizeof(int32_t));
if(t->reverseMap==NULL) {
t->isSorted=TRUE;
}
-enum {
- MOVE_TO_EXT=1,
- REMOVE_MAPPING=2
-};
-
/*
- * move mappings with their move flag set from the base table
- * and optionally to the extension table
- *
- * works only with explicit precision flags because it uses some of the
- * flags bits
+ * remove mappings with their move flag set from the base table
+ * and move some of them (with UCM_MOVE_TO_EXT) to the extension table
*/
-static void
-moveMappings(UCMTable *base, UCMTable *ext) {
+U_CAPI void U_EXPORT2
+ucm_moveMappings(UCMTable *base, UCMTable *ext) {
UCMapping *mb, *mbLimit;
int8_t flag;
/* reset the move flag */
mb->moveFlag=0;
- if(ext!=NULL && (flag&MOVE_TO_EXT)) {
+ if(ext!=NULL && (flag&UCM_MOVE_TO_EXT)) {
/* add the mapping to the extension table */
ucm_addMapping(ext, mb, UCM_GET_CODE_POINTS(base, mb), UCM_GET_BYTES(base, mb));
}
- /* move the last base mapping down and overwrite the current one */
+ /* remove this mapping: move the last base mapping down and overwrite the current one */
if(mb<(mbLimit-1)) {
uprv_memcpy(mb, mbLimit-1, sizeof(UCMapping));
}
return result;
}
- if(0<=mb->f && mb->f<=2) {
+ if((0<=mb->f && mb->f<=2) || mb->f==4) {
break;
}
return result;
}
- if(0<=me->f && me->f<=2) {
+ if((0<=me->f && me->f<=2) || me->f==4) {
break;
}
* if ext is DBCS, move DBCS mappings here
* and check SBCS ones for Unicode prefix below
*/
- mb->moveFlag|=MOVE_TO_EXT;
+ mb->moveFlag|=UCM_MOVE_TO_EXT;
result|=NEEDS_MOVE;
/* does mb map from an input sequence that is a prefix of me's? */
) {
if(moveToExt) {
/* mark this mapping to be moved to the extension table */
- mb->moveFlag|=MOVE_TO_EXT;
+ mb->moveFlag|=UCM_MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
if( mb->f==me->f && mb->bLen==me->bLen &&
0==uprv_memcmp(UCM_GET_BYTES(base, mb), UCM_GET_BYTES(ext, me), mb->bLen)
) {
- me->moveFlag|=REMOVE_MAPPING;
+ me->moveFlag|=UCM_REMOVE_MAPPING;
result|=NEEDS_MOVE;
} else if(intersectBase) {
/* mapping in base but not in ext, move it */
- mb->moveFlag|=MOVE_TO_EXT;
+ mb->moveFlag|=UCM_MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
if(cmp<0) {
if(intersectBase) {
/* mapping in base but not in ext, move it */
- mb->moveFlag|=MOVE_TO_EXT;
+ mb->moveFlag|=UCM_MOVE_TO_EXT;
result|=NEEDS_MOVE;
/*
) {
if(moveToExt) {
/* mark this mapping to be moved to the extension table */
- mb->moveFlag|=MOVE_TO_EXT;
+ mb->moveFlag|=UCM_MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
if( mb->f==me->f && mb->uLen==me->uLen &&
0==uprv_memcmp(UCM_GET_CODE_POINTS(base, mb), UCM_GET_CODE_POINTS(ext, me), 4*mb->uLen)
) {
- me->moveFlag|=REMOVE_MAPPING;
+ me->moveFlag|=UCM_REMOVE_MAPPING;
result|=NEEDS_MOVE;
} else if(intersectBase) {
/* mapping in base but not in ext, move it */
- mb->moveFlag|=MOVE_TO_EXT;
+ mb->moveFlag|=UCM_MOVE_TO_EXT;
result|=NEEDS_MOVE;
} else {
fprintf(stderr,
}
if(result&NEEDS_MOVE) {
- moveMappings(ext, NULL);
- moveMappings(base, moveTarget);
+ ucm_moveMappings(ext, NULL);
+ ucm_moveMappings(base, moveTarget);
ucm_sortTable(base);
ucm_sortTable(ext);
if(moveTarget!=NULL) {
if(isSISO && m->bLen==1 && (m->b.bytes[0]==0xe || m->b.bytes[0]==0xf)) {
fprintf(stderr, "warning: removing illegal mapping from an SI/SO-stateful table\n");
ucm_printMapping(table, m, stderr);
- m->moveFlag|=REMOVE_MAPPING;
+ m->moveFlag|=UCM_REMOVE_MAPPING;
needsMove=TRUE;
continue;
}
printMapping(m, UCM_GET_CODE_POINTS(table, m), UCM_GET_BYTES(table, m), stderr);
isOK=FALSE;
} else if(type>0) {
- m->moveFlag|=MOVE_TO_EXT;
+ m->moveFlag|=UCM_MOVE_TO_EXT;
needsMove=TRUE;
}
}
return FALSE;
}
if(needsMove) {
- moveMappings(ucm->base, ucm->ext);
+ ucm_moveMappings(ucm->base, ucm->ext);
return ucm_checkBaseExt(&ucm->states, ucm->base, ucm->ext, ucm->ext, FALSE);
} else {
ucm_sortTable(ucm->base);
break;
} else if(*s=='|') {
f=(int8_t)(s[1]-'0');
- if((uint8_t)f>3) {
- fprintf(stderr, "ucm error: fallback indicator must be |0..|3 - \"%s\"\n", line);
+ if((uint8_t)f>4) {
+ fprintf(stderr, "ucm error: fallback indicator must be |0..|4 - \"%s\"\n", line);
return FALSE;
}
break;
uint8_t bytes[UCNV_EXT_MAX_BYTES]) {
UCMapping *tm;
UChar32 c;
- int32_t index;
+ int32_t idx;
if(table->mappingsLength>=table->mappingsCapacity) {
/* make the mappings array larger */
}
if(m->uLen>1) {
- index=table->codePointsLength;
+ idx=table->codePointsLength;
table->codePointsLength+=m->uLen;
if(table->codePointsLength>table->codePointsCapacity) {
fprintf(stderr, "ucm error: too many code points in multiple-code point mappings\n");
exit(U_MEMORY_ALLOCATION_ERROR);
}
- uprv_memcpy(table->codePoints+index, codePoints, m->uLen*4);
- m->u=index;
+ uprv_memcpy(table->codePoints+idx, codePoints, m->uLen*4);
+ m->u=idx;
}
if(m->bLen>4) {
- index=table->bytesLength;
+ idx=table->bytesLength;
table->bytesLength+=m->bLen;
if(table->bytesLength>table->bytesCapacity) {
fprintf(stderr, "ucm error: too many bytes in mappings with >4 charset bytes\n");
exit(U_MEMORY_ALLOCATION_ERROR);
}
- uprv_memcpy(table->bytes+index, bytes, m->bLen);
- m->b.index=index;
+ uprv_memcpy(table->bytes+idx, bytes, m->bLen);
+ m->b.idx=idx;
}
/* set unicodeMask */
- for(index=0; index<m->uLen; ++index) {
- c=codePoints[index];
+ for(idx=0; idx<m->uLen; ++idx) {
+ c=codePoints[idx];
if(c>=0x10000) {
table->unicodeMask|=UCNV_HAS_SUPPLEMENTARY; /* there are supplementary code points */
} else if(U_IS_SURROGATE(c)) {
U_CAPI void U_EXPORT2
ucm_close(UCMFile *ucm) {
if(ucm!=NULL) {
- uprv_free(ucm->base);
- uprv_free(ucm->ext);
+ ucm_closeTable(ucm->base);
+ ucm_closeTable(ucm->ext);
uprv_free(ucm);
}
}
/*
* Suitable for an ICU conversion base table means:
- * - a 1:1 mapping
- * - not a |2 SUB mappings for <subchar1>
- * - not a |1 fallback to 0x00
- * - no leading 0x00 bytes
+ * - a 1:1 mapping (1 Unicode code point : 1 byte sequence)
+ * - precision flag 0..3
+ * - SBCS: any 1:1 mapping
+ * (the table stores additional bits to distinguish mapping types)
+ * - MBCS: not a |2 SUB mapping for <subchar1>
+ * - MBCS: not a |1 fallback to 0x00
+ * - MBCS: not a multi-byte mapping with leading 0x00 bytes
+ *
+ * Further restrictions for fromUnicode tables
+ * are enforced in makeconv (MBCSOkForBaseFromUnicode()).
+ *
+ * All of the MBCS fromUnicode specific tests could be removed from here,
+ * but the ones above are for unusual mappings, and removing the tests
+ * from here would change canonucm output which seems gratuitous.
+ * (Markus Scherer 2006-nov-28)
+ *
+ * Exception: All implicit mappings (f<0) that need to be moved
+ * because of fromUnicode restrictions _must_ be moved here because
+ * makeconv uses a hack for moving mappings only for the fromUnicode table
+ * that only works with non-negative values of f.
*/
- if( m->uLen==1 && count==1 &&
- !((m->f==2 && m->bLen==1 && baseStates->maxCharLength>1) ||
- (m->f==1 && m->bLen==1 && bytes[0]==0) ||
- (m->bLen>1 && bytes[0]==0))
+ if( m->uLen==1 && count==1 && m->f<=3 &&
+ (baseStates->maxCharLength==1 ||
+ !((m->f==2 && m->bLen==1) ||
+ (m->f==1 && bytes[0]==0) ||
+ (m->f<=1 && m->bLen>1 && bytes[0]==0)))
) {
return 0; /* suitable for a base table */
} else {
char line[500];
char *end;
UBool isOK;
-
+
if(U_FAILURE(*pErrorCode)) {
return;
}
*pErrorCode=U_INVALID_TABLE_FORMAT;
}
}
+#endif