#if !UCONFIG_NO_COLLATION
+#ifdef XP_CPLUSPLUS
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
+#endif
#include "unicode/ucol.h"
#include "utrie.h"
#include "cmemory.h"
* same formatVersion as in ucadata.icu's UDataInfo header
* (formatVersion 2.3)
*
- * uint8_t reserved[84]; - currently unused
+ * uint32_t offset to the reordering code to lead CE byte remapping table
+ * uint32_t offset to the lead CE byte to reordering code mapping table
+ *
+ * uint8_t reserved[76]; - currently unused
*
* -------------------------------------------------------------
*
#define UCA_DATA_FORMAT_2 ((uint8_t)0x6f)
#define UCA_DATA_FORMAT_3 ((uint8_t)0x6c)
-#define UCA_FORMAT_VERSION_0 ((uint8_t)2)
-#define UCA_FORMAT_VERSION_1 ((uint8_t)3)
+#define UCA_FORMAT_VERSION_0 ((uint8_t)3)
+#define UCA_FORMAT_VERSION_1 ((uint8_t)0)
#define UCA_FORMAT_VERSION_2 ((uint8_t)0)
#define UCA_FORMAT_VERSION_3 ((uint8_t)0)
/* by index */
#define UCOL_USE_ITERATOR 64
+#define UCOL_FORCE_HAN_IMPLICIT 128
+
#define NFC_ZERO_CC_BLOCK_LIMIT_ 0x300
-typedef struct collIterate {
- UChar *string; /* Original string */
+#ifdef XP_CPLUSPLUS
+
+U_NAMESPACE_BEGIN
+
+typedef struct collIterate : public UMemory {
+ const UChar *string; /* Original string */
/* UChar *start; Pointer to the start of the source string. Either points to string
or to writableBuffer */
- UChar *endp; /* string end ptr. Is undefined for null terminated strings */
- UChar *pos; /* This is position in the string. Can be to original or writable buf */
+ const UChar *endp; /* string end ptr. Is undefined for null terminated strings */
+ const UChar *pos; /* This is position in the string. Can be to original or writable buf */
uint32_t *toReturn; /* This is the CE from CEs buffer that should be returned */
uint32_t *CEpos; /* This is the position to which we have stored processed CEs */
int32_t offsetRepeatCount; /* Repeat stored offset if non-zero */
int32_t offsetRepeatValue; /* offset value to repeat */
- UChar *writableBuffer;
- uint32_t writableBufSize;
- UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
+ UnicodeString writableBuffer;
+ const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
const UCollator *coll;
+ const Normalizer2 *nfd;
uint8_t flags;
uint8_t origFlags;
uint32_t *extendCEs; /* This is use if CEs is not big enough */
int32_t extendCEsSize; /* Holds the size of the dynamic CEs buffer */
uint32_t CEs[UCOL_EXPAND_CE_BUFFER_SIZE]; /* This is where we store CEs */
- UChar stackWritableBuffer[UCOL_WRITABLE_BUFFER_SIZE]; /* A writable buffer. */
int32_t *offsetBuffer; /* A dynamic buffer to hold offsets */
int32_t offsetBufferSize; /* The size of the offset buffer */
UCharIterator *iterator;
/*int32_t iteratorIndex;*/
+
+ // The offsetBuffer should probably be a UVector32, but helper functions
+ // are an improvement over duplicated code.
+ void appendOffset(int32_t offset, UErrorCode &errorCode);
} collIterate;
+U_NAMESPACE_END
+
+#else
+
+typedef struct collIterate collIterate;
+
+#endif
+
#define paddedsize(something) ((something)+((((something)%4)!=0)?(4-(something)%4):0))
#define headersize (paddedsize(sizeof(UCATableHeader))+paddedsize(sizeof(UColOptionSet)))
data similar to collIterate.
*/
struct collIterateState {
- UChar *pos; /* This is position in the string. Can be to original or writable buf */
- UChar *returnPos;
- UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
- UChar *bufferaddress; /* address of the normalization buffer */
- uint32_t buffersize;
+ const UChar *pos; /* This is position in the string. Can be to original or writable buf */
+ const UChar *returnPos;
+ const UChar *fcdPosition; /* Position in the original string to continue FCD check from. */
+ const UChar *bufferaddress; /* address of the normalization buffer */
+ int32_t buffersize;
uint8_t flags;
uint8_t origFlags;
uint32_t iteratorIndex;
int32_t iteratorMove;
};
-U_CAPI void U_EXPORT2
-uprv_init_collIterate(const UCollator *collator, const UChar *sourceString, int32_t sourceLen, collIterate *s);
+U_CAPI void U_EXPORT2
+uprv_init_collIterate(const UCollator *collator,
+ const UChar *sourceString, int32_t sourceLen,
+ U_NAMESPACE_QUALIFIER collIterate *s, UErrorCode *status);
+
+/* Internal functions for C test code. */
+U_CAPI U_NAMESPACE_QUALIFIER collIterate * U_EXPORT2
+uprv_new_collIterate(UErrorCode *status);
+
+U_CAPI void U_EXPORT2
+uprv_delete_collIterate(U_NAMESPACE_QUALIFIER collIterate *s);
+
+/* @return s->pos == s->endp */
+U_CAPI UBool U_EXPORT2
+uprv_collIterateAtEnd(U_NAMESPACE_QUALIFIER collIterate *s);
+
+#ifdef XP_CPLUSPLUS
U_NAMESPACE_BEGIN
U_NAMESPACE_END
-struct UCollationElements
+struct UCollationElements : public U_NAMESPACE_QUALIFIER UMemory
{
/**
* Struct wrapper for source data
*/
- collIterate iteratordata_;
+ U_NAMESPACE_QUALIFIER collIterate iteratordata_;
/**
* Indicates if this data has been reset.
*/
U_NAMESPACE_QUALIFIER UCollationPCE *pce;
};
+#else
+/*opaque type*/
+struct UCollationElements;
+#endif
U_CAPI void U_EXPORT2
uprv_init_pce(const struct UCollationElements *elems);
(((uint32_t)(ch) - 0x1161) <= (0x1175 - 0x1161)) || \
(((uint32_t)(ch) - 0x11A8) <= (0x11C2 - 0x11A8)))
+/* Han character ranges */
+#define UCOL_FIRST_HAN 0x4E00
+#define UCOL_LAST_HAN 0x9FFF
+#define UCOL_FIRST_HAN_A 0x3400
+#define UCOL_LAST_HAN_A 0x4DBF
+#define UCOL_FIRST_HAN_COMPAT 0xFAE0
+#define UCOL_LAST_HAN_COMPAT 0xFA2F
+
+/* Han extension B is in plane 2 */
+#define UCOL_FIRST_HAN_B 0x20000
+#define UCOL_LAST_HAN_B 0x2A6DF
+
+/* Hangul range */
+#define UCOL_FIRST_HANGUL 0xAC00
+#define UCOL_LAST_HANGUL 0xD7AF
+
+/* Jamo ranges */
+#define UCOL_FIRST_L_JAMO 0x1100
+#define UCOL_FIRST_V_JAMO 0x1161
+#define UCOL_FIRST_T_JAMO 0x11A8
+#define UCOL_LAST_T_JAMO 0x11F9
#if 0
}
U_CFUNC
-uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE, collIterate *source, UErrorCode *status);
+uint32_t ucol_prv_getSpecialCE(const UCollator *coll, UChar ch, uint32_t CE,
+ U_NAMESPACE_QUALIFIER collIterate *source, UErrorCode *status);
U_CFUNC
uint32_t ucol_prv_getSpecialPrevCE(const UCollator *coll, UChar ch, uint32_t CE,
- collIterate *source, UErrorCode *status);
-U_CAPI uint32_t U_EXPORT2 ucol_getNextCE(const UCollator *coll, collIterate *collationSource, UErrorCode *status);
+ U_NAMESPACE_QUALIFIER collIterate *source, UErrorCode *status);
+U_CAPI uint32_t U_EXPORT2 ucol_getNextCE(const UCollator *coll,
+ U_NAMESPACE_QUALIFIER collIterate *collationSource, UErrorCode *status);
U_CFUNC uint32_t U_EXPORT2 ucol_getPrevCE(const UCollator *coll,
- collIterate *collationSource,
- UErrorCode *status);
+ U_NAMESPACE_QUALIFIER collIterate *collationSource,
+ UErrorCode *status);
/* function used by C++ getCollationKey to prevent restarting the calculation */
U_CFUNC int32_t
ucol_getSortKeyWithAllocation(const UCollator *coll,
U_CFUNC
int32_t
-ucol_getSortKeySize(const UCollator *coll, collIterate *s,
+ucol_getSortKeySize(const UCollator *coll, U_NAMESPACE_QUALIFIER collIterate *s,
int32_t currentSize, UColAttributeValue strength,
int32_t len);
/**
#define getCETag(CE) (((CE)&UCOL_TAG_MASK)>>UCOL_TAG_SHIFT)
#define isContraction(CE) (isSpecial((CE)) && (getCETag((CE)) == CONTRACTION_TAG))
#define isPrefix(CE) (isSpecial((CE)) && (getCETag((CE)) == SPEC_PROC_TAG))
-#define constructContractCE(tag, CE) (UCOL_SPECIAL_FLAG | ((tag)<<UCOL_TAG_SHIFT) | ((CE))&0xFFFFFF)
-#define constructSpecProcCE(CE) (UCOL_SPECIAL_FLAG | (SPEC_PROC_TAG<<UCOL_TAG_SHIFT) | ((CE))&0xFFFFFF)
+#define constructContractCE(tag, CE) (UCOL_SPECIAL_FLAG | ((tag)<<UCOL_TAG_SHIFT) | ((CE)&0xFFFFFF))
+#define constructSpecProcCE(CE) (UCOL_SPECIAL_FLAG | (SPEC_PROC_TAG<<UCOL_TAG_SHIFT) | ((CE)&0xFFFFFF))
#define getContractOffset(CE) ((CE)&0xFFFFFF)
#define getExpansionOffset(CE) (((CE)&0x00FFFFF0)>>4)
#define getExpansionCount(CE) ((CE)&0xF)
#define getExpansionSuffix(coleiter) ((coleiter)->iteratordata_.CEpos - (coleiter)->iteratordata_.toReturn)
#define setExpansionSuffix(coleiter, offset) ((coleiter)->iteratordata_.toReturn = (coleiter)->iteratordata_.CEpos - leftoverces)
-/* This is an enum that lists magic special byte values from the fractional UCA */
+/* This is an enum that lists magic special byte values from the fractional UCA.
+ * See also http://site.icu-project.org/design/collation/bytes */
/* TODO: all the #defines that refer to special byte values from the UCA should be changed to point here */
enum {
UCOL_BYTE_FIRST_TAILORED = 0x04,
UCOL_BYTE_COMMON = 0x05,
UCOL_BYTE_FIRST_UCA = UCOL_BYTE_COMMON,
- UCOL_CODAN_PLACEHOLDER = 0x27,
- UCOL_BYTE_LAST_LATIN_PRIMARY = 0x4C,
- UCOL_BYTE_FIRST_NON_LATIN_PRIMARY = 0x4D,
+ /* TODO: Make the following values dynamic since they change with almost every UCA version. */
+ UCOL_CODAN_PLACEHOLDER = 0x12,
+ UCOL_BYTE_FIRST_NON_LATIN_PRIMARY = 0x5B,
UCOL_BYTE_UNSHIFTED_MAX = 0xFF
};
UVersionInfo UCAVersion; /* version of the UCA, read from file */
UVersionInfo UCDVersion; /* UCD version, obtained by u_getUnicodeVersion */
UVersionInfo formatVersion; /* format version from the UDataInfo header */
- uint8_t reserved[84]; /* for future use */
+ uint32_t scriptToLeadByte; /* offset to script to lead collation byte mapping data */
+ uint32_t leadByteToScript; /* offset to lead collation byte to script mapping data */
+ uint8_t reserved[76]; /* for future use */
} UCATableHeader;
#define U_UNKNOWN_STATE 0
const uint32_t *expansion;
const UChar *contractionIndex;
const uint32_t *contractionCEs;
- /*const uint8_t *scriptOrder;*/
const uint32_t *endExpansionCE; /* array of last ces in an expansion ce.
corresponds to expansionCESize */
uint8_t tertiaryBottomCount;
UVersionInfo dataVersion; /* Data info of UCA table */
+ int32_t* reorderCodes;
+ int32_t reorderCodesLength;
+ uint8_t* leadBytePermutationTable;
};
U_CDECL_END
U_CAPI UChar32 U_EXPORT2
uprv_uca_getCodePointFromRaw(UChar32 i);
+typedef const UChar* GetCollationRulesFunction(void* context, const char* locale, const char* type, int32_t* pLength, UErrorCode* status);
+
+U_CAPI UCollator* U_EXPORT2
+ucol_openRulesForImport( const UChar *rules,
+ int32_t rulesLength,
+ UColAttributeValue normalizationMode,
+ UCollationStrength strength,
+ UParseError *parseError,
+ GetCollationRulesFunction importFunc,
+ void* context,
+ UErrorCode *status);
+
+
+U_CAPI void U_EXPORT2 ucol_buildPermutationTable(UCollator *coll, UErrorCode *status);
#ifdef XP_CPLUSPLUS
#endif /* XP_CPLUSPLUS */
/* The offsetBuffer in collIterate might need to be freed to avoid memory leaks. */
-static void freeOffsetBuffer(collIterate *s) {
- if (s != NULL && s->offsetBuffer != NULL) {
- uprv_free(s->offsetBuffer);
- s->offsetBuffer = NULL;
- s->offsetBufferSize = 0;
- }
-}
-
+void ucol_freeOffsetBuffer(U_NAMESPACE_QUALIFIER collIterate *s);
#endif /* #if !UCONFIG_NO_COLLATION */