-* Checking collation element validity given the boundary arguments.
-*/
-static UBool checkCEValidity(const UCollator *coll, const UChar *codepoints,
- int length, uint32_t primarymax,
- uint32_t secondarymax)
-{
- UErrorCode status = U_ZERO_ERROR;
- UCollationElements *iter = ucol_openElements(coll, codepoints, length,
- &status);
- uint32_t ce;
- UBool first = TRUE;
-/*
- UBool upper = FALSE;
- UBool lower = FALSE;
-*/
-
- if (U_FAILURE(status)) {
- log_err("Error creating iterator for testing validity\n");
- }
-
- ce = ucol_next(iter, &status);
-
- while (ce != UCOL_NULLORDER) {
- if (ce != 0) {
- uint32_t primary = UCOL_PRIMARYORDER(ce);
- uint32_t secondary = UCOL_SECONDARYORDER(ce);
- uint32_t tertiary = UCOL_TERTIARYORDER(ce);
-/* uint32_t scasebits = tertiary & 0xC0;*/
-
- if ((tertiary == 0 && secondary != 0) ||
- (tertiary < 0xC0 && secondary == 0 && primary != 0)) {
- /* n-1th level is not zero when the nth level is
- except for continuations, this is wrong */
- log_err("Lower level weight not 0 when high level weight is 0\n");
- goto fail;
- }
- else {
- /* checks if any byte is illegal ie = 01 02 03. */
- if (checkByteBounds(ce, 0x3, 0x1)) {
- log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
- goto fail;
- }
- }
- if ((primary != 0 && primary < primarymax)
- || ((primary & 0xFF) == 0xFF) || (((primary>>8) & 0xFF) == 0xFF)
- || ((primary & 0xFF) && ((primary & 0xFF) <= 0x03))
- || (((primary>>8) & 0xFF) && ((primary>>8) & 0xFF) <= 0x03)
- || (primary >= 0xFE00 && !isContinuation(ce))) {
- log_err("UCA primary weight out of bounds: %04X for string starting with %04X\n",
- primary, codepoints[0]);
- goto fail;
- }
- /* case matching not done since data generated by ken */
- if (first) {
- if (secondary >= 6 && secondary <= secondarymax) {
- log_err("Secondary weight out of range\n");
- goto fail;
- }
- first = FALSE;
- }
- }
- ce = ucol_next(iter, &status);
- }
- ucol_closeElements(iter);
- return TRUE;
-fail :
- ucol_closeElements(iter);
- return FALSE;
-}
-
-static void TestCEValidity()
-{
- /* testing UCA collation elements */
- UErrorCode status = U_ZERO_ERROR;
- /* en_US has no tailorings */
- UCollator *coll = ucol_open("root", &status);
- /* tailored locales */
- char locale[][11] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN", "zh__PINYIN"};
- const char *loc;
- FileStream *file = getFractionalUCA();
- char line[1024];
- UChar codepoints[10];
- int count = 0;
- int maxCount = 0;
- UParseError parseError;
- if (U_FAILURE(status)) {
- log_err("en_US collator creation failed\n");
- return;
- }
- log_verbose("Testing UCA elements\n");
- if (file == NULL) {
- log_err("Fractional UCA data can not be opened\n");
- return;
- }
-
- while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
- if(line[0] == 0 || line[0] == '#' || line[0] == '\n' ||
- line[0] == 0x000D || line[0] == '[') {
- continue;
- }
-
- getCodePoints(line, codepoints);
- checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
- }
-
- log_verbose("Testing UCA elements for the whole range of unicode characters\n");
- codepoints[0] = 0;
- while (codepoints[0] < 0xFFFF) {
- if (u_isdefined((UChar32)codepoints[0])) {
- checkCEValidity(coll, codepoints, 1, 5, 86);
- }
- codepoints[0] ++;
- }
-
- ucol_close(coll);
-
- /* testing tailored collation elements */
- log_verbose("Testing tailored elements\n");
- if(QUICK) {
- maxCount = sizeof(locale)/sizeof(locale[0]);
- } else {
- maxCount = uloc_countAvailable();
- }
- while (count < maxCount) {
- const UChar *rules = NULL,
- *current = NULL;
- UChar *rulesCopy = NULL;
- int32_t ruleLen = 0;
-
- uint32_t chOffset = 0;
- uint32_t chLen = 0;
- uint32_t exOffset = 0;
- uint32_t exLen = 0;
- uint32_t prefixOffset = 0;
- uint32_t prefixLen = 0;
- UBool startOfRules = TRUE;
- UColOptionSet opts;
-
- UColTokenParser src;
- uint32_t strength = 0;
- uint16_t specs = 0;
- if(QUICK) {
- loc = locale[count];
- } else {
- loc = uloc_getAvailable(count);
- if(!hasCollationElements(loc)) {
- count++;
- continue;
- }
- }
-
- log_verbose("Testing CEs for %s\n", loc);
-
- coll = ucol_open(loc, &status);
- if (U_FAILURE(status)) {
- log_err("%s collator creation failed\n", loc);
- return;
- }
-
- src.opts = &opts;
- rules = ucol_getRules(coll, &ruleLen);
-
- if (ruleLen > 0) {
- rulesCopy = (UChar *)malloc((ruleLen +
- UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
- uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
- src.current = src.source = rulesCopy;
- src.end = rulesCopy + ruleLen;
- src.extraCurrent = src.end;
- src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
-
- while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
- strength = src.parsedToken.strength;
- chOffset = src.parsedToken.charsOffset;
- chLen = src.parsedToken.charsLen;
- exOffset = src.parsedToken.extensionOffset;
- exLen = src.parsedToken.extensionLen;
- prefixOffset = src.parsedToken.prefixOffset;
- prefixLen = src.parsedToken.prefixLen;
- specs = src.parsedToken.flags;
-
- startOfRules = FALSE;
- uprv_memcpy(codepoints, src.source + chOffset,
- chLen * sizeof(UChar));
- codepoints[chLen] = 0;
- checkCEValidity(coll, codepoints, chLen, 4, 85);
- }
- free(rulesCopy);
- }
-
- ucol_close(coll);
- count ++;
- }
- T_FileStream_close(file);
-}
-
-static void printSortKeyError(const UChar *codepoints, int length,
- uint8_t *sortkey, int sklen)
-{
- int count = 0;
- log_err("Sortkey not valid for ");
- while (length > 0) {
- log_err("0x%04x ", *codepoints);
- length --;
- codepoints ++;
- }
- log_err("\nSortkey : ");
- while (count < sklen) {
- log_err("0x%02x ", sortkey[count]);
- count ++;
- }
- log_err("\n");
-}
-
-/**
-* Checking sort key validity for all levels