/********************************************************************
- * COPYRIGHT:
- * Copyright (c) 1997-2003, International Business Machines Corporation and
+ * COPYRIGHT:
+ * Copyright (c) 1997-2011, International Business Machines Corporation and
* others. All Rights Reserved.
********************************************************************/
#include "unicode/sortkey.h"
#include "regcoll.h"
#include "sfwdchit.h"
+#include "testutil.h"
+#include "cmemory.h"
#define ARRAY_LENGTH(array) ((int32_t)(sizeof array / sizeof array[0]))
if(U_FAILURE(status)) {
delete en_us;
en_us = 0;
- errln("Collator creation failed with %s", u_errorName(status));
+ errcheckln(status, "Collator creation failed with %s", u_errorName(status));
return;
}
}
"A\u0001", ">", "~\u0002", // Ensure A and ~ are not compared bitwise
"\u00C0", "=", "A\u0300" // Decomp should make these equal
};
-
+
String[] nodecomp = {
"\u00C0", ">", "A\u0300" // A-grave vs. A combining-grave
};
{0x00c0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}
};
-
+
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
// @bug 4058613
//
-// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
+// Collator::createInstance() causes an ArrayIndexOutofBoundsException for Korean
//
void CollationRegressionTest::Test4058613(/* char* par */)
{
// Creating a default collator doesn't work when Korean is the default
// locale
-
+
Locale oldDefault = Locale::getDefault();
UErrorCode status = U_ZERO_ERROR;
-
+
Locale::setDefault(Locale::getKorean(), status);
if (U_FAILURE(status))
}
Collator *c = NULL;
-
+
c = Collator::createInstance("en_US", status);
if (c == NULL || U_FAILURE(status))
delete c;
return;
}
-
+
// Since the fix to this bug was to turn off decomposition for Korean collators,
// ensure that's what we got
if (c->getAttribute(UCOL_NORMALIZATION_MODE, status) != UCOL_OFF)
RuleBasedCollator *c = NULL;
UnicodeString rules = "< a < b , c/a < d < z";
-
+
c = new RuleBasedCollator(rules, status);
if (c == NULL || U_FAILURE(status))
rules += ", i, I";
RuleBasedCollator *c = NULL;
-
+
c = new RuleBasedCollator(rules, status);
if (c == NULL || U_FAILURE(status))
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *c = NULL;
-
- c = (RuleBasedCollator *) Collator::createInstance(Locale::getFrance(), status);
+
+ c = (RuleBasedCollator *) Collator::createInstance(Locale::getCanadaFrench(), status);
if (c == NULL || U_FAILURE(status))
{
- errln("Failed to create collator for Locale::getFrance()");
+ errln("Failed to create collator for Locale::getCanadaFrench()");
delete c;
return;
}
{
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *c = NULL;
-
- c = (RuleBasedCollator *)Collator::createInstance(Locale::getFrance(), status);
+
+ c = (RuleBasedCollator *)Collator::createInstance(Locale::getCanadaFrench(), status);
if (c == NULL || U_FAILURE(status))
{
- errln("Failure creating collator for Locale::getFrance()");
+ errln("Failure creating collator for Locale::getCanadaFrench()");
delete c;
return;
}
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
c->setStrength(Collator::TERTIARY);
-
+
// Now that the default collators are set to NO_DECOMPOSITION
// (as a result of fixing bug 4114077), we must set it explicitly
// when we're testing reordering behavior. -- lwerner, 5/5/98
UErrorCode status = U_ZERO_ERROR;
Locale da_DK("da", "DK");
RuleBasedCollator *c = NULL;
-
+
c = (RuleBasedCollator *) Collator::createInstance(da_DK, status);
if (c == NULL || U_FAILURE(status))
UErrorCode status = U_ZERO_ERROR;
Locale el("el", "");
Collator *c = NULL;
-
+
c = Collator::createInstance(el, status);
if (c == NULL || U_FAILURE(status))
RuleBasedCollator *c = NULL;
UnicodeString rules = "< a < b";
UnicodeString nothing = "";
-
+
c = new RuleBasedCollator(rules, status);
if (c == NULL || U_FAILURE(status))
// From UTR #15:
// *In earlier versions of Unicode, jamo characters like ksf
- // had compatibility mappings to kf + sf. These mappings were
+ // had compatibility mappings to kf + sf. These mappings were
// removed in Unicode 2.1.9 to ensure that Hangul syllables are maintained.)
// That is, the following test is obsolete as of 2.1.9
{
UErrorCode status = U_ZERO_ERROR;
Collator *coll = NULL;
-
+
coll = Collator::createInstance(Locale::getJapan(), status);
-
+
if (coll == NULL || U_FAILURE(status))
{
errln("Failed to create collator for Locale::JAPAN");
static const UChar test[] = {0x41, 0x0308, 0x62, 0x63, 0};
CollationKey key;
-
+
coll->getCollationKey(test, key, status);
if (key.isBogus() || U_FAILURE(status))
UErrorCode status = U_ZERO_ERROR;
Collator *c = NULL;
-
- c = Collator::createInstance(Locale::getFrance(), status);
+
+ c = Collator::createInstance(Locale::getCanadaFrench(), status);
c->setStrength(Collator::TERTIARY);
-
+
if (c == NULL || U_FAILURE(status))
{
- errln("Failed to create a collator for Locale::getFrance()");
+ errln("Failed to create a collator for Locale::getCanadaFrench()");
delete c;
return;
}
// @bug 4114077
//
-// Collation with decomposition off doesn't work for Europe
+// Collation with decomposition off doesn't work for Europe
//
void CollationRegressionTest::Test4114077(/* char* par */)
{
// Ensure that we get the same results with decomposition off
// as we do with it on....
-
+
UErrorCode status = U_ZERO_ERROR;
RuleBasedCollator *c = (RuleBasedCollator *) en_us->clone();
c->setStrength(Collator::TERTIARY);
-
+
static const UChar test1[][CollationRegressionTest::MAX_TOKEN_LEN] =
{
{0x00C0, 0}, {0x3d, 0}, {0x41, 0x0300, 0}, // Should be equivalent
// @bug 4141640
//
-// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
+// Support for Swedish gone in 1.1.6 (Can't create Swedish collator)
//
void CollationRegressionTest::Test4141640(/* char* par */)
{
UErrorCode status = U_ZERO_ERROR;
int32_t i, localeCount;
const Locale *locales = Locale::getAvailableLocales(localeCount);
-
+
for (i = 0; i < localeCount; i += 1)
{
Collator *c = NULL;
// @bug 4139572
//
-// getCollationKey throws exception for spanish text
+// getCollationKey throws exception for spanish text
// Cannot reproduce this bug on 1.2, however it DOES fail on 1.1.6
//
void CollationRegressionTest::Test4139572(/* char* par */)
UErrorCode status = U_ZERO_ERROR;
Locale l("es", "es");
Collator *col = NULL;
-
+
col = Collator::createInstance(l, status);
if (col == NULL || U_FAILURE(status))
}
/* HSYS : RuleBasedCollator::compare() performance enhancements
compare() does not create CollationElementIterator() anymore.*/
-
+
class My4146160Collator : public RuleBasedCollator
{
public:
CollationElementIterator *createCollationElementIterator(const UnicodeString &text) const;
CollationElementIterator *createCollationElementIterator(const CharacterIterator &text) const;
-
+
static int32_t count;
};
My4146160Collator::count = 0;
My4146160Collator *mc = NULL;
-
+
mc = new My4146160Collator(*en_us, status);
if (mc == NULL || U_FAILURE(status))
delete mc;
#endif
}
+
+// Ticket 7189
+//
+// nextSortKeyPart incorrect for EO_S1 collation
+static int32_t calcKeyIncremental(UCollator *coll, const UChar* text, int32_t len, uint8_t *keyBuf, int32_t /*keyBufLen*/, UErrorCode& status) {
+ UCharIterator uiter;
+ uint32_t state[2] = { 0, 0 };
+ int32_t keyLen;
+ int32_t count = 8;
+
+ uiter_setString(&uiter, text, len);
+ keyLen = 0;
+ while (TRUE) {
+ int32_t keyPartLen = ucol_nextSortKeyPart(coll, &uiter, state, &keyBuf[keyLen], count, &status);
+ if (U_FAILURE(status)) {
+ return -1;
+ }
+ if (keyPartLen == 0) {
+ break;
+ }
+ keyLen += keyPartLen;
+ }
+ return keyLen;
+}
+
+void CollationRegressionTest::TestT7189() {
+ UErrorCode status = U_ZERO_ERROR;
+ UCollator *coll;
+ uint32_t i;
+
+ static const UChar text1[][CollationRegressionTest::MAX_TOKEN_LEN] = {
+ // "Achter De Hoven"
+ { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x44, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
+ // "ABC"
+ { 0x41, 0x42, 0x43, 0x00 },
+ // "HELLO world!"
+ { 0x48, 0x45, 0x4C, 0x4C, 0x4F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
+ };
+
+ static const UChar text2[][CollationRegressionTest::MAX_TOKEN_LEN] = {
+ // "Achter de Hoven"
+ { 0x41, 0x63, 0x68, 0x74, 0x65, 0x72, 0x20, 0x64, 0x65, 0x20, 0x48, 0x6F, 0x76, 0x65, 0x6E, 0x00 },
+ // "abc"
+ { 0x61, 0x62, 0x63, 0x00 },
+ // "hello world!"
+ { 0x68, 0x65, 0x6C, 0x6C, 0x6F, 0x20, 0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21, 0x00 }
+ };
+
+ // Open the collator
+ coll = ucol_openFromShortString("EO_S1", FALSE, NULL, &status);
+ if (U_FAILURE(status)) {
+ errln("Failed to create a collator for short string EO_S1");
+ return;
+ }
+
+ for (i = 0; i < sizeof(text1) / (CollationRegressionTest::MAX_TOKEN_LEN * sizeof(UChar)); i++) {
+ uint8_t key1[100], key2[100];
+ int32_t len1, len2;
+
+ len1 = calcKeyIncremental(coll, text1[i], -1, key1, sizeof(key1), status);
+ if (U_FAILURE(status)) {
+ errln(UnicodeString("Failed to get a partial collation key for ") + text1[i]);
+ break;
+ }
+ len2 = calcKeyIncremental(coll, text2[i], -1, key2, sizeof(key2), status);
+ if (U_FAILURE(status)) {
+ errln(UnicodeString("Failed to get a partial collation key for ") + text2[i]);
+ break;
+ }
+
+ if (len1 == len2 && uprv_memcmp(key1, key2, len1) == 0) {
+ errln(UnicodeString("Failed: Identical key\n") + " text1: " + text1[i] + "\n" + " text2: " + text2[i] + "\n" + " key : " + TestUtility::hex(key1, len1));
+ } else {
+ logln(UnicodeString("Keys produced -\n") + " text1: " + text1[i] + "\n" + " key1 : " + TestUtility::hex(key1, len1) + "\n" + " text2: " + text2[i] + "\n" + " key2 : "
+ + TestUtility::hex(key2, len2));
+ }
+ }
+ ucol_close(coll);
+}
+
+void CollationRegressionTest::TestCaseFirstCompression() {
+ RuleBasedCollator *col = (RuleBasedCollator *) en_us->clone();
+ UErrorCode status = U_ZERO_ERROR;
+
+ // default
+ caseFirstCompressionSub(col, "default");
+
+ // Upper first
+ col->setAttribute(UCOL_CASE_FIRST, UCOL_UPPER_FIRST, status);
+ if (U_FAILURE(status)) {
+ errln("Failed to set UCOL_UPPER_FIRST");
+ return;
+ }
+ caseFirstCompressionSub(col, "upper first");
+
+ // Lower first
+ col->setAttribute(UCOL_CASE_FIRST, UCOL_LOWER_FIRST, status);
+ if (U_FAILURE(status)) {
+ errln("Failed to set UCOL_LOWER_FIRST");
+ return;
+ }
+ caseFirstCompressionSub(col, "lower first");
+
+ delete col;
+}
+
+void CollationRegressionTest::caseFirstCompressionSub(Collator *col, UnicodeString opt) {
+ const int32_t maxLength = 50;
+
+ UChar str1[maxLength];
+ UChar str2[maxLength];
+
+ CollationKey key1, key2;
+
+ for (int32_t len = 1; len <= maxLength; len++) {
+ int32_t i = 0;
+ for (; i < len - 1; i++) {
+ str1[i] = str2[i] = (UChar)0x61; // 'a'
+ }
+ str1[i] = (UChar)0x41; // 'A'
+ str2[i] = (UChar)0x61; // 'a'
+
+ UErrorCode status = U_ZERO_ERROR;
+ col->getCollationKey(str1, len, key1, status);
+ col->getCollationKey(str2, len, key2, status);
+
+ UCollationResult cmpKey = key1.compareTo(key2, status);
+ UCollationResult cmpCol = col->compare(str1, len, str2, len, status);
+
+ if (U_FAILURE(status)) {
+ errln("Error in caseFirstCompressionSub");
+ } else if (cmpKey != cmpCol) {
+ errln((UnicodeString)"Inconsistent comparison(" + opt
+ + "): str1=" + UnicodeString(str1, len) + ", str2=" + UnicodeString(str2, len)
+ + ", cmpKey=" + cmpKey + ", cmpCol=" + cmpCol);
+ }
+ }
+}
+
+
+
void CollationRegressionTest::compareArray(Collator &c,
const UChar tests[][CollationRegressionTest::MAX_TOKEN_LEN],
int32_t testCount)
if (c1 != c2)
{
UnicodeString msg, msg1(" ");
-
+
msg += msg1 + count;
msg += ": strength(0x";
appendHex(c1, 8, msg);
case 28: name = "Test4139572"; if (exec) Test4139572(/* par */); break;
case 29: name = "Test4141640"; if (exec) Test4141640(/* par */); break;
case 30: name = "Test4146160"; if (exec) Test4146160(/* par */); break;
+ case 31: name = "TestT7189"; if (exec) TestT7189(); break;
+ case 32: name = "TestCaseFirstCompression"; if (exec) TestCaseFirstCompression(); break;
default: name = ""; break;
}
} else {
- errln("Class collator not instantiated");
+ dataerrln("Class collator not instantiated");
name = "";
}
}