/*
**********************************************************************
-* Copyright (c) 2002-2003, International Business Machines
+* Copyright (c) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
-* $Source: /cvs/root/ICU/icuSources/test/cintltst/usettest.c,v $
-**********************************************************************
*/
#include "unicode/uset.h"
#include "unicode/ustring.h"
#include "cintltst.h"
+#include "cmemory.h"
#include <stdlib.h>
#include <string.h>
-#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
-
#define TEST(x) addTest(root, &x, "uset/" # x)
static void TestAPI(void);
static void Testj2269(void);
static void TestSerialized(void);
+static void TestNonInvariantPattern(void);
+static void TestBadPattern(void);
+static void TestFreezable(void);
+static void TestSpan(void);
void addUSetTest(TestNode** root);
TEST(TestAPI);
TEST(Testj2269);
TEST(TestSerialized);
+ TEST(TestNonInvariantPattern);
+ TEST(TestBadPattern);
+ TEST(TestFreezable);
+ TEST(TestSpan);
}
/*------------------------------------------------------------------
}
static const UChar PAT[] = {91,97,45,99,123,97,98,125,93,0}; /* "[a-c{ab}]" */
-static const int32_t PAT_LEN = (sizeof(PAT) / sizeof(PAT[0])) - 1;
+static const int32_t PAT_LEN = UPRV_LENGTHOF(PAT) - 1;
+
+static const UChar PAT_lb[] = {0x6C, 0x62, 0}; /* "lb" */
+static const int32_t PAT_lb_LEN = UPRV_LENGTHOF(PAT_lb) - 1;
+
+static const UChar VAL_SP[] = {0x53, 0x50, 0}; /* "SP" */
+static const int32_t VAL_SP_LEN = UPRV_LENGTHOF(VAL_SP) - 1;
static const UChar STR_bc[] = {98,99,0}; /* "bc" */
-static const int32_t STR_bc_LEN = (sizeof(STR_bc) / sizeof(STR_bc[0])) - 1;
+static const int32_t STR_bc_LEN = UPRV_LENGTHOF(STR_bc) - 1;
static const UChar STR_ab[] = {97,98,0}; /* "ab" */
-static const int32_t STR_ab_LEN = (sizeof(STR_ab) / sizeof(STR_ab[0])) - 1;
+static const int32_t STR_ab_LEN = UPRV_LENGTHOF(STR_ab) - 1;
/**
* Basic API test for uset.x
*/
static void TestAPI() {
USet* set;
+ USet* set2;
UErrorCode ec;
/* [] */
+ set = uset_openEmpty();
+ expect(set, "", "abc{ab}", NULL);
+ uset_close(set);
+
+ set = uset_open(1, 0);
+ expect(set, "", "abc{ab}", NULL);
+ uset_close(set);
+
set = uset_open(1, 1);
uset_clear(set);
expect(set, "", "abc{ab}", NULL);
ec = U_ZERO_ERROR;
set = uset_openPattern(PAT, PAT_LEN, &ec);
if(U_FAILURE(ec)) {
- log_data_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
+ log_err("uset_openPattern([a-c{ab}]) failed - %s\n", u_errorName(ec));
return;
}
+ if(!uset_resemblesPattern(PAT, PAT_LEN, 0)) {
+ log_err("uset_resemblesPattern of PAT failed\n");
+ }
expect(set, "abc{ab}", "def{bc}", &ec);
/* [a-d{ab}] */
uset_removeRange(set, 0x0050, 0x0063);
expect(set, "de{bc}", "bcfg{ab}", NULL);
+ /* [g-l] */
+ uset_set(set, 0x0067, 0x006C);
+ expect(set, "ghijkl", "de{bc}", NULL);
+
+ if (uset_indexOf(set, 0x0067) != 0) {
+ log_err("uset_indexOf failed finding correct index of 'g'\n");
+ }
+
+ if (uset_charAt(set, 0) != 0x0067) {
+ log_err("uset_charAt failed finding correct char 'g' at index 0\n");
+ }
+
+ /* How to test this one...? */
+ uset_compact(set);
+
+ /* [g-i] */
+ uset_retain(set, 0x0067, 0x0069);
+ expect(set, "ghi", "dejkl{bc}", NULL);
+
+ /* UCHAR_ASCII_HEX_DIGIT */
+ uset_applyIntPropertyValue(set, UCHAR_ASCII_HEX_DIGIT, 1, &ec);
+ if(U_FAILURE(ec)) {
+ log_err("uset_applyIntPropertyValue([UCHAR_ASCII_HEX_DIGIT]) failed - %s\n", u_errorName(ec));
+ return;
+ }
+ expect(set, "0123456789ABCDEFabcdef", "GHIjkl{bc}", NULL);
+
+ /* [ab] */
+ uset_clear(set);
+ uset_addAllCodePoints(set, STR_ab, STR_ab_LEN);
+ expect(set, "ab", "def{ab}", NULL);
+ if (uset_containsAllCodePoints(set, STR_bc, STR_bc_LEN)){
+ log_err("set should not conatin all characters of \"bc\" \n");
+ }
+
+ /* [] */
+ set2 = uset_open(1, 1);
+ uset_clear(set2);
+
+ /* space */
+ uset_applyPropertyAlias(set2, PAT_lb, PAT_lb_LEN, VAL_SP, VAL_SP_LEN, &ec);
+ expect(set2, " ", "abcdefghi{bc}", NULL);
+
+ /* [a-c] */
+ uset_set(set2, 0x0061, 0x0063);
+ /* [g-i] */
+ uset_set(set, 0x0067, 0x0069);
+
+ /* [a-c g-i] */
+ if (uset_containsSome(set, set2)) {
+ log_err("set should not contain some of set2 yet\n");
+ }
+ uset_complementAll(set, set2);
+ if (!uset_containsSome(set, set2)) {
+ log_err("set should contain some of set2\n");
+ }
+ expect(set, "abcghi", "def{bc}", NULL);
+
+ /* [g-i] */
+ uset_removeAll(set, set2);
+ expect(set, "ghi", "abcdef{bc}", NULL);
+
+ /* [a-c g-i] */
+ uset_addAll(set2, set);
+ expect(set2, "abcghi", "def{bc}", NULL);
+
+ /* [g-i] */
+ uset_retainAll(set2, set);
+ expect(set2, "ghi", "abcdef{bc}", NULL);
+
uset_close(set);
+ uset_close(set2);
}
/*------------------------------------------------------------------
const char* list,
UBool isIn) {
const char* p = list;
- UChar ustr[128];
- char pat[128];
+ UChar ustr[4096];
+ char *pat;
UErrorCode ec;
- int32_t rangeStart = -1, rangeEnd = -1;
+ int32_t rangeStart = -1, rangeEnd = -1, length;
ec = U_ZERO_ERROR;
- uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
- u_UCharsToChars(ustr, pat, u_strlen(ustr)+1);
+ length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
+ if(U_FAILURE(ec)) {
+ log_err("FAIL: uset_toPattern() fails in expectContainment() - %s\n", u_errorName(ec));
+ return;
+ }
+ pat=aescstrdup(ustr, length);
while (*p) {
if (*p=='{') {
int32_t stringLength = 0;
char strCopy[64];
- while (*p++ != '}') {}
- stringLength = p - stringStart - 1;
+ while (*p++ != '}') {
+ }
+ stringLength = (int32_t)(p - stringStart - 1);
strncpy(strCopy, stringStart, stringLength);
strCopy[stringLength] = 0;
(isIn ? "contains" : "does not contain"),
strCopy);
} else {
- log_err("FAIL: %s %s \"%s\"\n", pat,
+ log_data_err("FAIL: %s %s \"%s\" (Are you missing data?)\n", pat,
(isIn ? "does not contain" : "contains"),
strCopy);
}
(isIn ? "contains" : "does not contain"),
*p);
} else {
- log_err("FAIL: %s %s '%c'\n", pat,
+ log_data_err("FAIL: %s %s '%c' (Are you missing data?)\n", pat,
(isIn ? "does not contain" : "contains"),
*p);
}
(isIn ? "contains" : "does not contain"),
rangeStart, rangeEnd);
} else {
- log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
+ log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
(isIn ? "does not contain" : "contains"),
rangeStart, rangeEnd);
}
(isIn ? "contains" : "does not contain"),
rangeStart, rangeEnd);
} else {
- log_err("FAIL: %s %s U+%04X-U+%04X\n", pat,
+ log_data_err("FAIL: %s %s U+%04X-U+%04X (Are you missing data?)\n", pat,
(isIn ? "does not contain" : "contains"),
rangeStart, rangeEnd);
}
}
}
-/* This only works for BMP chars */
+/* This only works for invariant BMP chars */
static char oneUCharToChar(UChar32 c) {
UChar ubuf[1];
char buf[1];
static void expectItems(const USet* set,
const char* items) {
const char* p = items;
- UChar ustr[128], itemStr[128];
- char pat[128], buf[128];
+ UChar ustr[4096], itemStr[4096];
+ char buf[4096];
+ char *pat;
UErrorCode ec;
int32_t expectedSize = 0;
int32_t itemCount = uset_getItemCount(set);
int32_t itemIndex = 0;
UChar32 start = 1, end = 0;
- int32_t itemLen = 0;
+ int32_t itemLen = 0, length;
ec = U_ZERO_ERROR;
- uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
+ length = uset_toPattern(set, ustr, sizeof(ustr), TRUE, &ec);
if (U_FAILURE(ec)) {
log_err("FAIL: uset_toPattern => %s\n", u_errorName(ec));
return;
}
- u_UCharsToChars(ustr, pat, u_strlen(ustr)+1);
+ pat=aescstrdup(ustr, length);
if (uset_isEmpty(set) != (strlen(items)==0)) {
- log_err("FAIL: %s should return %s from isEmpty\n",
+ log_data_err("FAIL: %s should return %s from isEmpty (Are you missing data?)\n",
pat,
strlen(items)==0 ? "TRUE" : "FALSE");
}
/* Don't test patterns starting with "[^" */
- if (strlen(pat) > 2 && pat[1] == '^') {
+ if (u_strlen(ustr) > 2 && ustr[1] == 0x5e /*'^'*/) {
return;
}
if (start > end || start == -1) {
/* Fetch our next item */
if (itemIndex >= itemCount) {
- log_err("FAIL: ran out of items iterating %s\n", pat);
+ log_data_err("FAIL: ran out of items iterating %s (Are you missing data?)\n", pat);
return;
}
int32_t stringLength = 0;
char strCopy[64];
- while (*p++ != '}') {}
- stringLength = p - stringStart - 1;
+ while (*p++ != '}') {
+ }
+ stringLength = (int32_t)(p - stringStart - 1);
strncpy(strCopy, stringStart, stringLength);
strCopy[stringLength] = 0;
errorCode=U_ZERO_ERROR;
set=uset_openPattern(pattern, -1, &errorCode);
if(U_FAILURE(errorCode)) {
- log_data_err("uset_openPattern([:Cf:]) failed - %s\n", u_errorName(errorCode));
+ log_data_err("uset_openPattern([:Cf:]) failed - %s (Are you missing data?)\n", u_errorName(errorCode));
return;
}
- length=uset_serialize(set, buffer, LENGTHOF(buffer), &errorCode);
+ length=uset_serialize(set, buffer, UPRV_LENGTHOF(buffer), &errorCode);
if(U_FAILURE(errorCode)) {
log_err("unable to uset_serialize([:Cf:]) - %s\n", u_errorName(errorCode));
uset_close(set);
uset_close(set);
}
+/**
+ * Make sure that when non-invariant chars are passed to uset_openPattern
+ * they do not cause an ugly failure mode (e.g. assertion failure).
+ * JB#3795.
+ */
+static void
+TestNonInvariantPattern() {
+ UErrorCode ec = U_ZERO_ERROR;
+ /* The critical part of this test is that the following pattern
+ must contain a non-invariant character. */
+ static const char *pattern = "[:ccc!=0:]";
+ UChar buf[256];
+ int32_t len = u_unescape(pattern, buf, 256);
+ /* This test 'fails' by having an assertion failure within the
+ following call. It passes by running to completion with no
+ assertion failure. */
+ USet *set = uset_openPattern(buf, len, &ec);
+ uset_close(set);
+}
+
+static void TestBadPattern(void) {
+ UErrorCode status = U_ZERO_ERROR;
+ USet *pat;
+ U_STRING_DECL(pattern, "[", 1);
+ U_STRING_INIT(pattern, "[", 1);
+ pat = uset_openPatternOptions(pattern, u_strlen(pattern), 0, &status);
+ if (pat != NULL || U_SUCCESS(status)) {
+ log_err("uset_openPatternOptions did not fail as expected %s\n", u_errorName(status));
+ }
+}
+
+static USet *openIDSet() {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ U_STRING_DECL(pattern, "[:ID_Continue:]", 15);
+ U_STRING_INIT(pattern, "[:ID_Continue:]", 15);
+ return uset_openPattern(pattern, 15, &errorCode);
+}
+
+static void TestFreezable() {
+ USet *idSet;
+ USet *frozen;
+ USet *thawed;
+
+ idSet=openIDSet();
+
+ if (idSet == NULL) {
+ log_data_err("openIDSet() returned NULL. (Are you missing data?)\n");
+ uset_close(idSet);
+ return;
+ }
+
+ frozen=uset_clone(idSet);
+
+ if (frozen == NULL) {
+ log_err("uset_Clone() returned NULL\n");
+ return;
+ }
+
+ if(!uset_equals(frozen, idSet)) {
+ log_err("uset_clone() did not make an equal copy\n");
+ }
+
+ uset_freeze(frozen);
+ uset_addRange(frozen, 0xd802, 0xd805);
+
+ if(uset_isFrozen(idSet) || !uset_isFrozen(frozen) || !uset_equals(frozen, idSet)) {
+ log_err("uset_freeze() or uset_isFrozen() does not work\n");
+ }
+
+ thawed=uset_cloneAsThawed(frozen);
+
+ if (thawed == NULL) {
+ log_err("uset_cloneAsThawed(frozen) returned NULL");
+ uset_close(frozen);
+ uset_close(idSet);
+ return;
+ }
+
+ uset_addRange(thawed, 0xd802, 0xd805);
+
+ if(uset_isFrozen(thawed) || uset_equals(thawed, idSet) || !uset_containsRange(thawed, 0xd802, 0xd805)) {
+ log_err("uset_cloneAsThawed() does not work\n");
+ }
+
+ uset_close(idSet);
+ uset_close(frozen);
+ uset_close(thawed);
+}
+
+static void TestSpan() {
+ static const UChar s16[2]={ 0xe01, 0x3000 };
+ static const char* s8="\xE0\xB8\x81\xE3\x80\x80";
+
+ USet *idSet=openIDSet();
+
+ if (idSet == NULL) {
+ log_data_err("openIDSet() returned NULL (Are you missing data?)\n");
+ return;
+ }
+
+ if(
+ 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+ 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
+ 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+ 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
+ ) {
+ log_err("uset_span() or uset_spanBack() does not work\n");
+ }
+
+ if(
+ 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+ 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
+ 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+ 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
+ ) {
+ log_err("uset_spanUTF8() or uset_spanBackUTF8() does not work\n");
+ }
+
+ uset_freeze(idSet);
+
+ if(
+ 1!=uset_span(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+ 0!=uset_span(idSet, s16, 2, USET_SPAN_NOT_CONTAINED) ||
+ 2!=uset_spanBack(idSet, s16, 2, USET_SPAN_CONTAINED) ||
+ 1!=uset_spanBack(idSet, s16, 2, USET_SPAN_NOT_CONTAINED)
+ ) {
+ log_err("uset_span(frozen) or uset_spanBack(frozen) does not work\n");
+ }
+
+ if(
+ 3!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+ 0!=uset_spanUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED) ||
+ 6!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_CONTAINED) ||
+ 3!=uset_spanBackUTF8(idSet, s8, 6, USET_SPAN_NOT_CONTAINED)
+ ) {
+ log_err("uset_spanUTF8(frozen) or uset_spanBackUTF8(frozen) does not work\n");
+ }
+
+ uset_close(idSet);
+}
+
/*eof*/