+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
-* Copyright (C) 2002-2014, International Business Machines
+* Copyright (C) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: custrtst.c
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/ucnv.h"
#include "unicode/uiter.h"
#include "cintltst.h"
+#include "cmemory.h"
#include <string.h>
-#define LENGTHOF(array) (sizeof(array)/sizeof((array)[0]))
-
/* get the sign of an integer */
#define _SIGN(value) ((value)==0 ? 0 : ((int32_t)(value)>>31)|1)
static void TestUnescape(void);
static void TestCountChar32(void);
static void TestUCharIterator(void);
+static void TestIsWellFormed(void);
void addUStringTest(TestNode** root);
addTest(root, &TestUnescape, "tsutil/custrtst/TestUnescape");
addTest(root, &TestCountChar32, "tsutil/custrtst/TestCountChar32");
addTest(root, &TestUCharIterator, "tsutil/custrtst/TestUCharIterator");
+ addTest(root, &TestIsWellFormed, "tsutil/custrtst/TestIsWellFormed");
}
/* test data for TestStringFunctions ---------------------------------------- */
if(temp[k] != 0xa4)
log_err("something threw an error in u_strncpy()\n");
- u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
+ u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
u_uastrncpy(temp, raw[i][j], k-1);
if(u_strncmp(temp, dataTable[i][j],k-1)!=0)
log_err("something threw an error in u_uastrncpy(k-1)\n");
if(temp[k-1] != 0x3F)
log_err("something threw an error in u_uastrncpy(k-1)\n");
- u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
+ u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
u_uastrncpy(temp, raw[i][j], k+1);
if(u_strcmp(temp, dataTable[i][j])!=0)
log_err("something threw an error in u_uastrncpy(k+1)\n");
if(temp[k] != 0)
log_err("something threw an error in u_uastrncpy(k+1)\n");
- u_memset(temp, 0x3F, (sizeof(temp) / sizeof(UChar)) - 1);
+ u_memset(temp, 0x3F, UPRV_LENGTHOF(temp) - 1);
u_uastrncpy(temp, raw[i][j], k);
if(u_strncmp(temp, dataTable[i][j], k)!=0)
log_err("something threw an error in u_uastrncpy(k)\n");
currToken++;
}
- if (currToken != sizeof(tokens)/sizeof(tokens[0])) {
+ if (currToken != UPRV_LENGTHOF(tokens)) {
log_err("Didn't get correct number of tokens\n");
}
state = delimBuf; /* Give it an "invalid" saveState */
UCharIterator iter1, iter2;
int32_t len1, len2, r1, r2;
- for(i=0; i<(sizeof(strings)/sizeof(strings[0])-1); ++i) {
+ for(i=0; i<(UPRV_LENGTHOF(strings)-1); ++i) {
if(u_strcmpCodePointOrder(strings[i], strings[i+1])>=0) {
log_err("error: u_strcmpCodePointOrder() fails for string %d and the following one\n", i);
}
if(
first!=u_strchr(s, nul) ||
first!=u_strchr32(s, nul) ||
- first!=u_memchr(s, nul, LENGTHOF(s)) ||
- first!=u_memchr32(s, nul, LENGTHOF(s)) ||
+ first!=u_memchr(s, nul, UPRV_LENGTHOF(s)) ||
+ first!=u_memchr32(s, nul, UPRV_LENGTHOF(s)) ||
first!=u_strrchr(s, nul) ||
first!=u_strrchr32(s, nul) ||
- first!=u_memrchr(s, nul, LENGTHOF(s)) ||
- first!=u_memrchr32(s, nul, LENGTHOF(s))
+ first!=u_memrchr(s, nul, UPRV_LENGTHOF(s)) ||
+ first!=u_memrchr32(s, nul, UPRV_LENGTHOF(s))
) {
log_err("error: one of the u_str[|mem][r]chr[32](s, nul) does not find the terminator of s\n");
}
s!=u_strstr(s, &nul) ||
s!=u_strFindFirst(s, -1, &nul, -1) ||
s!=u_strFindFirst(s, -1, &nul, 0) ||
- s!=u_strFindFirst(s, LENGTHOF(s), &nul, -1) ||
- s!=u_strFindFirst(s, LENGTHOF(s), &nul, 0) ||
+ s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, -1) ||
+ s!=u_strFindFirst(s, UPRV_LENGTHOF(s), &nul, 0) ||
s!=u_strrstr(s, &nul) ||
s!=u_strFindLast(s, -1, &nul, -1) ||
s!=u_strFindLast(s, -1, &nul, 0) ||
- s!=u_strFindLast(s, LENGTHOF(s), &nul, -1) ||
- s!=u_strFindLast(s, LENGTHOF(s), &nul, 0)
+ s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, -1) ||
+ s!=u_strFindLast(s, UPRV_LENGTHOF(s), &nul, 0)
) {
log_err("error: one of the u_str[str etc](s, \"\") does not find s itself\n");
}
0x50, 0x72, 0x69, 0x76, 0x61, 0x74, 0x65, 0x73, 0x20,
0x5a, 0x65, 0x69, 0x63, 0x68, 0x65, 0x6e, 0x3a, 0x20, 0xdbc8, 0xdf45, 0x1b, 0x03, 0x0a, 0x20, 0x1b, 0x263A, 0
};
- static const int32_t explength = sizeof(expect)/sizeof(expect[0])-1;
+ static const int32_t explength = UPRV_LENGTHOF(expect)-1;
int32_t length;
/* test u_unescape() */
- length=u_unescape(input, buffer, sizeof(buffer)/sizeof(buffer[0]));
+ length=u_unescape(input, buffer, UPRV_LENGTHOF(buffer));
if(length!=explength || u_strcmp(buffer, expect)!=0) {
log_err("failure in u_unescape(): length %d!=%d and/or incorrect result string\n", length,
explength);
}
/* try preflighting */
- length=u_unescape(input, NULL, sizeof(buffer)/sizeof(buffer[0]));
+ length=u_unescape(input, NULL, UPRV_LENGTHOF(buffer));
if(length!=explength || u_strcmp(buffer, expect)!=0) {
log_err("failure in u_unescape(preflighting): length %d!=%d\n", length, explength);
}
int32_t i, length, number;
/* test u_strHasMoreChar32Than() with length>=0 */
- length=LENGTHOF(string);
+ length=UPRV_LENGTHOF(string);
while(length>=0) {
for(i=0; i<=length; ++i) {
for(number=-1; number<=((length-i)+2); ++number) {
}
/* test u_strHasMoreChar32Than() with NUL-termination (length=-1) */
- length=LENGTHOF(string);
+ length=UPRV_LENGTHOF(string);
u_memcpy(buffer, string, length);
while(length>=0) {
buffer[length]=0;
for(i=0; i<=length; ++i) {
for(number=-1; number<=((length-i)+2); ++number) {
- _testStrHasMoreChar32Than(string+i, i, -1, number);
+ _testStrHasMoreChar32Than(buffer+i, i, -1, number);
}
}
--length;
}
/* test get/set state */
- length=LENGTHOF(text)-1;
+ length=UPRV_LENGTHOF(text)-1;
uiter_setString(&iter1, text, -1);
uiter_setString(&iter2, text, length);
testIteratorState(&iter1, &iter2, "UTF16IteratorState", length/2);
compareIterators(&iter1, "UTF16Iterator", &iter2, "UTF8Iterator_1");
/* test get/set state */
- length=LENGTHOF(text)-1;
+ length=UPRV_LENGTHOF(text)-1;
uiter_setUTF8(&iter1, bytes, -1);
testIteratorState(&iter1, &iter2, "UTF8IteratorState", length/2);
testIteratorState(&iter1, &iter2, "UTF8IteratorStatePlus1", length/2+1);
/* ### TODO test other iterators: CharacterIterator, Replaceable */
}
+
+static const UChar valid0[] = { 0 }; // test empty string
+static const UChar valid1[] = { 0x0061,0x270C,0xFE0E, // victory hand with text variation selector
+ 0x0062,0x270C,0xFE0F, // victory hand with emoji variation selector
+ 0x0063,0x270C,0xD83C,0xDFFD, // victory hand with skin tone modifier
+ 0x0064,0x270C,0xFE0F,0xD83C,0xDFFD, // victory hand with emoji variation selector and skin tone modifier (obsolete sequence)
+ 0x0065,0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83C,0xDFEB, // woman teacher (ZWJ sequence) with skin tone
+ 0x0066,0xD83D,0xDC69,0x200D,0xD83D,0xDC69,0x200D,0xD83D,0xDC67, // family (woman, woman, girl - ZWJ sequence)
+ 0x0067,0x0030,0xFE0F,0x20E3, // keypad 0 (emoji combining seq)
+ 0x0068,0xD83C,0xDDEC,0xD83C,0xDDE7, // flag of UK (regional indicator pair)
+ 0x0069,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0xDB40,0xDC7F, // flag of England (tag seq)
+ 0x006A,0 };
+static const UChar valid2[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
+ 0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 29 combining marks
+ 0x006C,0 };
+static const UChar valid3[] = { // sample from Bill Siegrist, 100 UTF16 units
+ 0xD83D,0xDC2E, // U+1F42E 🐮
+ 0xD83D,0xDC3C, // U+1F43C 🐼
+ 0xD83D,0xDC39, // U+1F439 🐹
+ 0xD83D,0xDC31, // U+1F431 🐱
+ 0xD83D,0xDE4B,0x200D,0x2640,0xFE0F, // U+1F64B U+200D U+2640 U+FE0F 🙋♀️
+ 0xD83D,0xDE47,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F647 U+1F3FC U+200D U+2642 U+FE0F 🙇🏼♂️
+ 0xD83D,0xDE46,0x200D,0x2642,0xFE0F, // U+1F646 U+200D U+2642 U+FE0F 🙆♂️
+ 0xD83E,0xDDDA,0xD83C,0xDFFF,0x200D,0x2640,0xFE0F, // U+1F9DA U+1F3FF U+200D U+2640 U+FE0F 🧚🏿♀️
+ 0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D U+2642 U+FE0F 🧖🏾♂️
+ 0xD83E,0xDDD6,0xD83C,0xDFFE,0x200D,0x2642,0xFE0F, // U+1F9D6 U+1F3FE U+200D U+2642 U+FE0F 🧖🏾♂️
+ 0xD83E,0xDDDB,0xD83C,0xDFFC,0x200D,0x2642,0xFE0F, // U+1F9DB U+1F3FC U+200D U+2642 U+FE0F 🧛🏼♂️
+ 0xD83E,0xDDD9,0x200D,0x2640,0xFE0F, // U+1F9D9 U+200D U+2640 U+FE0F 🧙♀️
+ 0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0x2696,0xFE0F, // U+1F468 U+1F3FE U+200D U+2696 U+FE0F 👨🏾⚖️
+ 0xD83D,0xDC69,0xD83C,0xDFFC,0x200D,0xD83D,0xDD27, // U+1F469 U+1F3FC U+200D U+1F527 👩🏼🔧
+ 0xD83D,0xDC69,0xD83C,0xDFFF,0x200D,0xD83C,0xDFEB, // U+1F469 U+1F3FF U+200D U+1F3EB 👩🏿🏫
+ 0xD83D,0xDC68,0xD83C,0xDFFE,0x200D,0xD83D,0xDCBB, // U+1F468 U+1F3FE U+200D U+1F4BB 👨🏾💻
+ 0xD83D,0xDC69,0xD83C,0xDFFD,0x200D,0xD83D,0xDD2C, // U+1F469 U+1F3FD U+200D U+1F52C 👩🏽🔬
+ 0xD83D,0xDC68,0xD83C,0xDFFC,0x200D,0xD83D,0xDE92, // U+1F468 U+1F3FC U+200D U+1F692 👨🏼🚒
+ 0 };
+static const UChar valid4[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
+ 0x0061,0x2066,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x2069, // to level 122 in LRI then pop to 112
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066, // to level 122 again
+ 0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 90
+ 0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C, // pop to level 58
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 74
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 90
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 106
+ 0x000A, // pop to 0
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
+ 0x000A,0 };
+
+static const UChar malformed1[] = { 0x0061,0xFFFF,0 }; // non-character (BMP)
+static const UChar malformed2[] = { 0x0062,0xD87F,0xDFFE,0 }; // non-character (supplemental)
+static const UChar malformed3[] = { 0x0063,0xD7FC,0 }; // unassigned
+static const UChar malformed4[] = { 0x0064,0xD800,0 }; // unpaired high surrogate
+static const UChar malformed5[] = { 0x0065,0xDC00,0 }; // unpaired low surrogate
+static const UChar malformed6[] = { 0x0066,0xFE0F,0 }; // emoji variation selector on non-emoji
+static const UChar malformed7[] = { 0x0067,0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence on non-emoji
+static const UChar malformed8[] = { 0xDB40,0xDC67,0xDB40,0xDC7F,0 }; // tag sequence with no base
+static const UChar malformed9[] = { 0x0068,0xD83C,0xDFF4,0xDB40,0xDC67,0xDB40,0xDC62,0xDB40,0xDC65,0xDB40,0xDC6E,0xDB40,0xDC67,0x0069,0 }; // tag sequence with no term
+static const UChar malformedA[] = { 0x006A,0xD83C,0xDFF4,0xDB40,0xDC7F,0 }; // tag sequence with no tag_spec, just term
+static const UChar malformedB[] = { 0x006B,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,
+ 0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300,0x0300, // 31 combining marks
+ 0x006C,0 };
+static const UChar malformedC[] = { 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 16
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 32
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 48
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 64
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 80
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 96
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 112
+ 0x0061,0x202A,0x0062,0x202A,0x0063,0x202A,0x0064,0x202A,0x0065,0x202A,0x0066,0x202A,0x0067,0x202A,0x0068,0x202A, // to level 128 (error)
+ 0x0061,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0x202C,0 }; // start PDFs, too late
+
+typedef struct {
+ const char* descrip;
+ const UChar* string;
+ UBool result;
+} StringAndResult;
+
+static const StringAndResult wellFormedTests[] = {
+ { "valid0", valid0, TRUE },
+ { "valid1", valid1, TRUE },
+ { "valid2", valid2, TRUE },
+ { "valid3", valid3, TRUE },
+ { "valid4", valid4, TRUE },
+ { "malformed1", malformed1, FALSE },
+ { "malformed2", malformed2, FALSE },
+ { "malformed3", malformed3, FALSE },
+ { "malformed4", malformed4, FALSE },
+ { "malformed5", malformed5, FALSE },
+ { "malformed6", malformed6, FALSE },
+ { "malformed7", malformed7, FALSE },
+ { "malformed8", malformed8, FALSE },
+ { "malformed9", malformed9, FALSE },
+ { "malformedA", malformedA, FALSE },
+ { "malformedB", malformedB, FALSE },
+ { "malformedC", malformedC, FALSE },
+ { NULL, NULL, 0 }
+};
+
+static void
+TestIsWellFormed() {
+ const StringAndResult* testPtr;
+ for (testPtr = wellFormedTests; testPtr->descrip != NULL; testPtr++) {
+ UBool result = u_strIsWellFormed(testPtr->string, -1);
+ if (result != testPtr->result) {
+ log_err("test %s with length -1, expected %d, got %d\n", testPtr->descrip, testPtr->result, result);
+ }
+
+ int32_t length = u_strlen(testPtr->string);
+ result = u_strIsWellFormed(testPtr->string, length);
+ if (result != testPtr->result) {
+ log_err("test %s with length %d, expected %d, got %d\n", testPtr->descrip, length, testPtr->result, result);
+ }
+ }
+}