git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/cintltst/callcoll.c

... / ...

Commit	Line	Data
	1	/********************************************************************
	2	* COPYRIGHT:
	3	* Copyright (c) 1997-2003, International Business Machines Corporation and
	4	* others. All Rights Reserved.
	5	********************************************************************/
	6	/********************************************************************************
	7	*
	8	* File CALLCOLL.C
	9	*
	10	* Modification History:
	11	* Name Description
	12	* Madhu Katragadda Ported for C API
	13	*********************************************************************************
	14	*/
	15
	16	/*
	17	* Important: This file is included into intltest/allcoll.cpp so that the
	18	* test data is shared. This makes it easier to maintain the test data,
	19	* especially since the Unicode data must be portable and quoted character
	20	* literals will not work.
	21	* If it is included, then there will be a #define INCLUDE_CALLCOLL_C
	22	* that must prevent the actual code in here from being part of the
	23	* allcoll.cpp compilation.
	24	*/
	25
	26	/**
	27	* CollationDummyTest is a third level test class. This tests creation of
	28	* a customized collator object. For example, number 1 to be sorted
	29	* equlivalent to word 'one'.
	30	*/
	31
	32	#include <string.h>
	33	#include <stdlib.h>
	34
	35	#include "unicode/utypes.h"
	36
	37	#if !UCONFIG_NO_COLLATION
	38
	39	#include "unicode/ucol.h"
	40	#include "unicode/uloc.h"
	41	#include "unicode/ucoleitr.h"
	42	#include "unicode/ustring.h"
	43
	44	#include "cintltst.h"
	45	#include "ccolltst.h"
	46	#include "callcoll.h"
	47	#include "calldata.h"
	48	#include "cstring.h"
	49	#include "cmemory.h"
	50	#include "ucol_imp.h"
	51
	52	/* perform test with strength PRIMARY */
	53	static void TestPrimary(void);
	54
	55	/* perform test with strength SECONDARY */
	56	static void TestSecondary(void);
	57
	58	/* perform test with strength tertiary */
	59	static void TestTertiary(void);
	60
	61	/perform tests with strength Identical /
	62	static void TestIdentical(void);
	63
	64	/* perform extra tests */
	65	static void TestExtra(void);
	66
	67	/* Test jitterbug 581 */
	68	static void TestJB581(void);
	69
	70	/* Test jitterbug 1401 */
	71	static void TestJB1401(void);
	72
	73	/* Test [variable top] in the rule syntax */
	74	static void TestVariableTop(void);
	75
	76	/* Test surrogates */
	77	static void TestSurrogates(void);
	78
	79	static void TestInvalidRules(void);
	80
	81	static void TestJitterbug1098(void);
	82
	83	const UCollationResult results[] = {
	84	UCOL_LESS,
	85	UCOL_LESS, /UCOL_GREATER,/
	86	UCOL_LESS,
	87	UCOL_LESS,
	88	UCOL_LESS,
	89	UCOL_LESS,
	90	UCOL_LESS,
	91	UCOL_GREATER,
	92	UCOL_GREATER,
	93	UCOL_LESS, /* 10 */
	94	UCOL_GREATER,
	95	UCOL_LESS,
	96	UCOL_GREATER,
	97	UCOL_GREATER,
	98	UCOL_LESS,
	99	UCOL_LESS,
	100	UCOL_LESS,
	101	/* test primary > 17 */
	102	UCOL_EQUAL,
	103	UCOL_EQUAL,
	104	UCOL_EQUAL, /* 20 */
	105	UCOL_LESS,
	106	UCOL_LESS,
	107	UCOL_EQUAL,
	108	UCOL_EQUAL,
	109	UCOL_EQUAL,
	110	UCOL_LESS,
	111	/* test secondary > 26 */
	112	UCOL_EQUAL,
	113	UCOL_EQUAL,
	114	UCOL_EQUAL,
	115	UCOL_EQUAL,
	116	UCOL_EQUAL, /* 30 */
	117	UCOL_EQUAL,
	118	UCOL_LESS,
	119	UCOL_EQUAL, /* 34 */
	120	UCOL_EQUAL,
	121	UCOL_EQUAL,
	122	UCOL_LESS /* 37 */
	123	};
	124
	125
	126	void addAllCollTest(TestNode** root)
	127	{
	128
	129
	130	addTest(root, &TestPrimary, "tscoll/callcoll/TestPrimary");
	131	addTest(root, &TestSecondary, "tscoll/callcoll/TestSecondary");
	132	addTest(root, &TestTertiary, "tscoll/callcoll/TestTertiary");
	133	addTest(root, &TestIdentical, "tscoll/callcoll/TestIdentical");
	134	addTest(root, &TestExtra, "tscoll/callcoll/TestExtra");
	135	addTest(root, &TestJB581, "tscoll/callcoll/TestJB581");
	136	addTest(root, &TestVariableTop, "tscoll/callcoll/TestVariableTop");
	137	addTest(root, &TestSurrogates, "tscoll/callcoll/TestSurrogates");
	138	addTest(root, &TestInvalidRules, "tscoll/callcoll/TestInvalidRules");
	139	addTest(root, &TestJB1401, "tscoll/callcoll/TestJB1401");
	140	addTest(root, &TestJitterbug1098, "tscoll/callcoll/TestJitterbug1098");
	141
	142	}
	143
	144	static UCollationResult compareUsingPartials(UCollator coll, const UChar source[], int32_t sLen, const UChar target[], int32_t tLen, int32_t pieceSize, UErrorCode status) {
	145	int32_t partialSKResult = 0;
	146	UCharIterator sIter, tIter;
	147	uint32_t sState[2], tState[2];
	148	int32_t sSize = pieceSize, tSize = pieceSize;
	149	int32_t i = 0;
	150	uint8_t sBuf[16384], tBuf[16384];
	151	if(pieceSize > 16384) {
	152	log_err("Partial sortkey size buffer too small. Please consider increasing the buffer!\n");
	153	*status = U_BUFFER_OVERFLOW_ERROR;
	154	return UCOL_EQUAL;
	155	}
	156	*status = U_ZERO_ERROR;
	157	sState[0] = 0; sState[1] = 0;
	158	tState[0] = 0; tState[1] = 0;
	159	while(sSize == pieceSize && tSize == pieceSize && partialSKResult == 0) {
	160	uiter_setString(&sIter, source, sLen);
	161	uiter_setString(&tIter, target, tLen);
	162	sSize = ucol_nextSortKeyPart(coll, &sIter, sState, sBuf, pieceSize, status);
	163	tSize = ucol_nextSortKeyPart(coll, &tIter, tState, tBuf, pieceSize, status);
	164
	165	if(sState[0] != 0 \|\| tState[0] != 0) {
	166	log_verbose("State != 0 : %08X %08X\n", sState[0], tState[0]);
	167	}
	168	log_verbose("%i ", i++);
	169
	170	partialSKResult = memcmp(sBuf, tBuf, pieceSize);
	171	}
	172
	173	if(partialSKResult < 0) {
	174	return UCOL_LESS;
	175	} else if(partialSKResult > 0) {
	176	return UCOL_GREATER;
	177	} else {
	178	return UCOL_EQUAL;
	179	}
	180	}
	181
	182	static void doTestVariant(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
	183	{
	184	int32_t sortklen1, sortklen2, sortklenmax, sortklenmin;
	185	int temp=0, gSortklen1=0,gSortklen2=0;
	186	UCollationResult compareResult, compareResulta, keyResult, compareResultIter = result;
	187	uint8_t sortKey1, sortKey2, sortKey1a, sortKey2a;
	188	uint32_t sLen = u_strlen(source);
	189	uint32_t tLen = u_strlen(target);
	190	char buffer[256];
	191	uint32_t len;
	192	UErrorCode status = U_ZERO_ERROR;
	193	UColAttributeValue norm = ucol_getAttribute(myCollation, UCOL_NORMALIZATION_MODE, &status);
	194
	195	UCharIterator sIter, tIter;
	196	uiter_setString(&sIter, source, sLen);
	197	uiter_setString(&tIter, target, tLen);
	198	compareResultIter = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
	199	if(compareResultIter != result) {
	200	log_err("different results in iterative comparison for UTF-16 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
	201	}
	202
	203	/* convert the strings to UTF-8 and do try comparing with char iterator */
	204	if(QUICK <= 0) { /!QUICK/
	205	char utf8Source[256], utf8Target[256];
	206	int32_t utf8SourceLen = 0, utf8TargetLen = 0;
	207	u_strToUTF8(utf8Source, 256, &utf8SourceLen, source, sLen, &status);
	208	if(U_FAILURE(status)) { /* probably buffer is not big enough */
	209	log_verbose("Src UTF-8 buffer too small! Will not compare!\n");
	210	} else {
	211	u_strToUTF8(utf8Target, 256, &utf8TargetLen, target, tLen, &status);
	212	if(U_SUCCESS(status)) { /* probably buffer is not big enough */
	213	UCollationResult compareResultUTF8 = result, compareResultUTF8Norm = result;
	214	/UCharIterator sIter, tIter;/
	215	/log_verbose("Strings converted to UTF-8:%s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));/
	216	uiter_setUTF8(&sIter, utf8Source, utf8SourceLen);
	217	uiter_setUTF8(&tIter, utf8Target, utf8TargetLen);
	218	/*uiter_setString(&sIter, source, sLen);
	219	uiter_setString(&tIter, target, tLen);*/
	220	compareResultUTF8 = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
	221	ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
	222	sIter.move(&sIter, 0, UITER_START);
	223	tIter.move(&tIter, 0, UITER_START);
	224	compareResultUTF8Norm = ucol_strcollIter(myCollation, &sIter, &tIter, &status);
	225	ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
	226	if(compareResultUTF8 != compareResultIter) {
	227	log_err("different results in iterative comparison for UTF-16 and UTF-8 encoded strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
	228	}
	229	if(compareResultUTF8 != compareResultUTF8Norm) {
	230	log_err("different results in iterative when normalization is turned on with UTF-8 strings. %s, %s\n", aescstrdup(source,-1), aescstrdup(target,-1));
	231	}
	232	} else {
	233	log_verbose("Target UTF-8 buffer too small! Did not compare!\n");
	234	}
	235	if(U_FAILURE(status)) {
	236	log_verbose("UTF-8 strcoll failed! Ignoring result\n");
	237	}
	238	}
	239	}
	240
	241	/* testing the partial sortkeys */
	242	if(1) { /!QUICK/
	243	int32_t i = 0;
	244	int32_t partialSizes[] = { 3, 1, 2, 4, 8, 20, 80 }; /* just size 3 in the quick mode */
	245	int32_t partialSizesSize = 1;
	246	if(QUICK <= 0) {
	247	partialSizesSize = 7;
	248	}
	249	log_verbose("partial sortkey test piecesize=");
	250	for(i = 0; i < partialSizesSize; i++) {
	251	UCollationResult partialSKResult = result, partialNormalizedSKResult = result;
	252	log_verbose("%i ", partialSizes[i]);
	253
	254	partialSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
	255	if(partialSKResult != result) {
	256	log_err("Partial sortkey comparison returned wrong result: %s, %s (size %i)\n",
	257	aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
	258	}
	259
	260	if(QUICK <= 0 && norm != UCOL_ON) {
	261	log_verbose("N ");
	262	ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
	263	partialNormalizedSKResult = compareUsingPartials(myCollation, source, sLen, target, tLen, partialSizes[i], &status);
	264	ucol_setAttribute(myCollation, UCOL_NORMALIZATION_MODE, norm, &status);
	265	if(partialSKResult != partialNormalizedSKResult) {
	266	log_err("Partial sortkey comparison gets different result when normalization is on: %s, %s (size %i)\n",
	267	aescstrdup(source,-1), aescstrdup(target,-1), partialSizes[i]);
	268	}
	269	}
	270	}
	271	log_verbose("\n");
	272	}
	273
	274
	275	compareResult = ucol_strcoll(myCollation, source, sLen, target, tLen);
	276	compareResulta = ucol_strcoll(myCollation, source, -1, target, -1);
	277	if (compareResult != compareResulta) {
	278	log_err("ucol_strcoll result from null terminated and explicit length strings differs.\n");
	279	}
	280
	281	sortklen1=ucol_getSortKey(myCollation, source, sLen, NULL, 0);
	282	sortklen2=ucol_getSortKey(myCollation, target, tLen, NULL, 0);
	283
	284	sortklenmax = (sortklen1>sortklen2?sortklen1:sortklen2);
	285	sortklenmin = (sortklen1<sortklen2?sortklen1:sortklen2);
	286
	287	sortKey1 =(uint8_t)malloc(sizeof(uint8_t) (sortklenmax+1));
	288	sortKey1a=(uint8_t)malloc(sizeof(uint8_t) (sortklenmax+1));
	289	ucol_getSortKey(myCollation, source, sLen, sortKey1, sortklen1+1);
	290	ucol_getSortKey(myCollation, source, -1, sortKey1a, sortklen1+1);
	291
	292	sortKey2 =(uint8_t)malloc(sizeof(uint8_t) (sortklenmax+1));
	293	sortKey2a=(uint8_t)malloc(sizeof(uint8_t) (sortklenmax+1));
	294	ucol_getSortKey(myCollation, target, tLen, sortKey2, sortklen2+1);
	295	ucol_getSortKey(myCollation, target, -1, sortKey2a, sortklen2+1);
	296
	297	/* Check that sort key generated with null terminated string is identical */
	298	/* to that generted with a length specified. */
	299	if (uprv_strcmp((const char )sortKey1, (const char )sortKey1a) != 0 \|\|
	300	uprv_strcmp((const char )sortKey2, (const char )sortKey2a) != 0 ) {
	301	log_err("Sort Keys from null terminated and explicit length strings differ.\n");
	302	}
	303
	304	/memcmp(sortKey1, sortKey2,sortklenmax);/
	305	temp= uprv_strcmp((const char )sortKey1, (const char )sortKey2);
	306	gSortklen1 = uprv_strlen((const char *)sortKey1)+1;
	307	gSortklen2 = uprv_strlen((const char *)sortKey2)+1;
	308	if(sortklen1 != gSortklen1){
	309	log_err("SortKey length does not match Expected: %i Got: %i\n",sortklen1, gSortklen1);
	310	log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey1, buffer, &len));
	311	}
	312	if(sortklen2!= gSortklen2){
	313	log_err("SortKey length does not match Expected: %i Got: %i\n", sortklen2, gSortklen2);
	314	log_verbose("Generated sortkey: %s\n", ucol_sortKeyToString(myCollation, sortKey2, buffer, &len));
	315	}
	316
	317	if(temp < 0) {
	318	keyResult=UCOL_LESS;
	319	}
	320	else if(temp > 0) {
	321	keyResult= UCOL_GREATER;
	322	}
	323	else {
	324	keyResult = UCOL_EQUAL;
	325	}
	326	reportCResult( source, target, sortKey1, sortKey2, compareResult, keyResult, compareResultIter, result );
	327	free(sortKey1);
	328	free(sortKey2);
	329	free(sortKey1a);
	330	free(sortKey2a);
	331
	332	}
	333
	334	void doTest(UCollator* myCollation, const UChar source[], const UChar target[], UCollationResult result)
	335	{
	336	if(myCollation) {
	337	doTestVariant(myCollation, source, target, result);
	338	if(result == UCOL_LESS) {
	339	doTestVariant(myCollation, target, source, UCOL_GREATER);
	340	} else if(result == UCOL_GREATER) {
	341	doTestVariant(myCollation, target, source, UCOL_LESS);
	342	} else {
	343	doTestVariant(myCollation, target, source, UCOL_EQUAL);
	344	}
	345	} else {
	346	log_data_err("No collator! Any data around?\n");
	347	}
	348	}
	349
	350	static void TestTertiary()
	351	{
	352	int32_t len,i;
	353	UChar *rules;
	354	UCollator *myCollation;
	355	UErrorCode status=U_ZERO_ERROR;
	356	const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
	357	len = strlen(str);
	358	rules=(UChar)malloc(sizeof(UChar) * (len+1));
	359	u_uastrcpy(rules, str);
	360
	361	myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
	362	if(U_FAILURE(status)){
	363	log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
	364	}
	365
	366	ucol_setStrength(myCollation, UCOL_TERTIARY);
	367	for (i = 0; i < 17 ; i++)
	368	{
	369	doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
	370	}
	371	free(rules);
	372	ucol_close(myCollation);
	373	myCollation = 0;
	374	}
	375
	376	static void TestPrimary( )
	377	{
	378	int32_t len,i;
	379	UChar *rules;
	380	UCollator *myCollation;
	381	UErrorCode status=U_ZERO_ERROR;
	382	const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
	383	len = strlen(str);
	384	rules=(UChar)malloc(sizeof(UChar) * (len+1));
	385	u_uastrcpy(rules, str);
	386
	387	myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
	388	if(U_FAILURE(status)){
	389	log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
	390	}
	391	ucol_setStrength(myCollation, UCOL_PRIMARY);
	392
	393	for (i = 17; i < 26 ; i++)
	394	{
	395
	396	doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
	397	}
	398	free(rules);
	399	ucol_close(myCollation);
	400	myCollation = 0;
	401	}
	402
	403	static void TestSecondary()
	404	{
	405	int32_t i;
	406	int32_t len;
	407	UChar *rules;
	408	UCollator *myCollation;
	409	UErrorCode status=U_ZERO_ERROR;
	410	const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
	411	len = strlen(str);
	412	rules=(UChar)malloc(sizeof(UChar) * (len+1));
	413	u_uastrcpy(rules, str);
	414
	415	myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
	416	if(U_FAILURE(status)){
	417	log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
	418	}
	419	ucol_setStrength(myCollation, UCOL_SECONDARY);
	420	for (i = 26; i < 34 ; i++)
	421	{
	422	doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
	423	}
	424	free(rules);
	425	ucol_close(myCollation);
	426	myCollation = 0;
	427	}
	428
	429	static void TestIdentical()
	430	{
	431	int32_t i;
	432	int32_t len;
	433	UChar *rules = 0;
	434	UCollator *myCollation;
	435	UErrorCode status=U_ZERO_ERROR;
	436	const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
	437	len = strlen(str);
	438	rules=(UChar)malloc(sizeof(UChar) * (len+1));
	439	u_uastrcpy(rules, str);
	440
	441	myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_IDENTICAL, NULL,&status);
	442	if(U_FAILURE(status)){
	443	log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
	444	}
	445	for(i= 34; i<37; i++)
	446	{
	447	doTest(myCollation, testSourceCases[i], testTargetCases[i], results[i]);
	448	}
	449	free(rules);
	450	ucol_close(myCollation);
	451	myCollation = 0;
	452	}
	453
	454	static void TestExtra()
	455	{
	456	int32_t i, j;
	457	int32_t len;
	458	UChar *rules;
	459	UCollator *myCollation;
	460	UErrorCode status = U_ZERO_ERROR;
	461	const char* str="& C < ch, cH, Ch, CH & Five, 5 & Four, 4 & one, 1 & Ampersand; '&' & Two, 2 ";
	462	len = strlen(str);
	463	rules=(UChar)malloc(sizeof(UChar) * (len+1));
	464	u_uastrcpy(rules, str);
	465
	466	myCollation=ucol_openRules(rules, len, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
	467	if(U_FAILURE(status)){
	468	log_err("ERROR: in creation of rule based collator :%s\n", myErrorName(status));
	469	}
	470	ucol_setStrength(myCollation, UCOL_TERTIARY);
	471	for (i = 0; i < COUNT_TEST_CASES-1 ; i++)
	472	{
	473	for (j = i + 1; j < COUNT_TEST_CASES; j += 1)
	474	{
	475
	476	doTest(myCollation, testCases[i], testCases[j], UCOL_LESS);
	477	}
	478	}
	479	free(rules);
	480	ucol_close(myCollation);
	481	myCollation = 0;
	482	}
	483
	484	static void TestJB581(void)
	485	{
	486	UChar dispName [100];
	487	int32_t bufferLen = 0;
	488	UChar source [100];
	489	UChar target [100];
	490	UCollationResult result = UCOL_EQUAL;
	491	uint8_t sourceKeyArray [100];
	492	uint8_t targetKeyArray [100];
	493	int32_t sourceKeyOut = 0,
	494	targetKeyOut = 0;
	495	UCollator *myCollator = 0;
	496	UErrorCode status = U_ZERO_ERROR;
	497
	498	/u_uastrcpy(source, "This is a test.");/
	499	/u_uastrcpy(target, "THISISATEST.");/
	500	u_uastrcpy(source, "THISISATEST.");
	501	u_uastrcpy(target, "Thisisatest.");
	502
	503	myCollator = ucol_open("en_US", &status);
	504	if (U_FAILURE(status)){
	505	bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
	506	/Report the error with display name... /
	507	log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
	508	return;
	509	}
	510	result = ucol_strcoll(myCollator, source, -1, target, -1);
	511	/* result is 1, secondary differences only for ignorable space characters*/
	512	if (result != 1)
	513	{
	514	log_err("Comparing two strings with only secondary differences in C failed.\n");
	515	}
	516	/* To compare them with just primary differences */
	517	ucol_setStrength(myCollator, UCOL_PRIMARY);
	518	result = ucol_strcoll(myCollator, source, -1, target, -1);
	519	/* result is 0 */
	520	if (result != 0)
	521	{
	522	log_err("Comparing two strings with no differences in C failed.\n");
	523	}
	524	/* Now, do the same comparison with keys */
	525	sourceKeyOut = ucol_getSortKey(myCollator, source, -1, sourceKeyArray, 100);
	526	targetKeyOut = ucol_getSortKey(myCollator, target, -1, targetKeyArray, 100);
	527	result = 0;
	528	bufferLen = ((targetKeyOut > 100) ? 100 : targetKeyOut);
	529	result = memcmp(sourceKeyArray, targetKeyArray, bufferLen);
	530	if (result != 0)
	531	{
	532	log_err("Comparing two strings with sort keys in C failed.\n");
	533	}
	534	ucol_close(myCollator);
	535	}
	536
	537	static void TestJB1401(void)
	538	{
	539	UCollator *myCollator = 0;
	540	UErrorCode status = U_ZERO_ERROR;
	541	static UChar NFD_UnsafeStartChars[] = {
	542	0x0f73, /* Tibetan Vowel Sign II */
	543	0x0f75, /* Tibetan Vowel Sign UU */
	544	0x0f81, /* Tibetan Vowel Sign Reversed II */
	545	0
	546	};
	547	int i;
	548
	549
	550	myCollator = ucol_open("en_US", &status);
	551	if (U_FAILURE(status)){
	552	int32_t bufferLen = 0;
	553	UChar dispName [100];
	554	bufferLen = uloc_getDisplayName("en_US", 0, dispName, 100, &status);
	555	/Report the error with display name... /
	556	log_err("ERROR: Failed to create the collator for : \"%s\"\n", dispName);
	557	return;
	558	}
	559	ucol_setAttribute(myCollator, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
	560	if (U_FAILURE(status)){
	561	log_err("ERROR: Failed to set normalization mode ON for collator.\n");
	562	return;
	563	}
	564
	565	for (i=0; ; i++) {
	566	UChar c;
	567	UChar X[4];
	568	UChar Y[20];
	569	UChar Z[20];
	570
	571	/* Get the next funny character to be tested, and set up the
	572	* three test strings X, Y, Z, consisting of an A-grave + test char,
	573	* in original form, NFD, and then NFC form.
	574	*/
	575	c = NFD_UnsafeStartChars[i];
	576	if (c==0) {break;}
	577
	578	X[0]=0xC0; X[1]=c; X[2]=0; /* \u00C0 is A Grave*/
	579
	580	unorm_normalize(X, -1, UNORM_NFD, 0, Y, 20, &status);
	581	unorm_normalize(Y, -1, UNORM_NFC, 0, Z, 20, &status);
	582	if (U_FAILURE(status)){
	583	log_err("ERROR: Failed to normalize test of character %x\n", c);
	584	return;
	585	}
	586
	587	/* Collation test. All three strings should be equal.
	588	* doTest does both strcoll and sort keys, with params in both orders.
	589	*/
	590	doTest(myCollator, X, Y, UCOL_EQUAL);
	591	doTest(myCollator, X, Z, UCOL_EQUAL);
	592	doTest(myCollator, Y, Z, UCOL_EQUAL);
	593
	594	/* Run collation element iterators over the three strings. Results should be same for each.
	595	*/
	596	{
	597	UCollationElements ceiX, ceiY, *ceiZ;
	598	int32_t ceX, ceY, ceZ;
	599	int j;
	600
	601	ceiX = ucol_openElements(myCollator, X, -1, &status);
	602	ceiY = ucol_openElements(myCollator, Y, -1, &status);
	603	ceiZ = ucol_openElements(myCollator, Z, -1, &status);
	604	if (U_FAILURE(status)) {
	605	log_err("ERROR: uucol_openElements failed.\n");
	606	return;
	607	}
	608
	609	for (j=0;; j++) {
	610	ceX = ucol_next(ceiX, &status);
	611	ceY = ucol_next(ceiY, &status);
	612	ceZ = ucol_next(ceiZ, &status);
	613	if (U_FAILURE(status)) {
	614	log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
	615	break;
	616	}
	617	if (ceX != ceY \|\| ceY != ceZ) {
	618	log_err("ERROR: ucol_next failed for iteration #%d.\n", j);
	619	break;
	620	}
	621	if (ceX == UCOL_NULLORDER) {
	622	break;
	623	}
	624	}
	625	ucol_closeElements(ceiX);
	626	ucol_closeElements(ceiY);
	627	ucol_closeElements(ceiZ);
	628	}
	629	}
	630	ucol_close(myCollator);
	631	}
	632
	633
	634
	635	/**
	636	* Tests the [variable top] tag in rule syntax. Since the default [alternate]
	637	* tag has the value shifted, any codepoints before [variable top] should give
	638	* a primary ce of 0.
	639	*/
	640	static void TestVariableTop(void)
	641	{
	642	const char *str = "&z = [variable top]";
	643	int len = strlen(str);
	644	UChar *rules;
	645	UCollator *myCollation;
	646	UCollator *enCollation;
	647	UErrorCode status = U_ZERO_ERROR;
	648	UChar source[1];
	649	UChar ch;
	650	uint8_t result[20];
	651	uint8_t expected[20];
	652
	653	rules = (UChar)malloc(sizeof(UChar) * (len + 1));
	654	u_uastrcpy(rules, str);
	655
	656	enCollation = ucol_open("en_US", &status);
	657	myCollation = ucol_openRules(rules, len, UCOL_OFF,
	658	UCOL_PRIMARY,NULL, &status);
	659	if (U_FAILURE(status)) {
	660	log_err("ERROR: in creation of rule based collator :%s\n",
	661	myErrorName(status));
	662	return;
	663	}
	664
	665	ucol_setStrength(enCollation, UCOL_PRIMARY);
	666	ucol_setAttribute(enCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
	667	&status);
	668	ucol_setAttribute(myCollation, UCOL_ALTERNATE_HANDLING, UCOL_SHIFTED,
	669	&status);
	670
	671	if (ucol_getAttribute(myCollation, UCOL_ALTERNATE_HANDLING, &status) !=
	672	UCOL_SHIFTED \|\| U_FAILURE(status)) {
	673	log_err("ERROR: ALTERNATE_HANDLING value can not be set to SHIFTED\n");
	674	}
	675
	676	uprv_memset(expected, 0, 20);
	677
	678	/* space is supposed to be a variable */
	679	source[0] = ' ';
	680	len = ucol_getSortKey(enCollation, source, 1, result,
	681	sizeof(result));
	682
	683	if (uprv_memcmp(expected, result, len) != 0) {
	684	log_err("ERROR: SHIFTED alternate does not return 0 for primary of space\n");
	685	}
	686
	687	ch = 'a';
	688	while (ch < 'z') {
	689	source[0] = ch;
	690	len = ucol_getSortKey(myCollation, source, 1, result,
	691	sizeof(result));
	692	if (uprv_memcmp(expected, result, len) != 0) {
	693	log_err("ERROR: SHIFTED alternate does not return 0 for primary of %c\n",
	694	ch);
	695	}
	696	ch ++;
	697	}
	698
	699	free(rules);
	700	ucol_close(enCollation);
	701	ucol_close(myCollation);
	702	enCollation = NULL;
	703	myCollation = NULL;
	704	}
	705
	706	/**
	707	* Tests surrogate support.
	708	* NOTE: This test used \\uD801\\uDC01 pair, which is now assigned to Desseret
	709	* Therefore, another (unassigned) code point was used for this test.
	710	*/
	711	static void TestSurrogates(void)
	712	{
	713	const char *str =
	714	"&z<'\\uD800\\uDC00'<'\\uD800\\uDC0A\\u0308'<A";
	715	int len = strlen(str);
	716	int rlen = 0;
	717	UChar *rules;
	718	UCollator *myCollation;
	719	UCollator *enCollation;
	720	UErrorCode status = U_ZERO_ERROR;
	721	UChar source[][4] =
	722	{{'z', 0, 0}, {0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {0xD800, 0xDC02}};
	723	UChar target[][4] =
	724	{{0xD800, 0xDC00, 0}, {0xD800, 0xDC0A, 0x0308, 0}, {'A', 0, 0}, {0xD800, 0xDC03}};
	725	int count = 0;
	726	uint8_t enresult[20], myresult[20];
	727	int enlen, mylen;
	728
	729	/* tests for open rules with surrogate rules */
	730	rules = (UChar)malloc(sizeof(UChar) * (len + 1));
	731	rlen = u_unescape(str, rules, len);
	732
	733	enCollation = ucol_open("en_US", &status);
	734	myCollation = ucol_openRules(rules, rlen, UCOL_OFF,
	735	UCOL_TERTIARY,NULL, &status);
	736	if (U_FAILURE(status)) {
	737	log_err("ERROR: in creation of rule based collator :%s\n",
	738	myErrorName(status));
	739	return;
	740	}
	741
	742	/*
	743	this test is to verify the supplementary sort key order in the english
	744	collator
	745	*/
	746	log_verbose("start of english collation supplementary characters test\n");
	747	while (count < 2) {
	748	doTest(enCollation, source[count], target[count], UCOL_LESS);
	749	count ++;
	750	}
	751	doTest(enCollation, source[count], target[count], UCOL_GREATER);
	752
	753	log_verbose("start of tailored collation supplementary characters test\n");
	754	count = 0;
	755	/* tests getting collation elements for surrogates for tailored rules */
	756	while (count < 4) {
	757	doTest(myCollation, source[count], target[count], UCOL_LESS);
	758	count ++;
	759	}
	760
	761	/* tests that \uD800\uDC02 still has the same value, not changed */
	762	enlen = ucol_getSortKey(enCollation, source[3], 2, enresult, 20);
	763	mylen = ucol_getSortKey(myCollation, source[3], 2, myresult, 20);
	764	if (enlen != mylen \|\|
	765	uprv_memcmp(enresult, myresult, enlen) != 0) {
	766	log_verbose("Failed : non-tailored supplementary characters should have the same value\n");
	767	}
	768
	769	free(rules);
	770	ucol_close(enCollation);
	771	ucol_close(myCollation);
	772	enCollation = NULL;
	773	myCollation = NULL;
	774	}
	775
	776	/*
	777	*### TODO: Add more invalid rules to test all different scenarios.
	778	*
	779	*/
	780	static void
	781	TestInvalidRules(){
	782	#define MAX_ERROR_STATES 2
	783
	784	static const char* rulesArr[MAX_ERROR_STATES] = {
	785	"& C < ch, cH, Ch[this should fail]<d",
	786	"& C < ch, cH, & Ch[variable top]"
	787	};
	788	static const char* preContextArr[MAX_ERROR_STATES] = {
	789	"his should fail",
	790	"& C < ch, cH, ",
	791
	792	};
	793	static const char* postContextArr[MAX_ERROR_STATES] = {
	794	"<d",
	795	" Ch[variable t"
	796	};
	797	int i;
	798
	799	for(i = 0;i<MAX_ERROR_STATES;i++){
	800	UChar rules[1000] = { '\0' };
	801	UChar preContextExp[1000] = { '\0' };
	802	UChar postContextExp[1000] = { '\0' };
	803	UParseError parseError;
	804	UErrorCode status = U_ZERO_ERROR;
	805	UCollator* coll=0;
	806	u_charsToUChars(rulesArr[i],rules,uprv_strlen(rulesArr[i])+1);
	807	u_charsToUChars(preContextArr[i],preContextExp,uprv_strlen(preContextArr[i])+1);
	808	u_charsToUChars(postContextArr[i],postContextExp,uprv_strlen(postContextArr[i])+1);
	809	/* clean up stuff in parseError */
	810	u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
	811	u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
	812	/* open the rules and test */
	813	coll = ucol_openRules(rules,u_strlen(rules),UCOL_OFF,UCOL_DEFAULT_STRENGTH,&parseError,&status);
	814	if(u_strcmp(parseError.preContext,preContextExp)!=0){
	815	log_err("preContext in UParseError for ucol_openRules does not match\n");
	816	}
	817	if(u_strcmp(parseError.postContext,postContextExp)!=0){
	818	log_err("postContext in UParseError for ucol_openRules does not match\n");
	819	}
	820	}
	821	}
	822
	823	static void
	824	TestJitterbug1098(){
	825	UChar rule[1000];
	826	UCollator* c1 = NULL;
	827	UErrorCode status = U_ZERO_ERROR;
	828	UParseError parseError;
	829	char preContext[200]={0};
	830	char postContext[200]={0};
	831	int i=0;
	832	const char* rules[] = {
	833	"&''<\\\\",
	834	"&\\'<\\\\",
	835	"&\\\"<'\\'",
	836	"&'\"'<\\'",
	837	'\0'
	838
	839	};
	840	const UCollationResult results1098[] = {
	841	UCOL_LESS,
	842	UCOL_LESS,
	843	UCOL_LESS,
	844	UCOL_LESS,
	845	};
	846	const UChar input[][2]= {
	847	{0x0027,0x005c},
	848	{0x0027,0x005c},
	849	{0x0022,0x005c},
	850	{0x0022,0x0027},
	851	};
	852	UChar X[2] ={0};
	853	UChar Y[2] ={0};
	854	u_memset(parseError.preContext,0x0000,U_PARSE_CONTEXT_LEN);
	855	u_memset(parseError.postContext,0x0000,U_PARSE_CONTEXT_LEN);
	856	for(;rules[i]!=0;i++){
	857	u_uastrcpy(rule, rules[i]);
	858	c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, &parseError, &status);
	859	if(U_FAILURE(status)){
	860	u_UCharsToChars(parseError.preContext,preContext,20);
	861	u_UCharsToChars(parseError.postContext,postContext,20);
	862	log_err("Could not parse the rules syntax. Error: %s ", u_errorName(status));
	863	log_verbose("\n\tPre-Context: %s \n\tPost-Context:%s \n",preContext,postContext);
	864	return;
	865	}
	866	X[0] = input[i][0];
	867	Y[0] = input[i][1];
	868	doTest(c1,X,Y,results1098[i]);
	869	ucol_close(c1);
	870	}
	871	}
	872
	873
	874	#endif /* #if !UCONFIG_NO_COLLATION */