git.saurik.com Git - apple/icu.git/blame_incremental - icuSources/test/cintltst/citertst.c

... / ...

Commit	Line	Data
	1	/********************************************************************
	2	* COPYRIGHT:
	3	* Copyright (c) 1997-2003, International Business Machines Corporation and
	4	* others. All Rights Reserved.
	5	********************************************************************/
	6	/********************************************************************************
	7	*
	8	* File CITERTST.C
	9	*
	10	* Modification History:
	11	* Date Name Description
	12	* Madhu Katragadda Ported for C API
	13	* 02/19/01 synwee Modified test case for new collation iterator
	14	*********************************************************************************/
	15	/*
	16	* Collation Iterator tests.
	17	* (Let me reiterate my position...)
	18	*/
	19
	20	#include "unicode/utypes.h"
	21
	22	#if !UCONFIG_NO_COLLATION
	23
	24	#include "unicode/ucol.h"
	25	#include "unicode/uloc.h"
	26	#include "unicode/uchar.h"
	27	#include "unicode/ustring.h"
	28	#include "cmemory.h"
	29	#include "cintltst.h"
	30	#include "citertst.h"
	31	#include "ccolltst.h"
	32	#include "filestrm.h"
	33	#include "cstring.h"
	34	#include "ucol_imp.h"
	35	#include "ucol_tok.h"
	36	#include <stdio.h>
	37
	38	extern uint8_t ucol_uprv_getCaseBits(const UChar , uint32_t, UErrorCode );
	39
	40	void addCollIterTest(TestNode** root)
	41	{
	42	addTest(root, &TestPrevious, "tscoll/citertst/TestPrevious");
	43	addTest(root, &TestOffset, "tscoll/citertst/TestOffset");
	44	addTest(root, &TestSetText, "tscoll/citertst/TestSetText");
	45	addTest(root, &TestMaxExpansion, "tscoll/citertst/TestMaxExpansion");
	46	addTest(root, &TestUnicodeChar, "tscoll/citertst/TestUnicodeChar");
	47	addTest(root, &TestNormalizedUnicodeChar,
	48	"tscoll/citertst/TestNormalizedUnicodeChar");
	49	addTest(root, &TestNormalization, "tscoll/citertst/TestNormalization");
	50	addTest(root, &TestBug672, "tscoll/citertst/TestBug672");
	51	addTest(root, &TestBug672Normalize, "tscoll/citertst/TestBug672Normalize");
	52	addTest(root, &TestSmallBuffer, "tscoll/citertst/TestSmallBuffer");
	53	addTest(root, &TestCEs, "tscoll/citertst/TestCEs");
	54	addTest(root, &TestDiscontiguos, "tscoll/citertst/TestDiscontiguos");
	55	addTest(root, &TestCEBufferOverflow, "tscoll/citertst/TestCEBufferOverflow");
	56	addTest(root, &TestCEValidity, "tscoll/citertst/TestCEValidity");
	57	addTest(root, &TestSortKeyValidity, "tscoll/citertst/TestSortKeyValidity");
	58	}
	59
	60	/* The locales we support */
	61
	62	static const char * LOCALES[] = {"en_AU", "en_BE", "en_CA"};
	63
	64	static void TestBug672() {
	65	UErrorCode status = U_ZERO_ERROR;
	66	UChar pattern[20];
	67	UChar text[50];
	68	int i;
	69	int result[3][3];
	70
	71	u_uastrcpy(pattern, "resume");
	72	u_uastrcpy(text, "Time to resume updating my resume.");
	73
	74	for (i = 0; i < 3; ++ i) {
	75	UCollator *coll = ucol_open(LOCALES[i], &status);
	76	UCollationElements *pitr = ucol_openElements(coll, pattern, -1,
	77	&status);
	78	UCollationElements *titer = ucol_openElements(coll, text, -1,
	79	&status);
	80	if (U_FAILURE(status)) {
	81	log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
	82	myErrorName(status));
	83	return;
	84	}
	85
	86	log_verbose("locale tested %s\n", LOCALES[i]);
	87
	88	while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
	89	U_SUCCESS(status)) {
	90	}
	91	if (U_FAILURE(status)) {
	92	log_err("ERROR: reversing collation iterator :%s\n",
	93	myErrorName(status));
	94	return;
	95	}
	96	ucol_reset(pitr);
	97
	98	ucol_setOffset(titer, u_strlen(pattern), &status);
	99	if (U_FAILURE(status)) {
	100	log_err("ERROR: setting offset in collator :%s\n",
	101	myErrorName(status));
	102	return;
	103	}
	104	result[i][0] = ucol_getOffset(titer);
	105	log_verbose("Text iterator set to offset %d\n", result[i][0]);
	106
	107	/* Use previous() */
	108	ucol_previous(titer, &status);
	109	result[i][1] = ucol_getOffset(titer);
	110	log_verbose("Current offset %d after previous\n", result[i][1]);
	111
	112	/* Add one to index */
	113	log_verbose("Adding one to current offset...\n");
	114	ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
	115	if (U_FAILURE(status)) {
	116	log_err("ERROR: setting offset in collator :%s\n",
	117	myErrorName(status));
	118	return;
	119	}
	120	result[i][2] = ucol_getOffset(titer);
	121	log_verbose("Current offset in text = %d\n", result[i][2]);
	122	ucol_closeElements(pitr);
	123	ucol_closeElements(titer);
	124	ucol_close(coll);
	125	}
	126
	127	if (uprv_memcmp(result[0], result[1], 3) != 0 \|\|
	128	uprv_memcmp(result[1], result[2], 3) != 0) {
	129	log_err("ERROR: Different locales have different offsets at the same character\n");
	130	}
	131	}
	132
	133
	134
	135	/* Running this test with normalization enabled showed up a bug in the incremental
	136	normalization code. */
	137	static void TestBug672Normalize() {
	138	UErrorCode status = U_ZERO_ERROR;
	139	UChar pattern[20];
	140	UChar text[50];
	141	int i;
	142	int result[3][3];
	143
	144	u_uastrcpy(pattern, "resume");
	145	u_uastrcpy(text, "Time to resume updating my resume.");
	146
	147	for (i = 0; i < 3; ++ i) {
	148	UCollator *coll = ucol_open(LOCALES[i], &status);
	149	UCollationElements *pitr = NULL;
	150	UCollationElements *titer = NULL;
	151
	152	ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
	153
	154	pitr = ucol_openElements(coll, pattern, -1, &status);
	155	titer = ucol_openElements(coll, text, -1, &status);
	156	if (U_FAILURE(status)) {
	157	log_err("ERROR: in creation of either the collator or the collation iterator :%s\n",
	158	myErrorName(status));
	159	return;
	160	}
	161
	162	log_verbose("locale tested %s\n", LOCALES[i]);
	163
	164	while (ucol_next(pitr, &status) != UCOL_NULLORDER &&
	165	U_SUCCESS(status)) {
	166	}
	167	if (U_FAILURE(status)) {
	168	log_err("ERROR: reversing collation iterator :%s\n",
	169	myErrorName(status));
	170	return;
	171	}
	172	ucol_reset(pitr);
	173
	174	ucol_setOffset(titer, u_strlen(pattern), &status);
	175	if (U_FAILURE(status)) {
	176	log_err("ERROR: setting offset in collator :%s\n",
	177	myErrorName(status));
	178	return;
	179	}
	180	result[i][0] = ucol_getOffset(titer);
	181	log_verbose("Text iterator set to offset %d\n", result[i][0]);
	182
	183	/* Use previous() */
	184	ucol_previous(titer, &status);
	185	result[i][1] = ucol_getOffset(titer);
	186	log_verbose("Current offset %d after previous\n", result[i][1]);
	187
	188	/* Add one to index */
	189	log_verbose("Adding one to current offset...\n");
	190	ucol_setOffset(titer, ucol_getOffset(titer) + 1, &status);
	191	if (U_FAILURE(status)) {
	192	log_err("ERROR: setting offset in collator :%s\n",
	193	myErrorName(status));
	194	return;
	195	}
	196	result[i][2] = ucol_getOffset(titer);
	197	log_verbose("Current offset in text = %d\n", result[i][2]);
	198	ucol_closeElements(pitr);
	199	ucol_closeElements(titer);
	200	ucol_close(coll);
	201	}
	202
	203	if (uprv_memcmp(result[0], result[1], 3) != 0 \|\|
	204	uprv_memcmp(result[1], result[2], 3) != 0) {
	205	log_err("ERROR: Different locales have different offsets at the same character\n");
	206	}
	207	}
	208
	209
	210
	211
	212	/**
	213	* Test for CollationElementIterator previous and next for the whole set of
	214	* unicode characters.
	215	*/
	216	static void TestUnicodeChar()
	217	{
	218	UChar source[0x100];
	219	UCollator *en_us;
	220	UCollationElements *iter;
	221	UErrorCode status = U_ZERO_ERROR;
	222	UChar codepoint;
	223
	224	UChar *test;
	225	en_us = ucol_open("en_US", &status);
	226	if (U_FAILURE(status)){
	227	log_err("ERROR: in creation of collation data using ucol_open()\n %s\n",
	228	myErrorName(status));
	229	return;
	230	}
	231
	232	for (codepoint = 1; codepoint < 0xFFFE;)
	233	{
	234	test = source;
	235
	236	while (codepoint % 0xFF != 0)
	237	{
	238	if (u_isdefined(codepoint))
	239	*(test ++) = codepoint;
	240	codepoint ++;
	241	}
	242
	243	if (u_isdefined(codepoint))
	244	*(test ++) = codepoint;
	245
	246	if (codepoint != 0xFFFF)
	247	codepoint ++;
	248
	249	*test = 0;
	250	iter=ucol_openElements(en_us, source, u_strlen(source), &status);
	251	if(U_FAILURE(status)){
	252	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	253	myErrorName(status));
	254	ucol_close(en_us);
	255	return;
	256	}
	257	/* A basic test to see if it's working at all */
	258	log_verbose("codepoint testing %x\n", codepoint);
	259	backAndForth(iter);
	260	ucol_closeElements(iter);
	261
	262	/* null termination test */
	263	iter=ucol_openElements(en_us, source, -1, &status);
	264	if(U_FAILURE(status)){
	265	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	266	myErrorName(status));
	267	ucol_close(en_us);
	268	return;
	269	}
	270	/* A basic test to see if it's working at all */
	271	backAndForth(iter);
	272	ucol_closeElements(iter);
	273	}
	274
	275	ucol_close(en_us);
	276	}
	277
	278	/**
	279	* Test for CollationElementIterator previous and next for the whole set of
	280	* unicode characters with normalization on.
	281	*/
	282	static void TestNormalizedUnicodeChar()
	283	{
	284	UChar source[0x100];
	285	UCollator *th_th;
	286	UCollationElements *iter;
	287	UErrorCode status = U_ZERO_ERROR;
	288	UChar codepoint;
	289
	290	UChar *test;
	291	/* thai should have normalization on */
	292	th_th = ucol_open("th_TH", &status);
	293	if (U_FAILURE(status)){
	294	log_err("ERROR: in creation of thai collation using ucol_open()\n %s\n",
	295	myErrorName(status));
	296	return;
	297	}
	298
	299	for (codepoint = 1; codepoint < 0xFFFE;)
	300	{
	301	test = source;
	302
	303	while (codepoint % 0xFF != 0)
	304	{
	305	if (u_isdefined(codepoint))
	306	*(test ++) = codepoint;
	307	codepoint ++;
	308	}
	309
	310	if (u_isdefined(codepoint))
	311	*(test ++) = codepoint;
	312
	313	if (codepoint != 0xFFFF)
	314	codepoint ++;
	315
	316	*test = 0;
	317	iter=ucol_openElements(th_th, source, u_strlen(source), &status);
	318	if(U_FAILURE(status)){
	319	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	320	myErrorName(status));
	321	ucol_close(th_th);
	322	return;
	323	}
	324
	325	backAndForth(iter);
	326	ucol_closeElements(iter);
	327
	328	iter=ucol_openElements(th_th, source, -1, &status);
	329	if(U_FAILURE(status)){
	330	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	331	myErrorName(status));
	332	ucol_close(th_th);
	333	return;
	334	}
	335
	336	backAndForth(iter);
	337	ucol_closeElements(iter);
	338	}
	339
	340	ucol_close(th_th);
	341	}
	342
	343	/**
	344	* Test the incremental normalization
	345	*/
	346	static void TestNormalization()
	347	{
	348	UErrorCode status = U_ZERO_ERROR;
	349	const char *str =
	350	"&a < \\u0300\\u0315 < A\\u0300\\u0315 < \\u0316\\u0315B < \\u0316\\u0300\\u0315";
	351	UCollator *coll;
	352	UChar rule[50];
	353	int rulelen = u_unescape(str, rule, 50);
	354	int count = 0;
	355	const char *testdata[] =
	356	{"\\u1ED9", "o\\u0323\\u0302",
	357	"\\u0300\\u0315", "\\u0315\\u0300",
	358	"A\\u0300\\u0315B", "A\\u0315\\u0300B",
	359	"A\\u0316\\u0315B", "A\\u0315\\u0316B",
	360	"\\u0316\\u0300\\u0315", "\\u0315\\u0300\\u0316",
	361	"A\\u0316\\u0300\\u0315B", "A\\u0315\\u0300\\u0316B",
	362	"\\u0316\\u0315\\u0300", "A\\u0316\\u0315\\u0300B"};
	363	int32_t srclen;
	364	UChar source[10];
	365	UCollationElements *iter;
	366
	367	coll = ucol_openRules(rule, rulelen, UCOL_ON, UCOL_TERTIARY, NULL, &status);
	368	ucol_setAttribute(coll, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
	369	if (U_FAILURE(status)){
	370	log_err("ERROR: in creation of collator using ucol_openRules()\n %s\n",
	371	myErrorName(status));
	372	return;
	373	}
	374
	375	srclen = u_unescape(testdata[0], source, 10);
	376	iter = ucol_openElements(coll, source, srclen, &status);
	377	backAndForth(iter);
	378	ucol_closeElements(iter);
	379
	380	srclen = u_unescape(testdata[1], source, 10);
	381	iter = ucol_openElements(coll, source, srclen, &status);
	382	backAndForth(iter);
	383	ucol_closeElements(iter);
	384
	385	while (count < 12) {
	386	srclen = u_unescape(testdata[count], source, 10);
	387	iter = ucol_openElements(coll, source, srclen, &status);
	388
	389	if (U_FAILURE(status)){
	390	log_err("ERROR: in creation of collator element iterator\n %s\n",
	391	myErrorName(status));
	392	return;
	393	}
	394	backAndForth(iter);
	395	ucol_closeElements(iter);
	396
	397	iter = ucol_openElements(coll, source, -1, &status);
	398
	399	if (U_FAILURE(status)){
	400	log_err("ERROR: in creation of collator element iterator\n %s\n",
	401	myErrorName(status));
	402	return;
	403	}
	404	backAndForth(iter);
	405	ucol_closeElements(iter);
	406	count ++;
	407	}
	408	ucol_close(coll);
	409	}
	410
	411	/**
	412	* Test for CollationElementIterator.previous()
	413	*
	414	* @bug 4108758 - Make sure it works with contracting characters
	415	*
	416	*/
	417	static void TestPrevious()
	418	{
	419	UCollator *coll=NULL;
	420	UChar rule[50];
	421	UChar *source;
	422	UCollator c1, c2, *c3;
	423	UCollationElements *iter;
	424	UErrorCode status = U_ZERO_ERROR;
	425
	426	test1=(UChar)malloc(sizeof(UChar) 50);
	427	test2=(UChar)malloc(sizeof(UChar) 50);
	428	u_uastrcpy(test1, "What subset of all possible test cases?");
	429	u_uastrcpy(test2, "has the highest probability of detecting");
	430	coll = ucol_open("en_US", &status);
	431
	432	iter=ucol_openElements(coll, test1, u_strlen(test1), &status);
	433	log_verbose("English locale testing back and forth\n");
	434	if(U_FAILURE(status)){
	435	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	436	myErrorName(status));
	437	ucol_close(coll);
	438	return;
	439	}
	440	/* A basic test to see if it's working at all */
	441	backAndForth(iter);
	442	ucol_closeElements(iter);
	443	ucol_close(coll);
	444
	445	/* Test with a contracting character sequence */
	446	u_uastrcpy(rule, "&a,A < b,B < c,C, d,D < z,Z < ch,cH,Ch,CH");
	447	c1 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
	448
	449	log_verbose("Contraction rule testing back and forth with no normalization\n");
	450
	451	if (c1 == NULL \|\| U_FAILURE(status))
	452	{
	453	log_err("Couldn't create a RuleBasedCollator with a contracting sequence\n %s\n",
	454	myErrorName(status));
	455	return;
	456	}
	457	source=(UChar)malloc(sizeof(UChar) 20);
	458	u_uastrcpy(source, "abchdcba");
	459	iter=ucol_openElements(c1, source, u_strlen(source), &status);
	460	if(U_FAILURE(status)){
	461	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	462	myErrorName(status));
	463	return;
	464	}
	465	backAndForth(iter);
	466	ucol_closeElements(iter);
	467	ucol_close(c1);
	468
	469	/* Test with an expanding character sequence */
	470	u_uastrcpy(rule, "&a < b < c/abd < d");
	471	c2 = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL, &status);
	472	log_verbose("Expansion rule testing back and forth with no normalization\n");
	473	if (c2 == NULL \|\| U_FAILURE(status))
	474	{
	475	log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
	476	myErrorName(status));
	477	return;
	478	}
	479	u_uastrcpy(source, "abcd");
	480	iter=ucol_openElements(c2, source, u_strlen(source), &status);
	481	if(U_FAILURE(status)){
	482	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	483	myErrorName(status));
	484	return;
	485	}
	486	backAndForth(iter);
	487	ucol_closeElements(iter);
	488	ucol_close(c2);
	489	/* Now try both */
	490	u_uastrcpy(rule, "&a < b < c/aba < d < z < ch");
	491	c3 = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH,NULL, &status);
	492	log_verbose("Expansion/contraction rule testing back and forth with no normalization\n");
	493
	494	if (c3 == NULL \|\| U_FAILURE(status))
	495	{
	496	log_err("Couldn't create a RuleBasedCollator with a contracting sequence.\n %s\n",
	497	myErrorName(status));
	498	return;
	499	}
	500	u_uastrcpy(source, "abcdbchdc");
	501	iter=ucol_openElements(c3, source, u_strlen(source), &status);
	502	if(U_FAILURE(status)){
	503	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	504	myErrorName(status));
	505	return;
	506	}
	507	backAndForth(iter);
	508	ucol_closeElements(iter);
	509	ucol_close(c3);
	510	source[0] = 0x0e41;
	511	source[1] = 0x0e02;
	512	source[2] = 0x0e41;
	513	source[3] = 0x0e02;
	514	source[4] = 0x0e27;
	515	source[5] = 0x61;
	516	source[6] = 0x62;
	517	source[7] = 0x63;
	518	source[8] = 0;
	519
	520	coll = ucol_open("th_TH", &status);
	521	log_verbose("Thai locale testing back and forth with normalization\n");
	522	iter=ucol_openElements(coll, source, u_strlen(source), &status);
	523	if(U_FAILURE(status)){
	524	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	525	myErrorName(status));
	526	return;
	527	}
	528	backAndForth(iter);
	529	ucol_closeElements(iter);
	530	ucol_close(coll);
	531
	532	/* prev test */
	533	source[0] = 0x0061;
	534	source[1] = 0x30CF;
	535	source[2] = 0x3099;
	536	source[3] = 0x30FC;
	537	source[4] = 0;
	538
	539	coll = ucol_open("ja_JP", &status);
	540	log_verbose("Japanese locale testing back and forth with normalization\n");
	541	iter=ucol_openElements(coll, source, u_strlen(source), &status);
	542	if(U_FAILURE(status)){
	543	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	544	myErrorName(status));
	545	return;
	546	}
	547	backAndForth(iter);
	548	ucol_closeElements(iter);
	549	ucol_close(coll);
	550
	551	free(source);
	552	free(test1);
	553	free(test2);
	554	}
	555
	556	/**
	557	* Test for getOffset() and setOffset()
	558	*/
	559	static void TestOffset()
	560	{
	561	UErrorCode status= U_ZERO_ERROR;
	562	UCollator *en_us=NULL;
	563	UCollationElements iter, pristine;
	564	int32_t offset;
	565	int32_t *orders;
	566	int32_t orderLength=0;
	567	int count = 0;
	568	test1=(UChar)malloc(sizeof(UChar) 50);
	569	test2=(UChar)malloc(sizeof(UChar) 50);
	570	u_uastrcpy(test1, "What subset of all possible test cases?");
	571	u_uastrcpy(test2, "has the highest probability of detecting");
	572	en_us = ucol_open("en_US", &status);
	573	log_verbose("Testing getOffset and setOffset for CollationElements\n");
	574	iter = ucol_openElements(en_us, test1, u_strlen(test1), &status);
	575	if(U_FAILURE(status)){
	576	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	577	myErrorName(status));
	578	ucol_close(en_us);
	579	return;
	580	}
	581	/* Run all the way through the iterator, then get the offset */
	582
	583	orders = getOrders(iter, &orderLength);
	584
	585	offset = ucol_getOffset(iter);
	586
	587	if (offset != u_strlen(test1))
	588	{
	589	log_err("offset at end != length %d vs %d\n", offset,
	590	u_strlen(test1) );
	591	}
	592
	593	/* Now set the offset back to the beginning and see if it works */
	594	pristine=ucol_openElements(en_us, test1, u_strlen(test1), &status);
	595	if(U_FAILURE(status)){
	596	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	597	myErrorName(status));
	598	ucol_close(en_us);
	599	return;
	600	}
	601	status = U_ZERO_ERROR;
	602
	603	ucol_setOffset(iter, 0, &status);
	604	if (U_FAILURE(status))
	605	{
	606	log_err("setOffset failed. %s\n", myErrorName(status));
	607	}
	608	else
	609	{
	610	assertEqual(iter, pristine);
	611	}
	612
	613	ucol_closeElements(pristine);
	614	ucol_closeElements(iter);
	615	free(orders);
	616
	617	/* testing offsets in normalization buffer */
	618	test1[0] = 0x61;
	619	test1[1] = 0x300;
	620	test1[2] = 0x316;
	621	test1[3] = 0x62;
	622	test1[4] = 0;
	623	ucol_setAttribute(en_us, UCOL_NORMALIZATION_MODE, UCOL_ON, &status);
	624	iter = ucol_openElements(en_us, test1, 4, &status);
	625	if(U_FAILURE(status)){
	626	log_err("ERROR: in creation of collation element iterator using ucol_openElements()\n %s\n",
	627	myErrorName(status));
	628	ucol_close(en_us);
	629	return;
	630	}
	631
	632	count = 0;
	633	while (ucol_next(iter, &status) != UCOL_NULLORDER &&
	634	U_SUCCESS(status)) {
	635	switch (count) {
	636	case 0:
	637	if (ucol_getOffset(iter) != 1) {
	638	log_err("ERROR: Offset of iteration should be 0\n");
	639	}
	640	break;
	641	case 3:
	642	if (ucol_getOffset(iter) != 4) {
	643	log_err("ERROR: Offset of iteration should be 4\n");
	644	}
	645	break;
	646	default:
	647	if (ucol_getOffset(iter) != 3) {
	648	log_err("ERROR: Offset of iteration should be 3\n");
	649	}
	650	}
	651	count ++;
	652	}
	653
	654	ucol_reset(iter);
	655	count = 0;
	656	while (ucol_previous(iter, &status) != UCOL_NULLORDER &&
	657	U_SUCCESS(status)) {
	658	switch (count) {
	659	case 0:
	660	if (ucol_getOffset(iter) != 3) {
	661	log_err("ERROR: Offset of iteration should be 3\n");
	662	}
	663	break;
	664	default:
	665	if (ucol_getOffset(iter) != 0) {
	666	log_err("ERROR: Offset of iteration should be 0\n");
	667	}
	668	}
	669	count ++;
	670	}
	671
	672	if(U_FAILURE(status)){
	673	log_err("ERROR: in iterating collation elements %s\n",
	674	myErrorName(status));
	675	}
	676
	677	ucol_closeElements(iter);
	678	ucol_close(en_us);
	679	free(test1);
	680	free(test2);
	681	}
	682
	683	/**
	684	* Test for setText()
	685	*/
	686	static void TestSetText()
	687	{
	688	int32_t c,i;
	689	UErrorCode status = U_ZERO_ERROR;
	690	UCollator *en_us=NULL;
	691	UCollationElements iter1, iter2;
	692	test1=(UChar)malloc(sizeof(UChar) 50);
	693	test2=(UChar)malloc(sizeof(UChar) 50);
	694	u_uastrcpy(test1, "What subset of all possible test cases?");
	695	u_uastrcpy(test2, "has the highest probability of detecting");
	696	en_us = ucol_open("en_US", &status);
	697	log_verbose("testing setText for Collation elements\n");
	698	iter1=ucol_openElements(en_us, test1, u_strlen(test1), &status);
	699	if(U_FAILURE(status)){
	700	log_err("ERROR: in creation of collation element iterator1 using ucol_openElements()\n %s\n",
	701	myErrorName(status));
	702	ucol_close(en_us);
	703	return;
	704	}
	705	iter2=ucol_openElements(en_us, test2, u_strlen(test2), &status);
	706	if(U_FAILURE(status)){
	707	log_err("ERROR: in creation of collation element iterator2 using ucol_openElements()\n %s\n",
	708	myErrorName(status));
	709	ucol_close(en_us);
	710	return;
	711	}
	712
	713	/* Run through the second iterator just to exercise it */
	714	c = ucol_next(iter2, &status);
	715	i = 0;
	716
	717	while ( ++i < 10 && (c != UCOL_NULLORDER))
	718	{
	719	if (U_FAILURE(status))
	720	{
	721	log_err("iter2->next() returned an error. %s\n", myErrorName(status));
	722	ucol_closeElements(iter2);
	723	ucol_closeElements(iter1);
	724	ucol_close(en_us);
	725	return;
	726	}
	727
	728	c = ucol_next(iter2, &status);
	729	}
	730
	731	/* Now set it to point to the same string as the first iterator */
	732	ucol_setText(iter2, test1, u_strlen(test1), &status);
	733	if (U_FAILURE(status))
	734	{
	735	log_err("call to iter2->setText(test1) failed. %s\n", myErrorName(status));
	736	}
	737	else
	738	{
	739	assertEqual(iter1, iter2);
	740	}
	741
	742	/* Now set it to point to a null string with fake length*/
	743	ucol_setText(iter2, NULL, 2, &status);
	744	if (U_FAILURE(status))
	745	{
	746	log_err("call to iter2->setText(null) failed. %s\n", myErrorName(status));
	747	}
	748	else
	749	{
	750	if (ucol_next(iter2, &status) != UCOL_NULLORDER) {
	751	log_err("iter2 with null text expected to return UCOL_NULLORDER\n");
	752	}
	753	}
	754
	755	ucol_closeElements(iter2);
	756	ucol_closeElements(iter1);
	757	ucol_close(en_us);
	758	free(test1);
	759	free(test2);
	760	}
	761
	762
	763
	764	static void backAndForth(UCollationElements *iter)
	765	{
	766	/* Run through the iterator forwards and stick it into an array */
	767	int32_t index, o;
	768	UErrorCode status = U_ZERO_ERROR;
	769	int32_t orderLength = 0;
	770	int32_t *orders;
	771	orders= getOrders(iter, &orderLength);
	772
	773
	774	/* Now go through it backwards and make sure we get the same values */
	775	index = orderLength;
	776	ucol_reset(iter);
	777
	778	/* synwee : changed */
	779	while ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
	780	{
	781	if (o != orders[-- index])
	782	{
	783	if (o == 0)
	784	index ++;
	785	else
	786	{
	787	while (index > 0 && orders[-- index] == 0)
	788	{
	789	}
	790	if (o != orders[index])
	791	{
	792	log_err("Mismatch at index : 0x%x\n", index);
	793	return;
	794	}
	795
	796	}
	797	}
	798	}
	799
	800	while (index != 0 && orders[index - 1] == 0) {
	801	index --;
	802	}
	803
	804	if (index != 0)
	805	{
	806	log_err("Didn't get back to beginning - index is %d\n", index);
	807
	808	ucol_reset(iter);
	809	log_err("\nnext: ");
	810	if ((o = ucol_next(iter, &status)) != UCOL_NULLORDER)
	811	{
	812	log_err("Error at %x\n", o);
	813	}
	814	log_err("\nprev: ");
	815	if ((o = ucol_previous(iter, &status)) != UCOL_NULLORDER)
	816	{
	817	log_err("Error at %x\n", o);
	818	}
	819	log_verbose("\n");
	820	}
	821
	822	free(orders);
	823	}
	824
	825	/** @bug 4108762
	826	* Test for getMaxExpansion()
	827	*/
	828	static void TestMaxExpansion()
	829	{
	830	UErrorCode status = U_ZERO_ERROR;
	831	UCollator coll ;/= ucol_open("en_US", &status);*/
	832	UChar ch = 0;
	833	UChar supplementary[2] = {0xD800, 0xDC00};
	834	uint32_t sorder = 0;
	835	UCollationElements iter ;/= ucol_openElements(coll, &ch, 1, &status);*/
	836	uint32_t temporder = 0;
	837
	838	UChar rule[256];
	839	u_uastrcpy(rule, "&a < ab < c/aba < d < z < ch");
	840	coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
	841	UCOL_DEFAULT_STRENGTH,NULL, &status);
	842	if(U_SUCCESS(status) && coll) {
	843	iter = ucol_openElements(coll, &ch, 1, &status);
	844
	845	while (ch < 0xFFFF && U_SUCCESS(status)) {
	846	int count = 1;
	847	uint32_t order;
	848	int32_t size = 0;
	849
	850	ch ++;
	851
	852	ucol_setText(iter, &ch, 1, &status);
	853	order = ucol_previous(iter, &status);
	854
	855	/* thai management */
	856	if (order == 0)
	857	order = ucol_previous(iter, &status);
	858
	859	while (U_SUCCESS(status) &&
	860	ucol_previous(iter, &status) != UCOL_NULLORDER) {
	861	count ++;
	862	}
	863
	864	size = ucol_getMaxExpansion(iter, order);
	865	if (U_FAILURE(status) \|\| size < count) {
	866	log_err("Failure at codepoint %d, maximum expansion count < %d\n",
	867	ch, count);
	868	}
	869	}
	870
	871	/* testing for exact max expansion */
	872	ch = 0;
	873	while (ch < 0x61) {
	874	uint32_t order;
	875	int32_t size;
	876	ucol_setText(iter, &ch, 1, &status);
	877	order = ucol_previous(iter, &status);
	878	size = ucol_getMaxExpansion(iter, order);
	879	if (U_FAILURE(status) \|\| size != 1) {
	880	log_err("Failure at codepoint %d, maximum expansion count < %d\n",
	881	ch, 1);
	882	}
	883	ch ++;
	884	}
	885
	886	ch = 0x63;
	887	ucol_setText(iter, &ch, 1, &status);
	888	temporder = ucol_previous(iter, &status);
	889
	890	if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) != 3) {
	891	log_err("Failure at codepoint %d, maximum expansion count != %d\n",
	892	ch, 3);
	893	}
	894
	895	ch = 0x64;
	896	ucol_setText(iter, &ch, 1, &status);
	897	temporder = ucol_previous(iter, &status);
	898
	899	if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) != 1) {
	900	log_err("Failure at codepoint %d, maximum expansion count != %d\n",
	901	ch, 3);
	902	}
	903
	904	ucol_setText(iter, supplementary, 2, &status);
	905	sorder = ucol_previous(iter, &status);
	906
	907	if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, sorder) != 2) {
	908	log_err("Failure at codepoint %d, maximum expansion count < %d\n",
	909	ch, 2);
	910	}
	911
	912	/* testing jamo */
	913	ch = 0x1165;
	914
	915	ucol_setText(iter, &ch, 1, &status);
	916	temporder = ucol_previous(iter, &status);
	917	if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) > 3) {
	918	log_err("Failure at codepoint %d, maximum expansion count > %d\n",
	919	ch, 3);
	920	}
	921
	922	ucol_closeElements(iter);
	923	ucol_close(coll);
	924
	925	/* testing special jamo &a<\u1160 */
	926	rule[0] = 0x26;
	927	rule[1] = 0x71;
	928	rule[2] = 0x3c;
	929	rule[3] = 0x1165;
	930	rule[4] = 0x2f;
	931	rule[5] = 0x71;
	932	rule[6] = 0x71;
	933	rule[7] = 0x71;
	934	rule[8] = 0x71;
	935	rule[9] = 0;
	936
	937	coll = ucol_openRules(rule, u_strlen(rule), UCOL_DEFAULT,
	938	UCOL_DEFAULT_STRENGTH,NULL, &status);
	939	iter = ucol_openElements(coll, &ch, 1, &status);
	940
	941	temporder = ucol_previous(iter, &status);
	942	if (U_FAILURE(status) \|\| ucol_getMaxExpansion(iter, temporder) != 6) {
	943	log_err("Failure at codepoint %d, maximum expansion count > %d\n",
	944	ch, 5);
	945	}
	946
	947	ucol_closeElements(iter);
	948	ucol_close(coll);
	949	} else {
	950	log_data_err("Couldn't open collator\n");
	951	}
	952
	953	}
	954
	955	/**
	956	* Return an integer array containing all of the collation orders
	957	* returned by calls to next on the specified iterator
	958	*/
	959	static int32_t* getOrders(UCollationElements iter, int32_t orderLength)
	960	{
	961	UErrorCode status;
	962	int32_t order;
	963	int32_t maxSize = 100;
	964	int32_t size = 0;
	965	int32_t *temp;
	966	int32_t orders =(int32_t)malloc(sizeof(int32_t) * maxSize);
	967	status= U_ZERO_ERROR;
	968
	969
	970	while ((order=ucol_next(iter, &status)) != UCOL_NULLORDER)
	971	{
	972	if (size == maxSize)
	973	{
	974	maxSize *= 2;
	975	temp = (int32_t)malloc(sizeof(int32_t) maxSize);
	976
	977	memcpy(temp, orders, size * sizeof(int32_t));
	978	free(orders);
	979	orders = temp;
	980
	981	}
	982
	983	orders[size++] = order;
	984	}
	985
	986	if (maxSize > size)
	987	{
	988	if (size == 0) {
	989	size = 1;
	990	temp = (int32_t)malloc(sizeof(int32_t) size);
	991	temp[0] = 0;
	992	}
	993	else {
	994	temp = (int32_t)malloc(sizeof(int32_t) size);
	995	memcpy(temp, orders, size * sizeof(int32_t));
	996	}
	997
	998	free(orders);
	999	orders = temp;
	1000	}
	1001
	1002	*orderLength = size;
	1003	return orders;
	1004	}
	1005
	1006
	1007	static void assertEqual(UCollationElements i1, UCollationElements i2)
	1008	{
	1009	int32_t c1, c2;
	1010	int32_t count = 0;
	1011	UErrorCode status = U_ZERO_ERROR;
	1012
	1013	do
	1014	{
	1015	c1 = ucol_next(i1, &status);
	1016	c2 = ucol_next(i2, &status);
	1017
	1018	if (c1 != c2)
	1019	{
	1020	log_err("Error in iteration %d assetEqual between\n %d and %d, they are not equal\n", count, c1, c2);
	1021	break;
	1022	}
	1023
	1024	count += 1;
	1025	}
	1026	while (c1 != UCOL_NULLORDER);
	1027	}
	1028
	1029	/**
	1030	* Testing iterators with extremely small buffers
	1031	*/
	1032	static void TestSmallBuffer()
	1033	{
	1034	UErrorCode status = U_ZERO_ERROR;
	1035	UCollator *coll;
	1036	UCollationElements *testiter,
	1037	*iter;
	1038	int32_t count = 0;
	1039	int32_t *testorders,
	1040	*orders;
	1041
	1042	UChar teststr[500];
	1043	UChar str[] = {0x300, 0x31A, 0};
	1044	/*
	1045	creating a long string of decomposable characters,
	1046	since by default the writable buffer is of size 256
	1047	*/
	1048	while (count < 500) {
	1049	if ((count & 1) == 0) {
	1050	teststr[count ++] = 0x300;
	1051	}
	1052	else {
	1053	teststr[count ++] = 0x31A;
	1054	}
	1055	}
	1056
	1057	coll = ucol_open("th_TH", &status);
	1058	if(U_SUCCESS(status) && coll) {
	1059	testiter = ucol_openElements(coll, teststr, 500, &status);
	1060	iter = ucol_openElements(coll, str, 2, &status);
	1061
	1062	orders = getOrders(iter, &count);
	1063	if (count != 2) {
	1064	log_err("Error collation elements size is not 2 for \\u0300\\u031A\n");
	1065	}
	1066
	1067	/*
	1068	this will rearrange the string data to 250 characters of 0x300 first then
	1069	250 characters of 0x031A
	1070	*/
	1071	testorders = getOrders(testiter, &count);
	1072
	1073	if (count != 500) {
	1074	log_err("Error decomposition does not give the right sized collation elements\n");
	1075	}
	1076
	1077	while (count != 0) {
	1078	/* UCA collation element for 0x0F76 */
	1079	if ((count > 250 && testorders[-- count] != orders[1]) \|\|
	1080	(count <= 250 && testorders[-- count] != orders[0])) {
	1081	log_err("Error decomposition does not give the right collation element at %d count\n", count);
	1082	break;
	1083	}
	1084	}
	1085
	1086	free(testorders);
	1087	free(orders);
	1088
	1089	ucol_reset(testiter);
	1090	/* ensures that the writable buffer was cleared */
	1091	if (testiter->iteratordata_.writableBuffer !=
	1092	testiter->iteratordata_.stackWritableBuffer) {
	1093	log_err("Error Writable buffer in collation element iterator not reset\n");
	1094	}
	1095
	1096	/* ensures closing of elements done properly to clear writable buffer */
	1097	ucol_next(testiter, &status);
	1098	ucol_next(testiter, &status);
	1099	ucol_closeElements(testiter);
	1100	ucol_closeElements(iter);
	1101	ucol_close(coll);
	1102	} else {
	1103	log_data_err("Couldn't open collator\n");
	1104	}
	1105	}
	1106
	1107	/**
	1108	* Sniplets of code from genuca
	1109	*/
	1110	static int32_t hex2num(char hex) {
	1111	if(hex>='0' && hex <='9') {
	1112	return hex-'0';
	1113	} else if(hex>='a' && hex<='f') {
	1114	return hex-'a'+10;
	1115	} else if(hex>='A' && hex<='F') {
	1116	return hex-'A'+10;
	1117	} else {
	1118	return 0;
	1119	}
	1120	}
	1121
	1122	/**
	1123	* Getting codepoints from a string
	1124	* @param str character string contain codepoints seperated by space and ended
	1125	* by a semicolon
	1126	* @param codepoints array for storage, assuming size > 5
	1127	* @return position at the end of the codepoint section
	1128	*/
	1129	static char * getCodePoints(char str, UChar codepoints) {
	1130	char *pStartCP = str;
	1131	char *pEndCP = str + 4;
	1132
	1133	codepoints = (UChar)((hex2num(pStartCP) << 12) \|
	1134	(hex2num(*(pStartCP + 1)) << 8) \|
	1135	(hex2num(*(pStartCP + 2)) << 4) \|
	1136	(hex2num(*(pStartCP + 3))));
	1137	codepoints ++;
	1138	while (*pEndCP != ';') {
	1139	pStartCP = pEndCP + 1;
	1140	codepoints = (UChar)((hex2num(pStartCP) << 12) \|
	1141	(hex2num(*(pStartCP + 1)) << 8) \|
	1142	(hex2num(*(pStartCP + 2)) << 4) \|
	1143	(hex2num(*(pStartCP + 3))));
	1144	codepoints ++;
	1145	pEndCP = pStartCP + 4;
	1146	}
	1147	*codepoints = 0;
	1148	return pEndCP + 1;
	1149	}
	1150
	1151	/**
	1152	* Sniplets of code from genuca
	1153	*/
	1154	static int32_t
	1155	readElement(char *from, char to, char separator, UErrorCode *status)
	1156	{
	1157	if (U_SUCCESS(*status)) {
	1158	char buffer[1024];
	1159	int32_t i = 0;
	1160	while (**from != separator) {
	1161	if (**from != ' ') {
	1162	(buffer+i++) = *from;
	1163	}
	1164	(*from)++;
	1165	}
	1166	(*from)++;
	1167	*(buffer + i) = 0;
	1168	strcpy(to, buffer);
	1169	return i/2;
	1170	}
	1171
	1172	return 0;
	1173	}
	1174
	1175	/**
	1176	* Sniplets of code from genuca
	1177	*/
	1178	static uint32_t
	1179	getSingleCEValue(char primary, char secondary, char *tertiary,
	1180	UErrorCode *status)
	1181	{
	1182	if (U_SUCCESS(*status)) {
	1183	uint32_t value = 0;
	1184	char primsave = '\0';
	1185	char secsave = '\0';
	1186	char tersave = '\0';
	1187	char *primend = primary+4;
	1188	char *secend = secondary+2;
	1189	char *terend = tertiary+2;
	1190	uint32_t primvalue;
	1191	uint32_t secvalue;
	1192	uint32_t tervalue;
	1193
	1194	if (uprv_strlen(primary) > 4) {
	1195	primsave = *primend;
	1196	*primend = '\0';
	1197	}
	1198
	1199	if (uprv_strlen(secondary) > 2) {
	1200	secsave = *secend;
	1201	*secend = '\0';
	1202	}
	1203
	1204	if (uprv_strlen(tertiary) > 2) {
	1205	tersave = *terend;
	1206	*terend = '\0';
	1207	}
	1208
	1209	primvalue = (*primary!='\0')?uprv_strtoul(primary, &primend, 16):0;
	1210	secvalue = (*secondary!='\0')?uprv_strtoul(secondary, &secend, 16):0;
	1211	tervalue = (*tertiary!='\0')?uprv_strtoul(tertiary, &terend, 16):0;
	1212	if(primvalue <= 0xFF) {
	1213	primvalue <<= 8;
	1214	}
	1215
	1216	value = ((primvalue << UCOL_PRIMARYORDERSHIFT) & UCOL_PRIMARYORDERMASK)
	1217	\| ((secvalue << UCOL_SECONDARYORDERSHIFT) & UCOL_SECONDARYORDERMASK)
	1218	\| (tervalue & UCOL_TERTIARYORDERMASK);
	1219
	1220	if(primsave!='\0') {
	1221	*primend = primsave;
	1222	}
	1223	if(secsave!='\0') {
	1224	*secend = secsave;
	1225	}
	1226	if(tersave!='\0') {
	1227	*terend = tersave;
	1228	}
	1229	return value;
	1230	}
	1231	return 0;
	1232	}
	1233
	1234	/**
	1235	* Getting collation elements generated from a string
	1236	* @param str character string contain collation elements contained in [] and
	1237	* seperated by space
	1238	* @param ce array for storage, assuming size > 20
	1239	* @param status error status
	1240	* @return position at the end of the codepoint section
	1241	*/
	1242	static char * getCEs(char str, uint32_t ces, UErrorCode *status) {
	1243	char *pStartCP = uprv_strchr(str, '[');
	1244	int count = 0;
	1245	char *pEndCP;
	1246	char primary[100];
	1247	char secondary[100];
	1248	char tertiary[100];
	1249
	1250	while (*pStartCP == '[') {
	1251	uint32_t primarycount = 0;
	1252	uint32_t secondarycount = 0;
	1253	uint32_t tertiarycount = 0;
	1254	uint32_t CEi = 1;
	1255	pEndCP = strchr(pStartCP, ']');
	1256	if(pEndCP == NULL) {
	1257	break;
	1258	}
	1259	pStartCP ++;
	1260
	1261	primarycount = readElement(&pStartCP, primary, ',', status);
	1262	secondarycount = readElement(&pStartCP, secondary, ',', status);
	1263	tertiarycount = readElement(&pStartCP, tertiary, ']', status);
	1264
	1265	/* I want to get the CEs entered right here, including continuation */
	1266	ces[count ++] = getSingleCEValue(primary, secondary, tertiary, status);
	1267	if (U_FAILURE(*status)) {
	1268	break;
	1269	}
	1270
	1271	while (2 * CEi < primarycount \|\| CEi < secondarycount \|\|
	1272	CEi < tertiarycount) {
	1273	uint32_t value = UCOL_CONTINUATION_MARKER; /* Continuation marker */
	1274	if (2 * CEi < primarycount) {
	1275	value \|= ((hex2num((primary + 4 CEi)) & 0xF) << 28);
	1276	value \|= ((hex2num((primary + 4 CEi + 1)) & 0xF) << 24);
	1277	}
	1278
	1279	if (2 * CEi + 1 < primarycount) {
	1280	value \|= ((hex2num((primary + 4 CEi + 2)) & 0xF) << 20);
	1281	value \|= ((hex2num((primary + 4 CEi + 3)) &0xF) << 16);
	1282	}
	1283
	1284	if (CEi < secondarycount) {
	1285	value \|= ((hex2num((secondary + 2 CEi)) & 0xF) << 12);
	1286	value \|= ((hex2num((secondary + 2 CEi + 1)) & 0xF) << 8);
	1287	}
	1288
	1289	if (CEi < tertiarycount) {
	1290	value \|= ((hex2num((tertiary + 2 CEi)) & 0x3) << 4);
	1291	value \|= (hex2num((tertiary + 2 CEi + 1)) & 0xF);
	1292	}
	1293
	1294	CEi ++;
	1295	ces[count ++] = value;
	1296	}
	1297
	1298	pStartCP = pEndCP + 1;
	1299	}
	1300	ces[count] = 0;
	1301	return pStartCP;
	1302	}
	1303
	1304	/**
	1305	* Getting the FractionalUCA.txt file stream
	1306	*/
	1307	static FileStream * getFractionalUCA(void)
	1308	{
	1309	char newPath[256];
	1310	char backupPath[256];
	1311	FileStream *result = NULL;
	1312
	1313	/* Look inside ICU_DATA first */
	1314	uprv_strcpy(newPath, u_getDataDirectory());
	1315	uprv_strcat(newPath, "unidata" U_FILE_SEP_STRING );
	1316	uprv_strcat(newPath, "FractionalUCA.txt");
	1317
	1318	/* As a fallback, try to guess where the source data was located
	1319	* at the time ICU was built, and look there.
	1320	*/
	1321	#if defined (U_TOPSRCDIR)
	1322	strcpy(backupPath, U_TOPSRCDIR U_FILE_SEP_STRING "data");
	1323	#else
	1324	{
	1325	UErrorCode errorCode = U_ZERO_ERROR;
	1326	strcpy(backupPath, loadTestData(&errorCode));
	1327	strcat(backupPath, U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING ".." U_FILE_SEP_STRING "data");
	1328	}
	1329	#endif
	1330	strcat(backupPath, U_FILE_SEP_STRING "unidata" U_FILE_SEP_STRING "FractionalUCA.txt");
	1331
	1332	result = T_FileStream_open(newPath, "rb");
	1333
	1334	if (result == NULL) {
	1335	result = T_FileStream_open(backupPath, "rb");
	1336	if (result == NULL) {
	1337	log_err("Failed to open either %s or %s\n", newPath, backupPath);
	1338	}
	1339	}
	1340	return result;
	1341	}
	1342
	1343	/**
	1344	* Testing the CEs returned by the iterator
	1345	*/
	1346	static void TestCEs() {
	1347	FileStream *file = NULL;
	1348	char line[1024];
	1349	char *str;
	1350	UChar codepoints[5];
	1351	uint32_t ces[20];
	1352	UErrorCode status = U_ZERO_ERROR;
	1353	UCollator *coll = ucol_open("", &status);
	1354	uint32_t lineNo = 0;
	1355
	1356	if (U_FAILURE(status)) {
	1357	log_err("Error in opening root collator\n");
	1358	return;
	1359	}
	1360
	1361	file = getFractionalUCA();
	1362
	1363	if (file == NULL) {
	1364	log_err("* unable to open input FractionalUCA.txt file *\n");
	1365	return;
	1366	}
	1367
	1368
	1369	while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
	1370	int count = 0;
	1371	UCollationElements *iter;
	1372	lineNo++;
	1373	/* skip this line if it is empty or a comment or is a return value
	1374	or start of some variable section */
	1375	if(line[0] == 0 \|\| line[0] == '#' \|\| line[0] == '\n' \|\|
	1376	line[0] == 0x000D \|\| line[0] == '[') {
	1377	continue;
	1378	}
	1379
	1380	str = getCodePoints(line, codepoints);
	1381
	1382	/* these are 'fake' codepoints in the fractional UCA, and are used just
	1383	* for positioning of indirect values. They should not go through this
	1384	* test.
	1385	*/
	1386	if(*codepoints == 0xFDD0) {
	1387	continue;
	1388	}
	1389
	1390	getCEs(str, ces, &status);
	1391	if (U_FAILURE(status)) {
	1392	log_err("Error in parsing collation elements in FractionalUCA.txt\n");
	1393	break;
	1394	}
	1395	iter = ucol_openElements(coll, codepoints, -1, &status);
	1396	if (U_FAILURE(status)) {
	1397	log_err("Error in opening collation elements\n");
	1398	break;
	1399	}
	1400	for (;;) {
	1401	uint32_t ce = (uint32_t)ucol_next(iter, &status);
	1402	if (ce == 0xFFFFFFFF) {
	1403	ce = 0;
	1404	}
	1405	/* we now unconditionally reorder Thai/Lao prevowels, so this
	1406	* test would fail if we don't skip here.
	1407	*/
	1408	if(UCOL_ISTHAIPREVOWEL(*codepoints) && ce == 0 && count == 0) {
	1409	continue;
	1410	}
	1411	if (ce != ces[count] \|\| U_FAILURE(status)) {
	1412	log_err("Collation elements in FractionalUCA.txt and iterators do not match!\n");
	1413	break;
	1414	}
	1415	if (ces[count] == 0) {
	1416	break;
	1417	}
	1418	count ++;
	1419	}
	1420	ucol_closeElements(iter);
	1421	}
	1422
	1423	T_FileStream_close(file);
	1424	ucol_close(coll);
	1425	}
	1426
	1427	/**
	1428	* Testing the discontigous contractions
	1429	*/
	1430	static void TestDiscontiguos() {
	1431	const char *rulestr =
	1432	"&z < AB < X\\u0300 < ABC < X\\u0300\\u0315";
	1433	UChar rule[50];
	1434	int rulelen = u_unescape(rulestr, rule, 50);
	1435	const char *src[] = {
	1436	"ADB", "ADBC", "A\\u0315B", "A\\u0315BC",
	1437	/* base character blocked */
	1438	"XD\\u0300", "XD\\u0300\\u0315",
	1439	/* non blocking combining character */
	1440	"X\\u0319\\u0300", "X\\u0319\\u0300\\u0315",
	1441	/* blocking combining character */
	1442	"X\\u0314\\u0300", "X\\u0314\\u0300\\u0315",
	1443	/* contraction prefix */
	1444	"ABDC", "AB\\u0315C","X\\u0300D\\u0315", "X\\u0300\\u0319\\u0315",
	1445	"X\\u0300\\u031A\\u0315",
	1446	/* ends not with a contraction character */
	1447	"X\\u0319\\u0300D", "X\\u0319\\u0300\\u0315D", "X\\u0300D\\u0315D",
	1448	"X\\u0300\\u0319\\u0315D", "X\\u0300\\u031A\\u0315D"
	1449	};
	1450	const char *tgt[] = {
	1451	/* non blocking combining character */
	1452	"A D B", "A D BC", "A \\u0315 B", "A \\u0315 BC",
	1453	/* base character blocked */
	1454	"X D \\u0300", "X D \\u0300\\u0315",
	1455	/* non blocking combining character */
	1456	"X\\u0300 \\u0319", "X\\u0300\\u0315 \\u0319",
	1457	/* blocking combining character */
	1458	"X \\u0314 \\u0300", "X \\u0314 \\u0300\\u0315",
	1459	/* contraction prefix */
	1460	"AB DC", "AB \\u0315 C","X\\u0300 D \\u0315", "X\\u0300\\u0315 \\u0319",
	1461	"X\\u0300 \\u031A \\u0315",
	1462	/* ends not with a contraction character */
	1463	"X\\u0300 \\u0319D", "X\\u0300\\u0315 \\u0319D", "X\\u0300 D\\u0315D",
	1464	"X\\u0300\\u0315 \\u0319D", "X\\u0300 \\u031A\\u0315D"
	1465	};
	1466	int size = 20;
	1467	UCollator *coll;
	1468	UErrorCode status = U_ZERO_ERROR;
	1469	int count = 0;
	1470	UCollationElements *iter;
	1471	UCollationElements *resultiter;
	1472
	1473	coll = ucol_openRules(rule, rulelen, UCOL_OFF, UCOL_DEFAULT_STRENGTH,NULL, &status);
	1474	iter = ucol_openElements(coll, rule, 1, &status);
	1475	resultiter = ucol_openElements(coll, rule, 1, &status);
	1476
	1477	if (U_FAILURE(status)) {
	1478	log_err("Error opening collation rules\n");
	1479	return;
	1480	}
	1481
	1482	while (count < size) {
	1483	UChar str[20];
	1484	UChar tstr[20];
	1485	int strLen = u_unescape(src[count], str, 20);
	1486	UChar *s;
	1487
	1488	ucol_setText(iter, str, strLen, &status);
	1489	if (U_FAILURE(status)) {
	1490	log_err("Error opening collation iterator\n");
	1491	return;
	1492	}
	1493
	1494	u_unescape(tgt[count], tstr, 20);
	1495	s = tstr;
	1496
	1497	log_verbose("count %d\n", count);
	1498
	1499	for (;;) {
	1500	uint32_t ce;
	1501	UChar *e = u_strchr(s, 0x20);
	1502	if (e == 0) {
	1503	e = u_strchr(s, 0);
	1504	}
	1505	ucol_setText(resultiter, s, (int32_t)(e - s), &status);
	1506	ce = ucol_next(resultiter, &status);
	1507	if (U_FAILURE(status)) {
	1508	log_err("Error manipulating collation iterator\n");
	1509	return;
	1510	}
	1511	while (ce != UCOL_NULLORDER) {
	1512	if (ce != (uint32_t)ucol_next(iter, &status) \|\|
	1513	U_FAILURE(status)) {
	1514	log_err("Discontiguos contraction test mismatch\n");
	1515	return;
	1516	}
	1517	ce = ucol_next(resultiter, &status);
	1518	if (U_FAILURE(status)) {
	1519	log_err("Error getting next collation element\n");
	1520	return;
	1521	}
	1522	}
	1523	s = e + 1;
	1524	if (*e == 0) {
	1525	break;
	1526	}
	1527	}
	1528	ucol_reset(iter);
	1529	backAndForth(iter);
	1530	count ++;
	1531	}
	1532	ucol_closeElements(resultiter);
	1533	ucol_closeElements(iter);
	1534	ucol_close(coll);
	1535	}
	1536
	1537	static void TestCEBufferOverflow()
	1538	{
	1539	UChar str[UCOL_EXPAND_CE_BUFFER_SIZE + 1];
	1540	UErrorCode status = U_ZERO_ERROR;
	1541	UChar rule[10];
	1542	UCollator *coll;
	1543	UCollationElements *iter;
	1544
	1545	u_uastrcpy(rule, "&z < AB");
	1546	coll = ucol_openRules(rule, u_strlen(rule), UCOL_OFF, UCOL_DEFAULT_STRENGTH, NULL,&status);
	1547	if (U_FAILURE(status)) {
	1548	log_err("Rule based collator not created for testing ce buffer overflow\n");
	1549	return;
	1550	}
	1551
	1552	/* 0xDCDC is a trail surrogate hence deemed unsafe by the heuristic
	1553	test. this will cause an overflow in getPrev */
	1554	str[0] = 0x0041; /* 'A' */
	1555	/uprv_memset(str + 1, 0xE0, sizeof(UChar) UCOL_EXPAND_CE_BUFFER_SIZE);*/
	1556	uprv_memset(str + 1, 0xDC, sizeof(UChar) * UCOL_EXPAND_CE_BUFFER_SIZE);
	1557	str[UCOL_EXPAND_CE_BUFFER_SIZE] = 0x0042; /* 'B' */
	1558	iter = ucol_openElements(coll, str, UCOL_EXPAND_CE_BUFFER_SIZE + 1,
	1559	&status);
	1560	if (ucol_previous(iter, &status) != UCOL_NULLORDER \|\|
	1561	status != U_BUFFER_OVERFLOW_ERROR) {
	1562	log_err("CE buffer expected to overflow with long string of trail surrogates\n");
	1563	}
	1564	ucol_closeElements(iter);
	1565	ucol_close(coll);
	1566	}
	1567
	1568	/**
	1569	* Byte bounds checks. Checks if each byte in data is between upper and lower
	1570	* inclusive.
	1571	*/
	1572	static UBool checkByteBounds(uint32_t data, char upper, char lower)
	1573	{
	1574	int count = 4;
	1575	while (count > 0) {
	1576	char b = (char)(data & 0xFF);
	1577	if (b > upper \|\| b < lower) {
	1578	return FALSE;
	1579	}
	1580	data = data >> 8;
	1581	count --;
	1582	}
	1583	return TRUE;
	1584	}
	1585
	1586	/**
	1587	* Determines case of the string of codepoints.
	1588	* If it is a multiple codepoints it has to treated as a contraction.
	1589	*/
	1590	#if 0
	1591	static uint8_t getCase(const UChar *s, uint32_t len) {
	1592	UBool lower = FALSE;
	1593	UBool upper = FALSE;
	1594	UBool title = FALSE;
	1595	UErrorCode status = U_ZERO_ERROR;
	1596	UChar str[256];
	1597	const UChar *ps = s;
	1598
	1599	if (len == 0) {
	1600	return UCOL_LOWER_CASE;
	1601	}
	1602
	1603	while (len > 0) {
	1604	UChar c = *ps ++;
	1605
	1606	if (u_islower(c)) {
	1607	lower = TRUE;
	1608	}
	1609	if (u_isupper(c)) {
	1610	upper = TRUE;
	1611	}
	1612	if (u_istitle(c)) {
	1613	title = TRUE;
	1614	}
	1615
	1616	len --;
	1617	}
	1618	if ((lower && !upper && !title) \|\| (!lower && !upper && !title)){
	1619	return UCOL_LOWER_CASE;
	1620	}
	1621	if (upper && !lower && !title) {
	1622	return UCOL_UPPER_CASE;
	1623	}
	1624	/* mix of cases here */
	1625	/* len = unorm_normalize(s, len, UNORM_NFKD, 0, str, 256, &status);
	1626	if (U_FAILURE(status)) {
	1627	log_err("Error normalizing data string\n");
	1628	return UCOL_LOWER_CASE;
	1629	}*/
	1630
	1631	if ((title && len >= 2) \|\| (lower && upper)) {
	1632	return UCOL_MIXED_CASE;
	1633	}
	1634	if (u_isupper(s[0])) {
	1635	return UCOL_UPPER_CASE;
	1636	}
	1637	return UCOL_LOWER_CASE;
	1638	}
	1639	#endif
	1640
	1641	/**
	1642	* Checking collation element validity given the boundary arguments.
	1643	*/
	1644	static UBool checkCEValidity(const UCollator coll, const UChar codepoints,
	1645	int length, uint32_t primarymax,
	1646	uint32_t secondarymax)
	1647	{
	1648	UErrorCode status = U_ZERO_ERROR;
	1649	UCollationElements *iter = ucol_openElements(coll, codepoints, length,
	1650	&status);
	1651	uint32_t ce;
	1652	UBool first = TRUE;
	1653	/*
	1654	UBool upper = FALSE;
	1655	UBool lower = FALSE;
	1656	*/
	1657
	1658	if (U_FAILURE(status)) {
	1659	log_err("Error creating iterator for testing validity\n");
	1660	}
	1661
	1662	ce = ucol_next(iter, &status);
	1663
	1664	while (ce != UCOL_NULLORDER) {
	1665	if (ce != 0) {
	1666	uint32_t primary = UCOL_PRIMARYORDER(ce);
	1667	uint32_t secondary = UCOL_SECONDARYORDER(ce);
	1668	uint32_t tertiary = UCOL_TERTIARYORDER(ce);
	1669	/* uint32_t scasebits = tertiary & 0xC0;*/
	1670
	1671	if ((tertiary == 0 && secondary != 0) \|\|
	1672	(tertiary < 0xC0 && secondary == 0 && primary != 0)) {
	1673	/* n-1th level is not zero when the nth level is
	1674	except for continuations, this is wrong */
	1675	log_err("Lower level weight not 0 when high level weight is 0\n");
	1676	goto fail;
	1677	}
	1678	else {
	1679	/* checks if any byte is illegal ie = 01 02 03. */
	1680	if (checkByteBounds(ce, 0x3, 0x1)) {
	1681	log_err("Byte range in CE lies in illegal bounds 0x1 - 0x3\n");
	1682	goto fail;
	1683	}
	1684	}
	1685	if ((primary != 0 && primary < primarymax) \|\| (primary >= 0xFF00 && !isContinuation(ce))) {
	1686	log_err("UCA primary weight out of bounds\n");
	1687	goto fail;
	1688	}
	1689	/* case matching not done since data generated by ken */
	1690	if (first) {
	1691	if (secondary >= 6 && secondary <= secondarymax) {
	1692	log_err("Secondary weight out of range\n");
	1693	goto fail;
	1694	}
	1695	first = FALSE;
	1696	}
	1697	}
	1698	ce = ucol_next(iter, &status);
	1699	}
	1700	ucol_closeElements(iter);
	1701	return TRUE;
	1702	fail :
	1703	ucol_closeElements(iter);
	1704	return FALSE;
	1705	}
	1706
	1707	static void TestCEValidity()
	1708	{
	1709	/* testing UCA collation elements */
	1710	UErrorCode status = U_ZERO_ERROR;
	1711	/* en_US has no tailorings */
	1712	UCollator *coll = ucol_open("en_US", &status);
	1713	/* tailored locales */
	1714	char locale[][6] = {"fr_FR", "ko_KR", "sh_YU", "th_TH", "zh_CN"};
	1715	FileStream *file = getFractionalUCA();
	1716	char line[1024];
	1717	UChar codepoints[10];
	1718	int count = 0;
	1719	UParseError parseError;
	1720	if (U_FAILURE(status)) {
	1721	log_err("en_US collator creation failed\n");
	1722	return;
	1723	}
	1724	log_verbose("Testing UCA elements\n");
	1725	if (file == NULL) {
	1726	log_err("Fractional UCA data can not be opened\n");
	1727	return;
	1728	}
	1729
	1730	while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
	1731	if(line[0] == 0 \|\| line[0] == '#' \|\| line[0] == '\n' \|\|
	1732	line[0] == 0x000D \|\| line[0] == '[') {
	1733	continue;
	1734	}
	1735
	1736	getCodePoints(line, codepoints);
	1737	checkCEValidity(coll, codepoints, u_strlen(codepoints), 5, 86);
	1738	}
	1739
	1740	log_verbose("Testing UCA elements for the whole range of unicode characters\n");
	1741	codepoints[0] = 0;
	1742	while (codepoints[0] < 0xFFFF) {
	1743	if (u_isdefined((UChar32)codepoints[0])) {
	1744	checkCEValidity(coll, codepoints, 1, 5, 86);
	1745	}
	1746	codepoints[0] ++;
	1747	}
	1748
	1749	ucol_close(coll);
	1750
	1751	/* testing tailored collation elements */
	1752	log_verbose("Testing tailored elements\n");
	1753	while (count < 5) {
	1754	const UChar *rules = NULL,
	1755	*current = NULL;
	1756	UChar *rulesCopy = NULL;
	1757	int32_t ruleLen = 0;
	1758
	1759	uint32_t chOffset = 0;
	1760	uint32_t chLen = 0;
	1761	uint32_t exOffset = 0;
	1762	uint32_t exLen = 0;
	1763	uint32_t prefixOffset = 0;
	1764	uint32_t prefixLen = 0;
	1765	UBool startOfRules = TRUE;
	1766	UColOptionSet opts;
	1767
	1768	UColTokenParser src;
	1769	uint32_t strength = 0;
	1770	uint16_t specs = 0;
	1771
	1772	coll = ucol_open(locale[count], &status);
	1773	if (U_FAILURE(status)) {
	1774	log_err("%s collator creation failed\n", locale[count]);
	1775	return;
	1776	}
	1777
	1778	src.opts = &opts;
	1779	rules = ucol_getRules(coll, &ruleLen);
	1780
	1781	if (ruleLen > 0) {
	1782	rulesCopy = (UChar *)malloc((ruleLen +
	1783	UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
	1784	uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
	1785	src.current = src.source = rulesCopy;
	1786	src.end = rulesCopy + ruleLen;
	1787	src.extraCurrent = src.end;
	1788	src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
	1789
	1790	while ((current = ucol_tok_parseNextToken(&src, startOfRules, &parseError,&status)) != NULL) {
	1791	strength = src.parsedToken.strength;
	1792	chOffset = src.parsedToken.charsOffset;
	1793	chLen = src.parsedToken.charsLen;
	1794	exOffset = src.parsedToken.extensionOffset;
	1795	exLen = src.parsedToken.extensionLen;
	1796	prefixOffset = src.parsedToken.prefixOffset;
	1797	prefixLen = src.parsedToken.prefixLen;
	1798	specs = src.parsedToken.flags;
	1799
	1800	startOfRules = FALSE;
	1801	uprv_memcpy(codepoints, src.source + chOffset,
	1802	chLen * sizeof(UChar));
	1803	codepoints[chLen] = 0;
	1804	checkCEValidity(coll, codepoints, chLen, 4, 85);
	1805	}
	1806	free(rulesCopy);
	1807	}
	1808
	1809	ucol_close(coll);
	1810	count ++;
	1811	}
	1812	T_FileStream_close(file);
	1813	}
	1814
	1815	static void printSortKeyError(const UChar *codepoints, int length,
	1816	uint8_t *sortkey, int sklen)
	1817	{
	1818	int count = 0;
	1819	log_err("Sortkey not valid for ");
	1820	while (length > 0) {
	1821	log_err("0x%04x ", *codepoints);
	1822	length --;
	1823	codepoints ++;
	1824	}
	1825	log_err("\nSortkey : ");
	1826	while (count < sklen) {
	1827	log_err("0x%02x ", sortkey[count]);
	1828	count ++;
	1829	}
	1830	log_err("\n");
	1831	}
	1832
	1833	/**
	1834	* Checking sort key validity for all levels
	1835	*/
	1836	static UBool checkSortKeyValidity(UCollator *coll,
	1837	const UChar *codepoints,
	1838	int length)
	1839	{
	1840	UErrorCode status = U_ZERO_ERROR;
	1841	UCollationStrength strength[5] = {UCOL_PRIMARY, UCOL_SECONDARY,
	1842	UCOL_TERTIARY, UCOL_QUATERNARY,
	1843	UCOL_IDENTICAL};
	1844	int strengthlen = 5;
	1845	int index = 0;
	1846	int caselevel = 0;
	1847
	1848	while (caselevel < 1) {
	1849	if (caselevel == 0) {
	1850	ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_OFF, &status);
	1851	}
	1852	else {
	1853	ucol_setAttribute(coll, UCOL_CASE_LEVEL, UCOL_ON, &status);
	1854	}
	1855
	1856	while (index < strengthlen) {
	1857	int count01 = 0;
	1858	uint32_t count = 0;
	1859	uint8_t sortkey[128];
	1860	uint32_t sklen;
	1861
	1862	ucol_setStrength(coll, strength[index]);
	1863	sklen = ucol_getSortKey(coll, codepoints, length, sortkey, 128);
	1864	while (sortkey[count] != 0) {
	1865	if (sortkey[count] == 2 \|\| (sortkey[count] == 3 && count01 > 0 && index != 4)) {
	1866	printSortKeyError(codepoints, length, sortkey, sklen);
	1867	return FALSE;
	1868	}
	1869	if (sortkey[count] == 1) {
	1870	count01 ++;
	1871	}
	1872	count ++;
	1873	}
	1874
	1875	if (count + 1 != sklen \|\| (count01 != index + caselevel)) {
	1876	printSortKeyError(codepoints, length, sortkey, sklen);
	1877	return FALSE;
	1878	}
	1879	index ++;
	1880	}
	1881	caselevel ++;
	1882	}
	1883	return TRUE;
	1884	}
	1885
	1886	static void TestSortKeyValidity(void)
	1887	{
	1888	/* testing UCA collation elements */
	1889	UErrorCode status = U_ZERO_ERROR;
	1890	/* en_US has no tailorings */
	1891	UCollator *coll = ucol_open("en_US", &status);
	1892	/* tailored locales */
	1893	char locale[][6] = {"fr_FR\0", "ko_KR\0", "sh_YU\0", "th_TH\0", "zh_CN\0"};
	1894	FileStream *file = getFractionalUCA();
	1895	char line[1024];
	1896	UChar codepoints[10];
	1897	int count = 0;
	1898	UParseError parseError;
	1899	if (U_FAILURE(status)) {
	1900	log_err("en_US collator creation failed\n");
	1901	return;
	1902	}
	1903	log_verbose("Testing UCA elements\n");
	1904	if (file == NULL) {
	1905	log_err("Fractional UCA data can not be opened\n");
	1906	return;
	1907	}
	1908
	1909	while (T_FileStream_readLine(file, line, sizeof(line)) != NULL) {
	1910	if(line[0] == 0 \|\| line[0] == '#' \|\| line[0] == '\n' \|\|
	1911	line[0] == 0x000D \|\| line[0] == '[') {
	1912	continue;
	1913	}
	1914
	1915	getCodePoints(line, codepoints);
	1916	checkSortKeyValidity(coll, codepoints, u_strlen(codepoints));
	1917	}
	1918
	1919	log_verbose("Testing UCA elements for the whole range of unicode characters\n");
	1920	codepoints[0] = 0;
	1921
	1922	while (codepoints[0] < 0xFFFF) {
	1923	if (u_isdefined((UChar32)codepoints[0])) {
	1924	checkSortKeyValidity(coll, codepoints, 1);
	1925	}
	1926	codepoints[0] ++;
	1927	}
	1928
	1929	ucol_close(coll);
	1930
	1931	/* testing tailored collation elements */
	1932	log_verbose("Testing tailored elements\n");
	1933	while (count < 5) {
	1934	const UChar *rules = NULL,
	1935	*current = NULL;
	1936	UChar *rulesCopy = NULL;
	1937	int32_t ruleLen = 0;
	1938
	1939	uint32_t chOffset = 0;
	1940	uint32_t chLen = 0;
	1941	uint32_t exOffset = 0;
	1942	uint32_t exLen = 0;
	1943	uint32_t prefixOffset = 0;
	1944	uint32_t prefixLen = 0;
	1945	UBool startOfRules = TRUE;
	1946	UColOptionSet opts;
	1947
	1948	UColTokenParser src;
	1949	uint32_t strength = 0;
	1950	uint16_t specs = 0;
	1951
	1952	coll = ucol_open(locale[count], &status);
	1953	if (U_FAILURE(status)) {
	1954	log_err("%s collator creation failed\n", locale[count]);
	1955	return;
	1956	}
	1957
	1958	src.opts = &opts;
	1959	rules = ucol_getRules(coll, &ruleLen);
	1960
	1961	if (ruleLen > 0) {
	1962	rulesCopy = (UChar *)malloc((ruleLen +
	1963	UCOL_TOK_EXTRA_RULE_SPACE_SIZE) * sizeof(UChar));
	1964	uprv_memcpy(rulesCopy, rules, ruleLen * sizeof(UChar));
	1965	src.current = src.source = rulesCopy;
	1966	src.end = rulesCopy + ruleLen;
	1967	src.extraCurrent = src.end;
	1968	src.extraEnd = src.end + UCOL_TOK_EXTRA_RULE_SPACE_SIZE;
	1969
	1970	while ((current = ucol_tok_parseNextToken(&src, startOfRules,&parseError, &status)) != NULL) {
	1971	strength = src.parsedToken.strength;
	1972	chOffset = src.parsedToken.charsOffset;
	1973	chLen = src.parsedToken.charsLen;
	1974	exOffset = src.parsedToken.extensionOffset;
	1975	exLen = src.parsedToken.extensionLen;
	1976	prefixOffset = src.parsedToken.prefixOffset;
	1977	prefixLen = src.parsedToken.prefixLen;
	1978	specs = src.parsedToken.flags;
	1979
	1980	startOfRules = FALSE;
	1981	uprv_memcpy(codepoints, src.source + chOffset,
	1982	chLen * sizeof(UChar));
	1983	codepoints[chLen] = 0;
	1984	checkSortKeyValidity(coll, codepoints, chLen);
	1985	}
	1986	free(rulesCopy);
	1987	}
	1988
	1989	ucol_close(coll);
	1990	count ++;
	1991	}
	1992	T_FileStream_close(file);
	1993	}
	1994
	1995	#endif /* #if !UCONFIG_NO_COLLATION */