1 /********************************************************************
3 * Copyright (C) 2001-2005 IBM, Inc. All Rights Reserved.
5 ********************************************************************/
6 /********************************************************************************
10 * Modification History:
11 * Name Date Description
12 * synwee May 31 2001 Creation
14 *********************************************************************************
18 * This program outputs the collation elements used for a requested tailoring.
21 * dumpce options... please check main function.
23 #include <unicode/utypes.h>
24 #include <unicode/ucol.h>
25 #include <unicode/uloc.h>
26 #include <unicode/ucoleitr.h>
27 #include <unicode/uchar.h>
28 #include <unicode/uscript.h>
29 #include <unicode/utf16.h>
30 #include <unicode/putil.h>
31 #include <unicode/ustring.h>
40 #include <unicode/ures.h>
41 #include <unicode/uniset.h>
42 #include <unicode/usetiter.h>
45 * Command line option variables.
46 * These global variables are set according to the options specified on the
47 * command line by the user.
49 static UOption options
[]={
50 /* 00 */ UOPTION_HELP_H
,
51 /* 01 */ UOPTION_HELP_QUESTION_MARK
,
52 /* 02 */ {"locale", NULL
, NULL
, NULL
, 'l', UOPT_REQUIRES_ARG
, 0},
53 /* 03 */ {"serialize", NULL
, NULL
, NULL
, 'z', UOPT_NO_ARG
, 0},
54 /* 04 */ UOPTION_DESTDIR
,
55 /* 05 */ UOPTION_SOURCEDIR
,
56 /* 06 */ {"attribute", NULL
, NULL
, NULL
, 'a', UOPT_REQUIRES_ARG
, 0},
57 /* 07 */ {"rule", NULL
, NULL
, NULL
, 'r', UOPT_REQUIRES_ARG
, 0},
58 /* 08 */ {"normalization", NULL
, NULL
, NULL
, 'n', UOPT_REQUIRES_ARG
, 0},
59 /* 09 */ {"scripts", NULL
, NULL
, NULL
, 't', UOPT_NO_ARG
, 0},
60 /* 10 */ {"reducehan", NULL
, NULL
, NULL
, 'e', UOPT_NO_ARG
, 0},
61 /* 11 */ UOPTION_VERBOSE
,
62 /* 12 */ {"wholescripts", NULL
, NULL
, NULL
, 'W', UOPT_NO_ARG
, 0}
66 * Collator used in this program
68 static UCollator
*COLLATOR_
;
70 * Output strea, used in this program
74 static UColAttributeValue ATTRIBUTE_
[UCOL_ATTRIBUTE_COUNT
] = {
75 UCOL_DEFAULT
, UCOL_DEFAULT
, UCOL_DEFAULT
, UCOL_DEFAULT
, UCOL_DEFAULT
,
76 UCOL_DEFAULT
, UCOL_DEFAULT
, UCOL_DEFAULT
,
84 static const EnumNameValuePair ATTRIBUTE_NAME_
[] = {
85 {UCOL_FRENCH_COLLATION
, "UCOL_FRENCH_COLLATION"},
86 {UCOL_ALTERNATE_HANDLING
, "UCOL_ALTERNATE_HANDLING"},
87 {UCOL_CASE_FIRST
, "UCOL_CASE_FIRST"},
88 {UCOL_CASE_LEVEL
, "UCOL_CASE_LEVEL"},
89 {UCOL_NORMALIZATION_MODE
,
90 "UCOL_NORMALIZATION_MODE|UCOL_DECOMPOSITION_MODE"},
91 {UCOL_STRENGTH
, "UCOL_STRENGTH"},
92 {UCOL_HIRAGANA_QUATERNARY_MODE
, "UCOL_HIRAGANA_QUATERNARY_MODE"},
93 {UCOL_NUMERIC_COLLATION
, "UCOL_NUMERIC_COLLATION"},
97 static const EnumNameValuePair ATTRIBUTE_VALUE_
[] = {
98 {UCOL_PRIMARY
, "UCOL_PRIMARY"},
99 {UCOL_SECONDARY
, "UCOL_SECONDARY"},
100 {UCOL_TERTIARY
, "UCOL_TERTIARY|UCOL_DEFAULT_STRENGTH"},
101 {UCOL_QUATERNARY
, "UCOL_QUATERNARY"},
102 {UCOL_IDENTICAL
, "UCOL_IDENTICAL"},
103 {UCOL_OFF
, "UCOL_OFF"},
104 {UCOL_ON
, "UCOL_ON"},
105 {UCOL_SHIFTED
, "UCOL_SHIFTED"},
106 {UCOL_NON_IGNORABLE
, "UCOL_NON_IGNORABLE"},
107 {UCOL_LOWER_FIRST
, "UCOL_LOWER_FIRST"},
108 {UCOL_UPPER_FIRST
, "UCOL_UPPER_FIRST"},
114 int count
; // number of codepoint
119 * Writes the hexadecimal of a null-terminated array of codepoints into a
121 * @param f UFILE instance to store
122 * @param c codepoints array
124 void serialize(FILE *f
, const UChar
*c
)
128 fprintf(f
, " %04x", cp
);
132 fprintf(f
, " %04x", cp
);
137 * Writes the hexadecimal of a non-null-terminated array of codepoints into a
139 * @param f UFILE instance to store
140 * @param c codepoints array
141 * @param l codepoints array length
143 void serialize(FILE *f
, const UChar
*c
, int l
)
148 fprintf(f
, " %04x", cp
);
152 fprintf(f
, " %04x", cp
);
158 * Sets the iterator to the argument string and outputs the collation elements.
159 * @param f file output stream
160 * @param iter collation element iterator
162 void serialize(FILE *f
, UCollationElements
*iter
) {
163 UChar
*codepoint
= iter
->iteratordata_
.string
;
164 // unlikely that sortkeys will be over this size
166 uint8_t *psortkey
= sortkey
;
167 int sortkeylength
= 0;
169 if (iter
->iteratordata_
.flags
& UCOL_ITER_HASLEN
) {
170 serialize(f
, codepoint
, iter
->iteratordata_
.endp
- codepoint
);
171 sortkeylength
= ucol_getSortKey(iter
->iteratordata_
.coll
, codepoint
,
172 iter
->iteratordata_
.endp
- codepoint
, sortkey
, 64);
175 serialize(f
, codepoint
);
176 sortkeylength
= ucol_getSortKey(iter
->iteratordata_
.coll
, codepoint
,
179 if (options
[11].doesOccur
) {
180 serialize(stdout
, codepoint
);
181 fprintf(stdout
, "\n");
186 UErrorCode error
= U_ZERO_ERROR
;
187 uint32_t ce
= ucol_next(iter
, &error
);
188 if (U_FAILURE(error
)) {
189 fprintf(f
, "Error retrieving collation elements\n");
195 if (UCOL_PRIMARYORDER(ce
) != 0) {
196 fprintf(f
, "%04x", UCOL_PRIMARYORDER(ce
));
199 if (UCOL_SECONDARYORDER(ce
) != 0) {
200 fprintf(f
, " %02x", UCOL_SECONDARYORDER(ce
));
203 if (UCOL_TERTIARYORDER(ce
) != 0) {
204 fprintf(f
, " %02x", UCOL_TERTIARYORDER(ce
));
208 ce
= ucol_next(iter
, &error
);
209 if (ce
== UCOL_NULLORDER
) {
212 if (U_FAILURE(error
)) {
213 fprintf(stdout
, "Error retrieving collation elements");
218 if (sortkeylength
> 64) {
219 fprintf(f
, "Sortkey exceeds pre-allocated size");
224 fprintf(f
, "%02x", *psortkey
);
226 if ((*psortkey
) == 0) {
235 * Serializes the contraction within the given argument rule
236 * @param f file output stream
238 * @param rlen rule length
239 * @param contractionsonly flag to indicate if only contractions are to be
240 * output or all collation elements
241 * @param iter iterator to iterate over collation elements
243 void serialize(FILE *f
, UChar
*rule
, int rlen
, UBool contractiononly
,
244 UCollationElements
*iter
) {
245 const UChar
*current
= NULL
;
246 uint32_t strength
= 0;
247 uint32_t chOffset
= 0;
249 uint32_t exOffset
= 0;
251 uint32_t prefixOffset
= 0;
252 uint32_t prefixLen
= 0;
257 UParseError parseError
;
258 UErrorCode error
= U_ZERO_ERROR
;
264 src
.end
= rule
+ rlen
;
265 src
.extraCurrent
= src
.end
;
266 src
.extraEnd
= src
.end
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
269 while ((current
= ucol_tok_parseNextToken(&src
, rstart
, &parseError
,
271 chOffset
= src
.parsedToken
.charsOffset
;
272 chLen
= src
.parsedToken
.charsLen
;
273 // contractions handled here
274 if (!contractiononly
|| chLen
> 1) {
275 ucol_setText(iter
, rule
+ chOffset
, chLen
, &error
);
276 if (U_FAILURE(error
)) {
277 fprintf(stdout
, "Error setting text in iterator\n");
287 * Prints the attribute values in the argument collator into the output stream
290 void outputAttribute(UCollator
*collator
, UErrorCode
*error
)
292 UColAttribute attribute
= UCOL_FRENCH_COLLATION
;
293 while (attribute
< UCOL_ATTRIBUTE_COUNT
) {
296 // getting attribute name
297 if (ATTRIBUTE_NAME_
[count
].value
== attribute
) {
298 fprintf(OUTPUT_
, "%s = ", ATTRIBUTE_NAME_
[count
].name
);
304 int attributeval
= ucol_getAttribute(collator
, attribute
, error
);
305 if (U_FAILURE(*error
)) {
306 fprintf(stdout
, "Failure in reading collator attribute\n");
310 // getting attribute value
311 if (ATTRIBUTE_VALUE_
[count
].value
== attributeval
) {
312 fprintf(OUTPUT_
, "%s\n", ATTRIBUTE_VALUE_
[count
].name
);
317 attribute
= (UColAttribute
)(attribute
+ 1);
322 * Prints the normalization mode in the argument collator into the output stream
325 void outputNormalization(UCollator
*collator
)
327 UErrorCode status
= U_ZERO_ERROR
;
328 int normmode
= ucol_getAttribute(collator
, UCOL_NORMALIZATION_MODE
, &status
);
331 // getting attribute name
332 if (ATTRIBUTE_VALUE_
[count
].value
== normmode
) {
337 fprintf(OUTPUT_
, "NORMALIZATION MODE = %s\n",
338 ATTRIBUTE_VALUE_
[count
].name
);
342 * Output the collation element belonging to the locale into a file
343 * @param locale string
344 * @param fullrules flag to indicate if only tailored collation elements are to
345 * be output or all collation elements
347 void serialize(const char *locale
, UBool tailoredonly
) {
348 UErrorCode error
= U_ZERO_ERROR
;
352 fprintf(OUTPUT_
, "# This file contains the serialized collation elements\n");
353 fprintf(OUTPUT_
, "# as of the collation version indicated below.\n");
354 fprintf(OUTPUT_
, "# Data format: xxxx xxxx..; [yyyy, yy, yy] [yyyy, yy, yy] ... [yyyy, yy, yy] [zz zz..\n");
355 fprintf(OUTPUT_
, "# where xxxx are codepoints in hexadecimals,\n");
356 fprintf(OUTPUT_
, "# yyyyyyyy are the corresponding\n");
357 fprintf(OUTPUT_
, "# collation elements in hexadecimals\n");
358 fprintf(OUTPUT_
, "# and zz are the sortkey values in hexadecimals\n");
360 fprintf(OUTPUT_
, "\n# Collator information\n");
362 fprintf(OUTPUT_
, "\nLocale: %s\n", locale
);
363 fprintf(stdout
, "Locale: %s\n", locale
);
364 UVersionInfo version
;
365 ucol_getVersion(COLLATOR_
, version
);
366 fprintf(OUTPUT_
, "Version number: %d.%d.%d.%d\n",
367 version
[0], version
[1], version
[2], version
[3]);
368 outputAttribute(COLLATOR_
, &error
);
369 outputNormalization(COLLATOR_
);
371 UCollationElements
*iter
= ucol_openElements(COLLATOR_
, str
, strlen
,
373 if (U_FAILURE(error
)) {
374 fprintf(stdout
, "Error creating iterator\n");
379 fprintf(OUTPUT_
, "\n# Range of unicode characters\n\n");
380 UChar32 codepoint
= 0;
381 while (codepoint
<= UCHAR_MAX_VALUE
) {
382 if (u_isdefined(codepoint
)) {
384 UTF16_APPEND_CHAR_UNSAFE(str
, strlen
, codepoint
);
386 ucol_setText(iter
, str
, strlen
, &error
);
387 if (U_FAILURE(error
)) {
388 fprintf(stdout
, "Error setting text in iterator\n");
391 serialize(OUTPUT_
, iter
);
397 UChar ucarules
[0x10000];
399 int32_t rulelength
= 0;
403 int32_t rulelength
= 0;
404 const UChar
*temp
= ucol_getRules(COLLATOR_
, &rulelength
);
405 if (rulelength
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
> 0x10000) {
406 rules
= (UChar
*)malloc(sizeof(UChar
) *
407 (rulelength
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
));
409 memcpy(rules
, temp
, rulelength
* sizeof(UChar
));
410 rules
[rulelength
] = 0;
411 fprintf(OUTPUT_
, "\n# Tailorings\n\n");
412 serialize(OUTPUT_
, rules
, rulelength
, FALSE
, iter
);
413 if (rules
!= ucarules
) {
418 rulelength
= ucol_getRulesEx(COLLATOR_
, UCOL_FULL_RULES
, ucarules
,
420 if (rulelength
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
> 0x10000) {
421 rules
= (UChar
*)malloc(sizeof(UChar
) *
422 (rulelength
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
));
423 rulelength
= ucol_getRulesEx(COLLATOR_
, UCOL_FULL_RULES
, rules
,
426 fprintf(OUTPUT_
, "\n# Contractions\n\n");
427 serialize(OUTPUT_
, rules
, rulelength
, TRUE
, iter
);
428 if (rules
!= ucarules
) {
433 ucol_closeElements(iter
);
437 * Sets the collator with the attribute values
439 * @param error status
441 void setAttributes(UCollator
*collator
, UErrorCode
*error
)
444 while (count
< UCOL_ATTRIBUTE_COUNT
) {
445 if (ATTRIBUTE_
[count
] != UCOL_DEFAULT
) {
446 ucol_setAttribute(collator
, (UColAttribute
)count
,
447 ATTRIBUTE_
[count
], error
);
448 if (U_FAILURE(*error
)) {
457 * Appends directory path with an ending seperator if necessary.
458 * @param path with enough space to append one seperator
459 * @return new directory path length
461 int appendDirSeparator(char *dir
)
463 int dirlength
= strlen(dir
);
464 char dirending
= dir
[dirlength
- 1];
465 if (dirending
!= U_FILE_SEP_CHAR
) {
466 dir
[dirlength
] = U_FILE_SEP_CHAR
;
467 dir
[dirlength
+ 1] = 0;
468 return dirlength
+ 1;
474 * Output the collation element into a file
480 if (options
[4].doesOccur
) {
481 strcpy(filename
, options
[4].value
);
482 dirlength
= appendDirSeparator(filename
);
485 if (options
[2].doesOccur
) {
486 const char *locale
= (char *)options
[2].value
;
487 int32_t localeindex
= 0;
489 if (strcmp(locale
, "all") == 0) {
490 if (options
[4].doesOccur
) {
491 strcat(filename
, "UCA.txt");
492 OUTPUT_
= fopen(filename
, "w");
493 if (OUTPUT_
== NULL
) {
494 fprintf(stdout
, "Cannot open file:%s\n", filename
);
498 fprintf(stdout
, "UCA\n");
499 UErrorCode error
= U_ZERO_ERROR
;
500 COLLATOR_
= ucol_open("en_US", &error
);
501 if (U_FAILURE(error
)) {
502 fprintf(stdout
, "Collator creation failed:");
503 fprintf(stdout
, u_errorName(error
));
507 setAttributes(COLLATOR_
, &error
);
508 if (U_FAILURE(error
)) {
509 fprintf(stdout
, "Collator attribute setting failed:");
510 fprintf(stdout
, u_errorName(error
));
515 serialize("UCA", FALSE
);
517 if (options
[4].doesOccur
) {
518 filename
[dirlength
] = 0;
521 ucol_close(COLLATOR_
);
522 localeindex
= ucol_countAvailable() - 1;
523 fprintf(stdout
, "Number of locales: %d\n", localeindex
+ 1);
524 locale
= ucol_getAvailable(localeindex
);
528 UErrorCode error
= U_ZERO_ERROR
;
529 COLLATOR_
= ucol_open(locale
, &error
);
530 if (U_FAILURE(error
)) {
531 fprintf(stdout
, "Collator creation failed:");
532 fprintf(stdout
, u_errorName(error
));
536 setAttributes(COLLATOR_
, &error
);
537 if (U_FAILURE(error
)) {
538 fprintf(stdout
, "Collator attribute setting failed:");
539 fprintf(stdout
, u_errorName(error
));
544 if (options
[4].doesOccur
) {
545 strcat(filename
, locale
);
546 strcat(filename
, ".txt");
547 OUTPUT_
= fopen(filename
, "w");
548 if (OUTPUT_
== NULL
) {
549 fprintf(stdout
, "Cannot open file:%s\n", filename
);
554 if (options
[3].doesOccur
) {
555 serialize(locale
, TRUE
);
558 ucol_close(COLLATOR_
);
561 if (options
[4].doesOccur
) {
562 filename
[dirlength
] = 0;
567 if (localeindex
< 0) {
570 locale
= ucol_getAvailable(localeindex
);
574 if (options
[7].doesOccur
) {
575 char inputfilename
[128];
576 // rules are to be used
577 if (options
[5].doesOccur
) {
578 strcpy(inputfilename
, options
[5].value
);
579 appendDirSeparator(inputfilename
);
581 strcat(inputfilename
, options
[7].value
);
582 FILE *input
= fopen(inputfilename
, "r");
584 fprintf(stdout
, "Cannot open file:%s\n", filename
);
592 // synwee TODO: make this part dynamic
593 while (fscanf(input
, "%[^\n]s", s
) != EOF
) {
594 size
-= u_unescape(s
, prule
, size
);
595 prule
= prule
+ u_strlen(prule
);
599 if (options
[4].doesOccur
) {
600 strcat(filename
, "Rules.txt");
601 OUTPUT_
= fopen(filename
, "w");
602 if (OUTPUT_
== NULL
) {
603 fprintf(stdout
, "Cannot open file:%s\n", filename
);
608 fprintf(stdout
, "Rules\n");
609 UErrorCode error
= U_ZERO_ERROR
;
610 UParseError parseError
;
611 COLLATOR_
= ucol_openRules(rule
, u_strlen(rule
), UCOL_DEFAULT
,
612 UCOL_DEFAULT_STRENGTH
, &parseError
, &error
);
613 if (U_FAILURE(error
)) {
614 fprintf(stdout
, "Collator creation failed:");
615 fprintf(stdout
, u_errorName(error
));
619 setAttributes(COLLATOR_
, &error
);
620 if (U_FAILURE(error
)) {
621 fprintf(stdout
, "Collator attribute setting failed:");
622 fprintf(stdout
, u_errorName(error
));
627 serialize("Rule-based", TRUE
);
628 ucol_close(COLLATOR_
);
631 if (options
[4].doesOccur
) {
632 filename
[dirlength
] = 0;
639 * Parse for enum values.
640 * Note this only works for positive enum values.
641 * @param enumarray array containing names of the enum values in string and
642 * their corresponding value.
643 * declared enum value.
644 * @param str string to be parsed
645 * @return corresponding integer enum value or -1 if value is not found.
647 int parseEnums(const EnumNameValuePair enumarray
[], const char *str
)
649 const char *enumname
= enumarray
[0].name
;
650 int result
= atoi(str
);
651 if (result
== 0 && str
[0] != '0') {
652 while (strcmp(enumname
, str
) != 0) {
653 // checking for multiple enum names sharing the same values
654 enumname
= strstr(enumname
, str
);
655 if (enumname
!= NULL
) {
656 int size
= strchr(enumname
, '|') - enumname
;
658 size
= strlen(enumname
);
660 if (size
== (int)strlen(str
)) {
661 return enumarray
[result
].value
;
665 if (&(enumarray
[result
]) == NULL
) {
668 enumname
= enumarray
[result
].name
;
675 * Parser for attribute name value pair
677 void parseAttributes() {
679 const char *pname
= options
[6].value
;
680 const char *pend
= options
[6].value
+ strlen(options
[6].value
);
683 while (pname
< pend
) {
684 pvalue
= strchr(pname
, '=');
685 if (pvalue
== NULL
) {
687 "No matching value found for attribute argument %s\n",
691 int count
= pvalue
- pname
;
692 strncpy(str
, pname
, count
);
695 int name
= parseEnums(ATTRIBUTE_NAME_
, str
);
697 fprintf(stdout
, "Attribute name not found: %s\n", str
);
702 // getting corresponding enum value
703 pname
= strchr(pvalue
, ',');
707 count
= pname
- pvalue
;
708 strncpy(str
, pvalue
, count
);
710 int value
= parseEnums(ATTRIBUTE_VALUE_
, str
);
712 fprintf(stdout
, "Attribute value not found: %s\n", str
);
715 ATTRIBUTE_
[name
] = (UColAttributeValue
)value
;
721 * Checks if the locale argument is a base language
722 * @param locale to be checked
723 * @return TRUE if it is a base language
725 inline UBool
checkLocaleForLanguage(const char *locale
)
727 return strlen(locale
) <= 2;
731 * Converts a UChar array into its string form "xxxx xxxx"
732 * @param ch array of UChar characters
733 * @param count number of UChar characters
735 void outputUChar(UChar ch
[], int count
)
737 for (int i
= 0; i
< count
; i
++) {
738 fprintf(OUTPUT_
, "%04X ", ch
[i
]);
743 * If it is a primary difference returns -1 or 1.
744 * If it is a secondary difference returns -2 or 2.
745 * If it is a tertiary difference returns -3 or 3.
746 * If equals returns 0.
748 int compareSortKey(const void *elem1
, const void *elem2
)
750 // compare the 2 script element sort key
751 UChar
*ch1
= ((ScriptElement
*)elem1
)->ch
;
752 UChar
*ch2
= ((ScriptElement
*)elem2
)->ch
;
753 int size1
= ((ScriptElement
*)elem1
)->count
;
754 int size2
= ((ScriptElement
*)elem2
)->count
;
755 UErrorCode error
= U_ZERO_ERROR
;
757 ucol_setStrength(COLLATOR_
, UCOL_PRIMARY
);
758 int result
= ucol_strcoll(COLLATOR_
, ch1
, size1
, ch2
, size2
);
760 ucol_setStrength(COLLATOR_
, UCOL_SECONDARY
);
761 result
= ucol_strcoll(COLLATOR_
, ch1
, size1
, ch2
, size2
);
763 ucol_setStrength(COLLATOR_
, UCOL_TERTIARY
);
764 result
= ucol_strcoll(COLLATOR_
, ch1
, size1
, ch2
, size2
);
783 * Output serialized script elements
784 * @param element the element to output
785 * @param compare the comparison with the previous element
786 * @param expansion flags TRUE if element has an expansion
788 void outputScriptElem(ScriptElement
&element
, int compare
, UBool expansion
)
793 fprintf(OUTPUT_
, "<tr><td class='eq' title='[");
796 fprintf(OUTPUT_
, "<tr><td class='q' title='[");
801 fprintf(OUTPUT_
, "<tr><td class='ep' title='[");
804 fprintf(OUTPUT_
, "<tr><td class='p' title='[");
809 fprintf(OUTPUT_
, "<tr><td class='es' title='[");
812 fprintf(OUTPUT_
, "<tr><td class='s' title='[");
817 fprintf(OUTPUT_
, "<tr><td class='et' title='[");
820 fprintf(OUTPUT_
, "<tr><td class='t' title='[");
825 ucol_setStrength(COLLATOR_
, UCOL_TERTIARY
);
826 ucol_getSortKey(COLLATOR_
, element
.ch
, element
.count
, sortkey
, 32);
828 while (sortkey
[i
] != 0) {
829 if (sortkey
[i
] == 1) {
830 fprintf(OUTPUT_
, " | ");
833 fprintf(OUTPUT_
, "%02x", sortkey
[i
]);
839 fprintf(OUTPUT_
, "]'>");
841 UErrorCode error
= U_ZERO_ERROR
;
844 int32_t length
= unorm_normalize(element
.ch
, element
.count
, UNORM_NFC
, 0, nfc
,
846 if (U_FAILURE(error
)) {
847 fprintf(stdout
, "Error normalizing contractions to NFC\n");
849 u_strToUTF8(utf8
, 64, &length
, nfc
, length
, &error
);
850 if (U_FAILURE(error
)) {
851 fprintf(stdout
, "Error converting UChar to utf8\n");
855 fprintf(OUTPUT_
, "%s<br>", utf8
);
856 fprintf(OUTPUT_
, "<tt>");
857 outputUChar(element
.ch
, element
.count
);
860 fprintf(OUTPUT_
, "</tt></td><td> </td><td> </td><td> </td><td>Q</td><td>");
862 else if (compare
== -1) {
863 fprintf(OUTPUT_
, "</tt></td><td>P</td><td> </td><td> </td><td> </td><td>");
865 else if (compare
== -2) {
866 fprintf(OUTPUT_
, "</tt></td><td> </td><td>S</td><td> </td><td> </td><td>");
868 else if (compare
== -3) {
869 fprintf(OUTPUT_
, "</tt></td><td> </td><td> </td><td>T</td><td> </td><td>");
873 while (i
< element
.count
) {
876 UTF_NEXT_CHAR(element
.ch
, i
, element
.count
, codepoint
);
877 int32_t temp
= u_charName(codepoint
, U_UNICODE_CHAR_NAME
, str
, 128,
879 if (U_FAILURE(error
)) {
880 fprintf(stdout
, "Error getting character name\n");
883 if (element
.tailored
) {
884 fprintf(OUTPUT_
, "<b>");
886 fprintf(OUTPUT_
, "%s", str
);
887 if (element
.tailored
) {
888 fprintf(OUTPUT_
, " *</b>");
890 if (i
< element
.count
) {
891 fprintf(OUTPUT_
, "<br>\n");
895 fprintf(OUTPUT_
, "</td></tr>\n");
899 * Checks if codepoint belongs to scripts
901 * @param scriptcount number of scripts
902 * @param codepoint to test
903 * @return TRUE if codepoint belongs to scripts
905 UBool
checkInScripts(UScriptCode script
[], int scriptcount
,
908 UErrorCode error
= U_ZERO_ERROR
;
909 for (int i
= 0; i
< scriptcount
; i
++) {
910 if (script
[i
] == USCRIPT_HAN
&& options
[10].doesOccur
) {
911 if ((codepoint
>= 0x2E80 && codepoint
<= 0x2EE4) ||
912 (codepoint
>= 0x2A672 && codepoint
<= 0x2A6D6)) {
917 else if (uscript_getScript(codepoint
, &error
) == script
[i
]) {
920 if (U_FAILURE(error
)) {
921 fprintf(stdout
, "Error checking character in scripts\n");
929 * Checks if the set of codepoints belongs to the script
931 * @param scriptcount number of scripts
933 * @return TRUE if all codepoints belongs to the script
935 inline UBool
checkInScripts(UScriptCode script
[], int scriptcount
,
936 ScriptElement scriptelem
)
939 while (i
< scriptelem
.count
) {
941 UTF_NEXT_CHAR(scriptelem
.ch
, i
, scriptelem
.count
, codepoint
);
942 UErrorCode error
= U_ZERO_ERROR
;
943 if (checkInScripts(script
, scriptcount
, codepoint
)) {
951 * Gets the script elements and contractions belonging to the script
952 * @param elems output list
953 * @param locale locale
954 * @return number of script elements
957 int getScriptElementsFromExemplars(ScriptElement scriptelem
[], const char* locale
) {
958 UErrorCode error
= U_ZERO_ERROR
;
959 UChar32 codepoint
= 0;
961 UResourceBundle
* ures
= ures_open(NULL
, locale
, &error
);
962 if (U_FAILURE(error
)) {
963 fprintf(stdout
, "Can not find resource bundle for locale: %s\n", locale
);
967 const UChar
* exemplarChars
= ures_getStringByKey(ures
, "ExemplarCharacters", &length
, &error
);
969 if (U_FAILURE(error
)) {
970 fprintf(stdout
, "Can not find ExemplarCharacters in resource bundle\n");
974 UChar
* upperChars
= new UChar
[length
*2];
975 if (upperChars
== 0) {
976 fprintf(stdout
, "Memory error\n");
980 int32_t destLength
= u_strToUpper(upperChars
, length
*2, exemplarChars
, -1, locale
, &error
);
981 if (U_FAILURE(error
)) {
982 fprintf(stdout
, "Error when u_strToUpper() \n");
986 UChar
* pattern
= new UChar
[length
+ destLength
+ 10];
987 UChar left
[2] = {0x005b, 0x0};
988 UChar right
[2] = {0x005d, 0x0};
989 pattern
= u_strcpy(pattern
, left
);
990 pattern
= u_strcat(pattern
, exemplarChars
);
991 pattern
= u_strcat(pattern
, upperChars
);
992 pattern
= u_strcat(pattern
, right
);
994 UnicodeSet
* uniset
= new UnicodeSet(UnicodeString(pattern
), error
);
995 if (U_FAILURE(error
)) {
996 fprintf(stdout
, "Can not open USet \n");
1000 UnicodeSetIterator
* usetiter
= new UnicodeSetIterator(*uniset
);
1004 while (usetiter
-> next()) {
1005 if (usetiter
-> isString()) {
1006 UnicodeString strItem
= usetiter
-> getString();
1008 scriptelem
[count
].count
= 0;
1009 for (int i
= 0; i
< strItem
.length(); i
++) {
1010 codepoint
= strItem
.char32At(i
);
1011 UTF16_APPEND_CHAR_UNSAFE(scriptelem
[count
].ch
,
1012 scriptelem
[count
].count
, codepoint
);
1013 scriptelem
[count
].tailored
= FALSE
;
1016 codepoint
= usetiter
-> getCodepoint();
1017 scriptelem
[count
].count
= 0;
1018 UTF16_APPEND_CHAR_UNSAFE(scriptelem
[count
].ch
,
1019 scriptelem
[count
].count
, codepoint
);
1020 scriptelem
[count
].tailored
= FALSE
;
1030 * Gets the script elements and contractions belonging to the script
1031 * @param script list
1032 * @param scriptcount number of scripts
1033 * @param elems output list
1034 * @return number of script elements
1036 int getScriptElements(UScriptCode script
[], int scriptcount
,
1037 ScriptElement scriptelem
[])
1039 UErrorCode error
= U_ZERO_ERROR
;
1040 UChar32 codepoint
= 0;
1042 while (codepoint
<= UCHAR_MAX_VALUE
) {
1043 if (checkInScripts(script
, scriptcount
, codepoint
)) {
1044 scriptelem
[count
].count
= 0;
1045 UTF16_APPEND_CHAR_UNSAFE(scriptelem
[count
].ch
,
1046 scriptelem
[count
].count
, codepoint
);
1047 scriptelem
[count
].tailored
= FALSE
;
1050 if (U_FAILURE(error
)) {
1051 fprintf(stdout
, "Error determining codepoint in script\n");
1057 const UChar
*current
= NULL
;
1058 uint32_t strength
= 0;
1059 uint32_t chOffset
= 0;
1061 uint32_t exOffset
= 0;
1063 uint32_t prefixOffset
= 0;
1064 uint32_t prefixLen
= 0;
1066 UBool rstart
= TRUE
;
1067 UColTokenParser src
;
1069 UParseError parseError
;
1071 int32_t rulelength
= ucol_getRulesEx(COLLATOR_
, UCOL_FULL_RULES
, NULL
, 0);
1072 src
.source
= (UChar
*)malloc(sizeof(UChar
) *
1073 (rulelength
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
));
1074 rulelength
= ucol_getRulesEx(COLLATOR_
, UCOL_FULL_RULES
, src
.source
,
1076 src
.current
= src
.source
;
1077 src
.end
= src
.source
+ rulelength
;
1078 src
.extraCurrent
= src
.end
;
1079 src
.extraEnd
= src
.end
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1083 ucol_tok_parseNextToken(&src, &strength, &chOffset,
1084 &chLen, &exOffset, &exLen,
1085 &prefixOffset, &prefixLen,
1086 &specs, rstart, &parseError,
1089 while ((current
= ucol_tok_parseNextToken(&src
, rstart
, &parseError
,
1091 // contractions handled here
1093 u_strncpy(scriptelem
[count
].ch
, src
.source
+ chOffset
, chLen
);
1094 scriptelem
[count
].count
= chLen
;
1095 if (checkInScripts(script
, scriptcount
, scriptelem
[count
])) {
1096 scriptelem
[count
].tailored
= FALSE
;
1102 if (U_FAILURE(error
)) {
1103 fprintf(stdout
, "Error parsing rules: %s\n", u_errorName(error
));
1105 // rule might have been reallocated, so delete this instead
1110 int compareCodepoints(const void *elem1
, const void *elem2
)
1112 UChar
*ch1
= ((ScriptElement
*)elem1
)->ch
; // key
1113 UChar
*ch2
= ((ScriptElement
*)elem2
)->ch
;
1114 ch1
[((ScriptElement
*)elem1
)->count
] = 0;
1115 ch2
[((ScriptElement
*)elem2
)->count
] = 0;
1117 // compare the 2 codepoints
1118 return u_strcmp(ch1
, ch2
);
1121 UBool
hasSubNFD(ScriptElement
&se
, ScriptElement
&key
)
1124 UChar
*ch2
= key
.ch
; // key
1128 // compare the 2 codepoints
1129 if (u_strstr(ch1
, ch2
) != NULL
) {
1133 // check the decomposition
1135 UErrorCode error
= U_ZERO_ERROR
;
1136 int size
= unorm_normalize(ch1
, se
.count
, UNORM_NFD
, 0, norm
, 32,
1138 if (U_FAILURE(error
)) {
1139 fprintf(stdout
, "Error normalizing\n");
1141 if (u_strstr(norm
, ch2
) != NULL
) {
1148 * Marks tailored elements
1149 * @param script list
1150 * @param scriptcount number of scripts
1151 * @param scriptelem script element list
1152 * @param scriptelemlength size of the script element list
1154 void markTailored(UScriptCode script
[], int scriptcount
,
1155 ScriptElement scriptelem
[], int scriptelemlength
)
1158 const UChar
*rule
= ucol_getRules(COLLATOR_
, &rulelength
);
1160 const UChar
*current
= NULL
;
1161 uint32_t strength
= 0;
1162 uint32_t chOffset
= 0;
1164 uint32_t exOffset
= 0;
1166 uint32_t prefixOffset
= 0;
1167 uint32_t prefixLen
= 0;
1169 UBool rstart
= TRUE
;
1170 UColTokenParser src
;
1172 UParseError parseError
;
1175 src
.source
= (UChar
*)malloc(
1176 (rulelength
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
) * sizeof(UChar
));
1177 memcpy(src
.source
, rule
, rulelength
* sizeof(UChar
));
1178 src
.current
= src
.source
;
1179 src
.end
= (UChar
*)src
.source
+ rulelength
;
1180 src
.extraCurrent
= src
.end
;
1181 src
.extraEnd
= src
.end
+ UCOL_TOK_EXTRA_RULE_SPACE_SIZE
;
1183 UErrorCode error
= U_ZERO_ERROR
;
1185 while ((current
= ucol_tok_parseNextToken(&src
, rstart
, &parseError
,
1187 if (chLen
>= 1 && strength
!= UCOL_TOK_RESET
) {
1188 // skipping the reset characters and non useful stuff.
1190 u_strncpy(se
.ch
, src
.source
+ chOffset
, chLen
);
1193 if (checkInScripts(script
, scriptcount
, se
)) {
1195 ScriptElement *tse = (ScriptElement *)bsearch(&se, scriptelem,
1197 sizeof(ScriptElement),
1200 for (int i
= 0; i
< scriptelemlength
; i
++) {
1201 if (!scriptelem
[i
].tailored
&&
1202 hasSubNFD(scriptelem
[i
], se
)) {
1203 scriptelem
[i
].tailored
= TRUE
;
1211 if (U_FAILURE(error
)) {
1212 fprintf(stdout
, "Error parsing rules\n");
1217 * Checks if the collation iterator has more than 1 collation element
1218 * @parem coleiter collation element iterator
1219 * @return TRUE if collation iterator has more than 1 collation element
1221 UBool
hasExpansions(UCollationElements
*coleiter
)
1223 UErrorCode error
= U_ZERO_ERROR
;
1224 int32_t ce
= ucol_next(coleiter
, &error
);
1227 if (U_FAILURE(error
)) {
1228 fprintf(stdout
, "Error getting next collation element\n");
1230 while (ce
!= UCOL_NULLORDER
) {
1231 if ((UCOL_PRIMARYORDER(ce
) != 0) && !isContinuation(ce
)) {
1237 ce
= ucol_next(coleiter
, &error
);
1238 if (U_FAILURE(error
)) {
1239 fprintf(stdout
, "Error getting next collation element\n");
1246 * Prints the footer for index.html
1247 * @param file output file
1249 void outputHTMLFooter()
1251 fprintf(OUTPUT_
, "</table>\n");
1252 fprintf(OUTPUT_
, "</body>\n");
1253 fprintf(OUTPUT_
, "</html>\n");
1257 * Serialize the codepoints from start to end into an html file.
1258 * Arranging them into ascending collation order.
1259 * @param script code list
1260 * @param scriptcount number of scripts
1262 //void serializeScripts(UScriptCode script[], int scriptcount)
1264 void serializeScripts(UScriptCode script
[], int scriptcount
, const char* locale
= NULL
)
1266 UErrorCode error
= U_ZERO_ERROR
;
1268 ScriptElement
*scriptelem
=
1269 (ScriptElement
*)malloc(sizeof(ScriptElement
) * 0x20000);
1270 if (scriptelem
== NULL
) {
1271 fprintf(stdout
, "Memory error\n");
1276 count
= getScriptElementsFromExemplars(scriptelem
, locale
);
1278 count
= getScriptElements(script
, scriptcount
, scriptelem
);
1281 // Sort script elements using Quicksort algorithm:
1282 qsort(scriptelem
, count
, sizeof(ScriptElement
), compareCodepoints
);
1283 markTailored(script
, scriptcount
, scriptelem
, count
);
1284 // Sort script elements using Quicksort algorithm:
1285 qsort(scriptelem
, count
, sizeof(ScriptElement
), compareSortKey
);
1287 UCollationElements
* coleiter
= ucol_openElements(COLLATOR_
,
1289 scriptelem
[0].count
,
1291 if (U_FAILURE(error
)) {
1292 fprintf(stdout
, "Error creating collation element iterator\n");
1296 outputScriptElem(scriptelem
[0], -1, hasExpansions(coleiter
));
1297 for (int i
= 0; i
< count
- 1; i
++) {
1298 ucol_setText(coleiter
, scriptelem
[i
+ 1].ch
, scriptelem
[i
+ 1].count
,
1300 if (U_FAILURE(error
)) {
1301 fprintf(stdout
, "Error setting text in collation element iterator\n");
1304 outputScriptElem(scriptelem
[i
+ 1],
1305 compareSortKey(scriptelem
+ i
, scriptelem
+ i
+ 1),
1306 hasExpansions(coleiter
));
1313 * Prints the header for the html
1314 * @param locale name
1316 * @param scriptcount number of scripts
1318 void outputHTMLHeader(const char *locale
, UScriptCode script
[],
1321 fprintf(OUTPUT_
, "<html>\n");
1322 fprintf(OUTPUT_
, "<head>\n");
1323 fprintf(OUTPUT_
, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n");
1324 fprintf(OUTPUT_
, "<meta http-equiv=\"Content-Language\" content=\"en-us\">\n");
1325 fprintf(OUTPUT_
, "<link rel=\"stylesheet\" href=\"charts.css\" type=\"text/css\">\n");
1326 fprintf(OUTPUT_
, "<title>ICU Collation charts</title>\n");
1327 fprintf(OUTPUT_
, "<base target=\"main\">\n");
1328 fprintf(OUTPUT_
, "</head>\n");
1330 fprintf(OUTPUT_
, "<body bgcolor=#FFFFFF>\n");
1331 fprintf(OUTPUT_
, "<!--\n");
1332 fprintf(OUTPUT_
, "This file contains sorted characters in ascending order according to the locale stated\n");
1333 fprintf(OUTPUT_
, "If the character is in red, it is tailored in the collation rules.\n");
1334 fprintf(OUTPUT_
, "Background colours have certain meanings:\n");
1335 fprintf(OUTPUT_
, "White - equals the previous character\n");
1336 fprintf(OUTPUT_
, "dark blue - primary greater than the previous character\n");
1337 fprintf(OUTPUT_
, "blue - secondary greater than the previous character\n");
1338 fprintf(OUTPUT_
, "light blue - tertiary greater than the previous character\n");
1339 fprintf(OUTPUT_
, "--!>\n");
1341 fprintf(OUTPUT_
, "<table border=0>\n");
1342 UChar displayname
[64];
1343 UErrorCode error
= U_ZERO_ERROR
;
1344 int32_t size
= uloc_getDisplayName(locale
, "en_US", displayname
, 64, &error
);
1345 char utf8displayname
[128];
1346 if (U_FAILURE(error
)) {
1347 utf8displayname
[0] = 0;
1350 int32_t utf8size
= 0;
1351 u_strToUTF8(utf8displayname
, 128, &utf8size
, displayname
, size
, &error
);
1354 fprintf(OUTPUT_
, "<tr><th>Locale</th><td class='noborder'>%s</td></tr>\n", utf8displayname
);
1355 fprintf(OUTPUT_
, "<tr><th>Script(s)</th>");
1356 fprintf(OUTPUT_
, "<td class='noborder'>");
1357 for (int i
= 0; i
< scriptcount
; i
++) {
1358 fprintf(OUTPUT_
, "%s", uscript_getName(script
[i
]));
1359 if (i
+ 1 != scriptcount
) {
1360 fprintf(OUTPUT_
, ", ");
1363 fprintf(OUTPUT_
, "</td></tr>\n");
1365 fprintf(OUTPUT_
, "<tr><th>Rules</th><td class='noborder'><a href=\"http://dev.icu-project.org/cgi-bin/viewcvs.cgi/*checkout*/icu/source/data/coll/%s.txt\">%s.txt</a></td></tr>\n", locale
, locale
);
1367 UVersionInfo version
;
1368 ucol_getVersion(COLLATOR_
, version
);
1369 fprintf(OUTPUT_
, "<tr><th>Collator version</th><td class='noborder'>%d.%d.%d.%d</td></tr>\n",
1370 version
[0], version
[1], version
[2], version
[3]);
1372 UColAttribute attr
= UCOL_FRENCH_COLLATION
;
1373 while (attr
< UCOL_ATTRIBUTE_COUNT
) {
1374 UColAttributeValue value
= ucol_getAttribute(COLLATOR_
, attr
, &error
);
1375 if (U_FAILURE(error
)) {
1376 fprintf(stdout
, "Error getting attribute\n");
1379 if (value
!= UCOL_DEFAULT
) {
1380 if (attr
== UCOL_FRENCH_COLLATION
&& value
!= UCOL_OFF
) {
1381 fprintf(OUTPUT_
, "<tr><th>French Collation</th><td class='noborder'>on, code %d</td></tr>\n", value
);
1383 if (attr
== UCOL_ALTERNATE_HANDLING
&& value
!= UCOL_NON_IGNORABLE
) {
1384 fprintf(OUTPUT_
, "<tr><th>Alternate Handling</th><td class='noborder'>shifted, code%d</td></tr>\n", value
);
1386 if (attr
== UCOL_CASE_FIRST
&& value
!= UCOL_OFF
) {
1387 fprintf(OUTPUT_
, "<tr><th>Case First</th><td class='noborder'>on, code %d</td></tr>\n", value
);
1389 if (attr
== UCOL_CASE_LEVEL
&& value
!= UCOL_OFF
) {
1390 fprintf(OUTPUT_
, "<tr><th>Case Level</th><td class='noborder'>on, code %d</td></tr>\n", value
);
1392 if (attr
== UCOL_NORMALIZATION_MODE
&& value
!= UCOL_OFF
) {
1393 fprintf(OUTPUT_
, "<tr><th>Normalization</th><td class='noborder'>on, code %d</td></tr>\n", value
);
1395 if (attr
== UCOL_STRENGTH
&& value
!= UCOL_TERTIARY
) {
1396 fprintf(OUTPUT_
, "<tr><th>Strength</th><td class='noborder'>code %d</td></tr>\n", value
);
1398 if (attr
== UCOL_HIRAGANA_QUATERNARY_MODE
&& value
!= UCOL_OFF
) {
1399 fprintf(OUTPUT_
, "<tr><th>Hiragana Quaternary</th><td class='noborder'>on, code %d</td></tr>\n", value
);
1402 attr
= (UColAttribute
)(attr
+ 1);
1405 // Get UNIX-style time and display as number and string.
1408 fprintf(OUTPUT_
, "<tr><th>Date Generated</th><td class='noborder'>%s</td></tr>", ctime(<ime
));
1410 fprintf(OUTPUT_
, "</table>\n");
1412 fprintf(OUTPUT_
, "<p><a href=help.html>How to read the table</a><br>\n");
1413 fprintf(OUTPUT_
, "<a href=http://www.jtcsv.com/cgi-bin/icu-bugs/ target=new>Submit a bug</a></p>\n");
1414 fprintf(OUTPUT_
, "\n<table>\n");
1415 fprintf(OUTPUT_
, "\n<tr><th>Codepoint</th><th>P</th><th>S</th><th>T</th><th>Q</th><th>Name</th></tr>\n");
1419 * Prints the header for index.html
1420 * @param file output file
1422 void outputListHTMLHeader(FILE *file
)
1424 fprintf(file
, "<html>\n");
1425 fprintf(file
, "<head>\n");
1426 fprintf(file
, "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n");
1427 fprintf(file
, "<meta http-equiv=\"Content-Language\" content=\"en-us\">\n");
1428 fprintf(file
, "<title>ICU Collation Charts</title>\n");
1429 fprintf(file
, "<base target=\"main\">\n");
1430 fprintf(file
, "</head>\n");
1431 fprintf(file
, "<body bgcolor=#FFFFFF>\n");
1432 fprintf(file
, "<h2 align=center>ICU Collation Charts</h2>\n");
1433 fprintf(file
, "<p align=center>\n");
1434 fprintf(file
, "<a href=http://www.unicode.org/charts/collation/ target=new>UCA Charts</a><br>");
1438 * Prints the footer for index.html
1439 * @param file output file
1441 void outputListHTMLFooter(FILE *file
)
1443 fprintf(file
, "</p>\n");
1444 //fprintf(file, "<center><image src=http://oss.software.ibm.com/icu/images/w24.gif></center>\n");
1445 fprintf(file
, "</body>\n");
1446 fprintf(file
, "</html>\n");
1450 * Gets all scripts and serialize their codepoints into an html file.
1452 void serializeScripts() {
1456 if (options
[4].doesOccur
) {
1457 strcpy(filename
, options
[4].value
);
1458 dirlength
= appendDirSeparator(filename
);
1464 int32_t localelist
= 0;
1467 localesize
= ucol_countAvailable();
1468 locale
= ucol_getAvailable(localelist
);
1470 strcat(filename
, "list.html");
1471 FILE *list
= fopen(filename
, "w");
1472 filename
[dirlength
] = 0;
1474 fprintf(stdout
, "Cannot open file: %s\n", filename
);
1478 outputListHTMLHeader(list
);
1479 fprintf(list
, "<blockquote>\n");
1481 UErrorCode error
= U_ZERO_ERROR
;
1482 COLLATOR_
= ucol_open(locale
, &error
);
1483 if (U_FAILURE(error
)) {
1484 fprintf(stdout
, "Collator creation failed:");
1485 fprintf(stdout
, u_errorName(error
));
1488 if ((error
!= U_USING_FALLBACK_WARNING
&& // not tailored
1489 error
!= U_USING_DEFAULT_WARNING
) ||
1490 checkLocaleForLanguage(locale
)) {
1491 fprintf(list
, "<a href=%s.html>%s</a> ", locale
, locale
);
1492 setAttributes(COLLATOR_
, &error
);
1493 if (U_FAILURE(error
)) {
1494 fprintf(stdout
, "Collator attribute setting failed:");
1495 fprintf(stdout
, u_errorName(error
));
1499 UScriptCode scriptcode
[32];
1500 uint32_t scriptcount
= uscript_getCode(locale
, scriptcode
, 32,
1502 if (U_FAILURE(error
)) {
1503 fprintf(stdout
, "Error getting lcale scripts\n");
1507 strcat(filename
, locale
);
1508 strcat(filename
, ".html");
1509 OUTPUT_
= fopen(filename
, "w");
1510 if (OUTPUT_
== NULL
) {
1511 fprintf(stdout
, "Cannot open file:%s\n", filename
);
1514 outputHTMLHeader(locale
, scriptcode
, scriptcount
);
1515 fprintf(stdout
, "%s\n", locale
);
1517 if(options
[12].doesOccur
) {
1518 // use whole scripts
1519 serializeScripts(scriptcode
, scriptcount
);
1521 // use exemplar chars
1522 serializeScripts(scriptcode
, scriptcount
, locale
);
1526 ucol_close(COLLATOR_
);
1528 filename
[dirlength
] = 0;
1530 if (localelist
== localesize
) {
1533 locale
= ucol_getAvailable(localelist
);
1535 fprintf(list
, "<br><a href=help.html>help</a><br>");
1536 fprintf(list
, "</blockquote>\n");
1537 outputListHTMLFooter(list
);
1542 * Main -- process command line, read in and pre-process the test file,
1543 * call other functions to do the actual tests.
1545 int main(int argc
, char *argv
[]) {
1547 argc
= u_parseArgs(argc
, argv
, sizeof(options
)/sizeof(options
[0]),
1550 // error handling, printing usage message
1552 fprintf(stdout
, "error in command line argument: ");
1553 fprintf(stdout
, argv
[-argc
]);
1554 fprintf(stdout
, "\n");
1556 if (argc
< 0 || options
[0].doesOccur
|| options
[1].doesOccur
) {
1557 fprintf(stdout
, "Usage: dumpce options...\n"
1559 " Display this message.\n"
1560 "--locale name|all\n"
1561 " ICU locale to use. Default is en_US\n"
1563 " Serializes the collation elements in -locale or all locales available and outputs them into --outputdir/locale_ce.txt\n"
1564 "--destdir dir_name\n"
1565 " Path for outputing the serialized collation elements. Defaults to stdout if no defined\n"
1566 "--sourcedir dir_name\n"
1567 " Path for the input rule file for collation\n"
1568 "--attribute name=value,name=value...\n"
1569 " Pairs of attribute names and values for setting\n"
1571 " Name of file containing the collation rules.\n"
1572 "--normalizaton mode\n"
1573 " UNormalizationMode mode to be used.\n"
1575 " Codepoints from all scripts are sorted and serialized.\n"
1577 " Only 200 Han script characters will be displayed with the use of --scripts.\n"
1579 " Show collation order for whole scripts instead of just for exemplar characters of a locale\n\n");
1581 fprintf(stdout
, "Example to generate *.txt files : dumpce --serialize --locale af --destdir /temp --attribute UCOL_STRENGTH=UCOL_DEFAULT_STRENGTH,4=17\n\n");
1582 fprintf(stdout
, "Example to generate *.html files for oss web display: dumpce --scripts --destdir /temp --reducehan\n");
1583 return argc
< 0 ? U_ILLEGAL_ARGUMENT_ERROR
: U_ZERO_ERROR
;
1587 if (options
[6].doesOccur
) {
1588 fprintf(stdout
, "attributes %s\n", options
[6].value
);
1591 if (options
[3].doesOccur
) {
1594 if (options
[9].doesOccur
) {