]>
Commit | Line | Data |
---|---|---|
51004dcb A |
1 | /* |
2 | ********************************************************************** | |
57a6839d | 3 | * Copyright (C) 2002-2013, International Business Machines |
51004dcb A |
4 | * Corporation and others. All Rights Reserved. |
5 | ********************************************************************** | |
6 | * | |
7 | * File gendict.cpp | |
8 | */ | |
9 | ||
10 | #include "unicode/utypes.h" | |
11 | #include "unicode/uchar.h" | |
12 | #include "unicode/ucnv.h" | |
13 | #include "unicode/uniset.h" | |
14 | #include "unicode/unistr.h" | |
15 | #include "unicode/uclean.h" | |
16 | #include "unicode/udata.h" | |
17 | #include "unicode/putil.h" | |
18 | #include "unicode/ucharstriebuilder.h" | |
19 | #include "unicode/bytestriebuilder.h" | |
20 | #include "unicode/ucharstrie.h" | |
21 | #include "unicode/bytestrie.h" | |
22 | #include "unicode/ucnv.h" | |
23 | #include "unicode/utf16.h" | |
24 | ||
25 | #include "charstr.h" | |
26 | #include "dictionarydata.h" | |
27 | #include "uoptions.h" | |
28 | #include "unewdata.h" | |
29 | #include "cmemory.h" | |
30 | #include "uassert.h" | |
31 | #include "ucbuf.h" | |
32 | #include "toolutil.h" | |
33 | #include "cstring.h" | |
34 | ||
35 | #include <stdio.h> | |
36 | #include <stdlib.h> | |
37 | #include <string.h> | |
38 | ||
39 | #include "putilimp.h" | |
57a6839d | 40 | UDate startTime; |
51004dcb A |
41 | |
42 | static int elapsedTime() { | |
43 | return (int)uprv_floor((uprv_getRawUTCtime()-startTime)/1000.0); | |
44 | } | |
45 | ||
46 | #if U_PLATFORM_IMPLEMENTS_POSIX && !U_PLATFORM_HAS_WIN32_API | |
57a6839d | 47 | |
51004dcb A |
48 | #include <signal.h> |
49 | #include <unistd.h> | |
50 | ||
51 | const char *wToolname="gendict"; | |
52 | const char *wOutname="(some file)"; | |
53 | ||
54 | const int firstSeconds = 5; /* seconds between notices*/ | |
55 | const int nextSeconds = 15; /* seconds between notices*/ | |
56 | ||
57 | static void alarm_fn(int /*n*/) { | |
58 | printf("%s: still writing\t%s (%ds)\t...\n", wToolname, wOutname, elapsedTime()); | |
59 | ||
60 | signal(SIGALRM, &alarm_fn); | |
61 | alarm(nextSeconds); // reset the alarm | |
62 | } | |
63 | ||
64 | static void install_watchdog(const char *toolName, const char *outFileName) { | |
65 | wToolname=toolName; | |
66 | wOutname=outFileName; | |
67 | ||
51004dcb A |
68 | signal(SIGALRM, &alarm_fn); |
69 | ||
70 | alarm(firstSeconds); // set the alarm | |
71 | } | |
72 | ||
73 | #else | |
74 | static void install_watchdog(const char*, const char*) { | |
75 | // not implemented | |
76 | } | |
77 | #endif | |
78 | ||
79 | ||
80 | ||
81 | ||
82 | U_NAMESPACE_USE | |
83 | ||
84 | static char *progName; | |
85 | static UOption options[]={ | |
86 | UOPTION_HELP_H, /* 0 */ | |
87 | UOPTION_HELP_QUESTION_MARK, /* 1 */ | |
88 | UOPTION_VERBOSE, /* 2 */ | |
89 | UOPTION_ICUDATADIR, /* 4 */ | |
90 | UOPTION_COPYRIGHT, /* 5 */ | |
91 | { "uchars", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 6 */ | |
92 | { "bytes", NULL, NULL, NULL, '\1', UOPT_NO_ARG, 0}, /* 7 */ | |
93 | { "transform", NULL, NULL, NULL, '\1', UOPT_REQUIRES_ARG, 0}, /* 8 */ | |
94 | }; | |
95 | ||
96 | enum arguments { | |
97 | ARG_HELP = 0, | |
98 | ARG_QMARK, | |
99 | ARG_VERBOSE, | |
100 | ARG_ICUDATADIR, | |
101 | ARG_COPYRIGHT, | |
102 | ARG_UCHARS, | |
103 | ARG_BYTES, | |
104 | ARG_TRANSFORM | |
105 | }; | |
106 | ||
107 | // prints out the standard usage method describing command line arguments, | |
108 | // then bails out with the desired exit code | |
109 | static void usageAndDie(UErrorCode retCode) { | |
110 | fprintf((U_SUCCESS(retCode) ? stdout : stderr), "Usage: %s -trietype [-options] input-dictionary-file output-file\n", progName); | |
111 | fprintf((U_SUCCESS(retCode) ? stdout : stderr), | |
112 | "\tRead in a word list and write out a string trie dictionary\n" | |
113 | "options:\n" | |
114 | "\t-h or -? or --help this usage text\n" | |
115 | "\t-V or --version show a version message\n" | |
116 | "\t-c or --copyright include a copyright notice\n" | |
117 | "\t-v or --verbose turn on verbose output\n" | |
118 | "\t-i or --icudatadir directory for locating any needed intermediate data files,\n" // TODO: figure out if we need this option | |
119 | "\t followed by path, defaults to %s\n" | |
120 | "\t--uchars output a UCharsTrie (mutually exclusive with -b!)\n" | |
121 | "\t--bytes output a BytesTrie (mutually exclusive with -u!)\n" | |
122 | "\t--transform the kind of transform to use (eg --transform offset-40A3,\n" | |
123 | "\t which specifies an offset transform with constant 0x40A3)\n", | |
124 | u_getDataDirectory()); | |
125 | exit(retCode); | |
126 | } | |
127 | ||
128 | ||
129 | /* UDataInfo cf. udata.h */ | |
130 | static UDataInfo dataInfo = { | |
131 | sizeof(UDataInfo), | |
132 | 0, | |
133 | ||
134 | U_IS_BIG_ENDIAN, | |
135 | U_CHARSET_FAMILY, | |
136 | U_SIZEOF_UCHAR, | |
137 | 0, | |
138 | ||
139 | { 0x44, 0x69, 0x63, 0x74 }, /* "Dict" */ | |
140 | { 1, 0, 0, 0 }, /* format version */ | |
141 | { 0, 0, 0, 0 } /* data version */ | |
142 | }; | |
143 | ||
144 | #if !UCONFIG_NO_BREAK_ITERATION | |
145 | ||
146 | // A wrapper for both BytesTrieBuilder and UCharsTrieBuilder. | |
147 | // may want to put this somewhere in ICU, as it could be useful outside | |
148 | // of this tool? | |
149 | class DataDict { | |
150 | private: | |
151 | BytesTrieBuilder *bt; | |
152 | UCharsTrieBuilder *ut; | |
153 | UChar32 transformConstant; | |
154 | int32_t transformType; | |
155 | public: | |
156 | // constructs a new data dictionary. if there is an error, | |
157 | // it will be returned in status | |
158 | // isBytesTrie != 0 will produce a BytesTrieBuilder, | |
159 | // isBytesTrie == 0 will produce a UCharsTrieBuilder | |
160 | DataDict(UBool isBytesTrie, UErrorCode &status) : bt(NULL), ut(NULL), | |
161 | transformConstant(0), transformType(DictionaryData::TRANSFORM_NONE) { | |
162 | if (isBytesTrie) { | |
163 | bt = new BytesTrieBuilder(status); | |
164 | } else { | |
165 | ut = new UCharsTrieBuilder(status); | |
166 | } | |
167 | } | |
168 | ||
169 | ~DataDict() { | |
170 | delete bt; | |
171 | delete ut; | |
172 | } | |
173 | ||
174 | private: | |
175 | char transform(UChar32 c, UErrorCode &status) { | |
176 | if (transformType == DictionaryData::TRANSFORM_TYPE_OFFSET) { | |
177 | if (c == 0x200D) { return (char)0xFF; } | |
178 | else if (c == 0x200C) { return (char)0xFE; } | |
179 | int32_t delta = c - transformConstant; | |
180 | if (delta < 0 || 0xFD < delta) { | |
181 | fprintf(stderr, "Codepoint U+%04lx out of range for --transform offset-%04lx!\n", | |
182 | (long)c, (long)transformConstant); | |
183 | exit(U_ILLEGAL_ARGUMENT_ERROR); // TODO: should return and print the line number | |
184 | } | |
185 | return (char)delta; | |
186 | } else { // no such transform type | |
187 | status = U_INTERNAL_PROGRAM_ERROR; | |
188 | return (char)c; // it should be noted this transform type will not generally work | |
189 | } | |
190 | } | |
191 | ||
192 | void transform(const UnicodeString &word, CharString &buf, UErrorCode &errorCode) { | |
193 | UChar32 c = 0; | |
194 | int32_t len = word.length(); | |
195 | for (int32_t i = 0; i < len; i += U16_LENGTH(c)) { | |
196 | c = word.char32At(i); | |
197 | buf.append(transform(c, errorCode), errorCode); | |
198 | } | |
199 | } | |
200 | ||
201 | public: | |
202 | // sets the desired transformation data. | |
203 | // should be populated from a command line argument | |
204 | // so far the only acceptable format is offset-<hex constant> | |
205 | // eventually others (mask-<hex constant>?) may be enabled | |
206 | // more complex functions may be more difficult | |
207 | void setTransform(const char *t) { | |
208 | if (strncmp(t, "offset-", 7) == 0) { | |
209 | char *end; | |
210 | unsigned long base = uprv_strtoul(t + 7, &end, 16); | |
211 | if (end == (t + 7) || *end != 0 || base > 0x10FF80) { | |
212 | fprintf(stderr, "Syntax for offset value in --transform offset-%s invalid!\n", t + 7); | |
213 | usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | |
214 | } | |
215 | transformType = DictionaryData::TRANSFORM_TYPE_OFFSET; | |
216 | transformConstant = (UChar32)base; | |
217 | } | |
218 | else { | |
219 | fprintf(stderr, "Invalid transform specified: %s\n", t); | |
220 | usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | |
221 | } | |
222 | } | |
223 | ||
224 | // add a word to the trie | |
225 | void addWord(const UnicodeString &word, int32_t value, UErrorCode &status) { | |
226 | if (bt) { | |
227 | CharString buf; | |
228 | transform(word, buf, status); | |
229 | bt->add(buf.toStringPiece(), value, status); | |
230 | } | |
231 | if (ut) { ut->add(word, value, status); } | |
232 | } | |
233 | ||
234 | // if we are a bytestrie, give back the StringPiece representing the serialized version of us | |
235 | StringPiece serializeBytes(UErrorCode &status) { | |
236 | return bt->buildStringPiece(USTRINGTRIE_BUILD_SMALL, status); | |
237 | } | |
238 | ||
239 | // if we are a ucharstrie, produce the UnicodeString representing the serialized version of us | |
240 | void serializeUChars(UnicodeString &s, UErrorCode &status) { | |
241 | ut->buildUnicodeString(USTRINGTRIE_BUILD_SMALL, s, status); | |
242 | } | |
243 | ||
244 | int32_t getTransform() { | |
245 | return (int32_t)(transformType | transformConstant); | |
246 | } | |
247 | }; | |
248 | #endif | |
249 | ||
250 | static const UChar LINEFEED_CHARACTER = 0x000A; | |
251 | static const UChar CARRIAGE_RETURN_CHARACTER = 0x000D; | |
252 | ||
253 | static UBool readLine(UCHARBUF *f, UnicodeString &fileLine, IcuToolErrorCode &errorCode) { | |
254 | int32_t lineLength; | |
255 | const UChar *line = ucbuf_readline(f, &lineLength, errorCode); | |
256 | if(line == NULL || errorCode.isFailure()) { return FALSE; } | |
257 | // Strip trailing CR/LF, comments, and spaces. | |
258 | const UChar *comment = u_memchr(line, 0x23, lineLength); // '#' | |
259 | if(comment != NULL) { | |
260 | lineLength = (int32_t)(comment - line); | |
261 | } else { | |
262 | while(lineLength > 0 && (line[lineLength - 1] == CARRIAGE_RETURN_CHARACTER || line[lineLength - 1] == LINEFEED_CHARACTER)) { --lineLength; } | |
263 | } | |
264 | while(lineLength > 0 && u_isspace(line[lineLength - 1])) { --lineLength; } | |
265 | fileLine.setTo(FALSE, line, lineLength); | |
266 | return TRUE; | |
267 | } | |
268 | ||
269 | //---------------------------------------------------------------------------- | |
270 | // | |
271 | // main for gendict | |
272 | // | |
273 | //---------------------------------------------------------------------------- | |
274 | int main(int argc, char **argv) { | |
275 | // | |
276 | // Pick up and check the command line arguments, | |
277 | // using the standard ICU tool utils option handling. | |
278 | // | |
279 | U_MAIN_INIT_ARGS(argc, argv); | |
280 | progName = argv[0]; | |
281 | argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options); | |
282 | if(argc<0) { | |
283 | // Unrecognized option | |
284 | fprintf(stderr, "error in command line argument \"%s\"\n", argv[-argc]); | |
285 | usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | |
286 | } | |
287 | ||
288 | if(options[ARG_HELP].doesOccur || options[ARG_QMARK].doesOccur) { | |
289 | // -? or -h for help. | |
290 | usageAndDie(U_ZERO_ERROR); | |
291 | } | |
292 | ||
293 | UBool verbose = options[ARG_VERBOSE].doesOccur; | |
294 | ||
295 | if (argc < 3) { | |
296 | fprintf(stderr, "input and output file must both be specified.\n"); | |
297 | usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | |
298 | } | |
299 | const char *outFileName = argv[2]; | |
300 | const char *wordFileName = argv[1]; | |
301 | ||
57a6839d | 302 | startTime = uprv_getRawUTCtime(); // initialize start timer |
51004dcb A |
303 | // set up the watchdog |
304 | install_watchdog(progName, outFileName); | |
305 | ||
306 | if (options[ARG_ICUDATADIR].doesOccur) { | |
307 | u_setDataDirectory(options[ARG_ICUDATADIR].value); | |
308 | } | |
309 | ||
310 | const char *copyright = NULL; | |
311 | if (options[ARG_COPYRIGHT].doesOccur) { | |
312 | copyright = U_COPYRIGHT_STRING; | |
313 | } | |
314 | ||
315 | if (options[ARG_UCHARS].doesOccur == options[ARG_BYTES].doesOccur) { | |
316 | fprintf(stderr, "you must specify exactly one type of trie to output!\n"); | |
317 | usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | |
318 | } | |
319 | UBool isBytesTrie = options[ARG_BYTES].doesOccur; | |
320 | if (isBytesTrie != options[ARG_TRANSFORM].doesOccur) { | |
321 | fprintf(stderr, "you must provide a transformation for a bytes trie, and must not provide one for a uchars trie!\n"); | |
322 | usageAndDie(U_ILLEGAL_ARGUMENT_ERROR); | |
323 | } | |
324 | ||
325 | IcuToolErrorCode status("gendict/main()"); | |
326 | ||
327 | #if UCONFIG_NO_BREAK_ITERATION || UCONFIG_NO_FILE_IO | |
328 | const char* outDir=NULL; | |
329 | ||
330 | UNewDataMemory *pData; | |
331 | char msg[1024]; | |
332 | UErrorCode tempstatus = U_ZERO_ERROR; | |
333 | ||
334 | /* write message with just the name */ // potential for a buffer overflow here... | |
335 | sprintf(msg, "gendict writes dummy %s because of UCONFIG_NO_BREAK_ITERATION and/or UCONFIG_NO_FILE_IO, see uconfig.h", outFileName); | |
336 | fprintf(stderr, "%s\n", msg); | |
337 | ||
338 | /* write the dummy data file */ | |
339 | pData = udata_create(outDir, NULL, outFileName, &dataInfo, NULL, &tempstatus); | |
340 | udata_writeBlock(pData, msg, strlen(msg)); | |
341 | udata_finish(pData, &tempstatus); | |
342 | return (int)tempstatus; | |
343 | ||
344 | #else | |
345 | // Read in the dictionary source file | |
346 | if (verbose) { printf("Opening file %s...\n", wordFileName); } | |
347 | const char *codepage = "UTF-8"; | |
348 | UCHARBUF *f = ucbuf_open(wordFileName, &codepage, TRUE, FALSE, status); | |
349 | if (status.isFailure()) { | |
350 | fprintf(stderr, "error opening input file: ICU Error \"%s\"\n", status.errorName()); | |
351 | exit(status.reset()); | |
352 | } | |
353 | if (verbose) { printf("Initializing dictionary builder of type %s...\n", (isBytesTrie ? "BytesTrie" : "UCharsTrie")); } | |
354 | DataDict dict(isBytesTrie, status); | |
355 | if (status.isFailure()) { | |
356 | fprintf(stderr, "new DataDict: ICU Error \"%s\"\n", status.errorName()); | |
357 | exit(status.reset()); | |
358 | } | |
359 | if (options[ARG_TRANSFORM].doesOccur) { | |
360 | dict.setTransform(options[ARG_TRANSFORM].value); | |
361 | } | |
362 | ||
363 | UnicodeString fileLine; | |
364 | if (verbose) { puts("Adding words to dictionary..."); } | |
365 | UBool hasValues = FALSE; | |
366 | UBool hasValuelessContents = FALSE; | |
367 | int lineCount = 0; | |
57a6839d A |
368 | int wordCount = 0; |
369 | int minlen = 255; | |
370 | int maxlen = 0; | |
51004dcb A |
371 | UBool isOk = TRUE; |
372 | while (readLine(f, fileLine, status)) { | |
373 | lineCount++; | |
374 | if (fileLine.isEmpty()) continue; | |
375 | ||
376 | // Parse word [spaces value]. | |
377 | int32_t keyLen; | |
378 | for (keyLen = 0; keyLen < fileLine.length() && !u_isspace(fileLine[keyLen]); ++keyLen) {} | |
379 | if (keyLen == 0) { | |
380 | fprintf(stderr, "Error: no word on line %i!\n", lineCount); | |
381 | isOk = FALSE; | |
382 | continue; | |
383 | } | |
384 | int32_t valueStart; | |
385 | for (valueStart = keyLen; | |
386 | valueStart < fileLine.length() && u_isspace(fileLine[valueStart]); | |
387 | ++valueStart) {} | |
388 | ||
389 | if (keyLen < valueStart) { | |
390 | int32_t valueLength = fileLine.length() - valueStart; | |
391 | if (valueLength > 15) { | |
392 | fprintf(stderr, "Error: value too long on line %i!\n", lineCount); | |
393 | isOk = FALSE; | |
394 | continue; | |
395 | } | |
396 | char s[16]; | |
397 | fileLine.extract(valueStart, valueLength, s, 16, US_INV); | |
398 | char *end; | |
399 | unsigned long value = uprv_strtoul(s, &end, 0); | |
400 | if (end == s || *end != 0 || (int32_t)uprv_strlen(s) != valueLength || value > 0xffffffff) { | |
401 | fprintf(stderr, "Error: value syntax error or value too large on line %i!\n", lineCount); | |
402 | isOk = FALSE; | |
403 | continue; | |
404 | } | |
405 | dict.addWord(fileLine.tempSubString(0, keyLen), (int32_t)value, status); | |
406 | hasValues = TRUE; | |
57a6839d A |
407 | wordCount++; |
408 | if (keyLen < minlen) minlen = keyLen; | |
409 | if (keyLen > maxlen) maxlen = keyLen; | |
51004dcb A |
410 | } else { |
411 | dict.addWord(fileLine.tempSubString(0, keyLen), 0, status); | |
57a6839d A |
412 | hasValuelessContents = TRUE; |
413 | wordCount++; | |
414 | if (keyLen < minlen) minlen = keyLen; | |
415 | if (keyLen > maxlen) maxlen = keyLen; | |
51004dcb A |
416 | } |
417 | ||
418 | if (status.isFailure()) { | |
419 | fprintf(stderr, "ICU Error \"%s\": Failed to add word to trie at input line %d in input file\n", | |
420 | status.errorName(), lineCount); | |
421 | exit(status.reset()); | |
422 | } | |
423 | } | |
57a6839d | 424 | if (verbose) { printf("Processed %d lines, added %d words, minlen %d, maxlen %d\n", lineCount, wordCount, minlen, maxlen); } |
51004dcb A |
425 | |
426 | if (!isOk && status.isSuccess()) { | |
427 | status.set(U_ILLEGAL_ARGUMENT_ERROR); | |
428 | } | |
429 | if (hasValues && hasValuelessContents) { | |
430 | fprintf(stderr, "warning: file contained both valued and unvalued strings!\n"); | |
431 | } | |
432 | ||
57a6839d | 433 | if (verbose) { printf("Serializing data...isBytesTrie? %d\n", isBytesTrie); } |
51004dcb A |
434 | int32_t outDataSize; |
435 | const void *outData; | |
436 | UnicodeString usp; | |
437 | if (isBytesTrie) { | |
438 | StringPiece sp = dict.serializeBytes(status); | |
439 | outDataSize = sp.size(); | |
440 | outData = sp.data(); | |
441 | } else { | |
442 | dict.serializeUChars(usp, status); | |
443 | outDataSize = usp.length() * U_SIZEOF_UCHAR; | |
444 | outData = usp.getBuffer(); | |
445 | } | |
446 | if (status.isFailure()) { | |
57a6839d | 447 | fprintf(stderr, "gendict: got failure of type %s while serializing, if U_ILLEGAL_ARGUMENT_ERROR possibly due to duplicate dictionary entries\n", status.errorName()); |
51004dcb A |
448 | exit(status.reset()); |
449 | } | |
450 | if (verbose) { puts("Opening output file..."); } | |
451 | UNewDataMemory *pData = udata_create(NULL, NULL, outFileName, &dataInfo, copyright, status); | |
452 | if (status.isFailure()) { | |
453 | fprintf(stderr, "gendict: could not open output file \"%s\", \"%s\"\n", outFileName, status.errorName()); | |
454 | exit(status.reset()); | |
455 | } | |
456 | ||
457 | if (verbose) { puts("Writing to output file..."); } | |
458 | int32_t indexes[DictionaryData::IX_COUNT] = { | |
459 | DictionaryData::IX_COUNT * sizeof(int32_t), 0, 0, 0, 0, 0, 0, 0 | |
460 | }; | |
461 | int32_t size = outDataSize + indexes[DictionaryData::IX_STRING_TRIE_OFFSET]; | |
462 | indexes[DictionaryData::IX_RESERVED1_OFFSET] = size; | |
463 | indexes[DictionaryData::IX_RESERVED2_OFFSET] = size; | |
464 | indexes[DictionaryData::IX_TOTAL_SIZE] = size; | |
465 | ||
466 | indexes[DictionaryData::IX_TRIE_TYPE] = isBytesTrie ? DictionaryData::TRIE_TYPE_BYTES : DictionaryData::TRIE_TYPE_UCHARS; | |
467 | if (hasValues) { | |
468 | indexes[DictionaryData::IX_TRIE_TYPE] |= DictionaryData::TRIE_HAS_VALUES; | |
469 | } | |
470 | ||
471 | indexes[DictionaryData::IX_TRANSFORM] = dict.getTransform(); | |
472 | udata_writeBlock(pData, indexes, sizeof(indexes)); | |
473 | udata_writeBlock(pData, outData, outDataSize); | |
474 | size_t bytesWritten = udata_finish(pData, status); | |
475 | if (status.isFailure()) { | |
476 | fprintf(stderr, "gendict: error \"%s\" writing the output file\n", status.errorName()); | |
477 | exit(status.reset()); | |
478 | } | |
479 | ||
480 | if (bytesWritten != (size_t)size) { | |
481 | fprintf(stderr, "Error writing to output file \"%s\"\n", outFileName); | |
482 | exit(U_INTERNAL_PROGRAM_ERROR); | |
483 | } | |
484 | ||
485 | printf("%s: done writing\t%s (%ds).\n", progName, outFileName, elapsedTime()); | |
486 | ||
487 | #ifdef TEST_GENDICT | |
488 | if (isBytesTrie) { | |
489 | BytesTrie::Iterator it(outData, outDataSize, status); | |
490 | while (it.hasNext()) { | |
491 | it.next(status); | |
492 | const StringPiece s = it.getString(); | |
493 | int32_t val = it.getValue(); | |
494 | printf("%s -> %i\n", s.data(), val); | |
495 | } | |
496 | } else { | |
497 | UCharsTrie::Iterator it((const UChar *)outData, outDataSize, status); | |
498 | while (it.hasNext()) { | |
499 | it.next(status); | |
500 | const UnicodeString s = it.getString(); | |
501 | int32_t val = it.getValue(); | |
502 | char tmp[1024]; | |
503 | s.extract(0, s.length(), tmp, 1024); | |
504 | printf("%s -> %i\n", tmp, val); | |
505 | } | |
506 | } | |
507 | #endif | |
508 | ||
509 | return 0; | |
510 | #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ | |
511 | } |