]> git.saurik.com Git - apple/icu.git/blame - icuSources/samples/ustring/ustring.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / samples / ustring / ustring.cpp
CommitLineData
b75a7d8f
A
1/*
2*******************************************************************************
3*
f3c0d7a5
A
4* © 2016 and later: Unicode, Inc. and others.
5* License & terms of use: http://www.unicode.org/copyright.html#License
6*
7*******************************************************************************
8*******************************************************************************
9*
b331163b 10* Copyright (C) 2000-2014, International Business Machines
b75a7d8f
A
11* Corporation and others. All Rights Reserved.
12*
13*******************************************************************************
14* file name: ustring.c
f3c0d7a5 15* encoding: UTF-8
b75a7d8f
A
16* tab size: 8 (not used)
17* indentation:4
18*
19* created on: 2000aug15
20* created by: Markus W. Scherer
21*
22* This file contains sample code that illustrates the use of Unicode strings
23* with ICU.
24*/
25
3d1f044b
A
26#define __STDC_FORMAT_MACROS 1
27#include <inttypes.h>
28
b75a7d8f
A
29#include <stdio.h>
30#include "unicode/utypes.h"
31#include "unicode/uchar.h"
32#include "unicode/locid.h"
33#include "unicode/ustring.h"
34#include "unicode/ucnv.h"
35#include "unicode/unistr.h"
36
0f5d89e8
A
37using namespace icu;
38
f3c0d7a5
A
39#ifndef UPRV_LENGTHOF
40#define UPRV_LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
41#endif
42
b75a7d8f
A
43// helper functions -------------------------------------------------------- ***
44
45// default converter for the platform encoding
46static UConverter *cnv=NULL;
47
48static void
49printUString(const char *announce, const UChar *s, int32_t length) {
50 static char out[200];
51 UChar32 c;
52 int32_t i;
53 UErrorCode errorCode=U_ZERO_ERROR;
54
55 /*
56 * Convert to the "platform encoding". See notes in printUnicodeString().
57 * ucnv_fromUChars(), like most ICU APIs understands length==-1
58 * to mean that the string is NUL-terminated.
59 */
60 ucnv_fromUChars(cnv, out, sizeof(out), s, length, &errorCode);
61 if(U_FAILURE(errorCode) || errorCode==U_STRING_NOT_TERMINATED_WARNING) {
62 printf("%sproblem converting string from Unicode: %s\n", announce, u_errorName(errorCode));
63 return;
64 }
65
66 printf("%s%s {", announce, out);
67
68 /* output the code points (not code units) */
69 if(length>=0) {
70 /* s is not NUL-terminated */
71 for(i=0; i<length; /* U16_NEXT post-increments */) {
72 U16_NEXT(s, i, length, c);
73 printf(" %04x", c);
74 }
75 } else {
76 /* s is NUL-terminated */
77 for(i=0; /* condition in loop body */; /* U16_NEXT post-increments */) {
78 U16_NEXT(s, i, length, c);
79 if(c==0) {
80 break;
81 }
82 printf(" %04x", c);
83 }
84 }
85 printf(" }\n");
86}
87
88static void
89printUnicodeString(const char *announce, const UnicodeString &s) {
90 static char out[200];
91 int32_t i, length;
92
93 // output the string, converted to the platform encoding
94
95 // Note for Windows: The "platform encoding" defaults to the "ANSI codepage",
96 // which is different from the "OEM codepage" in the console window.
97 // However, if you pipe the output into a file and look at it with Notepad
98 // or similar, then "ANSI" characters will show correctly.
99 // Production code should be aware of what encoding is required,
100 // and use a UConverter or at least a charset name explicitly.
101 out[s.extract(0, 99, out)]=0;
102 printf("%s%s {", announce, out);
103
104 // output the code units (not code points)
105 length=s.length();
106 for(i=0; i<length; ++i) {
107 printf(" %04x", s.charAt(i));
108 }
109 printf(" }\n");
110}
111
112// sample code for utf.h macros -------------------------------------------- ***
113
114static void
115demo_utf_h_macros() {
116 static UChar input[]={ 0x0061, 0xd800, 0xdc00, 0xdbff, 0xdfff, 0x0062 };
117 UChar32 c;
118 int32_t i;
119 UBool isError;
120
121 printf("\n* demo_utf_h_macros() -------------- ***\n\n");
122
b331163b
A
123 printUString("iterate forward through: ", input, UPRV_LENGTHOF(input));
124 for(i=0; i<UPRV_LENGTHOF(input); /* U16_NEXT post-increments */) {
b75a7d8f
A
125 /* Iterating forwards
126 Codepoint at offset 0: U+0061
127 Codepoint at offset 1: U+10000
128 Codepoint at offset 3: U+10ffff
129 Codepoint at offset 5: U+0062
130 */
131 printf("Codepoint at offset %d: U+", i);
b331163b 132 U16_NEXT(input, i, UPRV_LENGTHOF(input), c);
b75a7d8f
A
133 printf("%04x\n", c);
134 }
135
136 puts("");
137
138 isError=FALSE;
139 i=1; /* write position, gets post-incremented so needs to be in an l-value */
b331163b 140 U16_APPEND(input, i, UPRV_LENGTHOF(input), 0x0062, isError);
b75a7d8f 141
b331163b
A
142 printUString("iterate backward through: ", input, UPRV_LENGTHOF(input));
143 for(i=UPRV_LENGTHOF(input); i>0; /* U16_PREV pre-decrements */) {
b75a7d8f
A
144 U16_PREV(input, 0, i, c);
145 /* Iterating backwards
146 Codepoint at offset 5: U+0062
147 Codepoint at offset 3: U+10ffff
148 Codepoint at offset 2: U+dc00 -- unpaired surrogate because lead surr. overwritten
149 Codepoint at offset 1: U+0062 -- by this BMP code point
150 Codepoint at offset 0: U+0061
151 */
152 printf("Codepoint at offset %d: U+%04x\n", i, c);
153 }
154}
155
156// sample code for Unicode strings in C ------------------------------------ ***
157
158static void demo_C_Unicode_strings() {
159 printf("\n* demo_C_Unicode_strings() --------- ***\n\n");
160
161 static const UChar text[]={ 0x41, 0x42, 0x43, 0 }; /* "ABC" */
162 static const UChar appendText[]={ 0x61, 0x62, 0x63, 0 }; /* "abc" */
163 static const UChar cmpText[]={ 0x61, 0x53, 0x73, 0x43, 0 }; /* "aSsC" */
164 UChar buffer[32];
165 int32_t compare;
166 int32_t length=u_strlen(text); /* length=3 */
167
168 /* simple ANSI C-style functions */
169 buffer[0]=0; /* empty, NUL-terminated string */
170 u_strncat(buffer, text, 1); /* append just n=1 character ('A') */
171 u_strcat(buffer, appendText); /* buffer=="Aabc" */
172 length=u_strlen(buffer); /* length=4 */
173 printUString("should be \"Aabc\": ", buffer, -1);
174
175 /* bitwise comparing buffer with text */
176 compare=u_strcmp(buffer, text);
177 if(compare<=0) {
178 printf("String comparison error, expected \"Aabc\" > \"ABC\"\n");
179 }
180
181 /* Build "A<sharp s>C" in the buffer... */
182 u_strcpy(buffer, text);
183 buffer[1]=0xdf; /* sharp s, case-compares equal to "ss" */
184 printUString("should be \"A<sharp s>C\": ", buffer, -1);
185
186 /* Compare two strings case-insensitively using full case folding */
187 compare=u_strcasecmp(buffer, cmpText, U_FOLD_CASE_DEFAULT);
188 if(compare!=0) {
189 printf("String case insensitive comparison error, expected \"AbC\" to be equal to \"ABC\"\n");
190 }
191}
192
193// sample code for case mappings with C APIs -------------------------------- ***
194
195static void demoCaseMapInC() {
196 /*
197 * input=
198 * "aB<capital sigma>"
199 * "iI<small dotless i><capital dotted I> "
200 * "<sharp s> <small lig. ffi>"
201 * "<small final sigma><small sigma><capital sigma>"
202 */
203 static const UChar input[]={
204 0x61, 0x42, 0x3a3,
205 0x69, 0x49, 0x131, 0x130, 0x20,
206 0xdf, 0x20, 0xfb03,
207 0x3c2, 0x3c3, 0x3a3, 0
208 };
209 UChar buffer[32];
210
211 UErrorCode errorCode;
212 UChar32 c;
213 int32_t i, j, length;
214 UBool isError;
215
216 printf("\n* demoCaseMapInC() ----------------- ***\n\n");
217
218 /*
219 * First, use simple case mapping functions which provide
220 * 1:1 code point mappings without context/locale ID.
221 *
222 * Note that some mappings will not be "right" because some "real"
223 * case mappings require context, depend on the locale ID,
224 * and/or result in a change in the number of code points.
225 */
226 printUString("input string: ", input, -1);
227
228 /* uppercase */
229 isError=FALSE;
b331163b 230 for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
b75a7d8f
A
231 U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
232 if(c==0) {
233 break; /* stop at terminating NUL, no need to terminate buffer */
234 }
235 c=u_toupper(c);
b331163b 236 U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
b75a7d8f
A
237 }
238 printUString("simple-uppercased: ", buffer, j);
239 /* lowercase */
240 isError=FALSE;
b331163b 241 for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
b75a7d8f
A
242 U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
243 if(c==0) {
244 break; /* stop at terminating NUL, no need to terminate buffer */
245 }
246 c=u_tolower(c);
b331163b 247 U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
b75a7d8f
A
248 }
249 printUString("simple-lowercased: ", buffer, j);
250 /* titlecase */
251 isError=FALSE;
b331163b 252 for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
b75a7d8f
A
253 U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
254 if(c==0) {
255 break; /* stop at terminating NUL, no need to terminate buffer */
256 }
257 c=u_totitle(c);
b331163b 258 U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
b75a7d8f
A
259 }
260 printUString("simple-titlecased: ", buffer, j);
261 /* case-fold/default */
262 isError=FALSE;
b331163b 263 for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
b75a7d8f
A
264 U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
265 if(c==0) {
266 break; /* stop at terminating NUL, no need to terminate buffer */
267 }
268 c=u_foldCase(c, U_FOLD_CASE_DEFAULT);
b331163b 269 U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
b75a7d8f
A
270 }
271 printUString("simple-case-folded/default: ", buffer, j);
272 /* case-fold/Turkic */
273 isError=FALSE;
b331163b 274 for(i=j=0; j<UPRV_LENGTHOF(buffer) && !isError; /* U16_NEXT post-increments */) {
b75a7d8f
A
275 U16_NEXT(input, i, INT32_MAX, c); /* without length because NUL-terminated */
276 if(c==0) {
277 break; /* stop at terminating NUL, no need to terminate buffer */
278 }
279 c=u_foldCase(c, U_FOLD_CASE_EXCLUDE_SPECIAL_I);
b331163b 280 U16_APPEND(buffer, j, UPRV_LENGTHOF(buffer), c, isError);
b75a7d8f
A
281 }
282 printUString("simple-case-folded/Turkic: ", buffer, j);
283
284 /*
285 * Second, use full case mapping functions which provide
286 * 1:n code point mappings (n can be 0!) and are sensitive to context and locale ID.
287 *
288 * Note that lower/upper/titlecasing take a locale ID while case-folding
289 * has bit flag options instead, by design of the Unicode SpecialCasing.txt UCD file.
290 *
291 * Also, string titlecasing requires a BreakIterator to find starts of words.
292 * The sample code here passes in a NULL pointer; u_strToTitle() will open and close a default
293 * titlecasing BreakIterator automatically.
294 * For production code where many strings are titlecased it would be more efficient
295 * to open a BreakIterator externally and pass it in.
296 */
297 printUString("\ninput string: ", input, -1);
298
299 /* lowercase/English */
300 errorCode=U_ZERO_ERROR;
b331163b 301 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
b75a7d8f
A
302 if(U_SUCCESS(errorCode)) {
303 printUString("full-lowercased/en: ", buffer, length);
304 } else {
3d1f044b 305 printf("error in u_strToLower(en)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
306 }
307 /* lowercase/Turkish */
308 errorCode=U_ZERO_ERROR;
b331163b 309 length=u_strToLower(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
b75a7d8f
A
310 if(U_SUCCESS(errorCode)) {
311 printUString("full-lowercased/tr: ", buffer, length);
312 } else {
3d1f044b 313 printf("error in u_strToLower(tr)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
314 }
315 /* uppercase/English */
316 errorCode=U_ZERO_ERROR;
b331163b 317 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "en", &errorCode);
b75a7d8f
A
318 if(U_SUCCESS(errorCode)) {
319 printUString("full-uppercased/en: ", buffer, length);
320 } else {
3d1f044b 321 printf("error in u_strToUpper(en)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
322 }
323 /* uppercase/Turkish */
324 errorCode=U_ZERO_ERROR;
b331163b 325 length=u_strToUpper(buffer, UPRV_LENGTHOF(buffer), input, -1, "tr", &errorCode);
b75a7d8f
A
326 if(U_SUCCESS(errorCode)) {
327 printUString("full-uppercased/tr: ", buffer, length);
328 } else {
3d1f044b 329 printf("error in u_strToUpper(tr)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
330 }
331 /* titlecase/English */
332 errorCode=U_ZERO_ERROR;
b331163b 333 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "en", &errorCode);
b75a7d8f
A
334 if(U_SUCCESS(errorCode)) {
335 printUString("full-titlecased/en: ", buffer, length);
336 } else {
3d1f044b 337 printf("error in u_strToTitle(en)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
338 }
339 /* titlecase/Turkish */
340 errorCode=U_ZERO_ERROR;
b331163b 341 length=u_strToTitle(buffer, UPRV_LENGTHOF(buffer), input, -1, NULL, "tr", &errorCode);
b75a7d8f
A
342 if(U_SUCCESS(errorCode)) {
343 printUString("full-titlecased/tr: ", buffer, length);
344 } else {
3d1f044b 345 printf("error in u_strToTitle(tr)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
346 }
347 /* case-fold/default */
348 errorCode=U_ZERO_ERROR;
b331163b 349 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_DEFAULT, &errorCode);
b75a7d8f
A
350 if(U_SUCCESS(errorCode)) {
351 printUString("full-case-folded/default: ", buffer, length);
352 } else {
3d1f044b 353 printf("error in u_strFoldCase(default)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
354 }
355 /* case-fold/Turkic */
356 errorCode=U_ZERO_ERROR;
b331163b 357 length=u_strFoldCase(buffer, UPRV_LENGTHOF(buffer), input, -1, U_FOLD_CASE_EXCLUDE_SPECIAL_I, &errorCode);
b75a7d8f
A
358 if(U_SUCCESS(errorCode)) {
359 printUString("full-case-folded/Turkic: ", buffer, length);
360 } else {
3d1f044b 361 printf("error in u_strFoldCase(Turkic)=%" PRId32 " error=%s\n", length, u_errorName(errorCode));
b75a7d8f
A
362 }
363}
364
365// sample code for case mappings with C++ APIs ------------------------------ ***
366
367static void demoCaseMapInCPlusPlus() {
368 /*
369 * input=
370 * "aB<capital sigma>"
371 * "iI<small dotless i><capital dotted I> "
372 * "<sharp s> <small lig. ffi>"
373 * "<small final sigma><small sigma><capital sigma>"
374 */
375 static const UChar input[]={
376 0x61, 0x42, 0x3a3,
377 0x69, 0x49, 0x131, 0x130, 0x20,
378 0xdf, 0x20, 0xfb03,
379 0x3c2, 0x3c3, 0x3a3, 0
380 };
381
382 printf("\n* demoCaseMapInCPlusPlus() --------- ***\n\n");
383
384 UnicodeString s(input), t;
385 const Locale &en=Locale::getEnglish();
386 Locale tr("tr");
387
388 /*
389 * Full case mappings as in demoCaseMapInC(), using UnicodeString functions.
390 * These functions modify the string object itself.
391 * Since we want to keep the input string around, we copy it each time
392 * and case-map the copy.
393 */
394 printUnicodeString("input string: ", s);
395
396 /* lowercase/English */
397 printUnicodeString("full-lowercased/en: ", (t=s).toLower(en));
398 /* lowercase/Turkish */
399 printUnicodeString("full-lowercased/tr: ", (t=s).toLower(tr));
400 /* uppercase/English */
401 printUnicodeString("full-uppercased/en: ", (t=s).toUpper(en));
402 /* uppercase/Turkish */
403 printUnicodeString("full-uppercased/tr: ", (t=s).toUpper(tr));
404 /* titlecase/English */
405 printUnicodeString("full-titlecased/en: ", (t=s).toTitle(NULL, en));
406 /* titlecase/Turkish */
407 printUnicodeString("full-titlecased/tr: ", (t=s).toTitle(NULL, tr));
408 /* case-folde/default */
409 printUnicodeString("full-case-folded/default: ", (t=s).foldCase(U_FOLD_CASE_DEFAULT));
410 /* case-folde/Turkic */
411 printUnicodeString("full-case-folded/Turkic: ", (t=s).foldCase(U_FOLD_CASE_EXCLUDE_SPECIAL_I));
412}
413
414// sample code for UnicodeString storage models ----------------------------- ***
415
416static const UChar readonly[]={
417 0x61, 0x31, 0x20ac
418};
419static UChar writeable[]={
420 0x62, 0x32, 0xdbc0, 0xdc01 // includes a surrogate pair for a supplementary code point
421};
422static char out[100];
423
424static void
425demoUnicodeStringStorage() {
426 // These sample code lines illustrate how to use UnicodeString, and the
427 // comments tell what happens internally. There are no APIs to observe
428 // most of this programmatically, except for stepping into the code
429 // with a debugger.
430 // This is by design to hide such details from the user.
431 int32_t i;
432
433 printf("\n* demoUnicodeStringStorage() ------- ***\n\n");
434
435 // * UnicodeString with internally stored contents
436 // instantiate a UnicodeString from a single code point
437 // the few (2) UChars will be stored in the object itself
438 UnicodeString one((UChar32)0x24001);
439 // this copies the few UChars into the "two" object
440 UnicodeString two=one;
441 printf("length of short string copy: %d\n", two.length());
442 // set "one" to contain the 3 UChars from readonly
443 // this setTo() variant copies the characters
b331163b 444 one.setTo(readonly, UPRV_LENGTHOF(readonly));
b75a7d8f
A
445
446 // * UnicodeString with allocated contents
447 // build a longer string that will not fit into the object's buffer
b331163b 448 one+=UnicodeString(writeable, UPRV_LENGTHOF(writeable));
b75a7d8f
A
449 one+=one;
450 one+=one;
451 printf("length of longer string: %d\n", one.length());
452 // copying will use the same allocated buffer and increment the reference
453 // counter
454 two=one;
455 printf("length of longer string copy: %d\n", two.length());
456
457 // * UnicodeString using readonly-alias to a const UChar array
458 // construct a string that aliases a readonly buffer
b331163b 459 UnicodeString three(FALSE, readonly, UPRV_LENGTHOF(readonly));
b75a7d8f
A
460 printUnicodeString("readonly-alias string: ", three);
461 // copy-on-write: any modification to the string results in
462 // a copy to either the internal buffer or to a newly allocated one
463 three.setCharAt(1, 0x39);
464 printUnicodeString("readonly-aliasing string after modification: ", three);
465 // the aliased array is not modified
466 for(i=0; i<three.length(); ++i) {
3d1f044b 467 printf("readonly buffer[%d] after modifying its string: 0x%" PRId32 "\n",
b75a7d8f
A
468 i, readonly[i]);
469 }
470 // setTo() readonly alias
b331163b 471 one.setTo(FALSE, writeable, UPRV_LENGTHOF(writeable));
b75a7d8f
A
472 // copying the readonly-alias object with fastCopyFrom() (new in ICU 2.4)
473 // will readonly-alias the same buffer
474 two.fastCopyFrom(one);
475 printUnicodeString("fastCopyFrom(readonly alias of \"writeable\" array): ", two);
476 printf("verify that a fastCopyFrom(readonly alias) uses the same buffer pointer: %d (should be 1)\n",
477 one.getBuffer()==two.getBuffer());
478 // a normal assignment will clone the contents (new in ICU 2.4)
479 two=one;
480 printf("verify that a regular copy of a readonly alias uses a different buffer pointer: %d (should be 0)\n",
481 one.getBuffer()==two.getBuffer());
482
483 // * UnicodeString using writeable-alias to a non-const UChar array
b331163b 484 UnicodeString four(writeable, UPRV_LENGTHOF(writeable), UPRV_LENGTHOF(writeable));
b75a7d8f
A
485 printUnicodeString("writeable-alias string: ", four);
486 // a modification writes through to the buffer
487 four.setCharAt(1, 0x39);
488 for(i=0; i<four.length(); ++i) {
3d1f044b 489 printf("writeable-alias backing buffer[%d]=0x%" PRId32 " "
b75a7d8f
A
490 "after modification\n", i, writeable[i]);
491 }
492 // a copy will not alias any more;
493 // instead, it will get a copy of the contents into allocated memory
494 two=four;
495 two.setCharAt(1, 0x21);
496 for(i=0; i<two.length(); ++i) {
3d1f044b 497 printf("writeable-alias backing buffer[%d]=0x%" PRId32 " after "
b75a7d8f
A
498 "modification of string copy\n", i, writeable[i]);
499 }
500 // setTo() writeable alias, capacity==length
b331163b 501 one.setTo(writeable, UPRV_LENGTHOF(writeable), UPRV_LENGTHOF(writeable));
b75a7d8f
A
502 // grow the string - it will not fit into the backing buffer any more
503 // and will get copied before modification
504 one.append((UChar)0x40);
505 // shrink it back so it would fit
506 one.truncate(one.length()-1);
507 // we still operate on the copy
508 one.setCharAt(1, 0x25);
3d1f044b
A
509 printf("string after growing too much and then shrinking[1]=0x%" PRId32 "\n"
510 " backing store for this[1]=0x%" PRId32 "\n",
b75a7d8f
A
511 one.charAt(1), writeable[1]);
512 // if we need it in the original buffer, then extract() to it
513 // extract() does not do anything if the string aliases that same buffer
514 // i=min(one.length(), length of array)
b331163b 515 if(one.length()<UPRV_LENGTHOF(writeable)) {
b75a7d8f
A
516 i=one.length();
517 } else {
b331163b 518 i=UPRV_LENGTHOF(writeable);
b75a7d8f
A
519 }
520 one.extract(0, i, writeable);
b331163b 521 for(i=0; i<UPRV_LENGTHOF(writeable); ++i) {
3d1f044b 522 printf("writeable-alias backing buffer[%d]=0x%" PRId32 " after re-extract\n",
b75a7d8f
A
523 i, writeable[i]);
524 }
525}
526
527// sample code for UnicodeString instantiations ----------------------------- ***
528
529static void
530demoUnicodeStringInit() {
531 // *** Make sure to read about invariant characters in utypes.h! ***
532 // Initialization of Unicode strings from C literals works _only_ for
533 // invariant characters!
534
535 printf("\n* demoUnicodeStringInit() ---------- ***\n\n");
536
537 // the string literal is 32 chars long - this must be counted for the macro
538 UnicodeString invariantOnly=UNICODE_STRING("such characters are safe 123 %-.", 32);
539
540 /*
541 * In C, we need two macros: one to declare the UChar[] array, and
542 * one to populate it; the second one is a noop on platforms where
543 * wchar_t is compatible with UChar and ASCII-based.
544 * The length of the string literal must be counted for both macros.
545 */
546 /* declare the invString array for the string */
547 U_STRING_DECL(invString, "such characters are safe 123 %-.", 32);
548 /* populate it with the characters */
549 U_STRING_INIT(invString, "such characters are safe 123 %-.", 32);
550
551 // compare the C and C++ strings
552 printf("C and C++ Unicode strings are equal: %d\n", invariantOnly==UnicodeString(TRUE, invString, 32));
553
554 /*
555 * convert between char * and UChar * strings that
556 * contain only invariant characters
557 */
558 static const char *cs1="such characters are safe 123 %-.";
559 static UChar us1[40];
560 static char cs2[40];
561 u_charsToUChars(cs1, us1, 33); /* include the terminating NUL */
562 u_UCharsToChars(us1, cs2, 33);
563 printf("char * -> UChar * -> char * with only "
564 "invariant characters: \"%s\"\n",
565 cs2);
566
567 // initialize a UnicodeString from a string literal that contains
568 // escape sequences written with invariant characters
569 // do not forget to duplicate the backslashes for ICU to see them
570 // then, count each double backslash only once!
571 UnicodeString german=UNICODE_STRING(
572 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n", 64).
573 unescape();
574 printUnicodeString("german UnicodeString from unescaping:\n ", german);
575
576 /*
577 * C: convert and unescape a char * string with only invariant
578 * characters to fill a UChar * string
579 */
580 UChar buffer[200];
581 int32_t length;
582 length=u_unescape(
583 "Sch\\u00f6nes Auto: \\u20ac 11240.\\fPrivates Zeichen: \\U00102345\\n",
b331163b 584 buffer, UPRV_LENGTHOF(buffer));
b75a7d8f
A
585 printf("german C Unicode string from char * unescaping: (length %d)\n ", length);
586 printUnicodeString("", UnicodeString(buffer));
587}
588
589extern int
590main(int argc, const char *argv[]) {
591 UErrorCode errorCode=U_ZERO_ERROR;
592
593 // Note: Using a global variable for any object is not exactly thread-safe...
594
595 // You can change this call to e.g. ucnv_open("UTF-8", &errorCode) if you pipe
596 // the output to a file and look at it with a Unicode-capable editor.
597 // This will currently affect only the printUString() function, see the code above.
598 // printUnicodeString() could use this, too, by changing to an extract() overload
599 // that takes a UConverter argument.
600 cnv=ucnv_open(NULL, &errorCode);
601 if(U_FAILURE(errorCode)) {
602 fprintf(stderr, "error %s opening the default converter\n", u_errorName(errorCode));
603 return errorCode;
604 }
605
606 ucnv_setFromUCallBack(cnv, UCNV_FROM_U_CALLBACK_ESCAPE, UCNV_ESCAPE_C, NULL, NULL, &errorCode);
607 if(U_FAILURE(errorCode)) {
608 fprintf(stderr, "error %s setting the escape callback in the default converter\n", u_errorName(errorCode));
609 ucnv_close(cnv);
610 return errorCode;
611 }
612
613 demo_utf_h_macros();
614 demo_C_Unicode_strings();
615 demoCaseMapInC();
616 demoCaseMapInCPlusPlus();
617 demoUnicodeStringStorage();
618 demoUnicodeStringInit();
619
620 ucnv_close(cnv);
621 return 0;
622}