]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/unistr.h
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / unicode / unistr.h
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
3* Copyright (C) 1998-2003, International Business Machines
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11* Date Name Description
12* 09/25/98 stephen Creation.
13* 11/11/98 stephen Changed per 11/9 code review.
14* 04/20/99 stephen Overhauled per 4/16 code review.
15* 11/18/99 aliu Made to inherit from Replaceable. Added method
16* handleReplaceBetween(); other methods unchanged.
17* 06/25/01 grhoten Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
24#include "unicode/utypes.h"
25#include "unicode/uobject.h"
26#include "unicode/rep.h"
27
28struct UConverter; // unicode/ucnv.h
29class StringThreadTest;
30
31#ifndef U_COMPARE_CODE_POINT_ORDER
32/* see also ustring.h and unorm.h */
33/**
34 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
35 * Compare strings in code point order instead of code unit order.
36 * @draft ICU 2.2
37 */
38#define U_COMPARE_CODE_POINT_ORDER 0x8000
39#endif
40
41#ifndef USTRING_H
42/* see ustring.h */
43U_CAPI int32_t U_EXPORT2
44u_strlen(const UChar *s);
45#endif
46
47U_NAMESPACE_BEGIN
48
49class Locale; // unicode/locid.h
50class UCharReference;
51class StringCharacterIterator;
52class BreakIterator; // unicode/brkiter.h
53
54/* The <iostream> include has been moved to unicode/ustream.h */
55
56/**
57 * Unicode String literals in C++.
58 * Dependent on the platform properties, different UnicodeString
59 * constructors should be used to create a UnicodeString object from
60 * a string literal.
61 * The macros are defined for maximum performance.
62 * They work only for strings that contain "invariant characters", i.e.,
63 * only latin letters, digits, and some punctuation.
64 * See utypes.h for details.
65 *
66 * The string parameter must be a C string literal.
67 * The length of the string, not including the terminating
68 * <code>NUL</code>, must be specified as a constant.
69 * The U_STRING_DECL macro should be invoked exactly once for one
70 * such string variable before it is used.
71 * @stable ICU 2.0
72 */
73#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
74# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
75#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
76# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
77#else
78# define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, "")
79#endif
80
81/**
82 * Unicode String literals in C++.
83 * Dependent on the platform properties, different UnicodeString
84 * constructors should be used to create a UnicodeString object from
85 * a string literal.
86 * The macros are defined for improved performance.
87 * They work only for strings that contain "invariant characters", i.e.,
88 * only latin letters, digits, and some punctuation.
89 * See utypes.h for details.
90 *
91 * The string parameter must be a C string literal.
92 * @stable ICU 2.0
93 */
94#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
95# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
96#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
97# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
98#else
99# define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, "")
100#endif
101
102/**
103 * UnicodeString is a string class that stores Unicode characters directly and provides
104 * similar functionality as the Java String and StringBuffer classes.
105 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
106 *
107 * The UnicodeString class is not suitable for subclassing.
108 *
109 * <p>For an overview of Unicode strings in C and C++ see the
110 * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
111 *
112 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
113 * A Unicode character may be stored with either
114 * one code unit &mdash; which is the most common case &mdash; or with a matched pair of
115 * special code units ("surrogates").
116 * The data type for code units is UChar.<br>
117 * For single-character handling, a Unicode character code <em>point</em> is a value
118 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
119 *
120 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
121 * This is the same as with multi-byte char* strings in traditional string handling.
122 * Operations on partial strings typically do not test for code point boundaries.
123 * If necessary, the user needs to take care of such boundaries by testing for the code unit
124 * values or by using functions like
125 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
126 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
127 *
128 * UnicodeString methods are more lenient with regard to input parameter values
129 * than other ICU APIs. In particular:
130 * - If indexes are out of bounds for a UnicodeString object
131 * (<0 or >length()) then they are "pinned" to the nearest boundary.
132 * - If primitive string pointer values (e.g., const UChar * or char *)
133 * for input strings are NULL, then those input string parameters are treated
134 * as if they pointed to an empty string.
135 * However, this is <em>not</em> the case for char * parameters for charset names
136 * or other IDs.
137 * - Most UnicodeString methods do not take a UErrorCode parameter because
138 * there are usually very few opportunities for failure other than a shortage
139 * of memory, error codes in low-level C++ string methods would be inconvenient,
140 * and the error code as the last parameter (ICU convention) would prevent
141 * the use of default parameter values.
142 * Instead, such methods set the UnicodeString into a "bogus" state
143 * (see isBogus()) if an error occurs.
144 *
145 * In string comparisons, two UnicodeString objects that are both "bogus"
146 * compare equal (to be transitive and prevent endless loops in sorting),
147 * and a "bogus" string compares less than any non-"bogus" one.
148 *
149 * Const UnicodeString methods are thread-safe. Multiple threads can use
150 * const methods on the same UnicodeString object simultaneously,
151 * but non-const methods must not be called concurrently (in multiple threads)
152 * with any other (const or non-const) methods.
153 *
154 * Similarly, const UnicodeString & parameters are thread-safe.
155 * One object may be passed in as such a parameter concurrently in multiple threads.
156 * This includes the const UnicodeString & parameters for
157 * copy construction, assignment, and cloning.
158 *
159 * <p>UnicodeString uses several storage methods.
160 * String contents can be stored inside the UnicodeString object itself,
161 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
162 * Most of this is done transparently, but careful aliasing in particular provides
163 * significant performance improvements.
164 * Also, the internal buffer is accessible via special functions.
165 * For details see the
166 * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
167 *
168 * @see utf.h
169 * @see CharacterIterator
170 * @stable ICU 2.0
171 */
172class U_COMMON_API UnicodeString : public Replaceable
173{
174public:
175
176 //========================================
177 // Read-only operations
178 //========================================
179
180 /* Comparison - bitwise only - for international comparison use collation */
181
182 /**
183 * Equality operator. Performs only bitwise comparison.
184 * @param text The UnicodeString to compare to this one.
185 * @return TRUE if <TT>text</TT> contains the same characters as this one,
186 * FALSE otherwise.
187 * @stable ICU 2.0
188 */
189 inline UBool operator== (const UnicodeString& text) const;
190
191 /**
192 * Inequality operator. Performs only bitwise comparison.
193 * @param text The UnicodeString to compare to this one.
194 * @return FALSE if <TT>text</TT> contains the same characters as this one,
195 * TRUE otherwise.
196 * @stable ICU 2.0
197 */
198 inline UBool operator!= (const UnicodeString& text) const;
199
200 /**
201 * Greater than operator. Performs only bitwise comparison.
202 * @param text The UnicodeString to compare to this one.
203 * @return TRUE if the characters in <TT>text</TT> are bitwise
204 * greater than the characters in this, FALSE otherwise
205 * @stable ICU 2.0
206 */
207 inline UBool operator> (const UnicodeString& text) const;
208
209 /**
210 * Less than operator. Performs only bitwise comparison.
211 * @param text The UnicodeString to compare to this one.
212 * @return TRUE if the characters in <TT>text</TT> are bitwise
213 * less than the characters in this, FALSE otherwise
214 * @stable ICU 2.0
215 */
216 inline UBool operator< (const UnicodeString& text) const;
217
218 /**
219 * Greater than or equal operator. Performs only bitwise comparison.
220 * @param text The UnicodeString to compare to this one.
221 * @return TRUE if the characters in <TT>text</TT> are bitwise
222 * greater than or equal to the characters in this, FALSE otherwise
223 * @stable ICU 2.0
224 */
225 inline UBool operator>= (const UnicodeString& text) const;
226
227 /**
228 * Less than or equal operator. Performs only bitwise comparison.
229 * @param text The UnicodeString to compare to this one.
230 * @return TRUE if the characters in <TT>text</TT> are bitwise
231 * less than or equal to the characters in this, FALSE otherwise
232 * @stable ICU 2.0
233 */
234 inline UBool operator<= (const UnicodeString& text) const;
235
236 /**
237 * Compare the characters bitwise in this UnicodeString to
238 * the characters in <TT>text</TT>.
239 * @param text The UnicodeString to compare to this one.
240 * @return The result of bitwise character comparison: 0 if <TT>text</TT>
241 * contains the same characters as this, -1 if the characters in
242 * <TT>text</TT> are bitwise less than the characters in this, +1 if the
243 * characters in <TT>text</TT> are bitwise greater than the characters
244 * in this.
245 * @stable ICU 2.0
246 */
247 inline int8_t compare(const UnicodeString& text) const;
248
249 /**
250 * Compare the characters bitwise in the range
251 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
252 * in <TT>srcText</TT>
253 * @param start the offset at which the compare operation begins
254 * @param length the number of characters of text to compare.
255 * @param srcText the text to be compared
256 * @return The result of bitwise character comparison: 0 if <TT>text</TT>
257 * contains the same characters as this, -1 if the characters in
258 * <TT>text</TT> are bitwise less than the characters in this, +1 if the
259 * characters in <TT>text</TT> are bitwise greater than the characters
260 * in this.
261 * @stable ICU 2.0
262 */
263 inline int8_t compare(int32_t start,
264 int32_t length,
265 const UnicodeString& srcText) const;
266
267 /**
268 * Compare the characters bitwise in the range
269 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
270 * in <TT>srcText</TT> in the range
271 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
272 * @param start the offset at which the compare operation begins
273 * @param length the number of characters in this to compare.
274 * @param srcText the text to be compared
275 * @param srcStart the offset into <TT>srcText</TT> to start comparison
276 * @param srcLength the number of characters in <TT>src</TT> to compare
277 * @return The result of bitwise character comparison: 0 if <TT>text</TT>
278 * contains the same characters as this, -1 if the characters in
279 * <TT>text</TT> are bitwise less than the characters in this, +1 if the
280 * characters in <TT>text</TT> are bitwise greater than the characters
281 * in this.
282 * @stable ICU 2.0
283 */
284 inline int8_t compare(int32_t start,
285 int32_t length,
286 const UnicodeString& srcText,
287 int32_t srcStart,
288 int32_t srcLength) const;
289
290 /**
291 * Compare the characters bitwise in this UnicodeString with the first
292 * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
293 * @param srcChars The characters to compare to this UnicodeString.
294 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
295 * @return The result of bitwise character comparison: 0 if <TT>text</TT>
296 * contains the same characters as this, -1 if the characters in
297 * <TT>text</TT> are bitwise less than the characters in this, +1 if the
298 * characters in <TT>text</TT> are bitwise greater than the characters
299 * in this.
300 * @stable ICU 2.0
301 */
302 inline int8_t compare(const UChar *srcChars,
303 int32_t srcLength) const;
304
305 /**
306 * Compare the characters bitwise in the range
307 * [<TT>start</TT>, <TT>start + length</TT>) with the first
308 * <TT>length</TT> characters in <TT>srcChars</TT>
309 * @param start the offset at which the compare operation begins
310 * @param length the number of characters to compare.
311 * @param srcChars the characters to be compared
312 * @return The result of bitwise character comparison: 0 if <TT>text</TT>
313 * contains the same characters as this, -1 if the characters in
314 * <TT>text</TT> are bitwise less than the characters in this, +1 if the
315 * characters in <TT>text</TT> are bitwise greater than the characters
316 * in this.
317 * @stable ICU 2.0
318 */
319 inline int8_t compare(int32_t start,
320 int32_t length,
321 const UChar *srcChars) const;
322
323 /**
324 * Compare the characters bitwise in the range
325 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
326 * in <TT>srcChars</TT> in the range
327 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
328 * @param start the offset at which the compare operation begins
329 * @param length the number of characters in this to compare
330 * @param srcChars the characters to be compared
331 * @param srcStart the offset into <TT>srcChars</TT> to start comparison
332 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
333 * @return The result of bitwise character comparison: 0 if <TT>text</TT>
334 * contains the same characters as this, -1 if the characters in
335 * <TT>text</TT> are bitwise less than the characters in this, +1 if the
336 * characters in <TT>text</TT> are bitwise greater than the characters
337 * in this.
338 * @stable ICU 2.0
339 */
340 inline int8_t compare(int32_t start,
341 int32_t length,
342 const UChar *srcChars,
343 int32_t srcStart,
344 int32_t srcLength) const;
345
346 /**
347 * Compare the characters bitwise in the range
348 * [<TT>start</TT>, <TT>limit</TT>) with the characters
349 * in <TT>srcText</TT> in the range
350 * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
351 * @param start the offset at which the compare operation begins
352 * @param limit the offset immediately following the compare operation
353 * @param srcText the text to be compared
354 * @param srcStart the offset into <TT>srcText</TT> to start comparison
355 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
356 * @return The result of bitwise character comparison: 0 if <TT>text</TT>
357 * contains the same characters as this, -1 if the characters in
358 * <TT>text</TT> are bitwise less than the characters in this, +1 if the
359 * characters in <TT>text</TT> are bitwise greater than the characters
360 * in this.
361 * @stable ICU 2.0
362 */
363 inline int8_t compareBetween(int32_t start,
364 int32_t limit,
365 const UnicodeString& srcText,
366 int32_t srcStart,
367 int32_t srcLimit) const;
368
369 /**
370 * Compare two Unicode strings in code point order.
371 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
372 * if supplementary characters are present:
373 *
374 * In UTF-16, supplementary characters (with code points U+10000 and above) are
375 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
376 * which means that they compare as less than some other BMP characters like U+feff.
377 * This function compares Unicode strings in code point order.
378 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
379 *
380 * @param text Another string to compare this one to.
381 * @return a negative/zero/positive integer corresponding to whether
382 * this string is less than/equal to/greater than the second one
383 * in code point order
384 * @stable ICU 2.0
385 */
386 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
387
388 /**
389 * Compare two Unicode strings in code point order.
390 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
391 * if supplementary characters are present:
392 *
393 * In UTF-16, supplementary characters (with code points U+10000 and above) are
394 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
395 * which means that they compare as less than some other BMP characters like U+feff.
396 * This function compares Unicode strings in code point order.
397 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
398 *
399 * @param start The start offset in this string at which the compare operation begins.
400 * @param length The number of code units from this string to compare.
401 * @param srcText Another string to compare this one to.
402 * @return a negative/zero/positive integer corresponding to whether
403 * this string is less than/equal to/greater than the second one
404 * in code point order
405 * @stable ICU 2.0
406 */
407 inline int8_t compareCodePointOrder(int32_t start,
408 int32_t length,
409 const UnicodeString& srcText) const;
410
411 /**
412 * Compare two Unicode strings in code point order.
413 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
414 * if supplementary characters are present:
415 *
416 * In UTF-16, supplementary characters (with code points U+10000 and above) are
417 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
418 * which means that they compare as less than some other BMP characters like U+feff.
419 * This function compares Unicode strings in code point order.
420 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
421 *
422 * @param start The start offset in this string at which the compare operation begins.
423 * @param length The number of code units from this string to compare.
424 * @param srcText Another string to compare this one to.
425 * @param srcStart The start offset in that string at which the compare operation begins.
426 * @param srcLength The number of code units from that string to compare.
427 * @return a negative/zero/positive integer corresponding to whether
428 * this string is less than/equal to/greater than the second one
429 * in code point order
430 * @stable ICU 2.0
431 */
432 inline int8_t compareCodePointOrder(int32_t start,
433 int32_t length,
434 const UnicodeString& srcText,
435 int32_t srcStart,
436 int32_t srcLength) const;
437
438 /**
439 * Compare two Unicode strings in code point order.
440 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
441 * if supplementary characters are present:
442 *
443 * In UTF-16, supplementary characters (with code points U+10000 and above) are
444 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
445 * which means that they compare as less than some other BMP characters like U+feff.
446 * This function compares Unicode strings in code point order.
447 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
448 *
449 * @param srcChars A pointer to another string to compare this one to.
450 * @param srcLength The number of code units from that string to compare.
451 * @return a negative/zero/positive integer corresponding to whether
452 * this string is less than/equal to/greater than the second one
453 * in code point order
454 * @stable ICU 2.0
455 */
456 inline int8_t compareCodePointOrder(const UChar *srcChars,
457 int32_t srcLength) const;
458
459 /**
460 * Compare two Unicode strings in code point order.
461 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
462 * if supplementary characters are present:
463 *
464 * In UTF-16, supplementary characters (with code points U+10000 and above) are
465 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
466 * which means that they compare as less than some other BMP characters like U+feff.
467 * This function compares Unicode strings in code point order.
468 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
469 *
470 * @param start The start offset in this string at which the compare operation begins.
471 * @param length The number of code units from this string to compare.
472 * @param srcChars A pointer to another string to compare this one to.
473 * @return a negative/zero/positive integer corresponding to whether
474 * this string is less than/equal to/greater than the second one
475 * in code point order
476 * @stable ICU 2.0
477 */
478 inline int8_t compareCodePointOrder(int32_t start,
479 int32_t length,
480 const UChar *srcChars) const;
481
482 /**
483 * Compare two Unicode strings in code point order.
484 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
485 * if supplementary characters are present:
486 *
487 * In UTF-16, supplementary characters (with code points U+10000 and above) are
488 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
489 * which means that they compare as less than some other BMP characters like U+feff.
490 * This function compares Unicode strings in code point order.
491 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
492 *
493 * @param start The start offset in this string at which the compare operation begins.
494 * @param length The number of code units from this string to compare.
495 * @param srcChars A pointer to another string to compare this one to.
496 * @param srcStart The start offset in that string at which the compare operation begins.
497 * @param srcLength The number of code units from that string to compare.
498 * @return a negative/zero/positive integer corresponding to whether
499 * this string is less than/equal to/greater than the second one
500 * in code point order
501 * @stable ICU 2.0
502 */
503 inline int8_t compareCodePointOrder(int32_t start,
504 int32_t length,
505 const UChar *srcChars,
506 int32_t srcStart,
507 int32_t srcLength) const;
508
509 /**
510 * Compare two Unicode strings in code point order.
511 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
512 * if supplementary characters are present:
513 *
514 * In UTF-16, supplementary characters (with code points U+10000 and above) are
515 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
516 * which means that they compare as less than some other BMP characters like U+feff.
517 * This function compares Unicode strings in code point order.
518 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
519 *
520 * @param start The start offset in this string at which the compare operation begins.
521 * @param limit The offset after the last code unit from this string to compare.
522 * @param srcText Another string to compare this one to.
523 * @param srcStart The start offset in that string at which the compare operation begins.
524 * @param srcLimit The offset after the last code unit from that string to compare.
525 * @return a negative/zero/positive integer corresponding to whether
526 * this string is less than/equal to/greater than the second one
527 * in code point order
528 * @stable ICU 2.0
529 */
530 inline int8_t compareCodePointOrderBetween(int32_t start,
531 int32_t limit,
532 const UnicodeString& srcText,
533 int32_t srcStart,
534 int32_t srcLimit) const;
535
536 /**
537 * Compare two strings case-insensitively using full case folding.
538 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
539 *
540 * @param text Another string to compare this one to.
541 * @param options A bit set of options:
542 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
543 * Comparison in code unit order with default case folding.
544 *
545 * - U_COMPARE_CODE_POINT_ORDER
546 * Set to choose code point order instead of code unit order
547 * (see u_strCompare for details).
548 *
549 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
550 *
551 * @return A negative, zero, or positive integer indicating the comparison result.
552 * @stable ICU 2.0
553 */
554 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
555
556 /**
557 * Compare two strings case-insensitively using full case folding.
558 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
559 *
560 * @param start The start offset in this string at which the compare operation begins.
561 * @param length The number of code units from this string to compare.
562 * @param srcText Another string to compare this one to.
563 * @param options A bit set of options:
564 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
565 * Comparison in code unit order with default case folding.
566 *
567 * - U_COMPARE_CODE_POINT_ORDER
568 * Set to choose code point order instead of code unit order
569 * (see u_strCompare for details).
570 *
571 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
572 *
573 * @return A negative, zero, or positive integer indicating the comparison result.
574 * @stable ICU 2.0
575 */
576 inline int8_t caseCompare(int32_t start,
577 int32_t length,
578 const UnicodeString& srcText,
579 uint32_t options) const;
580
581 /**
582 * Compare two strings case-insensitively using full case folding.
583 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
584 *
585 * @param start The start offset in this string at which the compare operation begins.
586 * @param length The number of code units from this string to compare.
587 * @param srcText Another string to compare this one to.
588 * @param srcStart The start offset in that string at which the compare operation begins.
589 * @param srcLength The number of code units from that string to compare.
590 * @param options A bit set of options:
591 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
592 * Comparison in code unit order with default case folding.
593 *
594 * - U_COMPARE_CODE_POINT_ORDER
595 * Set to choose code point order instead of code unit order
596 * (see u_strCompare for details).
597 *
598 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
599 *
600 * @return A negative, zero, or positive integer indicating the comparison result.
601 * @stable ICU 2.0
602 */
603 inline int8_t caseCompare(int32_t start,
604 int32_t length,
605 const UnicodeString& srcText,
606 int32_t srcStart,
607 int32_t srcLength,
608 uint32_t options) const;
609
610 /**
611 * Compare two strings case-insensitively using full case folding.
612 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
613 *
614 * @param srcChars A pointer to another string to compare this one to.
615 * @param srcLength The number of code units from that string to compare.
616 * @param options A bit set of options:
617 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
618 * Comparison in code unit order with default case folding.
619 *
620 * - U_COMPARE_CODE_POINT_ORDER
621 * Set to choose code point order instead of code unit order
622 * (see u_strCompare for details).
623 *
624 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
625 *
626 * @return A negative, zero, or positive integer indicating the comparison result.
627 * @stable ICU 2.0
628 */
629 inline int8_t caseCompare(const UChar *srcChars,
630 int32_t srcLength,
631 uint32_t options) const;
632
633 /**
634 * Compare two strings case-insensitively using full case folding.
635 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
636 *
637 * @param start The start offset in this string at which the compare operation begins.
638 * @param length The number of code units from this string to compare.
639 * @param srcChars A pointer to another string to compare this one to.
640 * @param options A bit set of options:
641 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
642 * Comparison in code unit order with default case folding.
643 *
644 * - U_COMPARE_CODE_POINT_ORDER
645 * Set to choose code point order instead of code unit order
646 * (see u_strCompare for details).
647 *
648 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
649 *
650 * @return A negative, zero, or positive integer indicating the comparison result.
651 * @stable ICU 2.0
652 */
653 inline int8_t caseCompare(int32_t start,
654 int32_t length,
655 const UChar *srcChars,
656 uint32_t options) const;
657
658 /**
659 * Compare two strings case-insensitively using full case folding.
660 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
661 *
662 * @param start The start offset in this string at which the compare operation begins.
663 * @param length The number of code units from this string to compare.
664 * @param srcChars A pointer to another string to compare this one to.
665 * @param srcStart The start offset in that string at which the compare operation begins.
666 * @param srcLength The number of code units from that string to compare.
667 * @param options A bit set of options:
668 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
669 * Comparison in code unit order with default case folding.
670 *
671 * - U_COMPARE_CODE_POINT_ORDER
672 * Set to choose code point order instead of code unit order
673 * (see u_strCompare for details).
674 *
675 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
676 *
677 * @return A negative, zero, or positive integer indicating the comparison result.
678 * @stable ICU 2.0
679 */
680 inline int8_t caseCompare(int32_t start,
681 int32_t length,
682 const UChar *srcChars,
683 int32_t srcStart,
684 int32_t srcLength,
685 uint32_t options) const;
686
687 /**
688 * Compare two strings case-insensitively using full case folding.
689 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
690 *
691 * @param start The start offset in this string at which the compare operation begins.
692 * @param limit The offset after the last code unit from this string to compare.
693 * @param srcText Another string to compare this one to.
694 * @param srcStart The start offset in that string at which the compare operation begins.
695 * @param srcLimit The offset after the last code unit from that string to compare.
696 * @param options A bit set of options:
697 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
698 * Comparison in code unit order with default case folding.
699 *
700 * - U_COMPARE_CODE_POINT_ORDER
701 * Set to choose code point order instead of code unit order
702 * (see u_strCompare for details).
703 *
704 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
705 *
706 * @return A negative, zero, or positive integer indicating the comparison result.
707 * @stable ICU 2.0
708 */
709 inline int8_t caseCompareBetween(int32_t start,
710 int32_t limit,
711 const UnicodeString& srcText,
712 int32_t srcStart,
713 int32_t srcLimit,
714 uint32_t options) const;
715
716 /**
717 * Determine if this starts with the characters in <TT>text</TT>
718 * @param text The text to match.
719 * @return TRUE if this starts with the characters in <TT>text</TT>,
720 * FALSE otherwise
721 * @stable ICU 2.0
722 */
723 inline UBool startsWith(const UnicodeString& text) const;
724
725 /**
726 * Determine if this starts with the characters in <TT>srcText</TT>
727 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
728 * @param srcText The text to match.
729 * @param srcStart the offset into <TT>srcText</TT> to start matching
730 * @param srcLength the number of characters in <TT>srcText</TT> to match
731 * @return TRUE if this starts with the characters in <TT>text</TT>,
732 * FALSE otherwise
733 * @stable ICU 2.0
734 */
735 inline UBool startsWith(const UnicodeString& srcText,
736 int32_t srcStart,
737 int32_t srcLength) const;
738
739 /**
740 * Determine if this starts with the characters in <TT>srcChars</TT>
741 * @param srcChars The characters to match.
742 * @param srcLength the number of characters in <TT>srcChars</TT>
743 * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
744 * FALSE otherwise
745 * @stable ICU 2.0
746 */
747 inline UBool startsWith(const UChar *srcChars,
748 int32_t srcLength) const;
749
750 /**
751 * Determine if this ends with the characters in <TT>srcChars</TT>
752 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
753 * @param srcChars The characters to match.
754 * @param srcStart the offset into <TT>srcText</TT> to start matching
755 * @param srcLength the number of characters in <TT>srcChars</TT> to match
756 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
757 * @stable ICU 2.0
758 */
759 inline UBool startsWith(const UChar *srcChars,
760 int32_t srcStart,
761 int32_t srcLength) const;
762
763 /**
764 * Determine if this ends with the characters in <TT>text</TT>
765 * @param text The text to match.
766 * @return TRUE if this ends with the characters in <TT>text</TT>,
767 * FALSE otherwise
768 * @stable ICU 2.0
769 */
770 inline UBool endsWith(const UnicodeString& text) const;
771
772 /**
773 * Determine if this ends with the characters in <TT>srcText</TT>
774 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
775 * @param srcText The text to match.
776 * @param srcStart the offset into <TT>srcText</TT> to start matching
777 * @param srcLength the number of characters in <TT>srcText</TT> to match
778 * @return TRUE if this ends with the characters in <TT>text</TT>,
779 * FALSE otherwise
780 * @stable ICU 2.0
781 */
782 inline UBool endsWith(const UnicodeString& srcText,
783 int32_t srcStart,
784 int32_t srcLength) const;
785
786 /**
787 * Determine if this ends with the characters in <TT>srcChars</TT>
788 * @param srcChars The characters to match.
789 * @param srcLength the number of characters in <TT>srcChars</TT>
790 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
791 * FALSE otherwise
792 * @stable ICU 2.0
793 */
794 inline UBool endsWith(const UChar *srcChars,
795 int32_t srcLength) const;
796
797 /**
798 * Determine if this ends with the characters in <TT>srcChars</TT>
799 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
800 * @param srcChars The characters to match.
801 * @param srcStart the offset into <TT>srcText</TT> to start matching
802 * @param srcLength the number of characters in <TT>srcChars</TT> to match
803 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
804 * FALSE otherwise
805 * @stable ICU 2.0
806 */
807 inline UBool endsWith(const UChar *srcChars,
808 int32_t srcStart,
809 int32_t srcLength) const;
810
811
812 /* Searching - bitwise only */
813
814 /**
815 * Locate in this the first occurrence of the characters in <TT>text</TT>,
816 * using bitwise comparison.
817 * @param text The text to search for.
818 * @return The offset into this of the start of <TT>text</TT>,
819 * or -1 if not found.
820 * @stable ICU 2.0
821 */
822 inline int32_t indexOf(const UnicodeString& text) const;
823
824 /**
825 * Locate in this the first occurrence of the characters in <TT>text</TT>
826 * starting at offset <TT>start</TT>, using bitwise comparison.
827 * @param text The text to search for.
828 * @param start The offset at which searching will start.
829 * @return The offset into this of the start of <TT>text</TT>,
830 * or -1 if not found.
831 * @stable ICU 2.0
832 */
833 inline int32_t indexOf(const UnicodeString& text,
834 int32_t start) const;
835
836 /**
837 * Locate in this the first occurrence in the range
838 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
839 * in <TT>text</TT>, using bitwise comparison.
840 * @param text The text to search for.
841 * @param start The offset at which searching will start.
842 * @param length The number of characters to search
843 * @return The offset into this of the start of <TT>text</TT>,
844 * or -1 if not found.
845 * @stable ICU 2.0
846 */
847 inline int32_t indexOf(const UnicodeString& text,
848 int32_t start,
849 int32_t length) const;
850
851 /**
852 * Locate in this the first occurrence in the range
853 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
854 * in <TT>srcText</TT> in the range
855 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
856 * using bitwise comparison.
857 * @param srcText The text to search for.
858 * @param srcStart the offset into <TT>srcText</TT> at which
859 * to start matching
860 * @param srcLength the number of characters in <TT>srcText</TT> to match
861 * @param start the offset into this at which to start matching
862 * @param length the number of characters in this to search
863 * @return The offset into this of the start of <TT>text</TT>,
864 * or -1 if not found.
865 * @stable ICU 2.0
866 */
867 inline int32_t indexOf(const UnicodeString& srcText,
868 int32_t srcStart,
869 int32_t srcLength,
870 int32_t start,
871 int32_t length) const;
872
873 /**
874 * Locate in this the first occurrence of the characters in
875 * <TT>srcChars</TT>
876 * starting at offset <TT>start</TT>, using bitwise comparison.
877 * @param srcChars The text to search for.
878 * @param srcLength the number of characters in <TT>srcChars</TT> to match
879 * @param start the offset into this at which to start matching
880 * @return The offset into this of the start of <TT>text</TT>,
881 * or -1 if not found.
882 * @stable ICU 2.0
883 */
884 inline int32_t indexOf(const UChar *srcChars,
885 int32_t srcLength,
886 int32_t start) const;
887
888 /**
889 * Locate in this the first occurrence in the range
890 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
891 * in <TT>srcChars</TT>, using bitwise comparison.
892 * @param srcChars The text to search for.
893 * @param srcLength the number of characters in <TT>srcChars</TT>
894 * @param start The offset at which searching will start.
895 * @param length The number of characters to search
896 * @return The offset into this of the start of <TT>srcChars</TT>,
897 * or -1 if not found.
898 * @stable ICU 2.0
899 */
900 inline int32_t indexOf(const UChar *srcChars,
901 int32_t srcLength,
902 int32_t start,
903 int32_t length) const;
904
905 /**
906 * Locate in this the first occurrence in the range
907 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
908 * in <TT>srcChars</TT> in the range
909 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
910 * using bitwise comparison.
911 * @param srcChars The text to search for.
912 * @param srcStart the offset into <TT>srcChars</TT> at which
913 * to start matching
914 * @param srcLength the number of characters in <TT>srcChars</TT> to match
915 * @param start the offset into this at which to start matching
916 * @param length the number of characters in this to search
917 * @return The offset into this of the start of <TT>text</TT>,
918 * or -1 if not found.
919 * @stable ICU 2.0
920 */
921 int32_t indexOf(const UChar *srcChars,
922 int32_t srcStart,
923 int32_t srcLength,
924 int32_t start,
925 int32_t length) const;
926
927 /**
928 * Locate in this the first occurrence of the BMP code point <code>c</code>,
929 * using bitwise comparison.
930 * @param c The code unit to search for.
931 * @return The offset into this of <TT>c</TT>, or -1 if not found.
932 * @stable ICU 2.0
933 */
934 inline int32_t indexOf(UChar c) const;
935
936 /**
937 * Locate in this the first occurrence of the code point <TT>c</TT>,
938 * using bitwise comparison.
939 *
940 * @param c The code point to search for.
941 * @return The offset into this of <TT>c</TT>, or -1 if not found.
942 * @stable ICU 2.0
943 */
944 inline int32_t indexOf(UChar32 c) const;
945
946 /**
947 * Locate in this the first occurrence of the BMP code point <code>c</code>,
948 * starting at offset <TT>start</TT>, using bitwise comparison.
949 * @param c The code unit to search for.
950 * @param start The offset at which searching will start.
951 * @return The offset into this of <TT>c</TT>, or -1 if not found.
952 * @stable ICU 2.0
953 */
954 inline int32_t indexOf(UChar c,
955 int32_t start) const;
956
957 /**
958 * Locate in this the first occurrence of the code point <TT>c</TT>
959 * starting at offset <TT>start</TT>, using bitwise comparison.
960 *
961 * @param c The code point to search for.
962 * @param start The offset at which searching will start.
963 * @return The offset into this of <TT>c</TT>, or -1 if not found.
964 * @stable ICU 2.0
965 */
966 inline int32_t indexOf(UChar32 c,
967 int32_t start) const;
968
969 /**
970 * Locate in this the first occurrence of the BMP code point <code>c</code>
971 * in the range [<TT>start</TT>, <TT>start + length</TT>),
972 * using bitwise comparison.
973 * @param c The code unit to search for.
974 * @param start the offset into this at which to start matching
975 * @param length the number of characters in this to search
976 * @return The offset into this of <TT>c</TT>, or -1 if not found.
977 * @stable ICU 2.0
978 */
979 inline int32_t indexOf(UChar c,
980 int32_t start,
981 int32_t length) const;
982
983 /**
984 * Locate in this the first occurrence of the code point <TT>c</TT>
985 * in the range [<TT>start</TT>, <TT>start + length</TT>),
986 * using bitwise comparison.
987 *
988 * @param c The code point to search for.
989 * @param start the offset into this at which to start matching
990 * @param length the number of characters in this to search
991 * @return The offset into this of <TT>c</TT>, or -1 if not found.
992 * @stable ICU 2.0
993 */
994 inline int32_t indexOf(UChar32 c,
995 int32_t start,
996 int32_t length) const;
997
998 /**
999 * Locate in this the last occurrence of the characters in <TT>text</TT>,
1000 * using bitwise comparison.
1001 * @param text The text to search for.
1002 * @return The offset into this of the start of <TT>text</TT>,
1003 * or -1 if not found.
1004 * @stable ICU 2.0
1005 */
1006 inline int32_t lastIndexOf(const UnicodeString& text) const;
1007
1008 /**
1009 * Locate in this the last occurrence of the characters in <TT>text</TT>
1010 * starting at offset <TT>start</TT>, using bitwise comparison.
1011 * @param text The text to search for.
1012 * @param start The offset at which searching will start.
1013 * @return The offset into this of the start of <TT>text</TT>,
1014 * or -1 if not found.
1015 * @stable ICU 2.0
1016 */
1017 inline int32_t lastIndexOf(const UnicodeString& text,
1018 int32_t start) const;
1019
1020 /**
1021 * Locate in this the last occurrence in the range
1022 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1023 * in <TT>text</TT>, using bitwise comparison.
1024 * @param text The text to search for.
1025 * @param start The offset at which searching will start.
1026 * @param length The number of characters to search
1027 * @return The offset into this of the start of <TT>text</TT>,
1028 * or -1 if not found.
1029 * @stable ICU 2.0
1030 */
1031 inline int32_t lastIndexOf(const UnicodeString& text,
1032 int32_t start,
1033 int32_t length) const;
1034
1035 /**
1036 * Locate in this the last occurrence in the range
1037 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1038 * in <TT>srcText</TT> in the range
1039 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1040 * using bitwise comparison.
1041 * @param srcText The text to search for.
1042 * @param srcStart the offset into <TT>srcText</TT> at which
1043 * to start matching
1044 * @param srcLength the number of characters in <TT>srcText</TT> to match
1045 * @param start the offset into this at which to start matching
1046 * @param length the number of characters in this to search
1047 * @return The offset into this of the start of <TT>text</TT>,
1048 * or -1 if not found.
1049 * @stable ICU 2.0
1050 */
1051 inline int32_t lastIndexOf(const UnicodeString& srcText,
1052 int32_t srcStart,
1053 int32_t srcLength,
1054 int32_t start,
1055 int32_t length) const;
1056
1057 /**
1058 * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1059 * starting at offset <TT>start</TT>, using bitwise comparison.
1060 * @param srcChars The text to search for.
1061 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1062 * @param start the offset into this at which to start matching
1063 * @return The offset into this of the start of <TT>text</TT>,
1064 * or -1 if not found.
1065 * @stable ICU 2.0
1066 */
1067 inline int32_t lastIndexOf(const UChar *srcChars,
1068 int32_t srcLength,
1069 int32_t start) const;
1070
1071 /**
1072 * Locate in this the last occurrence in the range
1073 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1074 * in <TT>srcChars</TT>, using bitwise comparison.
1075 * @param srcChars The text to search for.
1076 * @param srcLength the number of characters in <TT>srcChars</TT>
1077 * @param start The offset at which searching will start.
1078 * @param length The number of characters to search
1079 * @return The offset into this of the start of <TT>srcChars</TT>,
1080 * or -1 if not found.
1081 * @stable ICU 2.0
1082 */
1083 inline int32_t lastIndexOf(const UChar *srcChars,
1084 int32_t srcLength,
1085 int32_t start,
1086 int32_t length) const;
1087
1088 /**
1089 * Locate in this the last occurrence in the range
1090 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1091 * in <TT>srcChars</TT> in the range
1092 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1093 * using bitwise comparison.
1094 * @param srcChars The text to search for.
1095 * @param srcStart the offset into <TT>srcChars</TT> at which
1096 * to start matching
1097 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1098 * @param start the offset into this at which to start matching
1099 * @param length the number of characters in this to search
1100 * @return The offset into this of the start of <TT>text</TT>,
1101 * or -1 if not found.
1102 * @stable ICU 2.0
1103 */
1104 int32_t lastIndexOf(const UChar *srcChars,
1105 int32_t srcStart,
1106 int32_t srcLength,
1107 int32_t start,
1108 int32_t length) const;
1109
1110 /**
1111 * Locate in this the last occurrence of the BMP code point <code>c</code>,
1112 * using bitwise comparison.
1113 * @param c The code unit to search for.
1114 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1115 * @stable ICU 2.0
1116 */
1117 inline int32_t lastIndexOf(UChar c) const;
1118
1119 /**
1120 * Locate in this the last occurrence of the code point <TT>c</TT>,
1121 * using bitwise comparison.
1122 *
1123 * @param c The code point to search for.
1124 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1125 * @stable ICU 2.0
1126 */
1127 inline int32_t lastIndexOf(UChar32 c) const;
1128
1129 /**
1130 * Locate in this the last occurrence of the BMP code point <code>c</code>
1131 * starting at offset <TT>start</TT>, using bitwise comparison.
1132 * @param c The code unit to search for.
1133 * @param start The offset at which searching will start.
1134 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1135 * @stable ICU 2.0
1136 */
1137 inline int32_t lastIndexOf(UChar c,
1138 int32_t start) const;
1139
1140 /**
1141 * Locate in this the last occurrence of the code point <TT>c</TT>
1142 * starting at offset <TT>start</TT>, using bitwise comparison.
1143 *
1144 * @param c The code point to search for.
1145 * @param start The offset at which searching will start.
1146 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1147 * @stable ICU 2.0
1148 */
1149 inline int32_t lastIndexOf(UChar32 c,
1150 int32_t start) const;
1151
1152 /**
1153 * Locate in this the last occurrence of the BMP code point <code>c</code>
1154 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1155 * using bitwise comparison.
1156 * @param c The code unit to search for.
1157 * @param start the offset into this at which to start matching
1158 * @param length the number of characters in this to search
1159 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1160 * @stable ICU 2.0
1161 */
1162 inline int32_t lastIndexOf(UChar c,
1163 int32_t start,
1164 int32_t length) const;
1165
1166 /**
1167 * Locate in this the last occurrence of the code point <TT>c</TT>
1168 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1169 * using bitwise comparison.
1170 *
1171 * @param c The code point to search for.
1172 * @param start the offset into this at which to start matching
1173 * @param length the number of characters in this to search
1174 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1175 * @stable ICU 2.0
1176 */
1177 inline int32_t lastIndexOf(UChar32 c,
1178 int32_t start,
1179 int32_t length) const;
1180
1181
1182 /* Character access */
1183
1184 /**
1185 * Return the code unit at offset <tt>offset</tt>.
1186 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1187 * @param offset a valid offset into the text
1188 * @return the code unit at offset <tt>offset</tt>
1189 * or 0xffff if the offset is not valid for this string
1190 * @stable ICU 2.0
1191 */
1192 inline UChar charAt(int32_t offset) const;
1193
1194 /**
1195 * Return the code unit at offset <tt>offset</tt>.
1196 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1197 * @param offset a valid offset into the text
1198 * @return the code unit at offset <tt>offset</tt>
1199 * @stable ICU 2.0
1200 */
1201 inline UChar operator[] (int32_t offset) const;
1202
1203 /**
1204 * Return the code point that contains the code unit
1205 * at offset <tt>offset</tt>.
1206 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1207 * @param offset a valid offset into the text
1208 * that indicates the text offset of any of the code units
1209 * that will be assembled into a code point (21-bit value) and returned
1210 * @return the code point of text at <tt>offset</tt>
1211 * or 0xffff if the offset is not valid for this string
1212 * @stable ICU 2.0
1213 */
1214 inline UChar32 char32At(int32_t offset) const;
1215
1216 /**
1217 * Adjust a random-access offset so that
1218 * it points to the beginning of a Unicode character.
1219 * The offset that is passed in points to
1220 * any code unit of a code point,
1221 * while the returned offset will point to the first code unit
1222 * of the same code point.
1223 * In UTF-16, if the input offset points to a second surrogate
1224 * of a surrogate pair, then the returned offset will point
1225 * to the first surrogate.
1226 * @param offset a valid offset into one code point of the text
1227 * @return offset of the first code unit of the same code point
1228 * @see U16_SET_CP_START
1229 * @stable ICU 2.0
1230 */
1231 inline int32_t getChar32Start(int32_t offset) const;
1232
1233 /**
1234 * Adjust a random-access offset so that
1235 * it points behind a Unicode character.
1236 * The offset that is passed in points behind
1237 * any code unit of a code point,
1238 * while the returned offset will point behind the last code unit
1239 * of the same code point.
1240 * In UTF-16, if the input offset points behind the first surrogate
1241 * (i.e., to the second surrogate)
1242 * of a surrogate pair, then the returned offset will point
1243 * behind the second surrogate (i.e., to the first surrogate).
1244 * @param offset a valid offset after any code unit of a code point of the text
1245 * @return offset of the first code unit after the same code point
1246 * @see U16_SET_CP_LIMIT
1247 * @stable ICU 2.0
1248 */
1249 inline int32_t getChar32Limit(int32_t offset) const;
1250
1251 /**
1252 * Move the code unit index along the string by delta code points.
1253 * Interpret the input index as a code unit-based offset into the string,
1254 * move the index forward or backward by delta code points, and
1255 * return the resulting index.
1256 * The input index should point to the first code unit of a code point,
1257 * if there is more than one.
1258 *
1259 * Both input and output indexes are code unit-based as for all
1260 * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1261 * If delta<0 then the index is moved backward (toward the start of the string).
1262 * If delta>0 then the index is moved forward (toward the end of the string).
1263 *
1264 * This behaves like CharacterIterator::move32(delta, kCurrent).
1265 *
1266 * Behavior for out-of-bounds indexes:
1267 * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1268 * if the input index<0 then it is pinned to 0;
1269 * if it is index>length() then it is pinned to length().
1270 * Afterwards, the index is moved by <code>delta</code> code points
1271 * forward or backward,
1272 * but no further backward than to 0 and no further forward than to length().
1273 * The resulting index return value will be in between 0 and length(), inclusively.
1274 *
1275 * Examples:
1276 * <pre>
1277 * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1278 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1279 *
1280 * // initial index: position of U+10000
1281 * int32_t index=1;
1282 *
1283 * // the following examples will all result in index==4, position of U+10ffff
1284 *
1285 * // skip 2 code points from some position in the string
1286 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1287 *
1288 * // go to the 3rd code point from the start of s (0-based)
1289 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1290 *
1291 * // go to the next-to-last code point of s
1292 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1293 * </pre>
1294 *
1295 * @param index input code unit index
1296 * @param delta (signed) code point count to move the index forward or backward
1297 * in the string
1298 * @return the resulting code unit index
1299 * @stable ICU 2.0
1300 */
1301 int32_t moveIndex32(int32_t index, int32_t delta) const;
1302
1303 /* Substring extraction */
1304
1305 /**
1306 * Copy the characters in the range
1307 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1308 * beginning at <tt>dstStart</tt>.
1309 * If the string aliases to <code>dst</code> itself as an external buffer,
1310 * then extract() will not copy the contents.
1311 *
1312 * @param start offset of first character which will be copied into the array
1313 * @param length the number of characters to extract
1314 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1315 * must be at least (<tt>dstStart + length</tt>).
1316 * @param dstStart the offset in <TT>dst</TT> where the first character
1317 * will be extracted
1318 * @stable ICU 2.0
1319 */
1320 inline void extract(int32_t start,
1321 int32_t length,
1322 UChar *dst,
1323 int32_t dstStart = 0) const;
1324
1325 /**
1326 * Copy the contents of the string into dest.
1327 * This is a convenience function that
1328 * checks if there is enough space in dest,
1329 * extracts the entire string if possible,
1330 * and NUL-terminates dest if possible.
1331 *
1332 * If the string fits into dest but cannot be NUL-terminated
1333 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1334 * If the string itself does not fit into dest
1335 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1336 *
1337 * If the string aliases to <code>dest</code> itself as an external buffer,
1338 * then extract() will not copy the contents.
1339 *
1340 * @param dest Destination string buffer.
1341 * @param destCapacity Number of UChars available at dest.
1342 * @param errorCode ICU error code.
1343 * @return length()
1344 * @stable ICU 2.0
1345 */
1346 int32_t
1347 extract(UChar *dest, int32_t destCapacity,
1348 UErrorCode &errorCode) const;
1349
1350 /**
1351 * Copy the characters in the range
1352 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1353 * <tt>target</tt>.
1354 * @param start offset of first character which will be copied
1355 * @param length the number of characters to extract
1356 * @param target UnicodeString into which to copy characters.
1357 * @return A reference to <TT>target</TT>
1358 * @stable ICU 2.0
1359 */
1360 inline void extract(int32_t start,
1361 int32_t length,
1362 UnicodeString& target) const;
1363
1364 /**
1365 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1366 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1367 * @param start offset of first character which will be copied into the array
1368 * @param limit offset immediately following the last character to be copied
1369 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1370 * must be at least (<tt>dstStart + (limit - start)</tt>).
1371 * @param dstStart the offset in <TT>dst</TT> where the first character
1372 * will be extracted
1373 * @stable ICU 2.0
1374 */
1375 inline void extractBetween(int32_t start,
1376 int32_t limit,
1377 UChar *dst,
1378 int32_t dstStart = 0) const;
1379
1380 /**
1381 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1382 * into the UnicodeString <tt>target</tt>. Replaceable API.
1383 * @param start offset of first character which will be copied
1384 * @param limit offset immediately following the last character to be copied
1385 * @param target UnicodeString into which to copy characters.
1386 * @return A reference to <TT>target</TT>
1387 * @stable ICU 2.0
1388 */
1389 virtual void extractBetween(int32_t start,
1390 int32_t limit,
1391 UnicodeString& target) const;
1392
1393 /**
1394 * Copy the characters in the range
1395 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1396 * in a specified codepage.
1397 * The output string is NUL-terminated.
1398 *
1399 * @param start offset of first character which will be copied
1400 * @param startLength the number of characters to extract
1401 * @param target the target buffer for extraction
1402 * @param codepage the desired codepage for the characters. 0 has
1403 * the special meaning of the default codepage
1404 * If <code>codepage</code> is an empty string (<code>""</code>),
1405 * then a simple conversion is performed on the codepage-invariant
1406 * subset ("invariant characters") of the platform encoding. See utypes.h.
1407 * If <TT>target</TT> is NULL, then the number of bytes required for
1408 * <TT>target</TT> is returned. It is assumed that the target is big enough
1409 * to fit all of the characters.
1410 * @return the output string length, not including the terminating NUL
1411 * @stable ICU 2.0
1412 */
1413 inline int32_t extract(int32_t start,
1414 int32_t startLength,
1415 char *target,
1416 const char *codepage = 0) const;
1417
1418 /**
1419 * Copy the characters in the range
1420 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1421 * in a specified codepage.
1422 * This function does not write any more than <code>targetLength</code>
1423 * characters but returns the length of the entire output string
1424 * so that one can allocate a larger buffer and call the function again
1425 * if necessary.
1426 * The output string is NUL-terminated if possible.
1427 *
1428 * @param start offset of first character which will be copied
1429 * @param startLength the number of characters to extract
1430 * @param target the target buffer for extraction
1431 * @param targetLength the length of the target buffer
1432 * @param codepage the desired codepage for the characters. 0 has
1433 * the special meaning of the default codepage
1434 * If <code>codepage</code> is an empty string (<code>""</code>),
1435 * then a simple conversion is performed on the codepage-invariant
1436 * subset ("invariant characters") of the platform encoding. See utypes.h.
1437 * If <TT>target</TT> is NULL, then the number of bytes required for
1438 * <TT>target</TT> is returned.
1439 * @return the output string length, not including the terminating NUL
1440 * @stable ICU 2.0
1441 */
1442 int32_t extract(int32_t start,
1443 int32_t startLength,
1444 char *target,
1445 uint32_t targetLength,
1446 const char *codepage = 0) const;
1447
1448 /**
1449 * Convert the UnicodeString into a codepage string using an existing UConverter.
1450 * The output string is NUL-terminated if possible.
1451 *
1452 * This function avoids the overhead of opening and closing a converter if
1453 * multiple strings are extracted.
1454 *
1455 * @param dest destination string buffer, can be NULL if destCapacity==0
1456 * @param destCapacity the number of chars available at dest
1457 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1458 * or NULL for the default converter
1459 * @param errorCode normal ICU error code
1460 * @return the length of the output string, not counting the terminating NUL;
1461 * if the length is greater than destCapacity, then the string will not fit
1462 * and a buffer of the indicated length would need to be passed in
1463 * @stable ICU 2.0
1464 */
1465 int32_t extract(char *dest, int32_t destCapacity,
1466 UConverter *cnv,
1467 UErrorCode &errorCode) const;
1468
1469 /* Length operations */
1470
1471 /**
1472 * Return the length of the UnicodeString object.
1473 * The length is the number of characters in the text.
1474 * @return the length of the UnicodeString object
1475 * @stable ICU 2.0
1476 */
1477 inline int32_t length(void) const;
1478
1479 /**
1480 * Count Unicode code points in the length UChar code units of the string.
1481 * A code point may occupy either one or two UChar code units.
1482 * Counting code points involves reading all code units.
1483 *
1484 * This functions is basically the inverse of moveIndex32().
1485 *
1486 * @param start the index of the first code unit to check
1487 * @param length the number of UChar code units to check
1488 * @return the number of code points in the specified code units
1489 * @stable ICU 2.0
1490 */
1491 int32_t
1492 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1493
1494 /**
1495 * Check if the length UChar code units of the string
1496 * contain more Unicode code points than a certain number.
1497 * This is more efficient than counting all code points in this part of the string
1498 * and comparing that number with a threshold.
1499 * This function may not need to scan the string at all if the length
1500 * falls within a certain range, and
1501 * never needs to count more than 'number+1' code points.
1502 * Logically equivalent to (countChar32(start, length)>number).
1503 * A Unicode code point may occupy either one or two UChar code units.
1504 *
1505 * @param start the index of the first code unit to check (0 for the entire string)
1506 * @param length the number of UChar code units to check
1507 * (use INT32_MAX for the entire string; remember that start/length
1508 * values are pinned)
1509 * @param number The number of code points in the (sub)string is compared against
1510 * the 'number' parameter.
1511 * @return Boolean value for whether the string contains more Unicode code points
1512 * than 'number'. Same as (u_countChar32(s, length)>number).
1513 * @see countChar32
1514 * @see u_strHasMoreChar32Than
1515 * @draft ICU 2.4
1516 */
1517 UBool
1518 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1519
1520 /**
1521 * Determine if this string is empty.
1522 * @return TRUE if this string contains 0 characters, FALSE otherwise.
1523 * @stable ICU 2.0
1524 */
1525 inline UBool isEmpty(void) const;
1526
1527 /**
1528 * Return the capacity of the internal buffer of the UnicodeString object.
1529 * This is useful together with the getBuffer functions.
1530 * See there for details.
1531 *
1532 * @return the number of UChars available in the internal buffer
1533 * @see getBuffer
1534 * @stable ICU 2.0
1535 */
1536 inline int32_t getCapacity(void) const;
1537
1538 /* Other operations */
1539
1540 /**
1541 * Generate a hash code for this object.
1542 * @return The hash code of this UnicodeString.
1543 * @stable ICU 2.0
1544 */
1545 inline int32_t hashCode(void) const;
1546
1547 /**
1548 * Determine if this object contains a valid string.
1549 * A bogus string has no value. It is different from an empty string.
1550 * It can be used to indicate that no string value is available.
1551 * getBuffer() and getTerminatedBuffer() return NULL, and
1552 * length() returns 0.
1553 *
1554 * @return TRUE if the string is valid, FALSE otherwise
1555 * @see setToBogus()
1556 * @stable ICU 2.0
1557 */
1558 inline UBool isBogus(void) const;
1559
1560
1561 //========================================
1562 // Write operations
1563 //========================================
1564
1565 /* Assignment operations */
1566
1567 /**
1568 * Assignment operator. Replace the characters in this UnicodeString
1569 * with the characters from <TT>srcText</TT>.
1570 * @param srcText The text containing the characters to replace
1571 * @return a reference to this
1572 * @stable ICU 2.0
1573 */
1574 UnicodeString &operator=(const UnicodeString &srcText);
1575
1576 /**
1577 * Almost the same as the assignment operator.
1578 * Replace the characters in this UnicodeString
1579 * with the characters from <code>srcText</code>.
1580 *
1581 * This function works the same for all strings except for ones that
1582 * are readonly aliases.
1583 * Starting with ICU 2.4, the assignment operator and the copy constructor
1584 * allocate a new buffer and copy the buffer contents even for readonly aliases.
1585 * This function implements the old, more efficient but less safe behavior
1586 * of making this string also a readonly alias to the same buffer.
1587 * The fastCopyFrom function must be used only if it is known that the lifetime of
1588 * this UnicodeString is at least as long as the lifetime of the aliased buffer
1589 * including its contents, for example for strings from resource bundles
1590 * or aliases to string contents.
1591 *
1592 * @param src The text containing the characters to replace.
1593 * @return a reference to this
1594 * @draft ICU 2.4
1595 */
1596 UnicodeString &fastCopyFrom(const UnicodeString &src);
1597
1598 /**
1599 * Assignment operator. Replace the characters in this UnicodeString
1600 * with the code unit <TT>ch</TT>.
1601 * @param ch the code unit to replace
1602 * @return a reference to this
1603 * @stable ICU 2.0
1604 */
1605 inline UnicodeString& operator= (UChar ch);
1606
1607 /**
1608 * Assignment operator. Replace the characters in this UnicodeString
1609 * with the code point <TT>ch</TT>.
1610 * @param ch the code point to replace
1611 * @return a reference to this
1612 * @stable ICU 2.0
1613 */
1614 inline UnicodeString& operator= (UChar32 ch);
1615
1616 /**
1617 * Set the text in the UnicodeString object to the characters
1618 * in <TT>srcText</TT> in the range
1619 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1620 * <TT>srcText</TT> is not modified.
1621 * @param srcText the source for the new characters
1622 * @param srcStart the offset into <TT>srcText</TT> where new characters
1623 * will be obtained
1624 * @return a reference to this
1625 * @draft ICU2.2
1626 */
1627 inline UnicodeString& setTo(const UnicodeString& srcText,
1628 int32_t srcStart);
1629
1630 /**
1631 * Set the text in the UnicodeString object to the characters
1632 * in <TT>srcText</TT> in the range
1633 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1634 * <TT>srcText</TT> is not modified.
1635 * @param srcText the source for the new characters
1636 * @param srcStart the offset into <TT>srcText</TT> where new characters
1637 * will be obtained
1638 * @param srcLength the number of characters in <TT>srcText</TT> in the
1639 * replace string.
1640 * @return a reference to this
1641 * @stable ICU 2.0
1642 */
1643 inline UnicodeString& setTo(const UnicodeString& srcText,
1644 int32_t srcStart,
1645 int32_t srcLength);
1646
1647 /**
1648 * Set the text in the UnicodeString object to the characters in
1649 * <TT>srcText</TT>.
1650 * <TT>srcText</TT> is not modified.
1651 * @param srcText the source for the new characters
1652 * @return a reference to this
1653 * @stable ICU 2.0
1654 */
1655 inline UnicodeString& setTo(const UnicodeString& srcText);
1656
1657 /**
1658 * Set the characters in the UnicodeString object to the characters
1659 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1660 * @param srcChars the source for the new characters
1661 * @param srcLength the number of Unicode characters in srcChars.
1662 * @return a reference to this
1663 * @stable ICU 2.0
1664 */
1665 inline UnicodeString& setTo(const UChar *srcChars,
1666 int32_t srcLength);
1667
1668 /**
1669 * Set the characters in the UnicodeString object to the code unit
1670 * <TT>srcChar</TT>.
1671 * @param srcChar the code unit which becomes the UnicodeString's character
1672 * content
1673 * @return a reference to this
1674 * @stable ICU 2.0
1675 */
1676 UnicodeString& setTo(UChar srcChar);
1677
1678 /**
1679 * Set the characters in the UnicodeString object to the code point
1680 * <TT>srcChar</TT>.
1681 * @param srcChar the code point which becomes the UnicodeString's character
1682 * content
1683 * @return a reference to this
1684 * @stable ICU 2.0
1685 */
1686 UnicodeString& setTo(UChar32 srcChar);
1687
1688 /**
1689 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1690 * The text will be used for the UnicodeString object, but
1691 * it will not be released when the UnicodeString is destroyed.
1692 * This has copy-on-write semantics:
1693 * When the string is modified, then the buffer is first copied into
1694 * newly allocated memory.
1695 * The aliased buffer is never modified.
1696 * In an assignment to another UnicodeString, the text will be aliased again,
1697 * so that both strings then alias the same readonly-text.
1698 *
1699 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1700 * This must be true if <code>textLength==-1</code>.
1701 * @param text The characters to alias for the UnicodeString.
1702 * @param textLength The number of Unicode characters in <code>text</code> to alias.
1703 * If -1, then this constructor will determine the length
1704 * by calling <code>u_strlen()</code>.
1705 * @return a reference to this
1706 * @stable ICU 2.0
1707 */
1708 UnicodeString &setTo(UBool isTerminated,
1709 const UChar *text,
1710 int32_t textLength);
1711
1712 /**
1713 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1714 * The text will be used for the UnicodeString object, but
1715 * it will not be released when the UnicodeString is destroyed.
1716 * This has write-through semantics:
1717 * For as long as the capacity of the buffer is sufficient, write operations
1718 * will directly affect the buffer. When more capacity is necessary, then
1719 * a new buffer will be allocated and the contents copied as with regularly
1720 * constructed strings.
1721 * In an assignment to another UnicodeString, the buffer will be copied.
1722 * The extract(UChar *dst) function detects whether the dst pointer is the same
1723 * as the string buffer itself and will in this case not copy the contents.
1724 *
1725 * @param buffer The characters to alias for the UnicodeString.
1726 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1727 * @param buffCapacity The size of <code>buffer</code> in UChars.
1728 * @return a reference to this
1729 * @stable ICU 2.0
1730 */
1731 UnicodeString &setTo(UChar *buffer,
1732 int32_t buffLength,
1733 int32_t buffCapacity);
1734
1735 /**
1736 * Make this UnicodeString object invalid.
1737 * The string will test TRUE with isBogus().
1738 *
1739 * A bogus string has no value. It is different from an empty string.
1740 * It can be used to indicate that no string value is available.
1741 * getBuffer() and getTerminatedBuffer() return NULL, and
1742 * length() returns 0.
1743 *
1744 * This utility function is used throughout the UnicodeString
1745 * implementation to indicate that a UnicodeString operation failed,
1746 * and may be used in other functions,
1747 * especially but not exclusively when such functions do not
1748 * take a UErrorCode for simplicity.
1749 *
1750 * The following methods, and no others, will clear a string object's bogus flag:
1751 * - remove()
1752 * - remove(0, INT32_MAX)
1753 * - truncate(0)
1754 * - operator=() (assignment operator)
1755 * - setTo(...)
1756 *
1757 * The simplest ways to turn a bogus string into an empty one
1758 * is to use the remove() function.
1759 * Examples for other functions that are equivalent to "set to empty string":
1760 * \code
1761 * if(s.isBogus()) {
1762 * s.remove(); // set to an empty string (remove all), or
1763 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1764 * s.truncate(0); // set to an empty string (complete truncation), or
1765 * s=UnicodeString(); // assign an empty string, or
1766 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1767 * static const UChar nul=0;
1768 * s.setTo(&nul, 0); // set to an empty C Unicode string
1769 * }
1770 * \endcode
1771 *
1772 * @see isBogus()
1773 * @stable ICU 2.0
1774 */
1775 void setToBogus();
1776
1777 /**
1778 * Set the character at the specified offset to the specified character.
1779 * @param offset A valid offset into the text of the character to set
1780 * @param ch The new character
1781 * @return A reference to this
1782 * @stable ICU 2.0
1783 */
1784 UnicodeString& setCharAt(int32_t offset,
1785 UChar ch);
1786
1787
1788 /* Append operations */
1789
1790 /**
1791 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1792 * object.
1793 * @param ch the code unit to be appended
1794 * @return a reference to this
1795 * @stable ICU 2.0
1796 */
1797 inline UnicodeString& operator+= (UChar ch);
1798
1799 /**
1800 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1801 * object.
1802 * @param ch the code point to be appended
1803 * @return a reference to this
1804 * @stable ICU 2.0
1805 */
1806 inline UnicodeString& operator+= (UChar32 ch);
1807
1808 /**
1809 * Append operator. Append the characters in <TT>srcText</TT> to the
1810 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
1811 * not modified.
1812 * @param srcText the source for the new characters
1813 * @return a reference to this
1814 * @stable ICU 2.0
1815 */
1816 inline UnicodeString& operator+= (const UnicodeString& srcText);
1817
1818 /**
1819 * Append the characters
1820 * in <TT>srcText</TT> in the range
1821 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
1822 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
1823 * is not modified.
1824 * @param srcText the source for the new characters
1825 * @param srcStart the offset into <TT>srcText</TT> where new characters
1826 * will be obtained
1827 * @param srcLength the number of characters in <TT>srcText</TT> in
1828 * the append string
1829 * @return a reference to this
1830 * @stable ICU 2.0
1831 */
1832 inline UnicodeString& append(const UnicodeString& srcText,
1833 int32_t srcStart,
1834 int32_t srcLength);
1835
1836 /**
1837 * Append the characters in <TT>srcText</TT> to the UnicodeString object at
1838 * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1839 * @param srcText the source for the new characters
1840 * @return a reference to this
1841 * @stable ICU 2.0
1842 */
1843 inline UnicodeString& append(const UnicodeString& srcText);
1844
1845 /**
1846 * Append the characters in <TT>srcChars</TT> in the range
1847 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
1848 * object at offset
1849 * <TT>start</TT>. <TT>srcChars</TT> is not modified.
1850 * @param srcChars the source for the new characters
1851 * @param srcStart the offset into <TT>srcChars</TT> where new characters
1852 * will be obtained
1853 * @param srcLength the number of characters in <TT>srcChars</TT> in
1854 * the append string
1855 * @return a reference to this
1856 * @stable ICU 2.0
1857 */
1858 inline UnicodeString& append(const UChar *srcChars,
1859 int32_t srcStart,
1860 int32_t srcLength);
1861
1862 /**
1863 * Append the characters in <TT>srcChars</TT> to the UnicodeString object
1864 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1865 * @param srcChars the source for the new characters
1866 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
1867 * @return a reference to this
1868 * @stable ICU 2.0
1869 */
1870 inline UnicodeString& append(const UChar *srcChars,
1871 int32_t srcLength);
1872
1873 /**
1874 * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
1875 * @param srcChar the code unit to append
1876 * @return a reference to this
1877 * @stable ICU 2.0
1878 */
1879 inline UnicodeString& append(UChar srcChar);
1880
1881 /**
1882 * Append the code point <TT>srcChar</TT> to the UnicodeString object.
1883 * @param srcChar the code point to append
1884 * @return a reference to this
1885 * @stable ICU 2.0
1886 */
1887 inline UnicodeString& append(UChar32 srcChar);
1888
1889
1890 /* Insert operations */
1891
1892 /**
1893 * Insert the characters in <TT>srcText</TT> in the range
1894 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
1895 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1896 * @param start the offset where the insertion begins
1897 * @param srcText the source for the new characters
1898 * @param srcStart the offset into <TT>srcText</TT> where new characters
1899 * will be obtained
1900 * @param srcLength the number of characters in <TT>srcText</TT> in
1901 * the insert string
1902 * @return a reference to this
1903 * @stable ICU 2.0
1904 */
1905 inline UnicodeString& insert(int32_t start,
1906 const UnicodeString& srcText,
1907 int32_t srcStart,
1908 int32_t srcLength);
1909
1910 /**
1911 * Insert the characters in <TT>srcText</TT> into the UnicodeString object
1912 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1913 * @param start the offset where the insertion begins
1914 * @param srcText the source for the new characters
1915 * @return a reference to this
1916 * @stable ICU 2.0
1917 */
1918 inline UnicodeString& insert(int32_t start,
1919 const UnicodeString& srcText);
1920
1921 /**
1922 * Insert the characters in <TT>srcChars</TT> in the range
1923 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
1924 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1925 * @param start the offset at which the insertion begins
1926 * @param srcChars the source for the new characters
1927 * @param srcStart the offset into <TT>srcChars</TT> where new characters
1928 * will be obtained
1929 * @param srcLength the number of characters in <TT>srcChars</TT>
1930 * in the insert string
1931 * @return a reference to this
1932 * @stable ICU 2.0
1933 */
1934 inline UnicodeString& insert(int32_t start,
1935 const UChar *srcChars,
1936 int32_t srcStart,
1937 int32_t srcLength);
1938
1939 /**
1940 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
1941 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1942 * @param start the offset where the insertion begins
1943 * @param srcChars the source for the new characters
1944 * @param srcLength the number of Unicode characters in srcChars.
1945 * @return a reference to this
1946 * @stable ICU 2.0
1947 */
1948 inline UnicodeString& insert(int32_t start,
1949 const UChar *srcChars,
1950 int32_t srcLength);
1951
1952 /**
1953 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
1954 * offset <TT>start</TT>.
1955 * @param start the offset at which the insertion occurs
1956 * @param srcChar the code unit to insert
1957 * @return a reference to this
1958 * @stable ICU 2.0
1959 */
1960 inline UnicodeString& insert(int32_t start,
1961 UChar srcChar);
1962
1963 /**
1964 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
1965 * offset <TT>start</TT>.
1966 * @param start the offset at which the insertion occurs
1967 * @param srcChar the code point to insert
1968 * @return a reference to this
1969 * @stable ICU 2.0
1970 */
1971 inline UnicodeString& insert(int32_t start,
1972 UChar32 srcChar);
1973
1974
1975 /* Replace operations */
1976
1977 /**
1978 * Replace the characters in the range
1979 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
1980 * <TT>srcText</TT> in the range
1981 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1982 * <TT>srcText</TT> is not modified.
1983 * @param start the offset at which the replace operation begins
1984 * @param length the number of characters to replace. The character at
1985 * <TT>start + length</TT> is not modified.
1986 * @param srcText the source for the new characters
1987 * @param srcStart the offset into <TT>srcText</TT> where new characters
1988 * will be obtained
1989 * @param srcLength the number of characters in <TT>srcText</TT> in
1990 * the replace string
1991 * @return a reference to this
1992 * @stable ICU 2.0
1993 */
1994 UnicodeString& replace(int32_t start,
1995 int32_t length,
1996 const UnicodeString& srcText,
1997 int32_t srcStart,
1998 int32_t srcLength);
1999
2000 /**
2001 * Replace the characters in the range
2002 * [<TT>start</TT>, <TT>start + length</TT>)
2003 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2004 * not modified.
2005 * @param start the offset at which the replace operation begins
2006 * @param length the number of characters to replace. The character at
2007 * <TT>start + length</TT> is not modified.
2008 * @param srcText the source for the new characters
2009 * @return a reference to this
2010 * @stable ICU 2.0
2011 */
2012 UnicodeString& replace(int32_t start,
2013 int32_t length,
2014 const UnicodeString& srcText);
2015
2016 /**
2017 * Replace the characters in the range
2018 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2019 * <TT>srcChars</TT> in the range
2020 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2021 * is not modified.
2022 * @param start the offset at which the replace operation begins
2023 * @param length the number of characters to replace. The character at
2024 * <TT>start + length</TT> is not modified.
2025 * @param srcChars the source for the new characters
2026 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2027 * will be obtained
2028 * @param srcLength the number of characters in <TT>srcChars</TT>
2029 * in the replace string
2030 * @return a reference to this
2031 * @stable ICU 2.0
2032 */
2033 UnicodeString& replace(int32_t start,
2034 int32_t length,
2035 const UChar *srcChars,
2036 int32_t srcStart,
2037 int32_t srcLength);
2038
2039 /**
2040 * Replace the characters in the range
2041 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2042 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2043 * @param start the offset at which the replace operation begins
2044 * @param length number of characters to replace. The character at
2045 * <TT>start + length</TT> is not modified.
2046 * @param srcChars the source for the new characters
2047 * @param srcLength the number of Unicode characters in srcChars
2048 * @return a reference to this
2049 * @stable ICU 2.0
2050 */
2051 inline UnicodeString& replace(int32_t start,
2052 int32_t length,
2053 const UChar *srcChars,
2054 int32_t srcLength);
2055
2056 /**
2057 * Replace the characters in the range
2058 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2059 * <TT>srcChar</TT>.
2060 * @param start the offset at which the replace operation begins
2061 * @param length the number of characters to replace. The character at
2062 * <TT>start + length</TT> is not modified.
2063 * @param srcChar the new code unit
2064 * @return a reference to this
2065 * @stable ICU 2.0
2066 */
2067 inline UnicodeString& replace(int32_t start,
2068 int32_t length,
2069 UChar srcChar);
2070
2071 /**
2072 * Replace the characters in the range
2073 * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2074 * <TT>srcChar</TT>.
2075 * @param start the offset at which the replace operation begins
2076 * @param length the number of characters to replace. The character at
2077 * <TT>start + length</TT> is not modified.
2078 * @param srcChar the new code point
2079 * @return a reference to this
2080 * @stable ICU 2.0
2081 */
2082 inline UnicodeString& replace(int32_t start,
2083 int32_t length,
2084 UChar32 srcChar);
2085
2086 /**
2087 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2088 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2089 * @param start the offset at which the replace operation begins
2090 * @param limit the offset immediately following the replace range
2091 * @param srcText the source for the new characters
2092 * @return a reference to this
2093 * @stable ICU 2.0
2094 */
2095 inline UnicodeString& replaceBetween(int32_t start,
2096 int32_t limit,
2097 const UnicodeString& srcText);
2098
2099 /**
2100 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2101 * with the characters in <TT>srcText</TT> in the range
2102 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2103 * @param start the offset at which the replace operation begins
2104 * @param limit the offset immediately following the replace range
2105 * @param srcText the source for the new characters
2106 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2107 * will be obtained
2108 * @param srcLimit the offset immediately following the range to copy
2109 * in <TT>srcText</TT>
2110 * @return a reference to this
2111 * @stable ICU 2.0
2112 */
2113 inline UnicodeString& replaceBetween(int32_t start,
2114 int32_t limit,
2115 const UnicodeString& srcText,
2116 int32_t srcStart,
2117 int32_t srcLimit);
2118
2119 /**
2120 * Replace a substring of this object with the given text.
2121 * @param start the beginning index, inclusive; <code>0 <= start
2122 * <= limit</code>.
2123 * @param limit the ending index, exclusive; <code>start <= limit
2124 * <= length()</code>.
2125 * @param text the text to replace characters <code>start</code>
2126 * to <code>limit - 1</code>
2127 * @stable ICU 2.0
2128 */
2129 virtual void handleReplaceBetween(int32_t start,
2130 int32_t limit,
2131 const UnicodeString& text);
2132
2133 /**
2134 * Replaceable API
2135 * @return TRUE if it has MetaData
2136 * @draft ICU 2.4
2137 */
2138 virtual UBool hasMetaData() const;
2139
2140 /**
2141 * Copy a substring of this object, retaining attribute (out-of-band)
2142 * information. This method is used to duplicate or reorder substrings.
2143 * The destination index must not overlap the source range.
2144 *
2145 * @param start the beginning index, inclusive; <code>0 <= start <=
2146 * limit</code>.
2147 * @param limit the ending index, exclusive; <code>start <= limit <=
2148 * length()</code>.
2149 * @param dest the destination index. The characters from
2150 * <code>start..limit-1</code> will be copied to <code>dest</code>.
2151 * Implementations of this method may assume that <code>dest <= start ||
2152 * dest >= limit</code>.
2153 * @stable ICU 2.0
2154 */
2155 virtual void copy(int32_t start, int32_t limit, int32_t dest);
2156
2157 /* Search and replace operations */
2158
2159 /**
2160 * Replace all occurrences of characters in oldText with the characters
2161 * in newText
2162 * @param oldText the text containing the search text
2163 * @param newText the text containing the replacement text
2164 * @return a reference to this
2165 * @stable ICU 2.0
2166 */
2167 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2168 const UnicodeString& newText);
2169
2170 /**
2171 * Replace all occurrences of characters in oldText with characters
2172 * in newText
2173 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2174 * @param start the start of the range in which replace will performed
2175 * @param length the length of the range in which replace will be performed
2176 * @param oldText the text containing the search text
2177 * @param newText the text containing the replacement text
2178 * @return a reference to this
2179 * @stable ICU 2.0
2180 */
2181 inline UnicodeString& findAndReplace(int32_t start,
2182 int32_t length,
2183 const UnicodeString& oldText,
2184 const UnicodeString& newText);
2185
2186 /**
2187 * Replace all occurrences of characters in oldText in the range
2188 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2189 * in newText in the range
2190 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2191 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2192 * @param start the start of the range in which replace will performed
2193 * @param length the length of the range in which replace will be performed
2194 * @param oldText the text containing the search text
2195 * @param oldStart the start of the search range in <TT>oldText</TT>
2196 * @param oldLength the length of the search range in <TT>oldText</TT>
2197 * @param newText the text containing the replacement text
2198 * @param newStart the start of the replacement range in <TT>newText</TT>
2199 * @param newLength the length of the replacement range in <TT>newText</TT>
2200 * @return a reference to this
2201 * @stable ICU 2.0
2202 */
2203 UnicodeString& findAndReplace(int32_t start,
2204 int32_t length,
2205 const UnicodeString& oldText,
2206 int32_t oldStart,
2207 int32_t oldLength,
2208 const UnicodeString& newText,
2209 int32_t newStart,
2210 int32_t newLength);
2211
2212
2213 /* Remove operations */
2214
2215 /**
2216 * Remove all characters from the UnicodeString object.
2217 * @return a reference to this
2218 * @stable ICU 2.0
2219 */
2220 inline UnicodeString& remove(void);
2221
2222 /**
2223 * Remove the characters in the range
2224 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2225 * @param start the offset of the first character to remove
2226 * @param length the number of characters to remove
2227 * @return a reference to this
2228 * @stable ICU 2.0
2229 */
2230 inline UnicodeString& remove(int32_t start,
2231 int32_t length = (int32_t)INT32_MAX);
2232
2233 /**
2234 * Remove the characters in the range
2235 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2236 * @param start the offset of the first character to remove
2237 * @param limit the offset immediately following the range to remove
2238 * @return a reference to this
2239 * @stable ICU 2.0
2240 */
2241 inline UnicodeString& removeBetween(int32_t start,
2242 int32_t limit = (int32_t)INT32_MAX);
2243
2244
2245 /* Length operations */
2246
2247 /**
2248 * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2249 * If the length of this UnicodeString is less than targetLength,
2250 * length() - targetLength copies of padChar will be added to the
2251 * beginning of this UnicodeString.
2252 * @param targetLength the desired length of the string
2253 * @param padChar the character to use for padding. Defaults to
2254 * space (U+0020)
2255 * @return TRUE if the text was padded, FALSE otherwise.
2256 * @stable ICU 2.0
2257 */
2258 UBool padLeading(int32_t targetLength,
2259 UChar padChar = 0x0020);
2260
2261 /**
2262 * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2263 * If the length of this UnicodeString is less than targetLength,
2264 * length() - targetLength copies of padChar will be added to the
2265 * end of this UnicodeString.
2266 * @param targetLength the desired length of the string
2267 * @param padChar the character to use for padding. Defaults to
2268 * space (U+0020)
2269 * @return TRUE if the text was padded, FALSE otherwise.
2270 * @stable ICU 2.0
2271 */
2272 UBool padTrailing(int32_t targetLength,
2273 UChar padChar = 0x0020);
2274
2275 /**
2276 * Truncate this UnicodeString to the <TT>targetLength</TT>.
2277 * @param targetLength the desired length of this UnicodeString.
2278 * @return TRUE if the text was truncated, FALSE otherwise
2279 * @stable ICU 2.0
2280 */
2281 inline UBool truncate(int32_t targetLength);
2282
2283 /**
2284 * Trims leading and trailing whitespace from this UnicodeString.
2285 * @return a reference to this
2286 * @stable ICU 2.0
2287 */
2288 UnicodeString& trim(void);
2289
2290
2291 /* Miscellaneous operations */
2292
2293 /**
2294 * Reverse this UnicodeString in place.
2295 * @return a reference to this
2296 * @stable ICU 2.0
2297 */
2298 inline UnicodeString& reverse(void);
2299
2300 /**
2301 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2302 * this UnicodeString.
2303 * @param start the start of the range to reverse
2304 * @param length the number of characters to to reverse
2305 * @return a reference to this
2306 * @stable ICU 2.0
2307 */
2308 inline UnicodeString& reverse(int32_t start,
2309 int32_t length);
2310
2311 /**
2312 * Convert the characters in this to UPPER CASE following the conventions of
2313 * the default locale.
2314 * @return A reference to this.
2315 * @stable ICU 2.0
2316 */
2317 UnicodeString& toUpper(void);
2318
2319 /**
2320 * Convert the characters in this to UPPER CASE following the conventions of
2321 * a specific locale.
2322 * @param locale The locale containing the conventions to use.
2323 * @return A reference to this.
2324 * @stable ICU 2.0
2325 */
2326 UnicodeString& toUpper(const Locale& locale);
2327
2328 /**
2329 * Convert the characters in this to lower case following the conventions of
2330 * the default locale.
2331 * @return A reference to this.
2332 * @stable ICU 2.0
2333 */
2334 UnicodeString& toLower(void);
2335
2336 /**
2337 * Convert the characters in this to lower case following the conventions of
2338 * a specific locale.
2339 * @param locale The locale containing the conventions to use.
2340 * @return A reference to this.
2341 * @stable ICU 2.0
2342 */
2343 UnicodeString& toLower(const Locale& locale);
2344
2345#if !UCONFIG_NO_BREAK_ITERATION
2346
2347 /**
2348 * Titlecase this string, convenience function using the default locale.
2349 *
2350 * Casing is locale-dependent and context-sensitive.
2351 * Titlecasing uses a break iterator to find the first characters of words
2352 * that are to be titlecased. It titlecases those characters and lowercases
2353 * all others.
2354 *
2355 * The titlecase break iterator can be provided to customize for arbitrary
2356 * styles, using rules and dictionaries beyond the standard iterators.
2357 * It may be more efficient to always provide an iterator to avoid
2358 * opening and closing one for each string.
2359 * The standard titlecase iterator for the root locale implements the
2360 * algorithm of Unicode TR 21.
2361 *
2362 * This function uses only the first() and next() methods of the
2363 * provided break iterator.
2364 *
2365 * @param titleIter A break iterator to find the first characters of words
2366 * that are to be titlecased.
2367 * If none is provided (0), then a standard titlecase
2368 * break iterator is opened.
2369 * @return A reference to this.
2370 * @stable ICU 2.1
2371 */
2372 UnicodeString &toTitle(BreakIterator *titleIter);
2373
2374 /**
2375 * Titlecase this string.
2376 *
2377 * Casing is locale-dependent and context-sensitive.
2378 * Titlecasing uses a break iterator to find the first characters of words
2379 * that are to be titlecased. It titlecases those characters and lowercases
2380 * all others.
2381 *
2382 * The titlecase break iterator can be provided to customize for arbitrary
2383 * styles, using rules and dictionaries beyond the standard iterators.
2384 * It may be more efficient to always provide an iterator to avoid
2385 * opening and closing one for each string.
2386 * The standard titlecase iterator for the root locale implements the
2387 * algorithm of Unicode TR 21.
2388 *
2389 * This function uses only the first() and next() methods of the
2390 * provided break iterator.
2391 *
2392 * @param titleIter A break iterator to find the first characters of words
2393 * that are to be titlecased.
2394 * If none is provided (0), then a standard titlecase
2395 * break iterator is opened.
2396 * @param locale The locale to consider.
2397 * @return A reference to this.
2398 * @stable ICU 2.1
2399 */
2400 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2401
2402#endif
2403
2404 /**
2405 * Case-fold the characters in this string.
2406 * Case-folding is locale-independent and not context-sensitive,
2407 * but there is an option for whether to include or exclude mappings for dotted I
2408 * and dotless i that are marked with 'I' in CaseFolding.txt.
2409 * The result may be longer or shorter than the original.
2410 *
2411 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2412 * @return A reference to this.
2413 * @stable ICU 2.0
2414 */
2415 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2416
2417 //========================================
2418 // Access to the internal buffer
2419 //========================================
2420
2421 /**
2422 * Get a read/write pointer to the internal buffer.
2423 * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2424 * writable, and is still owned by the UnicodeString object.
2425 * Calls to getBuffer(minCapacity) must not be nested, and
2426 * must be matched with calls to releaseBuffer(newLength).
2427 * If the string buffer was read-only or shared,
2428 * then it will be reallocated and copied.
2429 *
2430 * An attempted nested call will return 0, and will not further modify the
2431 * state of the UnicodeString object.
2432 * It also returns 0 if the string is bogus.
2433 *
2434 * The actual capacity of the string buffer may be larger than minCapacity.
2435 * getCapacity() returns the actual capacity.
2436 * For many operations, the full capacity should be used to avoid reallocations.
2437 *
2438 * While the buffer is "open" between getBuffer(minCapacity)
2439 * and releaseBuffer(newLength), the following applies:
2440 * - The string length is set to 0.
2441 * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2442 * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2443 * - You can read from and write to the returned buffer.
2444 * - The previous string contents will still be in the buffer;
2445 * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2446 * If the length() was greater than minCapacity, then any contents after minCapacity
2447 * may be lost.
2448 * The buffer contents is not NUL-terminated by getBuffer().
2449 * If length()<getCapacity() then you can terminate it by writing a NUL
2450 * at index length().
2451 * - You must call releaseBuffer(newLength) before and in order to
2452 * return to normal UnicodeString operation.
2453 *
2454 * @param minCapacity the minimum number of UChars that are to be available
2455 * in the buffer, starting at the returned pointer;
2456 * default to the current string capacity if minCapacity==-1
2457 * @return a writable pointer to the internal string buffer,
2458 * or 0 if an error occurs (nested calls, out of memory)
2459 *
2460 * @see releaseBuffer
2461 * @see getTerminatedBuffer()
2462 * @stable ICU 2.0
2463 */
2464 UChar *getBuffer(int32_t minCapacity);
2465
2466 /**
2467 * Release a read/write buffer on a UnicodeString object with an
2468 * "open" getBuffer(minCapacity).
2469 * This function must be called in a matched pair with getBuffer(minCapacity).
2470 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2471 *
2472 * It will set the string length to newLength, at most to the current capacity.
2473 * If newLength==-1 then it will set the length according to the
2474 * first NUL in the buffer, or to the capacity if there is no NUL.
2475 *
2476 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2477 *
2478 * @param newLength the new length of the UnicodeString object;
2479 * defaults to the current capacity if newLength is greater than that;
2480 * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2481 * the current capacity of the string
2482 *
2483 * @see getBuffer(int32_t minCapacity)
2484 * @stable ICU 2.0
2485 */
2486 void releaseBuffer(int32_t newLength=-1);
2487
2488 /**
2489 * Get a read-only pointer to the internal buffer.
2490 * This can be called at any time on a valid UnicodeString.
2491 *
2492 * It returns 0 if the string is bogus, or
2493 * during an "open" getBuffer(minCapacity).
2494 *
2495 * It can be called as many times as desired.
2496 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2497 * at which time the pointer is semantically invalidated and must not be used any more.
2498 *
2499 * The capacity of the buffer can be determined with getCapacity().
2500 * The part after length() may or may not be initialized and valid,
2501 * depending on the history of the UnicodeString object.
2502 *
2503 * The buffer contents is (probably) not NUL-terminated.
2504 * You can check if it is with
2505 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2506 * (See getTerminatedBuffer().)
2507 *
2508 * The buffer may reside in read-only memory. Its contents must not
2509 * be modified.
2510 *
2511 * @return a read-only pointer to the internal string buffer,
2512 * or 0 if the string is empty or bogus
2513 *
2514 * @see getBuffer(int32_t minCapacity)
2515 * @see getTerminatedBuffer()
2516 * @stable ICU 2.0
2517 */
2518 inline const UChar *getBuffer() const;
2519
2520 /**
2521 * Get a read-only pointer to the internal buffer,
2522 * making sure that it is NUL-terminated.
2523 * This can be called at any time on a valid UnicodeString.
2524 *
2525 * It returns 0 if the string is bogus, or
2526 * during an "open" getBuffer(minCapacity), or if the buffer cannot
2527 * be NUL-terminated (because memory allocation failed).
2528 *
2529 * It can be called as many times as desired.
2530 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2531 * at which time the pointer is semantically invalidated and must not be used any more.
2532 *
2533 * The capacity of the buffer can be determined with getCapacity().
2534 * The part after length()+1 may or may not be initialized and valid,
2535 * depending on the history of the UnicodeString object.
2536 *
2537 * The buffer contents is guaranteed to be NUL-terminated.
2538 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2539 * is written.
2540 * For this reason, this function is not const, unlike getBuffer().
2541 * Note that a UnicodeString may also contain NUL characters as part of its contents.
2542 *
2543 * The buffer may reside in read-only memory. Its contents must not
2544 * be modified.
2545 *
2546 * @return a read-only pointer to the internal string buffer,
2547 * or 0 if the string is empty or bogus
2548 *
2549 * @see getBuffer(int32_t minCapacity)
2550 * @see getBuffer()
2551 * @draft ICU 2.2
2552 */
2553 inline const UChar *getTerminatedBuffer();
2554
2555 //========================================
2556 // Constructors
2557 //========================================
2558
2559 /** Construct an empty UnicodeString.
2560 * @stable ICU 2.0
2561 */
2562 UnicodeString();
2563
2564 /**
2565 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2566 * @param capacity the number of UChars this UnicodeString should hold
2567 * before a resize is necessary; if count is greater than 0 and count
2568 * code points c take up more space than capacity, then capacity is adjusted
2569 * accordingly.
2570 * @param c is used to initially fill the string
2571 * @param count specifies how many code points c are to be written in the
2572 * string
2573 * @stable ICU 2.0
2574 */
2575 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2576
2577 /**
2578 * Single UChar (code unit) constructor.
2579 * @param ch the character to place in the UnicodeString
2580 * @stable ICU 2.0
2581 */
2582 UnicodeString(UChar ch);
2583
2584 /**
2585 * Single UChar32 (code point) constructor.
2586 * @param ch the character to place in the UnicodeString
2587 * @stable ICU 2.0
2588 */
2589 UnicodeString(UChar32 ch);
2590
2591 /**
2592 * UChar* constructor.
2593 * @param text The characters to place in the UnicodeString. <TT>text</TT>
2594 * must be NULL (U+0000) terminated.
2595 * @stable ICU 2.0
2596 */
2597 UnicodeString(const UChar *text);
2598
2599 /**
2600 * UChar* constructor.
2601 * @param text The characters to place in the UnicodeString.
2602 * @param textLength The number of Unicode characters in <TT>text</TT>
2603 * to copy.
2604 * @stable ICU 2.0
2605 */
2606 UnicodeString(const UChar *text,
2607 int32_t textLength);
2608
2609 /**
2610 * Readonly-aliasing UChar* constructor.
2611 * The text will be used for the UnicodeString object, but
2612 * it will not be released when the UnicodeString is destroyed.
2613 * This has copy-on-write semantics:
2614 * When the string is modified, then the buffer is first copied into
2615 * newly allocated memory.
2616 * The aliased buffer is never modified.
2617 * In an assignment to another UnicodeString, the text will be aliased again,
2618 * so that both strings then alias the same readonly-text.
2619 *
2620 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2621 * This must be true if <code>textLength==-1</code>.
2622 * @param text The characters to alias for the UnicodeString.
2623 * @param textLength The number of Unicode characters in <code>text</code> to alias.
2624 * If -1, then this constructor will determine the length
2625 * by calling <code>u_strlen()</code>.
2626 * @stable ICU 2.0
2627 */
2628 UnicodeString(UBool isTerminated,
2629 const UChar *text,
2630 int32_t textLength);
2631
2632 /**
2633 * Writable-aliasing UChar* constructor.
2634 * The text will be used for the UnicodeString object, but
2635 * it will not be released when the UnicodeString is destroyed.
2636 * This has write-through semantics:
2637 * For as long as the capacity of the buffer is sufficient, write operations
2638 * will directly affect the buffer. When more capacity is necessary, then
2639 * a new buffer will be allocated and the contents copied as with regularly
2640 * constructed strings.
2641 * In an assignment to another UnicodeString, the buffer will be copied.
2642 * The extract(UChar *dst) function detects whether the dst pointer is the same
2643 * as the string buffer itself and will in this case not copy the contents.
2644 *
2645 * @param buffer The characters to alias for the UnicodeString.
2646 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2647 * @param buffCapacity The size of <code>buffer</code> in UChars.
2648 * @stable ICU 2.0
2649 */
2650 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2651
2652 /**
2653 * char* constructor.
2654 * @param codepageData an array of bytes, null-terminated
2655 * @param codepage the encoding of <TT>codepageData</TT>. The special
2656 * value 0 for <TT>codepage</TT> indicates that the text is in the
2657 * platform's default codepage.
2658 * If <code>codepage</code> is an empty string (<code>""</code>),
2659 * then a simple conversion is performed on the codepage-invariant
2660 * subset ("invariant characters") of the platform encoding. See utypes.h.
2661 * @stable ICU 2.0
2662 */
2663 UnicodeString(const char *codepageData,
2664 const char *codepage = 0);
2665
2666 /**
2667 * char* constructor.
2668 * @param codepageData an array of bytes.
2669 * @param dataLength The number of bytes in <TT>codepageData</TT>.
2670 * @param codepage the encoding of <TT>codepageData</TT>. The special
2671 * value 0 for <TT>codepage</TT> indicates that the text is in the
2672 * platform's default codepage.
2673 * If <code>codepage</code> is an empty string (<code>""</code>),
2674 * then a simple conversion is performed on the codepage-invariant
2675 * subset ("invariant characters") of the platform encoding. See utypes.h.
2676 * @stable ICU 2.0
2677 */
2678 UnicodeString(const char *codepageData,
2679 int32_t dataLength,
2680 const char *codepage = 0);
2681
2682 /**
2683 * char * / UConverter constructor.
2684 * This constructor uses an existing UConverter object to
2685 * convert the codepage string to Unicode and construct a UnicodeString
2686 * from that.
2687 *
2688 * The converter is reset at first.
2689 * If the error code indicates a failure before this constructor is called,
2690 * or if an error occurs during conversion or construction,
2691 * then the string will be bogus.
2692 *
2693 * This function avoids the overhead of opening and closing a converter if
2694 * multiple strings are constructed.
2695 *
2696 * @param src input codepage string
2697 * @param srcLength length of the input string, can be -1 for NUL-terminated strings
2698 * @param cnv converter object (ucnv_resetToUnicode() will be called),
2699 * can be NULL for the default converter
2700 * @param errorCode normal ICU error code
2701 * @stable ICU 2.0
2702 */
2703 UnicodeString(
2704 const char *src, int32_t srcLength,
2705 UConverter *cnv,
2706 UErrorCode &errorCode);
2707
2708
2709 /**
2710 * Copy constructor.
2711 * @param that The UnicodeString object to copy.
2712 * @stable ICU 2.0
2713 */
2714 UnicodeString(const UnicodeString& that);
2715
2716 /**
2717 * 'Substring' constructor from tail of source string.
2718 * @param src The UnicodeString object to copy.
2719 * @param srcStart The offset into <tt>src</tt> at which to start copying.
2720 * @draft ICU2.2
2721 */
2722 UnicodeString(const UnicodeString& src, int32_t srcStart);
2723
2724 /**
2725 * 'Substring' constructor from subrange of source string.
2726 * @param src The UnicodeString object to copy.
2727 * @param srcStart The offset into <tt>src</tt> at which to start copying.
2728 * @param srcLength The number of characters from <tt>src</tt> to copy.
2729 * @draft ICU2.2
2730 */
2731 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
2732
2733 /**
2734 * Clone this object, an instance of a subclass of Replaceable.
2735 * Clones can be used concurrently in multiple threads.
2736 * If a subclass does not implement clone(), or if an error occurs,
2737 * then NULL is returned.
2738 * The clone functions in all subclasses return a pointer to a Replaceable
2739 * because some compilers do not support covariant (same-as-this)
2740 * return types; cast to the appropriate subclass if necessary.
2741 * The caller must delete the clone.
2742 *
2743 * @return a clone of this object
2744 *
2745 * @see Replaceable::clone
2746 * @see getDynamicClassID
2747 * @draft ICU 2.6
2748 */
2749 virtual Replaceable *clone() const;
2750
2751 /** Destructor.
2752 * @stable ICU 2.0
2753 */
2754 ~UnicodeString();
2755
2756
2757 /* Miscellaneous operations */
2758
2759 /**
2760 * Unescape a string of characters and return a string containing
2761 * the result. The following escape sequences are recognized:
2762 *
2763 * \uhhhh 4 hex digits; h in [0-9A-Fa-f]
2764 * \Uhhhhhhhh 8 hex digits
2765 * \xhh 1-2 hex digits
2766 * \ooo 1-3 octal digits; o in [0-7]
2767 * \cX control-X; X is masked with 0x1F
2768 *
2769 * as well as the standard ANSI C escapes:
2770 *
2771 * \a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A,
2772 * \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B,
2773 * \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C
2774 *
2775 * Anything else following a backslash is generically escaped. For
2776 * example, "[a\-z]" returns "[a-z]".
2777 *
2778 * If an escape sequence is ill-formed, this method returns an empty
2779 * string. An example of an ill-formed sequence is "\u" followed by
2780 * fewer than 4 hex digits.
2781 *
2782 * This function is similar to u_unescape() but not identical to it.
2783 * The latter takes a source char*, so it does escape recognition
2784 * and also invariant conversion.
2785 *
2786 * @return a string with backslash escapes interpreted, or an
2787 * empty string on error.
2788 * @see UnicodeString#unescapeAt()
2789 * @see u_unescape()
2790 * @see u_unescapeAt()
2791 * @stable ICU 2.0
2792 */
2793 UnicodeString unescape() const;
2794
2795 /**
2796 * Unescape a single escape sequence and return the represented
2797 * character. See unescape() for a listing of the recognized escape
2798 * sequences. The character at offset-1 is assumed (without
2799 * checking) to be a backslash. If the escape sequence is
2800 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
2801 * returned.
2802 *
2803 * @param offset an input output parameter. On input, it is the
2804 * offset into this string where the escape sequence is located,
2805 * after the initial backslash. On output, it is advanced after the
2806 * last character parsed. On error, it is not advanced at all.
2807 * @return the character represented by the escape sequence at
2808 * offset, or (UChar32)0xFFFFFFFF on error.
2809 * @see UnicodeString#unescape()
2810 * @see u_unescape()
2811 * @see u_unescapeAt()
2812 * @stable ICU 2.0
2813 */
2814 UChar32 unescapeAt(int32_t &offset) const;
2815
2816 /**
2817 * ICU "poor man's RTTI", returns a UClassID for the actual class.
2818 *
2819 * @draft ICU 2.2
2820 */
2821 virtual inline UClassID getDynamicClassID() const;
2822
2823 /**
2824 * ICU "poor man's RTTI", returns a UClassID for this class.
2825 *
2826 * @draft ICU 2.2
2827 */
2828 static inline UClassID getStaticClassID();
2829
2830 //========================================
2831 // Implementation methods
2832 //========================================
2833
2834protected:
2835 /**
2836 * Implement Replaceable::getLength() (see jitterbug 1027).
2837 * @draft ICU 2.4
2838 */
2839 virtual int32_t getLength() const;
2840
2841 /**
2842 * The change in Replaceable to use virtual getCharAt() allows
2843 * UnicodeString::charAt() to be inline again (see jitterbug 709).
2844 * @draft ICU 2.4
2845 */
2846 virtual UChar getCharAt(int32_t offset) const;
2847
2848 /**
2849 * The change in Replaceable to use virtual getChar32At() allows
2850 * UnicodeString::char32At() to be inline again (see jitterbug 709).
2851 * @draft ICU 2.4
2852 */
2853 virtual UChar32 getChar32At(int32_t offset) const;
2854
2855private:
2856
2857 inline int8_t
2858 doCompare(int32_t start,
2859 int32_t length,
2860 const UnicodeString& srcText,
2861 int32_t srcStart,
2862 int32_t srcLength) const;
2863
2864 int8_t doCompare(int32_t start,
2865 int32_t length,
2866 const UChar *srcChars,
2867 int32_t srcStart,
2868 int32_t srcLength) const;
2869
2870 inline int8_t
2871 doCompareCodePointOrder(int32_t start,
2872 int32_t length,
2873 const UnicodeString& srcText,
2874 int32_t srcStart,
2875 int32_t srcLength) const;
2876
2877 int8_t doCompareCodePointOrder(int32_t start,
2878 int32_t length,
2879 const UChar *srcChars,
2880 int32_t srcStart,
2881 int32_t srcLength) const;
2882
2883 inline int8_t
2884 doCaseCompare(int32_t start,
2885 int32_t length,
2886 const UnicodeString &srcText,
2887 int32_t srcStart,
2888 int32_t srcLength,
2889 uint32_t options) const;
2890
2891 int8_t
2892 doCaseCompare(int32_t start,
2893 int32_t length,
2894 const UChar *srcChars,
2895 int32_t srcStart,
2896 int32_t srcLength,
2897 uint32_t options) const;
2898
2899 int32_t doIndexOf(UChar c,
2900 int32_t start,
2901 int32_t length) const;
2902
2903 int32_t doIndexOf(UChar32 c,
2904 int32_t start,
2905 int32_t length) const;
2906
2907 int32_t doLastIndexOf(UChar c,
2908 int32_t start,
2909 int32_t length) const;
2910
2911 int32_t doLastIndexOf(UChar32 c,
2912 int32_t start,
2913 int32_t length) const;
2914
2915 void doExtract(int32_t start,
2916 int32_t length,
2917 UChar *dst,
2918 int32_t dstStart) const;
2919
2920 inline void doExtract(int32_t start,
2921 int32_t length,
2922 UnicodeString& target) const;
2923
2924 inline UChar doCharAt(int32_t offset) const;
2925
2926 UnicodeString& doReplace(int32_t start,
2927 int32_t length,
2928 const UnicodeString& srcText,
2929 int32_t srcStart,
2930 int32_t srcLength);
2931
2932 UnicodeString& doReplace(int32_t start,
2933 int32_t length,
2934 const UChar *srcChars,
2935 int32_t srcStart,
2936 int32_t srcLength);
2937
2938 UnicodeString& doReverse(int32_t start,
2939 int32_t length);
2940
2941 // calculate hash code
2942 int32_t doHashCode(void) const;
2943
2944 // get pointer to start of array
2945 inline UChar* getArrayStart(void);
2946 inline const UChar* getArrayStart(void) const;
2947
2948 // allocate the array; result may be fStackBuffer
2949 // sets refCount to 1 if appropriate
2950 // sets fArray, fCapacity, and fFlags
2951 // returns boolean for success or failure
2952 UBool allocate(int32_t capacity);
2953
2954 // release the array if owned
2955 void releaseArray(void);
2956
2957 // turn a bogus string into an empty one
2958 void unBogus();
2959
2960 // implements assigment operator, copy constructor, and fastCopyFrom()
2961 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
2962
2963 // Pin start and limit to acceptable values.
2964 inline void pinIndex(int32_t& start) const;
2965 inline void pinIndices(int32_t& start,
2966 int32_t& length) const;
2967
2968 /* Internal extract() using UConverter. */
2969 int32_t doExtract(int32_t start, int32_t length,
2970 char *dest, int32_t destCapacity,
2971 UConverter *cnv,
2972 UErrorCode &errorCode) const;
2973
2974 /*
2975 * Real constructor for converting from codepage data.
2976 * It assumes that it is called with !fRefCounted.
2977 *
2978 * If <code>codepage==0</code>, then the default converter
2979 * is used for the platform encoding.
2980 * If <code>codepage</code> is an empty string (<code>""</code>),
2981 * then a simple conversion is performed on the codepage-invariant
2982 * subset ("invariant characters") of the platform encoding. See utypes.h.
2983 */
2984 void doCodepageCreate(const char *codepageData,
2985 int32_t dataLength,
2986 const char *codepage);
2987
2988 /*
2989 * Worker function for creating a UnicodeString from
2990 * a codepage string using a UConverter.
2991 */
2992 void
2993 doCodepageCreate(const char *codepageData,
2994 int32_t dataLength,
2995 UConverter *converter,
2996 UErrorCode &status);
2997 /*
2998 * This function is called when write access to the array
2999 * is necessary.
3000 *
3001 * We need to make a copy of the array if
3002 * the buffer is read-only, or
3003 * the buffer is refCounted (shared), and refCount>1, or
3004 * the buffer is too small.
3005 *
3006 * Return FALSE if memory could not be allocated.
3007 */
3008 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3009 int32_t growCapacity = -1,
3010 UBool doCopyArray = TRUE,
3011 int32_t **pBufferToDelete = 0,
3012 UBool forceClone = FALSE);
3013
3014 // common function for case mappings
3015 UnicodeString &
3016 caseMap(BreakIterator *titleIter,
3017 const Locale& locale,
3018 uint32_t options,
3019 int32_t toWhichCase);
3020
3021 // ref counting
3022 void addRef(void);
3023 int32_t removeRef(void);
3024 int32_t refCount(void) const;
3025
3026 // constants
3027 enum {
3028 US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
3029 kInvalidUChar=0xffff, // invalid UChar index
3030 kGrowSize=128, // grow size for this buffer
3031 kInvalidHashCode=0, // invalid hash code
3032 kEmptyHashCode=1, // hash code for empty string
3033
3034 // bit flag values for fFlags
3035 kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3036 kUsingStackBuffer=2,// fArray==fStackBuffer
3037 kRefCounted=4, // there is a refCount field before the characters in fArray
3038 kBufferIsReadonly=8,// do not write to this buffer
3039 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3040 // and releaseBuffer(newLength) must be called
3041
3042 // combined values for convenience
3043 kShortString=kUsingStackBuffer,
3044 kLongString=kRefCounted,
3045 kReadonlyAlias=kBufferIsReadonly,
3046 kWritableAlias=0
3047 };
3048
3049 friend class StringCharacterIterator;
3050 friend class StringThreadTest;
3051
3052 /*
3053 * The following are all the class fields that are stored
3054 * in each UnicodeString object.
3055 * Note that UnicodeString has virtual functions,
3056 * therefore there is an implicit vtable pointer
3057 * as the first real field.
3058 * The fields should be aligned such that no padding is
3059 * necessary, mostly by having larger types first.
3060 * On 32-bit machines, the size should be 32 bytes,
3061 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3062 */
3063 // (implicit) *vtable;
3064 int32_t fLength; // number of characters in fArray
3065 int32_t fCapacity; // sizeof fArray
3066 UChar *fArray; // the Unicode data
3067 uint16_t fFlags; // bit flags: see constants above
3068 UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
3069
3070 /**
3071 * The address of this static class variable serves as this class's ID
3072 * for ICU "poor man's RTTI".
3073 */
3074 static const char fgClassID;
3075};
3076
3077U_NAMESPACE_END
3078
3079//========================================
3080// Array copying
3081//========================================
3082/**
3083 * Copy an array of UnicodeString OBJECTS (not pointers).
3084 * @internal
3085 */
3086inline void
3087uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
3088{ while(count-- > 0) *dst++ = *src++; }
3089
3090/**
3091 * Copy an array of UnicodeString OBJECTS (not pointers).
3092 * @internal
3093 */
3094inline void
3095uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
3096 U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
3097{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
3098
3099U_NAMESPACE_BEGIN
3100
3101//========================================
3102// Inline members
3103//========================================
3104
3105//========================================
3106// Privates
3107//========================================
3108
3109inline void
3110UnicodeString::pinIndex(int32_t& start) const
3111{
3112 // pin index
3113 if(start < 0) {
3114 start = 0;
3115 } else if(start > fLength) {
3116 start = fLength;
3117 }
3118}
3119
3120inline void
3121UnicodeString::pinIndices(int32_t& start,
3122 int32_t& _length) const
3123{
3124 // pin indices
3125 if(start < 0) {
3126 start = 0;
3127 } else if(start > fLength) {
3128 start = fLength;
3129 }
3130 if(_length < 0) {
3131 _length = 0;
3132 } else if(_length > (fLength - start)) {
3133 _length = (fLength - start);
3134 }
3135}
3136
3137inline UChar*
3138UnicodeString::getArrayStart()
3139{ return fArray; }
3140
3141inline const UChar*
3142UnicodeString::getArrayStart() const
3143{ return fArray; }
3144
3145//========================================
3146// Read-only implementation methods
3147//========================================
3148inline UClassID
3149UnicodeString::getStaticClassID()
3150{ return (UClassID)&fgClassID; }
3151
3152inline UClassID
3153UnicodeString::getDynamicClassID() const
3154{ return UnicodeString::getStaticClassID(); }
3155
3156inline int32_t
3157UnicodeString::length() const
3158{ return fLength; }
3159
3160inline int32_t
3161UnicodeString::getCapacity() const
3162{ return fCapacity; }
3163
3164inline int32_t
3165UnicodeString::hashCode() const
3166{ return doHashCode(); }
3167
3168inline UBool
3169UnicodeString::isBogus() const
3170{ return (UBool)(fFlags & kIsBogus); }
3171
3172inline const UChar *
3173UnicodeString::getBuffer() const {
3174 if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
3175 return fArray;
3176 } else {
3177 return 0;
3178 }
3179}
3180
3181//========================================
3182// Read-only alias methods
3183//========================================
3184inline int8_t
3185UnicodeString::doCompare(int32_t start,
3186 int32_t length,
3187 const UnicodeString& srcText,
3188 int32_t srcStart,
3189 int32_t srcLength) const
3190{
3191 if(srcText.isBogus()) {
3192 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3193 } else {
3194 srcText.pinIndices(srcStart, srcLength);
3195 return doCompare(start, length, srcText.fArray, srcStart, srcLength);
3196 }
3197}
3198
3199inline UBool
3200UnicodeString::operator== (const UnicodeString& text) const
3201{
3202 if(isBogus()) {
3203 return text.isBogus();
3204 } else {
3205 return
3206 !text.isBogus() &&
3207 fLength == text.fLength &&
3208 doCompare(0, fLength, text, 0, text.fLength) == 0;
3209 }
3210}
3211
3212inline UBool
3213UnicodeString::operator!= (const UnicodeString& text) const
3214{ return (! operator==(text)); }
3215
3216inline UBool
3217UnicodeString::operator> (const UnicodeString& text) const
3218{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
3219
3220inline UBool
3221UnicodeString::operator< (const UnicodeString& text) const
3222{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
3223
3224inline UBool
3225UnicodeString::operator>= (const UnicodeString& text) const
3226{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
3227
3228inline UBool
3229UnicodeString::operator<= (const UnicodeString& text) const
3230{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
3231
3232inline int8_t
3233UnicodeString::compare(const UnicodeString& text) const
3234{ return doCompare(0, fLength, text, 0, text.fLength); }
3235
3236inline int8_t
3237UnicodeString::compare(int32_t start,
3238 int32_t _length,
3239 const UnicodeString& srcText) const
3240{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
3241
3242inline int8_t
3243UnicodeString::compare(const UChar *srcChars,
3244 int32_t srcLength) const
3245{ return doCompare(0, fLength, srcChars, 0, srcLength); }
3246
3247inline int8_t
3248UnicodeString::compare(int32_t start,
3249 int32_t _length,
3250 const UnicodeString& srcText,
3251 int32_t srcStart,
3252 int32_t srcLength) const
3253{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3254
3255inline int8_t
3256UnicodeString::compare(int32_t start,
3257 int32_t _length,
3258 const UChar *srcChars) const
3259{ return doCompare(start, _length, srcChars, 0, _length); }
3260
3261inline int8_t
3262UnicodeString::compare(int32_t start,
3263 int32_t _length,
3264 const UChar *srcChars,
3265 int32_t srcStart,
3266 int32_t srcLength) const
3267{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3268
3269inline int8_t
3270UnicodeString::compareBetween(int32_t start,
3271 int32_t limit,
3272 const UnicodeString& srcText,
3273 int32_t srcStart,
3274 int32_t srcLimit) const
3275{ return doCompare(start, limit - start,
3276 srcText, srcStart, srcLimit - srcStart); }
3277
3278inline int8_t
3279UnicodeString::doCompareCodePointOrder(int32_t start,
3280 int32_t length,
3281 const UnicodeString& srcText,
3282 int32_t srcStart,
3283 int32_t srcLength) const
3284{
3285 if(srcText.isBogus()) {
3286 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3287 } else {
3288 srcText.pinIndices(srcStart, srcLength);
3289 return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
3290 }
3291}
3292
3293inline int8_t
3294UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3295{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
3296
3297inline int8_t
3298UnicodeString::compareCodePointOrder(int32_t start,
3299 int32_t _length,
3300 const UnicodeString& srcText) const
3301{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
3302
3303inline int8_t
3304UnicodeString::compareCodePointOrder(const UChar *srcChars,
3305 int32_t srcLength) const
3306{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
3307
3308inline int8_t
3309UnicodeString::compareCodePointOrder(int32_t start,
3310 int32_t _length,
3311 const UnicodeString& srcText,
3312 int32_t srcStart,
3313 int32_t srcLength) const
3314{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3315
3316inline int8_t
3317UnicodeString::compareCodePointOrder(int32_t start,
3318 int32_t _length,
3319 const UChar *srcChars) const
3320{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3321
3322inline int8_t
3323UnicodeString::compareCodePointOrder(int32_t start,
3324 int32_t _length,
3325 const UChar *srcChars,
3326 int32_t srcStart,
3327 int32_t srcLength) const
3328{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3329
3330inline int8_t
3331UnicodeString::compareCodePointOrderBetween(int32_t start,
3332 int32_t limit,
3333 const UnicodeString& srcText,
3334 int32_t srcStart,
3335 int32_t srcLimit) const
3336{ return doCompareCodePointOrder(start, limit - start,
3337 srcText, srcStart, srcLimit - srcStart); }
3338
3339inline int8_t
3340UnicodeString::doCaseCompare(int32_t start,
3341 int32_t length,
3342 const UnicodeString &srcText,
3343 int32_t srcStart,
3344 int32_t srcLength,
3345 uint32_t options) const
3346{
3347 if(srcText.isBogus()) {
3348 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3349 } else {
3350 srcText.pinIndices(srcStart, srcLength);
3351 return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
3352 }
3353}
3354
3355inline int8_t
3356UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3357 return doCaseCompare(0, fLength, text, 0, text.fLength, options);
3358}
3359
3360inline int8_t
3361UnicodeString::caseCompare(int32_t start,
3362 int32_t _length,
3363 const UnicodeString &srcText,
3364 uint32_t options) const {
3365 return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
3366}
3367
3368inline int8_t
3369UnicodeString::caseCompare(const UChar *srcChars,
3370 int32_t srcLength,
3371 uint32_t options) const {
3372 return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
3373}
3374
3375inline int8_t
3376UnicodeString::caseCompare(int32_t start,
3377 int32_t _length,
3378 const UnicodeString &srcText,
3379 int32_t srcStart,
3380 int32_t srcLength,
3381 uint32_t options) const {
3382 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3383}
3384
3385inline int8_t
3386UnicodeString::caseCompare(int32_t start,
3387 int32_t _length,
3388 const UChar *srcChars,
3389 uint32_t options) const {
3390 return doCaseCompare(start, _length, srcChars, 0, _length, options);
3391}
3392
3393inline int8_t
3394UnicodeString::caseCompare(int32_t start,
3395 int32_t _length,
3396 const UChar *srcChars,
3397 int32_t srcStart,
3398 int32_t srcLength,
3399 uint32_t options) const {
3400 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3401}
3402
3403inline int8_t
3404UnicodeString::caseCompareBetween(int32_t start,
3405 int32_t limit,
3406 const UnicodeString &srcText,
3407 int32_t srcStart,
3408 int32_t srcLimit,
3409 uint32_t options) const {
3410 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3411}
3412
3413inline int32_t
3414UnicodeString::indexOf(const UnicodeString& srcText,
3415 int32_t srcStart,
3416 int32_t srcLength,
3417 int32_t start,
3418 int32_t _length) const
3419{
3420 if(!srcText.isBogus()) {
3421 srcText.pinIndices(srcStart, srcLength);
3422 if(srcLength > 0) {
3423 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3424 }
3425 }
3426 return -1;
3427}
3428
3429inline int32_t
3430UnicodeString::indexOf(const UnicodeString& text) const
3431{ return indexOf(text, 0, text.fLength, 0, fLength); }
3432
3433inline int32_t
3434UnicodeString::indexOf(const UnicodeString& text,
3435 int32_t start) const {
3436 pinIndex(start);
3437 return indexOf(text, 0, text.fLength, start, fLength - start);
3438}
3439
3440inline int32_t
3441UnicodeString::indexOf(const UnicodeString& text,
3442 int32_t start,
3443 int32_t _length) const
3444{ return indexOf(text, 0, text.fLength, start, _length); }
3445
3446inline int32_t
3447UnicodeString::indexOf(const UChar *srcChars,
3448 int32_t srcLength,
3449 int32_t start) const {
3450 pinIndex(start);
3451 return indexOf(srcChars, 0, srcLength, start, fLength - start);
3452}
3453
3454inline int32_t
3455UnicodeString::indexOf(const UChar *srcChars,
3456 int32_t srcLength,
3457 int32_t start,
3458 int32_t _length) const
3459{ return indexOf(srcChars, 0, srcLength, start, _length); }
3460
3461inline int32_t
3462UnicodeString::indexOf(UChar c,
3463 int32_t start,
3464 int32_t _length) const
3465{ return doIndexOf(c, start, _length); }
3466
3467inline int32_t
3468UnicodeString::indexOf(UChar32 c,
3469 int32_t start,
3470 int32_t _length) const
3471{ return doIndexOf(c, start, _length); }
3472
3473inline int32_t
3474UnicodeString::indexOf(UChar c) const
3475{ return doIndexOf(c, 0, fLength); }
3476
3477inline int32_t
3478UnicodeString::indexOf(UChar32 c) const
3479{ return indexOf(c, 0, fLength); }
3480
3481inline int32_t
3482UnicodeString::indexOf(UChar c,
3483 int32_t start) const {
3484 pinIndex(start);
3485 return doIndexOf(c, start, fLength - start);
3486}
3487
3488inline int32_t
3489UnicodeString::indexOf(UChar32 c,
3490 int32_t start) const {
3491 pinIndex(start);
3492 return indexOf(c, start, fLength - start);
3493}
3494
3495inline int32_t
3496UnicodeString::lastIndexOf(const UChar *srcChars,
3497 int32_t srcLength,
3498 int32_t start,
3499 int32_t _length) const
3500{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3501
3502inline int32_t
3503UnicodeString::lastIndexOf(const UChar *srcChars,
3504 int32_t srcLength,
3505 int32_t start) const {
3506 pinIndex(start);
3507 return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
3508}
3509
3510inline int32_t
3511UnicodeString::lastIndexOf(const UnicodeString& srcText,
3512 int32_t srcStart,
3513 int32_t srcLength,
3514 int32_t start,
3515 int32_t _length) const
3516{
3517 if(!srcText.isBogus()) {
3518 srcText.pinIndices(srcStart, srcLength);
3519 if(srcLength > 0) {
3520 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3521 }
3522 }
3523 return -1;
3524}
3525
3526inline int32_t
3527UnicodeString::lastIndexOf(const UnicodeString& text,
3528 int32_t start,
3529 int32_t _length) const
3530{ return lastIndexOf(text, 0, text.fLength, start, _length); }
3531
3532inline int32_t
3533UnicodeString::lastIndexOf(const UnicodeString& text,
3534 int32_t start) const {
3535 pinIndex(start);
3536 return lastIndexOf(text, 0, text.fLength, start, fLength - start);
3537}
3538
3539inline int32_t
3540UnicodeString::lastIndexOf(const UnicodeString& text) const
3541{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
3542
3543inline int32_t
3544UnicodeString::lastIndexOf(UChar c,
3545 int32_t start,
3546 int32_t _length) const
3547{ return doLastIndexOf(c, start, _length); }
3548
3549inline int32_t
3550UnicodeString::lastIndexOf(UChar32 c,
3551 int32_t start,
3552 int32_t _length) const {
3553 return doLastIndexOf(c, start, _length);
3554}
3555
3556inline int32_t
3557UnicodeString::lastIndexOf(UChar c) const
3558{ return doLastIndexOf(c, 0, fLength); }
3559
3560inline int32_t
3561UnicodeString::lastIndexOf(UChar32 c) const {
3562 return lastIndexOf(c, 0, fLength);
3563}
3564
3565inline int32_t
3566UnicodeString::lastIndexOf(UChar c,
3567 int32_t start) const {
3568 pinIndex(start);
3569 return doLastIndexOf(c, start, fLength - start);
3570}
3571
3572inline int32_t
3573UnicodeString::lastIndexOf(UChar32 c,
3574 int32_t start) const {
3575 pinIndex(start);
3576 return lastIndexOf(c, start, fLength - start);
3577}
3578
3579inline UBool
3580UnicodeString::startsWith(const UnicodeString& text) const
3581{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
3582
3583inline UBool
3584UnicodeString::startsWith(const UnicodeString& srcText,
3585 int32_t srcStart,
3586 int32_t srcLength) const
3587{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3588
3589inline UBool
3590UnicodeString::startsWith(const UChar *srcChars,
3591 int32_t srcLength) const
3592{ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
3593
3594inline UBool
3595UnicodeString::startsWith(const UChar *srcChars,
3596 int32_t srcStart,
3597 int32_t srcLength) const
3598{ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
3599
3600inline UBool
3601UnicodeString::endsWith(const UnicodeString& text) const
3602{ return doCompare(fLength - text.fLength, text.fLength,
3603 text, 0, text.fLength) == 0; }
3604
3605inline UBool
3606UnicodeString::endsWith(const UnicodeString& srcText,
3607 int32_t srcStart,
3608 int32_t srcLength) const {
3609 srcText.pinIndices(srcStart, srcLength);
3610 return doCompare(fLength - srcLength, srcLength,
3611 srcText, srcStart, srcLength) == 0;
3612}
3613
3614inline UBool
3615UnicodeString::endsWith(const UChar *srcChars,
3616 int32_t srcLength) const {
3617 if(srcLength < 0) {
3618 srcLength = u_strlen(srcChars);
3619 }
3620 return doCompare(fLength - srcLength, srcLength,
3621 srcChars, 0, srcLength) == 0;
3622}
3623
3624inline UBool
3625UnicodeString::endsWith(const UChar *srcChars,
3626 int32_t srcStart,
3627 int32_t srcLength) const {
3628 if(srcLength < 0) {
3629 srcLength = u_strlen(srcChars + srcStart);
3630 }
3631 return doCompare(fLength - srcLength, srcLength,
3632 srcChars, srcStart, srcLength) == 0;
3633}
3634
3635//========================================
3636// replace
3637//========================================
3638inline UnicodeString&
3639UnicodeString::replace(int32_t start,
3640 int32_t _length,
3641 const UnicodeString& srcText)
3642{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
3643
3644inline UnicodeString&
3645UnicodeString::replace(int32_t start,
3646 int32_t _length,
3647 const UnicodeString& srcText,
3648 int32_t srcStart,
3649 int32_t srcLength)
3650{ return doReplace(start, _length, srcText, srcStart, srcLength); }
3651
3652inline UnicodeString&
3653UnicodeString::replace(int32_t start,
3654 int32_t _length,
3655 const UChar *srcChars,
3656 int32_t srcLength)
3657{ return doReplace(start, _length, srcChars, 0, srcLength); }
3658
3659inline UnicodeString&
3660UnicodeString::replace(int32_t start,
3661 int32_t _length,
3662 const UChar *srcChars,
3663 int32_t srcStart,
3664 int32_t srcLength)
3665{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
3666
3667inline UnicodeString&
3668UnicodeString::replace(int32_t start,
3669 int32_t _length,
3670 UChar srcChar)
3671{ return doReplace(start, _length, &srcChar, 0, 1); }
3672
3673inline UnicodeString&
3674UnicodeString::replace(int32_t start,
3675 int32_t _length,
3676 UChar32 srcChar) {
3677 UChar buffer[U16_MAX_LENGTH];
3678 int32_t count = 0;
3679 UBool isError = FALSE;
3680 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
3681 return doReplace(start, _length, buffer, 0, count);
3682}
3683
3684inline UnicodeString&
3685UnicodeString::replaceBetween(int32_t start,
3686 int32_t limit,
3687 const UnicodeString& srcText)
3688{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
3689
3690inline UnicodeString&
3691UnicodeString::replaceBetween(int32_t start,
3692 int32_t limit,
3693 const UnicodeString& srcText,
3694 int32_t srcStart,
3695 int32_t srcLimit)
3696{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
3697
3698inline UnicodeString&
3699UnicodeString::findAndReplace(const UnicodeString& oldText,
3700 const UnicodeString& newText)
3701{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
3702 newText, 0, newText.fLength); }
3703
3704inline UnicodeString&
3705UnicodeString::findAndReplace(int32_t start,
3706 int32_t _length,
3707 const UnicodeString& oldText,
3708 const UnicodeString& newText)
3709{ return findAndReplace(start, _length, oldText, 0, oldText.fLength,
3710 newText, 0, newText.fLength); }
3711
3712// ============================
3713// extract
3714// ============================
3715inline void
3716UnicodeString::doExtract(int32_t start,
3717 int32_t _length,
3718 UnicodeString& target) const
3719{ target.replace(0, target.fLength, *this, start, _length); }
3720
3721inline void
3722UnicodeString::extract(int32_t start,
3723 int32_t _length,
3724 UChar *target,
3725 int32_t targetStart) const
3726{ doExtract(start, _length, target, targetStart); }
3727
3728inline void
3729UnicodeString::extract(int32_t start,
3730 int32_t _length,
3731 UnicodeString& target) const
3732{ doExtract(start, _length, target); }
3733
3734inline int32_t
3735UnicodeString::extract(int32_t start,
3736 int32_t _length,
3737 char *dst,
3738 const char *codepage) const
3739
3740{
3741 // This dstSize value will be checked explicitly
3742 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
3743}
3744
3745inline void
3746UnicodeString::extractBetween(int32_t start,
3747 int32_t limit,
3748 UChar *dst,
3749 int32_t dstStart) const
3750{ doExtract(start, limit - start, dst, dstStart); }
3751
3752inline UChar
3753UnicodeString::doCharAt(int32_t offset) const
3754{
3755 if((uint32_t)offset < (uint32_t)fLength) {
3756 return fArray[offset];
3757 } else {
3758 return kInvalidUChar;
3759 }
3760}
3761
3762inline UChar
3763UnicodeString::charAt(int32_t offset) const
3764{ return doCharAt(offset); }
3765
3766inline UChar
3767UnicodeString::operator[] (int32_t offset) const
3768{ return doCharAt(offset); }
3769
3770inline UChar32
3771UnicodeString::char32At(int32_t offset) const
3772{
3773 if((uint32_t)offset < (uint32_t)fLength) {
3774 UChar32 c;
3775 U16_GET(fArray, 0, offset, fLength, c);
3776 return c;
3777 } else {
3778 return kInvalidUChar;
3779 }
3780}
3781
3782inline int32_t
3783UnicodeString::getChar32Start(int32_t offset) const {
3784 if((uint32_t)offset < (uint32_t)fLength) {
3785 U16_SET_CP_START(fArray, 0, offset);
3786 return offset;
3787 } else {
3788 return 0;
3789 }
3790}
3791
3792inline int32_t
3793UnicodeString::getChar32Limit(int32_t offset) const {
3794 if((uint32_t)offset < (uint32_t)fLength) {
3795 U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
3796 return offset;
3797 } else {
3798 return fLength;
3799 }
3800}
3801
3802inline UBool
3803UnicodeString::isEmpty() const {
3804 return fLength == 0;
3805}
3806
3807//========================================
3808// Write implementation methods
3809//========================================
3810inline const UChar *
3811UnicodeString::getTerminatedBuffer() {
3812 if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3813 return 0;
3814 } else if(fLength<fCapacity && fArray[fLength]==0) {
3815 return fArray;
3816 } else if(cloneArrayIfNeeded(fLength+1)) {
3817 fArray[fLength]=0;
3818 return fArray;
3819 } else {
3820 return 0;
3821 }
3822}
3823
3824inline UnicodeString&
3825UnicodeString::operator= (UChar ch)
3826{ return doReplace(0, fLength, &ch, 0, 1); }
3827
3828inline UnicodeString&
3829UnicodeString::operator= (UChar32 ch)
3830{ return replace(0, fLength, ch); }
3831
3832inline UnicodeString&
3833UnicodeString::setTo(const UnicodeString& srcText,
3834 int32_t srcStart,
3835 int32_t srcLength)
3836{
3837 unBogus();
3838 return doReplace(0, fLength, srcText, srcStart, srcLength);
3839}
3840
3841inline UnicodeString&
3842UnicodeString::setTo(const UnicodeString& srcText,
3843 int32_t srcStart)
3844{
3845 unBogus();
3846 srcText.pinIndex(srcStart);
3847 return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
3848}
3849
3850inline UnicodeString&
3851UnicodeString::setTo(const UnicodeString& srcText)
3852{
3853 unBogus();
3854 return doReplace(0, fLength, srcText, 0, srcText.fLength);
3855}
3856
3857inline UnicodeString&
3858UnicodeString::setTo(const UChar *srcChars,
3859 int32_t srcLength)
3860{
3861 unBogus();
3862 return doReplace(0, fLength, srcChars, 0, srcLength);
3863}
3864
3865inline UnicodeString&
3866UnicodeString::setTo(UChar srcChar)
3867{
3868 unBogus();
3869 return doReplace(0, fLength, &srcChar, 0, 1);
3870}
3871
3872inline UnicodeString&
3873UnicodeString::setTo(UChar32 srcChar)
3874{
3875 unBogus();
3876 return replace(0, fLength, srcChar);
3877}
3878
3879inline UnicodeString&
3880UnicodeString::operator+= (UChar ch)
3881{ return doReplace(fLength, 0, &ch, 0, 1); }
3882
3883inline UnicodeString&
3884UnicodeString::operator+= (UChar32 ch) {
3885 UChar buffer[U16_MAX_LENGTH];
3886 int32_t _length = 0;
3887 UBool isError = FALSE;
3888 U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
3889 return doReplace(fLength, 0, buffer, 0, _length);
3890}
3891
3892inline UnicodeString&
3893UnicodeString::operator+= (const UnicodeString& srcText)
3894{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
3895
3896inline UnicodeString&
3897UnicodeString::append(const UnicodeString& srcText,
3898 int32_t srcStart,
3899 int32_t srcLength)
3900{ return doReplace(fLength, 0, srcText, srcStart, srcLength); }
3901
3902inline UnicodeString&
3903UnicodeString::append(const UnicodeString& srcText)
3904{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
3905
3906inline UnicodeString&
3907UnicodeString::append(const UChar *srcChars,
3908 int32_t srcStart,
3909 int32_t srcLength)
3910{ return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
3911
3912inline UnicodeString&
3913UnicodeString::append(const UChar *srcChars,
3914 int32_t srcLength)
3915{ return doReplace(fLength, 0, srcChars, 0, srcLength); }
3916
3917inline UnicodeString&
3918UnicodeString::append(UChar srcChar)
3919{ return doReplace(fLength, 0, &srcChar, 0, 1); }
3920
3921inline UnicodeString&
3922UnicodeString::append(UChar32 srcChar) {
3923 UChar buffer[U16_MAX_LENGTH];
3924 int32_t _length = 0;
3925 UBool isError = FALSE;
3926 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
3927 return doReplace(fLength, 0, buffer, 0, _length);
3928}
3929
3930inline UnicodeString&
3931UnicodeString::insert(int32_t start,
3932 const UnicodeString& srcText,
3933 int32_t srcStart,
3934 int32_t srcLength)
3935{ return doReplace(start, 0, srcText, srcStart, srcLength); }
3936
3937inline UnicodeString&
3938UnicodeString::insert(int32_t start,
3939 const UnicodeString& srcText)
3940{ return doReplace(start, 0, srcText, 0, srcText.fLength); }
3941
3942inline UnicodeString&
3943UnicodeString::insert(int32_t start,
3944 const UChar *srcChars,
3945 int32_t srcStart,
3946 int32_t srcLength)
3947{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
3948
3949inline UnicodeString&
3950UnicodeString::insert(int32_t start,
3951 const UChar *srcChars,
3952 int32_t srcLength)
3953{ return doReplace(start, 0, srcChars, 0, srcLength); }
3954
3955inline UnicodeString&
3956UnicodeString::insert(int32_t start,
3957 UChar srcChar)
3958{ return doReplace(start, 0, &srcChar, 0, 1); }
3959
3960inline UnicodeString&
3961UnicodeString::insert(int32_t start,
3962 UChar32 srcChar)
3963{ return replace(start, 0, srcChar); }
3964
3965
3966inline UnicodeString&
3967UnicodeString::remove()
3968{
3969 // remove() of a bogus string makes the string empty and non-bogus
3970 if(isBogus()) {
3971 unBogus();
3972 } else {
3973 fLength = 0;
3974 }
3975 return *this;
3976}
3977
3978inline UnicodeString&
3979UnicodeString::remove(int32_t start,
3980 int32_t _length)
3981{
3982 if(start <= 0 && _length == INT32_MAX) {
3983 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
3984 return remove();
3985 } else {
3986 return doReplace(start, _length, NULL, 0, 0);
3987 }
3988}
3989
3990inline UnicodeString&
3991UnicodeString::removeBetween(int32_t start,
3992 int32_t limit)
3993{ return doReplace(start, limit - start, NULL, 0, 0); }
3994
3995inline UBool
3996UnicodeString::truncate(int32_t targetLength)
3997{
3998 if(isBogus() && targetLength == 0) {
3999 // truncate(0) of a bogus string makes the string empty and non-bogus
4000 unBogus();
4001 return FALSE;
4002 } else if((uint32_t)targetLength < (uint32_t)fLength) {
4003 fLength = targetLength;
4004 return TRUE;
4005 } else {
4006 return FALSE;
4007 }
4008}
4009
4010inline UnicodeString&
4011UnicodeString::reverse()
4012{ return doReverse(0, fLength); }
4013
4014inline UnicodeString&
4015UnicodeString::reverse(int32_t start,
4016 int32_t _length)
4017{ return doReverse(start, _length); }
4018
4019
4020U_NAMESPACE_END
4021
4022#endif