2 **********************************************************************
3 * Copyright (C) 1998-2004, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
9 * Modification History:
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 11/11/98 stephen Changed per 11/9 code review.
14 * 04/20/99 stephen Overhauled per 4/16 code review.
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 * handleReplaceBetween(); other methods unchanged.
17 * 06/25/01 grhoten Remove dependency on iostream.
18 ******************************************************************************
24 #include "unicode/rep.h"
26 struct UConverter
; // unicode/ucnv.h
27 class StringThreadTest
;
29 #ifndef U_COMPARE_CODE_POINT_ORDER
30 /* see also ustring.h and unorm.h */
32 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
33 * Compare strings in code point order instead of code unit order.
36 #define U_COMPARE_CODE_POINT_ORDER 0x8000
41 U_STABLE
int32_t U_EXPORT2
42 u_strlen(const UChar
*s
);
47 class Locale
; // unicode/locid.h
48 class StringCharacterIterator
;
49 class BreakIterator
; // unicode/brkiter.h
51 /* The <iostream> include has been moved to unicode/ustream.h */
54 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
55 * which constructs a Unicode string from an invariant-character char * string.
56 * About invariant characters see utypes.h.
57 * This constructor has no runtime dependency on conversion code and is
58 * therefore recommended over ones taking a charset name string
59 * (where the empty string "" indicates invariant-character conversion).
63 #define US_INV UnicodeString::kInvariant
66 * Unicode String literals in C++.
67 * Dependent on the platform properties, different UnicodeString
68 * constructors should be used to create a UnicodeString object from
70 * The macros are defined for maximum performance.
71 * They work only for strings that contain "invariant characters", i.e.,
72 * only latin letters, digits, and some punctuation.
73 * See utypes.h for details.
75 * The string parameter must be a C string literal.
76 * The length of the string, not including the terminating
77 * <code>NUL</code>, must be specified as a constant.
78 * The U_STRING_DECL macro should be invoked exactly once for one
79 * such string variable before it is used.
82 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
83 # define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
84 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
85 # define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
87 # define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
91 * Unicode String literals in C++.
92 * Dependent on the platform properties, different UnicodeString
93 * constructors should be used to create a UnicodeString object from
95 * The macros are defined for improved performance.
96 * They work only for strings that contain "invariant characters", i.e.,
97 * only latin letters, digits, and some punctuation.
98 * See utypes.h for details.
100 * The string parameter must be a C string literal.
103 #if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
104 # define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
105 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
106 # define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
108 # define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
112 * UnicodeString is a string class that stores Unicode characters directly and provides
113 * similar functionality as the Java String and StringBuffer classes.
114 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
116 * The UnicodeString class is not suitable for subclassing.
118 * <p>For an overview of Unicode strings in C and C++ see the
119 * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
121 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
122 * A Unicode character may be stored with either
123 * one code unit — which is the most common case — or with a matched pair of
124 * special code units ("surrogates").
125 * The data type for code units is UChar.<br>
126 * For single-character handling, a Unicode character code <em>point</em> is a value
127 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
129 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
130 * This is the same as with multi-byte char* strings in traditional string handling.
131 * Operations on partial strings typically do not test for code point boundaries.
132 * If necessary, the user needs to take care of such boundaries by testing for the code unit
133 * values or by using functions like
134 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
135 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
137 * UnicodeString methods are more lenient with regard to input parameter values
138 * than other ICU APIs. In particular:
139 * - If indexes are out of bounds for a UnicodeString object
140 * (<0 or >length()) then they are "pinned" to the nearest boundary.
141 * - If primitive string pointer values (e.g., const UChar * or char *)
142 * for input strings are NULL, then those input string parameters are treated
143 * as if they pointed to an empty string.
144 * However, this is <em>not</em> the case for char * parameters for charset names
146 * - Most UnicodeString methods do not take a UErrorCode parameter because
147 * there are usually very few opportunities for failure other than a shortage
148 * of memory, error codes in low-level C++ string methods would be inconvenient,
149 * and the error code as the last parameter (ICU convention) would prevent
150 * the use of default parameter values.
151 * Instead, such methods set the UnicodeString into a "bogus" state
152 * (see isBogus()) if an error occurs.
154 * In string comparisons, two UnicodeString objects that are both "bogus"
155 * compare equal (to be transitive and prevent endless loops in sorting),
156 * and a "bogus" string compares less than any non-"bogus" one.
158 * Const UnicodeString methods are thread-safe. Multiple threads can use
159 * const methods on the same UnicodeString object simultaneously,
160 * but non-const methods must not be called concurrently (in multiple threads)
161 * with any other (const or non-const) methods.
163 * Similarly, const UnicodeString & parameters are thread-safe.
164 * One object may be passed in as such a parameter concurrently in multiple threads.
165 * This includes the const UnicodeString & parameters for
166 * copy construction, assignment, and cloning.
168 * <p>UnicodeString uses several storage methods.
169 * String contents can be stored inside the UnicodeString object itself,
170 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
171 * Most of this is done transparently, but careful aliasing in particular provides
172 * significant performance improvements.
173 * Also, the internal buffer is accessible via special functions.
174 * For details see the
175 * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
178 * @see CharacterIterator
181 class U_COMMON_API UnicodeString
: public Replaceable
186 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
187 * which constructs a Unicode string from an invariant-character char * string.
188 * Use the macro US_INV instead of the full qualification for this value.
201 //========================================
202 // Read-only operations
203 //========================================
205 /* Comparison - bitwise only - for international comparison use collation */
208 * Equality operator. Performs only bitwise comparison.
209 * @param text The UnicodeString to compare to this one.
210 * @return TRUE if <TT>text</TT> contains the same characters as this one,
214 inline UBool
operator== (const UnicodeString
& text
) const;
217 * Inequality operator. Performs only bitwise comparison.
218 * @param text The UnicodeString to compare to this one.
219 * @return FALSE if <TT>text</TT> contains the same characters as this one,
223 inline UBool
operator!= (const UnicodeString
& text
) const;
226 * Greater than operator. Performs only bitwise comparison.
227 * @param text The UnicodeString to compare to this one.
228 * @return TRUE if the characters in this are bitwise
229 * greater than the characters in <code>text</code>, FALSE otherwise
232 inline UBool
operator> (const UnicodeString
& text
) const;
235 * Less than operator. Performs only bitwise comparison.
236 * @param text The UnicodeString to compare to this one.
237 * @return TRUE if the characters in this are bitwise
238 * less than the characters in <code>text</code>, FALSE otherwise
241 inline UBool
operator< (const UnicodeString
& text
) const;
244 * Greater than or equal operator. Performs only bitwise comparison.
245 * @param text The UnicodeString to compare to this one.
246 * @return TRUE if the characters in this are bitwise
247 * greater than or equal to the characters in <code>text</code>, FALSE otherwise
250 inline UBool
operator>= (const UnicodeString
& text
) const;
253 * Less than or equal operator. Performs only bitwise comparison.
254 * @param text The UnicodeString to compare to this one.
255 * @return TRUE if the characters in this are bitwise
256 * less than or equal to the characters in <code>text</code>, FALSE otherwise
259 inline UBool
operator<= (const UnicodeString
& text
) const;
262 * Compare the characters bitwise in this UnicodeString to
263 * the characters in <code>text</code>.
264 * @param text The UnicodeString to compare to this one.
265 * @return The result of bitwise character comparison: 0 if this
266 * contains the same characters as <code>text</code>, -1 if the characters in
267 * this are bitwise less than the characters in <code>text</code>, +1 if the
268 * characters in this are bitwise greater than the characters
269 * in <code>text</code>.
272 inline int8_t compare(const UnicodeString
& text
) const;
275 * Compare the characters bitwise in the range
276 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
278 * @param start the offset at which the compare operation begins
279 * @param length the number of characters of text to compare.
280 * @param text the other text to be compared against this string.
281 * @return The result of bitwise character comparison: 0 if this
282 * contains the same characters as <code>text</code>, -1 if the characters in
283 * this are bitwise less than the characters in <code>text</code>, +1 if the
284 * characters in this are bitwise greater than the characters
285 * in <code>text</code>.
288 inline int8_t compare(int32_t start
,
290 const UnicodeString
& text
) const;
293 * Compare the characters bitwise in the range
294 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
295 * in <TT>srcText</TT> in the range
296 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
297 * @param start the offset at which the compare operation begins
298 * @param length the number of characters in this to compare.
299 * @param srcText the text to be compared
300 * @param srcStart the offset into <TT>srcText</TT> to start comparison
301 * @param srcLength the number of characters in <TT>src</TT> to compare
302 * @return The result of bitwise character comparison: 0 if this
303 * contains the same characters as <code>srcText</code>, -1 if the characters in
304 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
305 * characters in this are bitwise greater than the characters
306 * in <code>srcText</code>.
309 inline int8_t compare(int32_t start
,
311 const UnicodeString
& srcText
,
313 int32_t srcLength
) const;
316 * Compare the characters bitwise in this UnicodeString with the first
317 * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
318 * @param srcChars The characters to compare to this UnicodeString.
319 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
320 * @return The result of bitwise character comparison: 0 if this
321 * contains the same characters as <code>srcChars</code>, -1 if the characters in
322 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
323 * characters in this are bitwise greater than the characters
324 * in <code>srcChars</code>.
327 inline int8_t compare(const UChar
*srcChars
,
328 int32_t srcLength
) const;
331 * Compare the characters bitwise in the range
332 * [<TT>start</TT>, <TT>start + length</TT>) with the first
333 * <TT>length</TT> characters in <TT>srcChars</TT>
334 * @param start the offset at which the compare operation begins
335 * @param length the number of characters to compare.
336 * @param srcChars the characters to be compared
337 * @return The result of bitwise character comparison: 0 if this
338 * contains the same characters as <code>srcChars</code>, -1 if the characters in
339 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
340 * characters in this are bitwise greater than the characters
341 * in <code>srcChars</code>.
344 inline int8_t compare(int32_t start
,
346 const UChar
*srcChars
) const;
349 * Compare the characters bitwise in the range
350 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
351 * in <TT>srcChars</TT> in the range
352 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
353 * @param start the offset at which the compare operation begins
354 * @param length the number of characters in this to compare
355 * @param srcChars the characters to be compared
356 * @param srcStart the offset into <TT>srcChars</TT> to start comparison
357 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
358 * @return The result of bitwise character comparison: 0 if this
359 * contains the same characters as <code>srcChars</code>, -1 if the characters in
360 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
361 * characters in this are bitwise greater than the characters
362 * in <code>srcChars</code>.
365 inline int8_t compare(int32_t start
,
367 const UChar
*srcChars
,
369 int32_t srcLength
) const;
372 * Compare the characters bitwise in the range
373 * [<TT>start</TT>, <TT>limit</TT>) with the characters
374 * in <TT>srcText</TT> in the range
375 * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
376 * @param start the offset at which the compare operation begins
377 * @param limit the offset immediately following the compare operation
378 * @param srcText the text to be compared
379 * @param srcStart the offset into <TT>srcText</TT> to start comparison
380 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
381 * @return The result of bitwise character comparison: 0 if this
382 * contains the same characters as <code>srcText</code>, -1 if the characters in
383 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
384 * characters in this are bitwise greater than the characters
385 * in <code>srcText</code>.
388 inline int8_t compareBetween(int32_t start
,
390 const UnicodeString
& srcText
,
392 int32_t srcLimit
) const;
395 * Compare two Unicode strings in code point order.
396 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
397 * if supplementary characters are present:
399 * In UTF-16, supplementary characters (with code points U+10000 and above) are
400 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
401 * which means that they compare as less than some other BMP characters like U+feff.
402 * This function compares Unicode strings in code point order.
403 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
405 * @param text Another string to compare this one to.
406 * @return a negative/zero/positive integer corresponding to whether
407 * this string is less than/equal to/greater than the second one
408 * in code point order
411 inline int8_t compareCodePointOrder(const UnicodeString
& text
) const;
414 * Compare two Unicode strings in code point order.
415 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
416 * if supplementary characters are present:
418 * In UTF-16, supplementary characters (with code points U+10000 and above) are
419 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
420 * which means that they compare as less than some other BMP characters like U+feff.
421 * This function compares Unicode strings in code point order.
422 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
424 * @param start The start offset in this string at which the compare operation begins.
425 * @param length The number of code units from this string to compare.
426 * @param srcText Another string to compare this one to.
427 * @return a negative/zero/positive integer corresponding to whether
428 * this string is less than/equal to/greater than the second one
429 * in code point order
432 inline int8_t compareCodePointOrder(int32_t start
,
434 const UnicodeString
& srcText
) const;
437 * Compare two Unicode strings in code point order.
438 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
439 * if supplementary characters are present:
441 * In UTF-16, supplementary characters (with code points U+10000 and above) are
442 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
443 * which means that they compare as less than some other BMP characters like U+feff.
444 * This function compares Unicode strings in code point order.
445 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
447 * @param start The start offset in this string at which the compare operation begins.
448 * @param length The number of code units from this string to compare.
449 * @param srcText Another string to compare this one to.
450 * @param srcStart The start offset in that string at which the compare operation begins.
451 * @param srcLength The number of code units from that string to compare.
452 * @return a negative/zero/positive integer corresponding to whether
453 * this string is less than/equal to/greater than the second one
454 * in code point order
457 inline int8_t compareCodePointOrder(int32_t start
,
459 const UnicodeString
& srcText
,
461 int32_t srcLength
) const;
464 * Compare two Unicode strings in code point order.
465 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
466 * if supplementary characters are present:
468 * In UTF-16, supplementary characters (with code points U+10000 and above) are
469 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
470 * which means that they compare as less than some other BMP characters like U+feff.
471 * This function compares Unicode strings in code point order.
472 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
474 * @param srcChars A pointer to another string to compare this one to.
475 * @param srcLength The number of code units from that string to compare.
476 * @return a negative/zero/positive integer corresponding to whether
477 * this string is less than/equal to/greater than the second one
478 * in code point order
481 inline int8_t compareCodePointOrder(const UChar
*srcChars
,
482 int32_t srcLength
) const;
485 * Compare two Unicode strings in code point order.
486 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
487 * if supplementary characters are present:
489 * In UTF-16, supplementary characters (with code points U+10000 and above) are
490 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
491 * which means that they compare as less than some other BMP characters like U+feff.
492 * This function compares Unicode strings in code point order.
493 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
495 * @param start The start offset in this string at which the compare operation begins.
496 * @param length The number of code units from this string to compare.
497 * @param srcChars A pointer to another string to compare this one to.
498 * @return a negative/zero/positive integer corresponding to whether
499 * this string is less than/equal to/greater than the second one
500 * in code point order
503 inline int8_t compareCodePointOrder(int32_t start
,
505 const UChar
*srcChars
) const;
508 * Compare two Unicode strings in code point order.
509 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
510 * if supplementary characters are present:
512 * In UTF-16, supplementary characters (with code points U+10000 and above) are
513 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
514 * which means that they compare as less than some other BMP characters like U+feff.
515 * This function compares Unicode strings in code point order.
516 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
518 * @param start The start offset in this string at which the compare operation begins.
519 * @param length The number of code units from this string to compare.
520 * @param srcChars A pointer to another string to compare this one to.
521 * @param srcStart The start offset in that string at which the compare operation begins.
522 * @param srcLength The number of code units from that string to compare.
523 * @return a negative/zero/positive integer corresponding to whether
524 * this string is less than/equal to/greater than the second one
525 * in code point order
528 inline int8_t compareCodePointOrder(int32_t start
,
530 const UChar
*srcChars
,
532 int32_t srcLength
) const;
535 * Compare two Unicode strings in code point order.
536 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
537 * if supplementary characters are present:
539 * In UTF-16, supplementary characters (with code points U+10000 and above) are
540 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
541 * which means that they compare as less than some other BMP characters like U+feff.
542 * This function compares Unicode strings in code point order.
543 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
545 * @param start The start offset in this string at which the compare operation begins.
546 * @param limit The offset after the last code unit from this string to compare.
547 * @param srcText Another string to compare this one to.
548 * @param srcStart The start offset in that string at which the compare operation begins.
549 * @param srcLimit The offset after the last code unit from that string to compare.
550 * @return a negative/zero/positive integer corresponding to whether
551 * this string is less than/equal to/greater than the second one
552 * in code point order
555 inline int8_t compareCodePointOrderBetween(int32_t start
,
557 const UnicodeString
& srcText
,
559 int32_t srcLimit
) const;
562 * Compare two strings case-insensitively using full case folding.
563 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
565 * @param text Another string to compare this one to.
566 * @param options A bit set of options:
567 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
568 * Comparison in code unit order with default case folding.
570 * - U_COMPARE_CODE_POINT_ORDER
571 * Set to choose code point order instead of code unit order
572 * (see u_strCompare for details).
574 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
576 * @return A negative, zero, or positive integer indicating the comparison result.
579 inline int8_t caseCompare(const UnicodeString
& text
, uint32_t options
) const;
582 * Compare two strings case-insensitively using full case folding.
583 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
585 * @param start The start offset in this string at which the compare operation begins.
586 * @param length The number of code units from this string to compare.
587 * @param srcText Another string to compare this one to.
588 * @param options A bit set of options:
589 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
590 * Comparison in code unit order with default case folding.
592 * - U_COMPARE_CODE_POINT_ORDER
593 * Set to choose code point order instead of code unit order
594 * (see u_strCompare for details).
596 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
598 * @return A negative, zero, or positive integer indicating the comparison result.
601 inline int8_t caseCompare(int32_t start
,
603 const UnicodeString
& srcText
,
604 uint32_t options
) const;
607 * Compare two strings case-insensitively using full case folding.
608 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
610 * @param start The start offset in this string at which the compare operation begins.
611 * @param length The number of code units from this string to compare.
612 * @param srcText Another string to compare this one to.
613 * @param srcStart The start offset in that string at which the compare operation begins.
614 * @param srcLength The number of code units from that string to compare.
615 * @param options A bit set of options:
616 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
617 * Comparison in code unit order with default case folding.
619 * - U_COMPARE_CODE_POINT_ORDER
620 * Set to choose code point order instead of code unit order
621 * (see u_strCompare for details).
623 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
625 * @return A negative, zero, or positive integer indicating the comparison result.
628 inline int8_t caseCompare(int32_t start
,
630 const UnicodeString
& srcText
,
633 uint32_t options
) const;
636 * Compare two strings case-insensitively using full case folding.
637 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
639 * @param srcChars A pointer to another string to compare this one to.
640 * @param srcLength The number of code units from that string to compare.
641 * @param options A bit set of options:
642 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
643 * Comparison in code unit order with default case folding.
645 * - U_COMPARE_CODE_POINT_ORDER
646 * Set to choose code point order instead of code unit order
647 * (see u_strCompare for details).
649 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
651 * @return A negative, zero, or positive integer indicating the comparison result.
654 inline int8_t caseCompare(const UChar
*srcChars
,
656 uint32_t options
) const;
659 * Compare two strings case-insensitively using full case folding.
660 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
662 * @param start The start offset in this string at which the compare operation begins.
663 * @param length The number of code units from this string to compare.
664 * @param srcChars A pointer to another string to compare this one to.
665 * @param options A bit set of options:
666 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
667 * Comparison in code unit order with default case folding.
669 * - U_COMPARE_CODE_POINT_ORDER
670 * Set to choose code point order instead of code unit order
671 * (see u_strCompare for details).
673 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
675 * @return A negative, zero, or positive integer indicating the comparison result.
678 inline int8_t caseCompare(int32_t start
,
680 const UChar
*srcChars
,
681 uint32_t options
) const;
684 * Compare two strings case-insensitively using full case folding.
685 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
687 * @param start The start offset in this string at which the compare operation begins.
688 * @param length The number of code units from this string to compare.
689 * @param srcChars A pointer to another string to compare this one to.
690 * @param srcStart The start offset in that string at which the compare operation begins.
691 * @param srcLength The number of code units from that string to compare.
692 * @param options A bit set of options:
693 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
694 * Comparison in code unit order with default case folding.
696 * - U_COMPARE_CODE_POINT_ORDER
697 * Set to choose code point order instead of code unit order
698 * (see u_strCompare for details).
700 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
702 * @return A negative, zero, or positive integer indicating the comparison result.
705 inline int8_t caseCompare(int32_t start
,
707 const UChar
*srcChars
,
710 uint32_t options
) const;
713 * Compare two strings case-insensitively using full case folding.
714 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
716 * @param start The start offset in this string at which the compare operation begins.
717 * @param limit The offset after the last code unit from this string to compare.
718 * @param srcText Another string to compare this one to.
719 * @param srcStart The start offset in that string at which the compare operation begins.
720 * @param srcLimit The offset after the last code unit from that string to compare.
721 * @param options A bit set of options:
722 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
723 * Comparison in code unit order with default case folding.
725 * - U_COMPARE_CODE_POINT_ORDER
726 * Set to choose code point order instead of code unit order
727 * (see u_strCompare for details).
729 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
731 * @return A negative, zero, or positive integer indicating the comparison result.
734 inline int8_t caseCompareBetween(int32_t start
,
736 const UnicodeString
& srcText
,
739 uint32_t options
) const;
742 * Determine if this starts with the characters in <TT>text</TT>
743 * @param text The text to match.
744 * @return TRUE if this starts with the characters in <TT>text</TT>,
748 inline UBool
startsWith(const UnicodeString
& text
) const;
751 * Determine if this starts with the characters in <TT>srcText</TT>
752 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
753 * @param srcText The text to match.
754 * @param srcStart the offset into <TT>srcText</TT> to start matching
755 * @param srcLength the number of characters in <TT>srcText</TT> to match
756 * @return TRUE if this starts with the characters in <TT>text</TT>,
760 inline UBool
startsWith(const UnicodeString
& srcText
,
762 int32_t srcLength
) const;
765 * Determine if this starts with the characters in <TT>srcChars</TT>
766 * @param srcChars The characters to match.
767 * @param srcLength the number of characters in <TT>srcChars</TT>
768 * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
772 inline UBool
startsWith(const UChar
*srcChars
,
773 int32_t srcLength
) const;
776 * Determine if this ends with the characters in <TT>srcChars</TT>
777 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
778 * @param srcChars The characters to match.
779 * @param srcStart the offset into <TT>srcText</TT> to start matching
780 * @param srcLength the number of characters in <TT>srcChars</TT> to match
781 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
784 inline UBool
startsWith(const UChar
*srcChars
,
786 int32_t srcLength
) const;
789 * Determine if this ends with the characters in <TT>text</TT>
790 * @param text The text to match.
791 * @return TRUE if this ends with the characters in <TT>text</TT>,
795 inline UBool
endsWith(const UnicodeString
& text
) const;
798 * Determine if this ends with the characters in <TT>srcText</TT>
799 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
800 * @param srcText The text to match.
801 * @param srcStart the offset into <TT>srcText</TT> to start matching
802 * @param srcLength the number of characters in <TT>srcText</TT> to match
803 * @return TRUE if this ends with the characters in <TT>text</TT>,
807 inline UBool
endsWith(const UnicodeString
& srcText
,
809 int32_t srcLength
) const;
812 * Determine if this ends with the characters in <TT>srcChars</TT>
813 * @param srcChars The characters to match.
814 * @param srcLength the number of characters in <TT>srcChars</TT>
815 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
819 inline UBool
endsWith(const UChar
*srcChars
,
820 int32_t srcLength
) const;
823 * Determine if this ends with the characters in <TT>srcChars</TT>
824 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
825 * @param srcChars The characters to match.
826 * @param srcStart the offset into <TT>srcText</TT> to start matching
827 * @param srcLength the number of characters in <TT>srcChars</TT> to match
828 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
832 inline UBool
endsWith(const UChar
*srcChars
,
834 int32_t srcLength
) const;
837 /* Searching - bitwise only */
840 * Locate in this the first occurrence of the characters in <TT>text</TT>,
841 * using bitwise comparison.
842 * @param text The text to search for.
843 * @return The offset into this of the start of <TT>text</TT>,
844 * or -1 if not found.
847 inline int32_t indexOf(const UnicodeString
& text
) const;
850 * Locate in this the first occurrence of the characters in <TT>text</TT>
851 * starting at offset <TT>start</TT>, using bitwise comparison.
852 * @param text The text to search for.
853 * @param start The offset at which searching will start.
854 * @return The offset into this of the start of <TT>text</TT>,
855 * or -1 if not found.
858 inline int32_t indexOf(const UnicodeString
& text
,
859 int32_t start
) const;
862 * Locate in this the first occurrence in the range
863 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
864 * in <TT>text</TT>, using bitwise comparison.
865 * @param text The text to search for.
866 * @param start The offset at which searching will start.
867 * @param length The number of characters to search
868 * @return The offset into this of the start of <TT>text</TT>,
869 * or -1 if not found.
872 inline int32_t indexOf(const UnicodeString
& text
,
874 int32_t length
) const;
877 * Locate in this the first occurrence in the range
878 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
879 * in <TT>srcText</TT> in the range
880 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
881 * using bitwise comparison.
882 * @param srcText The text to search for.
883 * @param srcStart the offset into <TT>srcText</TT> at which
885 * @param srcLength the number of characters in <TT>srcText</TT> to match
886 * @param start the offset into this at which to start matching
887 * @param length the number of characters in this to search
888 * @return The offset into this of the start of <TT>text</TT>,
889 * or -1 if not found.
892 inline int32_t indexOf(const UnicodeString
& srcText
,
896 int32_t length
) const;
899 * Locate in this the first occurrence of the characters in
901 * starting at offset <TT>start</TT>, using bitwise comparison.
902 * @param srcChars The text to search for.
903 * @param srcLength the number of characters in <TT>srcChars</TT> to match
904 * @param start the offset into this at which to start matching
905 * @return The offset into this of the start of <TT>text</TT>,
906 * or -1 if not found.
909 inline int32_t indexOf(const UChar
*srcChars
,
911 int32_t start
) const;
914 * Locate in this the first occurrence in the range
915 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
916 * in <TT>srcChars</TT>, using bitwise comparison.
917 * @param srcChars The text to search for.
918 * @param srcLength the number of characters in <TT>srcChars</TT>
919 * @param start The offset at which searching will start.
920 * @param length The number of characters to search
921 * @return The offset into this of the start of <TT>srcChars</TT>,
922 * or -1 if not found.
925 inline int32_t indexOf(const UChar
*srcChars
,
928 int32_t length
) const;
931 * Locate in this the first occurrence in the range
932 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
933 * in <TT>srcChars</TT> in the range
934 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
935 * using bitwise comparison.
936 * @param srcChars The text to search for.
937 * @param srcStart the offset into <TT>srcChars</TT> at which
939 * @param srcLength the number of characters in <TT>srcChars</TT> to match
940 * @param start the offset into this at which to start matching
941 * @param length the number of characters in this to search
942 * @return The offset into this of the start of <TT>text</TT>,
943 * or -1 if not found.
946 int32_t indexOf(const UChar
*srcChars
,
950 int32_t length
) const;
953 * Locate in this the first occurrence of the BMP code point <code>c</code>,
954 * using bitwise comparison.
955 * @param c The code unit to search for.
956 * @return The offset into this of <TT>c</TT>, or -1 if not found.
959 inline int32_t indexOf(UChar c
) const;
962 * Locate in this the first occurrence of the code point <TT>c</TT>,
963 * using bitwise comparison.
965 * @param c The code point to search for.
966 * @return The offset into this of <TT>c</TT>, or -1 if not found.
969 inline int32_t indexOf(UChar32 c
) const;
972 * Locate in this the first occurrence of the BMP code point <code>c</code>,
973 * starting at offset <TT>start</TT>, using bitwise comparison.
974 * @param c The code unit to search for.
975 * @param start The offset at which searching will start.
976 * @return The offset into this of <TT>c</TT>, or -1 if not found.
979 inline int32_t indexOf(UChar c
,
980 int32_t start
) const;
983 * Locate in this the first occurrence of the code point <TT>c</TT>
984 * starting at offset <TT>start</TT>, using bitwise comparison.
986 * @param c The code point to search for.
987 * @param start The offset at which searching will start.
988 * @return The offset into this of <TT>c</TT>, or -1 if not found.
991 inline int32_t indexOf(UChar32 c
,
992 int32_t start
) const;
995 * Locate in this the first occurrence of the BMP code point <code>c</code>
996 * in the range [<TT>start</TT>, <TT>start + length</TT>),
997 * using bitwise comparison.
998 * @param c The code unit to search for.
999 * @param start the offset into this at which to start matching
1000 * @param length the number of characters in this to search
1001 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1004 inline int32_t indexOf(UChar c
,
1006 int32_t length
) const;
1009 * Locate in this the first occurrence of the code point <TT>c</TT>
1010 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1011 * using bitwise comparison.
1013 * @param c The code point to search for.
1014 * @param start the offset into this at which to start matching
1015 * @param length the number of characters in this to search
1016 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1019 inline int32_t indexOf(UChar32 c
,
1021 int32_t length
) const;
1024 * Locate in this the last occurrence of the characters in <TT>text</TT>,
1025 * using bitwise comparison.
1026 * @param text The text to search for.
1027 * @return The offset into this of the start of <TT>text</TT>,
1028 * or -1 if not found.
1031 inline int32_t lastIndexOf(const UnicodeString
& text
) const;
1034 * Locate in this the last occurrence of the characters in <TT>text</TT>
1035 * starting at offset <TT>start</TT>, using bitwise comparison.
1036 * @param text The text to search for.
1037 * @param start The offset at which searching will start.
1038 * @return The offset into this of the start of <TT>text</TT>,
1039 * or -1 if not found.
1042 inline int32_t lastIndexOf(const UnicodeString
& text
,
1043 int32_t start
) const;
1046 * Locate in this the last occurrence in the range
1047 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1048 * in <TT>text</TT>, using bitwise comparison.
1049 * @param text The text to search for.
1050 * @param start The offset at which searching will start.
1051 * @param length The number of characters to search
1052 * @return The offset into this of the start of <TT>text</TT>,
1053 * or -1 if not found.
1056 inline int32_t lastIndexOf(const UnicodeString
& text
,
1058 int32_t length
) const;
1061 * Locate in this the last occurrence in the range
1062 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1063 * in <TT>srcText</TT> in the range
1064 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1065 * using bitwise comparison.
1066 * @param srcText The text to search for.
1067 * @param srcStart the offset into <TT>srcText</TT> at which
1069 * @param srcLength the number of characters in <TT>srcText</TT> to match
1070 * @param start the offset into this at which to start matching
1071 * @param length the number of characters in this to search
1072 * @return The offset into this of the start of <TT>text</TT>,
1073 * or -1 if not found.
1076 inline int32_t lastIndexOf(const UnicodeString
& srcText
,
1080 int32_t length
) const;
1083 * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1084 * starting at offset <TT>start</TT>, using bitwise comparison.
1085 * @param srcChars The text to search for.
1086 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1087 * @param start the offset into this at which to start matching
1088 * @return The offset into this of the start of <TT>text</TT>,
1089 * or -1 if not found.
1092 inline int32_t lastIndexOf(const UChar
*srcChars
,
1094 int32_t start
) const;
1097 * Locate in this the last occurrence in the range
1098 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1099 * in <TT>srcChars</TT>, using bitwise comparison.
1100 * @param srcChars The text to search for.
1101 * @param srcLength the number of characters in <TT>srcChars</TT>
1102 * @param start The offset at which searching will start.
1103 * @param length The number of characters to search
1104 * @return The offset into this of the start of <TT>srcChars</TT>,
1105 * or -1 if not found.
1108 inline int32_t lastIndexOf(const UChar
*srcChars
,
1111 int32_t length
) const;
1114 * Locate in this the last occurrence in the range
1115 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1116 * in <TT>srcChars</TT> in the range
1117 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1118 * using bitwise comparison.
1119 * @param srcChars The text to search for.
1120 * @param srcStart the offset into <TT>srcChars</TT> at which
1122 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1123 * @param start the offset into this at which to start matching
1124 * @param length the number of characters in this to search
1125 * @return The offset into this of the start of <TT>text</TT>,
1126 * or -1 if not found.
1129 int32_t lastIndexOf(const UChar
*srcChars
,
1133 int32_t length
) const;
1136 * Locate in this the last occurrence of the BMP code point <code>c</code>,
1137 * using bitwise comparison.
1138 * @param c The code unit to search for.
1139 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1142 inline int32_t lastIndexOf(UChar c
) const;
1145 * Locate in this the last occurrence of the code point <TT>c</TT>,
1146 * using bitwise comparison.
1148 * @param c The code point to search for.
1149 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1152 inline int32_t lastIndexOf(UChar32 c
) const;
1155 * Locate in this the last occurrence of the BMP code point <code>c</code>
1156 * starting at offset <TT>start</TT>, using bitwise comparison.
1157 * @param c The code unit to search for.
1158 * @param start The offset at which searching will start.
1159 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1162 inline int32_t lastIndexOf(UChar c
,
1163 int32_t start
) const;
1166 * Locate in this the last occurrence of the code point <TT>c</TT>
1167 * starting at offset <TT>start</TT>, using bitwise comparison.
1169 * @param c The code point to search for.
1170 * @param start The offset at which searching will start.
1171 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1174 inline int32_t lastIndexOf(UChar32 c
,
1175 int32_t start
) const;
1178 * Locate in this the last occurrence of the BMP code point <code>c</code>
1179 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1180 * using bitwise comparison.
1181 * @param c The code unit to search for.
1182 * @param start the offset into this at which to start matching
1183 * @param length the number of characters in this to search
1184 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1187 inline int32_t lastIndexOf(UChar c
,
1189 int32_t length
) const;
1192 * Locate in this the last occurrence of the code point <TT>c</TT>
1193 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1194 * using bitwise comparison.
1196 * @param c The code point to search for.
1197 * @param start the offset into this at which to start matching
1198 * @param length the number of characters in this to search
1199 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1202 inline int32_t lastIndexOf(UChar32 c
,
1204 int32_t length
) const;
1207 /* Character access */
1210 * Return the code unit at offset <tt>offset</tt>.
1211 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1212 * @param offset a valid offset into the text
1213 * @return the code unit at offset <tt>offset</tt>
1214 * or 0xffff if the offset is not valid for this string
1217 inline UChar
charAt(int32_t offset
) const;
1220 * Return the code unit at offset <tt>offset</tt>.
1221 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1222 * @param offset a valid offset into the text
1223 * @return the code unit at offset <tt>offset</tt>
1226 inline UChar
operator[] (int32_t offset
) const;
1229 * Return the code point that contains the code unit
1230 * at offset <tt>offset</tt>.
1231 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1232 * @param offset a valid offset into the text
1233 * that indicates the text offset of any of the code units
1234 * that will be assembled into a code point (21-bit value) and returned
1235 * @return the code point of text at <tt>offset</tt>
1236 * or 0xffff if the offset is not valid for this string
1239 inline UChar32
char32At(int32_t offset
) const;
1242 * Adjust a random-access offset so that
1243 * it points to the beginning of a Unicode character.
1244 * The offset that is passed in points to
1245 * any code unit of a code point,
1246 * while the returned offset will point to the first code unit
1247 * of the same code point.
1248 * In UTF-16, if the input offset points to a second surrogate
1249 * of a surrogate pair, then the returned offset will point
1250 * to the first surrogate.
1251 * @param offset a valid offset into one code point of the text
1252 * @return offset of the first code unit of the same code point
1253 * @see U16_SET_CP_START
1256 inline int32_t getChar32Start(int32_t offset
) const;
1259 * Adjust a random-access offset so that
1260 * it points behind a Unicode character.
1261 * The offset that is passed in points behind
1262 * any code unit of a code point,
1263 * while the returned offset will point behind the last code unit
1264 * of the same code point.
1265 * In UTF-16, if the input offset points behind the first surrogate
1266 * (i.e., to the second surrogate)
1267 * of a surrogate pair, then the returned offset will point
1268 * behind the second surrogate (i.e., to the first surrogate).
1269 * @param offset a valid offset after any code unit of a code point of the text
1270 * @return offset of the first code unit after the same code point
1271 * @see U16_SET_CP_LIMIT
1274 inline int32_t getChar32Limit(int32_t offset
) const;
1277 * Move the code unit index along the string by delta code points.
1278 * Interpret the input index as a code unit-based offset into the string,
1279 * move the index forward or backward by delta code points, and
1280 * return the resulting index.
1281 * The input index should point to the first code unit of a code point,
1282 * if there is more than one.
1284 * Both input and output indexes are code unit-based as for all
1285 * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1286 * If delta<0 then the index is moved backward (toward the start of the string).
1287 * If delta>0 then the index is moved forward (toward the end of the string).
1289 * This behaves like CharacterIterator::move32(delta, kCurrent).
1291 * Behavior for out-of-bounds indexes:
1292 * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1293 * if the input index<0 then it is pinned to 0;
1294 * if it is index>length() then it is pinned to length().
1295 * Afterwards, the index is moved by <code>delta</code> code points
1296 * forward or backward,
1297 * but no further backward than to 0 and no further forward than to length().
1298 * The resulting index return value will be in between 0 and length(), inclusively.
1302 * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1303 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1305 * // initial index: position of U+10000
1308 * // the following examples will all result in index==4, position of U+10ffff
1310 * // skip 2 code points from some position in the string
1311 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1313 * // go to the 3rd code point from the start of s (0-based)
1314 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1316 * // go to the next-to-last code point of s
1317 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1320 * @param index input code unit index
1321 * @param delta (signed) code point count to move the index forward or backward
1323 * @return the resulting code unit index
1326 int32_t moveIndex32(int32_t index
, int32_t delta
) const;
1328 /* Substring extraction */
1331 * Copy the characters in the range
1332 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1333 * beginning at <tt>dstStart</tt>.
1334 * If the string aliases to <code>dst</code> itself as an external buffer,
1335 * then extract() will not copy the contents.
1337 * @param start offset of first character which will be copied into the array
1338 * @param length the number of characters to extract
1339 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1340 * must be at least (<tt>dstStart + length</tt>).
1341 * @param dstStart the offset in <TT>dst</TT> where the first character
1345 inline void extract(int32_t start
,
1348 int32_t dstStart
= 0) const;
1351 * Copy the contents of the string into dest.
1352 * This is a convenience function that
1353 * checks if there is enough space in dest,
1354 * extracts the entire string if possible,
1355 * and NUL-terminates dest if possible.
1357 * If the string fits into dest but cannot be NUL-terminated
1358 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1359 * If the string itself does not fit into dest
1360 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1362 * If the string aliases to <code>dest</code> itself as an external buffer,
1363 * then extract() will not copy the contents.
1365 * @param dest Destination string buffer.
1366 * @param destCapacity Number of UChars available at dest.
1367 * @param errorCode ICU error code.
1372 extract(UChar
*dest
, int32_t destCapacity
,
1373 UErrorCode
&errorCode
) const;
1376 * Copy the characters in the range
1377 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1379 * @param start offset of first character which will be copied
1380 * @param length the number of characters to extract
1381 * @param target UnicodeString into which to copy characters.
1382 * @return A reference to <TT>target</TT>
1385 inline void extract(int32_t start
,
1387 UnicodeString
& target
) const;
1390 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1391 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1392 * @param start offset of first character which will be copied into the array
1393 * @param limit offset immediately following the last character to be copied
1394 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1395 * must be at least (<tt>dstStart + (limit - start)</tt>).
1396 * @param dstStart the offset in <TT>dst</TT> where the first character
1400 inline void extractBetween(int32_t start
,
1403 int32_t dstStart
= 0) const;
1406 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
1407 * into the UnicodeString <tt>target</tt>. Replaceable API.
1408 * @param start offset of first character which will be copied
1409 * @param limit offset immediately following the last character to be copied
1410 * @param target UnicodeString into which to copy characters.
1411 * @return A reference to <TT>target</TT>
1414 virtual void extractBetween(int32_t start
,
1416 UnicodeString
& target
) const;
1419 * Copy the characters in the range
1420 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1421 * All characters must be invariant (see utypes.h).
1422 * Use US_INV as the last, signature-distinguishing parameter.
1424 * This function does not write any more than <code>targetLength</code>
1425 * characters but returns the length of the entire output string
1426 * so that one can allocate a larger buffer and call the function again
1428 * The output string is NUL-terminated if possible.
1430 * @param start offset of first character which will be copied
1431 * @param startLength the number of characters to extract
1432 * @param target the target buffer for extraction, can be NULL
1433 * if targetLength is 0
1434 * @param targetCapacity the length of the target buffer
1435 * @param inv Signature-distinguishing paramater, use US_INV.
1436 * @return the output string length, not including the terminating NUL
1439 int32_t extract(int32_t start
,
1440 int32_t startLength
,
1442 int32_t targetCapacity
,
1443 enum EInvariant inv
) const;
1445 #if !UCONFIG_NO_CONVERSION
1448 * Copy the characters in the range
1449 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1450 * in a specified codepage.
1451 * The output string is NUL-terminated.
1453 * Recommendation: For invariant-character strings use
1454 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1455 * because it avoids object code dependencies of UnicodeString on
1456 * the conversion code.
1458 * @param start offset of first character which will be copied
1459 * @param startLength the number of characters to extract
1460 * @param target the target buffer for extraction
1461 * @param codepage the desired codepage for the characters. 0 has
1462 * the special meaning of the default codepage
1463 * If <code>codepage</code> is an empty string (<code>""</code>),
1464 * then a simple conversion is performed on the codepage-invariant
1465 * subset ("invariant characters") of the platform encoding. See utypes.h.
1466 * If <TT>target</TT> is NULL, then the number of bytes required for
1467 * <TT>target</TT> is returned. It is assumed that the target is big enough
1468 * to fit all of the characters.
1469 * @return the output string length, not including the terminating NUL
1472 inline int32_t extract(int32_t start
,
1473 int32_t startLength
,
1475 const char *codepage
= 0) const;
1478 * Copy the characters in the range
1479 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1480 * in a specified codepage.
1481 * This function does not write any more than <code>targetLength</code>
1482 * characters but returns the length of the entire output string
1483 * so that one can allocate a larger buffer and call the function again
1485 * The output string is NUL-terminated if possible.
1487 * Recommendation: For invariant-character strings use
1488 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1489 * because it avoids object code dependencies of UnicodeString on
1490 * the conversion code.
1492 * @param start offset of first character which will be copied
1493 * @param startLength the number of characters to extract
1494 * @param target the target buffer for extraction
1495 * @param targetLength the length of the target buffer
1496 * @param codepage the desired codepage for the characters. 0 has
1497 * the special meaning of the default codepage
1498 * If <code>codepage</code> is an empty string (<code>""</code>),
1499 * then a simple conversion is performed on the codepage-invariant
1500 * subset ("invariant characters") of the platform encoding. See utypes.h.
1501 * If <TT>target</TT> is NULL, then the number of bytes required for
1502 * <TT>target</TT> is returned.
1503 * @return the output string length, not including the terminating NUL
1506 int32_t extract(int32_t start
,
1507 int32_t startLength
,
1509 uint32_t targetLength
,
1510 const char *codepage
= 0) const;
1513 * Convert the UnicodeString into a codepage string using an existing UConverter.
1514 * The output string is NUL-terminated if possible.
1516 * This function avoids the overhead of opening and closing a converter if
1517 * multiple strings are extracted.
1519 * @param dest destination string buffer, can be NULL if destCapacity==0
1520 * @param destCapacity the number of chars available at dest
1521 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1522 * or NULL for the default converter
1523 * @param errorCode normal ICU error code
1524 * @return the length of the output string, not counting the terminating NUL;
1525 * if the length is greater than destCapacity, then the string will not fit
1526 * and a buffer of the indicated length would need to be passed in
1529 int32_t extract(char *dest
, int32_t destCapacity
,
1531 UErrorCode
&errorCode
) const;
1535 /* Length operations */
1538 * Return the length of the UnicodeString object.
1539 * The length is the number of UChar code units are in the UnicodeString.
1540 * If you want the number of code points, please use countChar32().
1541 * @return the length of the UnicodeString object
1545 inline int32_t length(void) const;
1548 * Count Unicode code points in the length UChar code units of the string.
1549 * A code point may occupy either one or two UChar code units.
1550 * Counting code points involves reading all code units.
1552 * This functions is basically the inverse of moveIndex32().
1554 * @param start the index of the first code unit to check
1555 * @param length the number of UChar code units to check
1556 * @return the number of code points in the specified code units
1561 countChar32(int32_t start
=0, int32_t length
=INT32_MAX
) const;
1564 * Check if the length UChar code units of the string
1565 * contain more Unicode code points than a certain number.
1566 * This is more efficient than counting all code points in this part of the string
1567 * and comparing that number with a threshold.
1568 * This function may not need to scan the string at all if the length
1569 * falls within a certain range, and
1570 * never needs to count more than 'number+1' code points.
1571 * Logically equivalent to (countChar32(start, length)>number).
1572 * A Unicode code point may occupy either one or two UChar code units.
1574 * @param start the index of the first code unit to check (0 for the entire string)
1575 * @param length the number of UChar code units to check
1576 * (use INT32_MAX for the entire string; remember that start/length
1577 * values are pinned)
1578 * @param number The number of code points in the (sub)string is compared against
1579 * the 'number' parameter.
1580 * @return Boolean value for whether the string contains more Unicode code points
1581 * than 'number'. Same as (u_countChar32(s, length)>number).
1583 * @see u_strHasMoreChar32Than
1587 hasMoreChar32Than(int32_t start
, int32_t length
, int32_t number
) const;
1590 * Determine if this string is empty.
1591 * @return TRUE if this string contains 0 characters, FALSE otherwise.
1594 inline UBool
isEmpty(void) const;
1597 * Return the capacity of the internal buffer of the UnicodeString object.
1598 * This is useful together with the getBuffer functions.
1599 * See there for details.
1601 * @return the number of UChars available in the internal buffer
1605 inline int32_t getCapacity(void) const;
1607 /* Other operations */
1610 * Generate a hash code for this object.
1611 * @return The hash code of this UnicodeString.
1614 inline int32_t hashCode(void) const;
1617 * Determine if this object contains a valid string.
1618 * A bogus string has no value. It is different from an empty string.
1619 * It can be used to indicate that no string value is available.
1620 * getBuffer() and getTerminatedBuffer() return NULL, and
1621 * length() returns 0.
1623 * @return TRUE if the string is valid, FALSE otherwise
1627 inline UBool
isBogus(void) const;
1630 //========================================
1632 //========================================
1634 /* Assignment operations */
1637 * Assignment operator. Replace the characters in this UnicodeString
1638 * with the characters from <TT>srcText</TT>.
1639 * @param srcText The text containing the characters to replace
1640 * @return a reference to this
1643 UnicodeString
&operator=(const UnicodeString
&srcText
);
1646 * Almost the same as the assignment operator.
1647 * Replace the characters in this UnicodeString
1648 * with the characters from <code>srcText</code>.
1650 * This function works the same for all strings except for ones that
1651 * are readonly aliases.
1652 * Starting with ICU 2.4, the assignment operator and the copy constructor
1653 * allocate a new buffer and copy the buffer contents even for readonly aliases.
1654 * This function implements the old, more efficient but less safe behavior
1655 * of making this string also a readonly alias to the same buffer.
1656 * The fastCopyFrom function must be used only if it is known that the lifetime of
1657 * this UnicodeString is at least as long as the lifetime of the aliased buffer
1658 * including its contents, for example for strings from resource bundles
1659 * or aliases to string contents.
1661 * @param src The text containing the characters to replace.
1662 * @return a reference to this
1665 UnicodeString
&fastCopyFrom(const UnicodeString
&src
);
1668 * Assignment operator. Replace the characters in this UnicodeString
1669 * with the code unit <TT>ch</TT>.
1670 * @param ch the code unit to replace
1671 * @return a reference to this
1674 inline UnicodeString
& operator= (UChar ch
);
1677 * Assignment operator. Replace the characters in this UnicodeString
1678 * with the code point <TT>ch</TT>.
1679 * @param ch the code point to replace
1680 * @return a reference to this
1683 inline UnicodeString
& operator= (UChar32 ch
);
1686 * Set the text in the UnicodeString object to the characters
1687 * in <TT>srcText</TT> in the range
1688 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1689 * <TT>srcText</TT> is not modified.
1690 * @param srcText the source for the new characters
1691 * @param srcStart the offset into <TT>srcText</TT> where new characters
1693 * @return a reference to this
1696 inline UnicodeString
& setTo(const UnicodeString
& srcText
,
1700 * Set the text in the UnicodeString object to the characters
1701 * in <TT>srcText</TT> in the range
1702 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1703 * <TT>srcText</TT> is not modified.
1704 * @param srcText the source for the new characters
1705 * @param srcStart the offset into <TT>srcText</TT> where new characters
1707 * @param srcLength the number of characters in <TT>srcText</TT> in the
1709 * @return a reference to this
1712 inline UnicodeString
& setTo(const UnicodeString
& srcText
,
1717 * Set the text in the UnicodeString object to the characters in
1719 * <TT>srcText</TT> is not modified.
1720 * @param srcText the source for the new characters
1721 * @return a reference to this
1724 inline UnicodeString
& setTo(const UnicodeString
& srcText
);
1727 * Set the characters in the UnicodeString object to the characters
1728 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1729 * @param srcChars the source for the new characters
1730 * @param srcLength the number of Unicode characters in srcChars.
1731 * @return a reference to this
1734 inline UnicodeString
& setTo(const UChar
*srcChars
,
1738 * Set the characters in the UnicodeString object to the code unit
1740 * @param srcChar the code unit which becomes the UnicodeString's character
1742 * @return a reference to this
1745 UnicodeString
& setTo(UChar srcChar
);
1748 * Set the characters in the UnicodeString object to the code point
1750 * @param srcChar the code point which becomes the UnicodeString's character
1752 * @return a reference to this
1755 UnicodeString
& setTo(UChar32 srcChar
);
1758 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1759 * The text will be used for the UnicodeString object, but
1760 * it will not be released when the UnicodeString is destroyed.
1761 * This has copy-on-write semantics:
1762 * When the string is modified, then the buffer is first copied into
1763 * newly allocated memory.
1764 * The aliased buffer is never modified.
1765 * In an assignment to another UnicodeString, the text will be aliased again,
1766 * so that both strings then alias the same readonly-text.
1768 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1769 * This must be true if <code>textLength==-1</code>.
1770 * @param text The characters to alias for the UnicodeString.
1771 * @param textLength The number of Unicode characters in <code>text</code> to alias.
1772 * If -1, then this constructor will determine the length
1773 * by calling <code>u_strlen()</code>.
1774 * @return a reference to this
1777 UnicodeString
&setTo(UBool isTerminated
,
1779 int32_t textLength
);
1782 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1783 * The text will be used for the UnicodeString object, but
1784 * it will not be released when the UnicodeString is destroyed.
1785 * This has write-through semantics:
1786 * For as long as the capacity of the buffer is sufficient, write operations
1787 * will directly affect the buffer. When more capacity is necessary, then
1788 * a new buffer will be allocated and the contents copied as with regularly
1789 * constructed strings.
1790 * In an assignment to another UnicodeString, the buffer will be copied.
1791 * The extract(UChar *dst) function detects whether the dst pointer is the same
1792 * as the string buffer itself and will in this case not copy the contents.
1794 * @param buffer The characters to alias for the UnicodeString.
1795 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1796 * @param buffCapacity The size of <code>buffer</code> in UChars.
1797 * @return a reference to this
1800 UnicodeString
&setTo(UChar
*buffer
,
1802 int32_t buffCapacity
);
1805 * Make this UnicodeString object invalid.
1806 * The string will test TRUE with isBogus().
1808 * A bogus string has no value. It is different from an empty string.
1809 * It can be used to indicate that no string value is available.
1810 * getBuffer() and getTerminatedBuffer() return NULL, and
1811 * length() returns 0.
1813 * This utility function is used throughout the UnicodeString
1814 * implementation to indicate that a UnicodeString operation failed,
1815 * and may be used in other functions,
1816 * especially but not exclusively when such functions do not
1817 * take a UErrorCode for simplicity.
1819 * The following methods, and no others, will clear a string object's bogus flag:
1821 * - remove(0, INT32_MAX)
1823 * - operator=() (assignment operator)
1826 * The simplest ways to turn a bogus string into an empty one
1827 * is to use the remove() function.
1828 * Examples for other functions that are equivalent to "set to empty string":
1831 * s.remove(); // set to an empty string (remove all), or
1832 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1833 * s.truncate(0); // set to an empty string (complete truncation), or
1834 * s=UnicodeString(); // assign an empty string, or
1835 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1836 * static const UChar nul=0;
1837 * s.setTo(&nul, 0); // set to an empty C Unicode string
1847 * Set the character at the specified offset to the specified character.
1848 * @param offset A valid offset into the text of the character to set
1849 * @param ch The new character
1850 * @return A reference to this
1853 UnicodeString
& setCharAt(int32_t offset
,
1857 /* Append operations */
1860 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1862 * @param ch the code unit to be appended
1863 * @return a reference to this
1866 inline UnicodeString
& operator+= (UChar ch
);
1869 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1871 * @param ch the code point to be appended
1872 * @return a reference to this
1875 inline UnicodeString
& operator+= (UChar32 ch
);
1878 * Append operator. Append the characters in <TT>srcText</TT> to the
1879 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
1881 * @param srcText the source for the new characters
1882 * @return a reference to this
1885 inline UnicodeString
& operator+= (const UnicodeString
& srcText
);
1888 * Append the characters
1889 * in <TT>srcText</TT> in the range
1890 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
1891 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
1893 * @param srcText the source for the new characters
1894 * @param srcStart the offset into <TT>srcText</TT> where new characters
1896 * @param srcLength the number of characters in <TT>srcText</TT> in
1898 * @return a reference to this
1901 inline UnicodeString
& append(const UnicodeString
& srcText
,
1906 * Append the characters in <TT>srcText</TT> to the UnicodeString object at
1907 * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1908 * @param srcText the source for the new characters
1909 * @return a reference to this
1912 inline UnicodeString
& append(const UnicodeString
& srcText
);
1915 * Append the characters in <TT>srcChars</TT> in the range
1916 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
1918 * <TT>start</TT>. <TT>srcChars</TT> is not modified.
1919 * @param srcChars the source for the new characters
1920 * @param srcStart the offset into <TT>srcChars</TT> where new characters
1922 * @param srcLength the number of characters in <TT>srcChars</TT> in
1924 * @return a reference to this
1927 inline UnicodeString
& append(const UChar
*srcChars
,
1932 * Append the characters in <TT>srcChars</TT> to the UnicodeString object
1933 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1934 * @param srcChars the source for the new characters
1935 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
1936 * @return a reference to this
1939 inline UnicodeString
& append(const UChar
*srcChars
,
1943 * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
1944 * @param srcChar the code unit to append
1945 * @return a reference to this
1948 inline UnicodeString
& append(UChar srcChar
);
1951 * Append the code point <TT>srcChar</TT> to the UnicodeString object.
1952 * @param srcChar the code point to append
1953 * @return a reference to this
1956 inline UnicodeString
& append(UChar32 srcChar
);
1959 /* Insert operations */
1962 * Insert the characters in <TT>srcText</TT> in the range
1963 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
1964 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1965 * @param start the offset where the insertion begins
1966 * @param srcText the source for the new characters
1967 * @param srcStart the offset into <TT>srcText</TT> where new characters
1969 * @param srcLength the number of characters in <TT>srcText</TT> in
1971 * @return a reference to this
1974 inline UnicodeString
& insert(int32_t start
,
1975 const UnicodeString
& srcText
,
1980 * Insert the characters in <TT>srcText</TT> into the UnicodeString object
1981 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1982 * @param start the offset where the insertion begins
1983 * @param srcText the source for the new characters
1984 * @return a reference to this
1987 inline UnicodeString
& insert(int32_t start
,
1988 const UnicodeString
& srcText
);
1991 * Insert the characters in <TT>srcChars</TT> in the range
1992 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
1993 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1994 * @param start the offset at which the insertion begins
1995 * @param srcChars the source for the new characters
1996 * @param srcStart the offset into <TT>srcChars</TT> where new characters
1998 * @param srcLength the number of characters in <TT>srcChars</TT>
1999 * in the insert string
2000 * @return a reference to this
2003 inline UnicodeString
& insert(int32_t start
,
2004 const UChar
*srcChars
,
2009 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
2010 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2011 * @param start the offset where the insertion begins
2012 * @param srcChars the source for the new characters
2013 * @param srcLength the number of Unicode characters in srcChars.
2014 * @return a reference to this
2017 inline UnicodeString
& insert(int32_t start
,
2018 const UChar
*srcChars
,
2022 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
2023 * offset <TT>start</TT>.
2024 * @param start the offset at which the insertion occurs
2025 * @param srcChar the code unit to insert
2026 * @return a reference to this
2029 inline UnicodeString
& insert(int32_t start
,
2033 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
2034 * offset <TT>start</TT>.
2035 * @param start the offset at which the insertion occurs
2036 * @param srcChar the code point to insert
2037 * @return a reference to this
2040 inline UnicodeString
& insert(int32_t start
,
2044 /* Replace operations */
2047 * Replace the characters in the range
2048 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2049 * <TT>srcText</TT> in the range
2050 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
2051 * <TT>srcText</TT> is not modified.
2052 * @param start the offset at which the replace operation begins
2053 * @param length the number of characters to replace. The character at
2054 * <TT>start + length</TT> is not modified.
2055 * @param srcText the source for the new characters
2056 * @param srcStart the offset into <TT>srcText</TT> where new characters
2058 * @param srcLength the number of characters in <TT>srcText</TT> in
2059 * the replace string
2060 * @return a reference to this
2063 UnicodeString
& replace(int32_t start
,
2065 const UnicodeString
& srcText
,
2070 * Replace the characters in the range
2071 * [<TT>start</TT>, <TT>start + length</TT>)
2072 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2074 * @param start the offset at which the replace operation begins
2075 * @param length the number of characters to replace. The character at
2076 * <TT>start + length</TT> is not modified.
2077 * @param srcText the source for the new characters
2078 * @return a reference to this
2081 UnicodeString
& replace(int32_t start
,
2083 const UnicodeString
& srcText
);
2086 * Replace the characters in the range
2087 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2088 * <TT>srcChars</TT> in the range
2089 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
2091 * @param start the offset at which the replace operation begins
2092 * @param length the number of characters to replace. The character at
2093 * <TT>start + length</TT> is not modified.
2094 * @param srcChars the source for the new characters
2095 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2097 * @param srcLength the number of characters in <TT>srcChars</TT>
2098 * in the replace string
2099 * @return a reference to this
2102 UnicodeString
& replace(int32_t start
,
2104 const UChar
*srcChars
,
2109 * Replace the characters in the range
2110 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2111 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2112 * @param start the offset at which the replace operation begins
2113 * @param length number of characters to replace. The character at
2114 * <TT>start + length</TT> is not modified.
2115 * @param srcChars the source for the new characters
2116 * @param srcLength the number of Unicode characters in srcChars
2117 * @return a reference to this
2120 inline UnicodeString
& replace(int32_t start
,
2122 const UChar
*srcChars
,
2126 * Replace the characters in the range
2127 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2129 * @param start the offset at which the replace operation begins
2130 * @param length the number of characters to replace. The character at
2131 * <TT>start + length</TT> is not modified.
2132 * @param srcChar the new code unit
2133 * @return a reference to this
2136 inline UnicodeString
& replace(int32_t start
,
2141 * Replace the characters in the range
2142 * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2144 * @param start the offset at which the replace operation begins
2145 * @param length the number of characters to replace. The character at
2146 * <TT>start + length</TT> is not modified.
2147 * @param srcChar the new code point
2148 * @return a reference to this
2151 inline UnicodeString
& replace(int32_t start
,
2156 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2157 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2158 * @param start the offset at which the replace operation begins
2159 * @param limit the offset immediately following the replace range
2160 * @param srcText the source for the new characters
2161 * @return a reference to this
2164 inline UnicodeString
& replaceBetween(int32_t start
,
2166 const UnicodeString
& srcText
);
2169 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2170 * with the characters in <TT>srcText</TT> in the range
2171 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2172 * @param start the offset at which the replace operation begins
2173 * @param limit the offset immediately following the replace range
2174 * @param srcText the source for the new characters
2175 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2177 * @param srcLimit the offset immediately following the range to copy
2178 * in <TT>srcText</TT>
2179 * @return a reference to this
2182 inline UnicodeString
& replaceBetween(int32_t start
,
2184 const UnicodeString
& srcText
,
2189 * Replace a substring of this object with the given text.
2190 * @param start the beginning index, inclusive; <code>0 <= start
2192 * @param limit the ending index, exclusive; <code>start <= limit
2193 * <= length()</code>.
2194 * @param text the text to replace characters <code>start</code>
2195 * to <code>limit - 1</code>
2198 virtual void handleReplaceBetween(int32_t start
,
2200 const UnicodeString
& text
);
2204 * @return TRUE if it has MetaData
2207 virtual UBool
hasMetaData() const;
2210 * Copy a substring of this object, retaining attribute (out-of-band)
2211 * information. This method is used to duplicate or reorder substrings.
2212 * The destination index must not overlap the source range.
2214 * @param start the beginning index, inclusive; <code>0 <= start <=
2216 * @param limit the ending index, exclusive; <code>start <= limit <=
2218 * @param dest the destination index. The characters from
2219 * <code>start..limit-1</code> will be copied to <code>dest</code>.
2220 * Implementations of this method may assume that <code>dest <= start ||
2221 * dest >= limit</code>.
2224 virtual void copy(int32_t start
, int32_t limit
, int32_t dest
);
2226 /* Search and replace operations */
2229 * Replace all occurrences of characters in oldText with the characters
2231 * @param oldText the text containing the search text
2232 * @param newText the text containing the replacement text
2233 * @return a reference to this
2236 inline UnicodeString
& findAndReplace(const UnicodeString
& oldText
,
2237 const UnicodeString
& newText
);
2240 * Replace all occurrences of characters in oldText with characters
2242 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2243 * @param start the start of the range in which replace will performed
2244 * @param length the length of the range in which replace will be performed
2245 * @param oldText the text containing the search text
2246 * @param newText the text containing the replacement text
2247 * @return a reference to this
2250 inline UnicodeString
& findAndReplace(int32_t start
,
2252 const UnicodeString
& oldText
,
2253 const UnicodeString
& newText
);
2256 * Replace all occurrences of characters in oldText in the range
2257 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2258 * in newText in the range
2259 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
2260 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2261 * @param start the start of the range in which replace will performed
2262 * @param length the length of the range in which replace will be performed
2263 * @param oldText the text containing the search text
2264 * @param oldStart the start of the search range in <TT>oldText</TT>
2265 * @param oldLength the length of the search range in <TT>oldText</TT>
2266 * @param newText the text containing the replacement text
2267 * @param newStart the start of the replacement range in <TT>newText</TT>
2268 * @param newLength the length of the replacement range in <TT>newText</TT>
2269 * @return a reference to this
2272 UnicodeString
& findAndReplace(int32_t start
,
2274 const UnicodeString
& oldText
,
2277 const UnicodeString
& newText
,
2282 /* Remove operations */
2285 * Remove all characters from the UnicodeString object.
2286 * @return a reference to this
2289 inline UnicodeString
& remove(void);
2292 * Remove the characters in the range
2293 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2294 * @param start the offset of the first character to remove
2295 * @param length the number of characters to remove
2296 * @return a reference to this
2299 inline UnicodeString
& remove(int32_t start
,
2300 int32_t length
= (int32_t)INT32_MAX
);
2303 * Remove the characters in the range
2304 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2305 * @param start the offset of the first character to remove
2306 * @param limit the offset immediately following the range to remove
2307 * @return a reference to this
2310 inline UnicodeString
& removeBetween(int32_t start
,
2311 int32_t limit
= (int32_t)INT32_MAX
);
2314 /* Length operations */
2317 * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2318 * If the length of this UnicodeString is less than targetLength,
2319 * length() - targetLength copies of padChar will be added to the
2320 * beginning of this UnicodeString.
2321 * @param targetLength the desired length of the string
2322 * @param padChar the character to use for padding. Defaults to
2324 * @return TRUE if the text was padded, FALSE otherwise.
2327 UBool
padLeading(int32_t targetLength
,
2328 UChar padChar
= 0x0020);
2331 * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2332 * If the length of this UnicodeString is less than targetLength,
2333 * length() - targetLength copies of padChar will be added to the
2334 * end of this UnicodeString.
2335 * @param targetLength the desired length of the string
2336 * @param padChar the character to use for padding. Defaults to
2338 * @return TRUE if the text was padded, FALSE otherwise.
2341 UBool
padTrailing(int32_t targetLength
,
2342 UChar padChar
= 0x0020);
2345 * Truncate this UnicodeString to the <TT>targetLength</TT>.
2346 * @param targetLength the desired length of this UnicodeString.
2347 * @return TRUE if the text was truncated, FALSE otherwise
2350 inline UBool
truncate(int32_t targetLength
);
2353 * Trims leading and trailing whitespace from this UnicodeString.
2354 * @return a reference to this
2357 UnicodeString
& trim(void);
2360 /* Miscellaneous operations */
2363 * Reverse this UnicodeString in place.
2364 * @return a reference to this
2367 inline UnicodeString
& reverse(void);
2370 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2371 * this UnicodeString.
2372 * @param start the start of the range to reverse
2373 * @param length the number of characters to to reverse
2374 * @return a reference to this
2377 inline UnicodeString
& reverse(int32_t start
,
2381 * Convert the characters in this to UPPER CASE following the conventions of
2382 * the default locale.
2383 * @return A reference to this.
2386 UnicodeString
& toUpper(void);
2389 * Convert the characters in this to UPPER CASE following the conventions of
2390 * a specific locale.
2391 * @param locale The locale containing the conventions to use.
2392 * @return A reference to this.
2395 UnicodeString
& toUpper(const Locale
& locale
);
2398 * Convert the characters in this to lower case following the conventions of
2399 * the default locale.
2400 * @return A reference to this.
2403 UnicodeString
& toLower(void);
2406 * Convert the characters in this to lower case following the conventions of
2407 * a specific locale.
2408 * @param locale The locale containing the conventions to use.
2409 * @return A reference to this.
2412 UnicodeString
& toLower(const Locale
& locale
);
2414 #if !UCONFIG_NO_BREAK_ITERATION
2417 * Titlecase this string, convenience function using the default locale.
2419 * Casing is locale-dependent and context-sensitive.
2420 * Titlecasing uses a break iterator to find the first characters of words
2421 * that are to be titlecased. It titlecases those characters and lowercases
2424 * The titlecase break iterator can be provided to customize for arbitrary
2425 * styles, using rules and dictionaries beyond the standard iterators.
2426 * It may be more efficient to always provide an iterator to avoid
2427 * opening and closing one for each string.
2428 * The standard titlecase iterator for the root locale implements the
2429 * algorithm of Unicode TR 21.
2431 * This function uses only the first() and next() methods of the
2432 * provided break iterator.
2434 * @param titleIter A break iterator to find the first characters of words
2435 * that are to be titlecased.
2436 * If none is provided (0), then a standard titlecase
2437 * break iterator is opened.
2438 * Otherwise the provided iterator is set to the string's text.
2439 * @return A reference to this.
2442 UnicodeString
&toTitle(BreakIterator
*titleIter
);
2445 * Titlecase this string.
2447 * Casing is locale-dependent and context-sensitive.
2448 * Titlecasing uses a break iterator to find the first characters of words
2449 * that are to be titlecased. It titlecases those characters and lowercases
2452 * The titlecase break iterator can be provided to customize for arbitrary
2453 * styles, using rules and dictionaries beyond the standard iterators.
2454 * It may be more efficient to always provide an iterator to avoid
2455 * opening and closing one for each string.
2456 * The standard titlecase iterator for the root locale implements the
2457 * algorithm of Unicode TR 21.
2459 * This function uses only the first() and next() methods of the
2460 * provided break iterator.
2462 * @param titleIter A break iterator to find the first characters of words
2463 * that are to be titlecased.
2464 * If none is provided (0), then a standard titlecase
2465 * break iterator is opened.
2466 * Otherwise the provided iterator is set to the string's text.
2467 * @param locale The locale to consider.
2468 * @return A reference to this.
2471 UnicodeString
&toTitle(BreakIterator
*titleIter
, const Locale
&locale
);
2476 * Case-fold the characters in this string.
2477 * Case-folding is locale-independent and not context-sensitive,
2478 * but there is an option for whether to include or exclude mappings for dotted I
2479 * and dotless i that are marked with 'I' in CaseFolding.txt.
2480 * The result may be longer or shorter than the original.
2482 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2483 * @return A reference to this.
2486 UnicodeString
&foldCase(uint32_t options
=0 /*U_FOLD_CASE_DEFAULT*/);
2488 //========================================
2489 // Access to the internal buffer
2490 //========================================
2493 * Get a read/write pointer to the internal buffer.
2494 * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2495 * writable, and is still owned by the UnicodeString object.
2496 * Calls to getBuffer(minCapacity) must not be nested, and
2497 * must be matched with calls to releaseBuffer(newLength).
2498 * If the string buffer was read-only or shared,
2499 * then it will be reallocated and copied.
2501 * An attempted nested call will return 0, and will not further modify the
2502 * state of the UnicodeString object.
2503 * It also returns 0 if the string is bogus.
2505 * The actual capacity of the string buffer may be larger than minCapacity.
2506 * getCapacity() returns the actual capacity.
2507 * For many operations, the full capacity should be used to avoid reallocations.
2509 * While the buffer is "open" between getBuffer(minCapacity)
2510 * and releaseBuffer(newLength), the following applies:
2511 * - The string length is set to 0.
2512 * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2513 * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2514 * - You can read from and write to the returned buffer.
2515 * - The previous string contents will still be in the buffer;
2516 * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2517 * If the length() was greater than minCapacity, then any contents after minCapacity
2519 * The buffer contents is not NUL-terminated by getBuffer().
2520 * If length()<getCapacity() then you can terminate it by writing a NUL
2521 * at index length().
2522 * - You must call releaseBuffer(newLength) before and in order to
2523 * return to normal UnicodeString operation.
2525 * @param minCapacity the minimum number of UChars that are to be available
2526 * in the buffer, starting at the returned pointer;
2527 * default to the current string capacity if minCapacity==-1
2528 * @return a writable pointer to the internal string buffer,
2529 * or 0 if an error occurs (nested calls, out of memory)
2531 * @see releaseBuffer
2532 * @see getTerminatedBuffer()
2535 UChar
*getBuffer(int32_t minCapacity
);
2538 * Release a read/write buffer on a UnicodeString object with an
2539 * "open" getBuffer(minCapacity).
2540 * This function must be called in a matched pair with getBuffer(minCapacity).
2541 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2543 * It will set the string length to newLength, at most to the current capacity.
2544 * If newLength==-1 then it will set the length according to the
2545 * first NUL in the buffer, or to the capacity if there is no NUL.
2547 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2549 * @param newLength the new length of the UnicodeString object;
2550 * defaults to the current capacity if newLength is greater than that;
2551 * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2552 * the current capacity of the string
2554 * @see getBuffer(int32_t minCapacity)
2557 void releaseBuffer(int32_t newLength
=-1);
2560 * Get a read-only pointer to the internal buffer.
2561 * This can be called at any time on a valid UnicodeString.
2563 * It returns 0 if the string is bogus, or
2564 * during an "open" getBuffer(minCapacity).
2566 * It can be called as many times as desired.
2567 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2568 * at which time the pointer is semantically invalidated and must not be used any more.
2570 * The capacity of the buffer can be determined with getCapacity().
2571 * The part after length() may or may not be initialized and valid,
2572 * depending on the history of the UnicodeString object.
2574 * The buffer contents is (probably) not NUL-terminated.
2575 * You can check if it is with
2576 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2577 * (See getTerminatedBuffer().)
2579 * The buffer may reside in read-only memory. Its contents must not
2582 * @return a read-only pointer to the internal string buffer,
2583 * or 0 if the string is empty or bogus
2585 * @see getBuffer(int32_t minCapacity)
2586 * @see getTerminatedBuffer()
2589 inline const UChar
*getBuffer() const;
2592 * Get a read-only pointer to the internal buffer,
2593 * making sure that it is NUL-terminated.
2594 * This can be called at any time on a valid UnicodeString.
2596 * It returns 0 if the string is bogus, or
2597 * during an "open" getBuffer(minCapacity), or if the buffer cannot
2598 * be NUL-terminated (because memory allocation failed).
2600 * It can be called as many times as desired.
2601 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2602 * at which time the pointer is semantically invalidated and must not be used any more.
2604 * The capacity of the buffer can be determined with getCapacity().
2605 * The part after length()+1 may or may not be initialized and valid,
2606 * depending on the history of the UnicodeString object.
2608 * The buffer contents is guaranteed to be NUL-terminated.
2609 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2611 * For this reason, this function is not const, unlike getBuffer().
2612 * Note that a UnicodeString may also contain NUL characters as part of its contents.
2614 * The buffer may reside in read-only memory. Its contents must not
2617 * @return a read-only pointer to the internal string buffer,
2618 * or 0 if the string is empty or bogus
2620 * @see getBuffer(int32_t minCapacity)
2624 inline const UChar
*getTerminatedBuffer();
2626 //========================================
2628 //========================================
2630 /** Construct an empty UnicodeString.
2636 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2637 * @param capacity the number of UChars this UnicodeString should hold
2638 * before a resize is necessary; if count is greater than 0 and count
2639 * code points c take up more space than capacity, then capacity is adjusted
2641 * @param c is used to initially fill the string
2642 * @param count specifies how many code points c are to be written in the
2646 UnicodeString(int32_t capacity
, UChar32 c
, int32_t count
);
2649 * Single UChar (code unit) constructor.
2650 * @param ch the character to place in the UnicodeString
2653 UnicodeString(UChar ch
);
2656 * Single UChar32 (code point) constructor.
2657 * @param ch the character to place in the UnicodeString
2660 UnicodeString(UChar32 ch
);
2663 * UChar* constructor.
2664 * @param text The characters to place in the UnicodeString. <TT>text</TT>
2665 * must be NULL (U+0000) terminated.
2668 UnicodeString(const UChar
*text
);
2671 * UChar* constructor.
2672 * @param text The characters to place in the UnicodeString.
2673 * @param textLength The number of Unicode characters in <TT>text</TT>
2677 UnicodeString(const UChar
*text
,
2678 int32_t textLength
);
2681 * Readonly-aliasing UChar* constructor.
2682 * The text will be used for the UnicodeString object, but
2683 * it will not be released when the UnicodeString is destroyed.
2684 * This has copy-on-write semantics:
2685 * When the string is modified, then the buffer is first copied into
2686 * newly allocated memory.
2687 * The aliased buffer is never modified.
2688 * In an assignment to another UnicodeString, the text will be aliased again,
2689 * so that both strings then alias the same readonly-text.
2691 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2692 * This must be true if <code>textLength==-1</code>.
2693 * @param text The characters to alias for the UnicodeString.
2694 * @param textLength The number of Unicode characters in <code>text</code> to alias.
2695 * If -1, then this constructor will determine the length
2696 * by calling <code>u_strlen()</code>.
2699 UnicodeString(UBool isTerminated
,
2701 int32_t textLength
);
2704 * Writable-aliasing UChar* constructor.
2705 * The text will be used for the UnicodeString object, but
2706 * it will not be released when the UnicodeString is destroyed.
2707 * This has write-through semantics:
2708 * For as long as the capacity of the buffer is sufficient, write operations
2709 * will directly affect the buffer. When more capacity is necessary, then
2710 * a new buffer will be allocated and the contents copied as with regularly
2711 * constructed strings.
2712 * In an assignment to another UnicodeString, the buffer will be copied.
2713 * The extract(UChar *dst) function detects whether the dst pointer is the same
2714 * as the string buffer itself and will in this case not copy the contents.
2716 * @param buffer The characters to alias for the UnicodeString.
2717 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2718 * @param buffCapacity The size of <code>buffer</code> in UChars.
2721 UnicodeString(UChar
*buffer
, int32_t buffLength
, int32_t buffCapacity
);
2723 #if !UCONFIG_NO_CONVERSION
2726 * char* constructor.
2727 * @param codepageData an array of bytes, null-terminated
2728 * @param codepage the encoding of <TT>codepageData</TT>. The special
2729 * value 0 for <TT>codepage</TT> indicates that the text is in the
2730 * platform's default codepage.
2732 * If <code>codepage</code> is an empty string (<code>""</code>),
2733 * then a simple conversion is performed on the codepage-invariant
2734 * subset ("invariant characters") of the platform encoding. See utypes.h.
2735 * Recommendation: For invariant-character strings use the constructor
2736 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2737 * because it avoids object code dependencies of UnicodeString on
2738 * the conversion code.
2742 UnicodeString(const char *codepageData
,
2743 const char *codepage
= 0);
2746 * char* constructor.
2747 * @param codepageData an array of bytes.
2748 * @param dataLength The number of bytes in <TT>codepageData</TT>.
2749 * @param codepage the encoding of <TT>codepageData</TT>. The special
2750 * value 0 for <TT>codepage</TT> indicates that the text is in the
2751 * platform's default codepage.
2752 * If <code>codepage</code> is an empty string (<code>""</code>),
2753 * then a simple conversion is performed on the codepage-invariant
2754 * subset ("invariant characters") of the platform encoding. See utypes.h.
2755 * Recommendation: For invariant-character strings use the constructor
2756 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2757 * because it avoids object code dependencies of UnicodeString on
2758 * the conversion code.
2762 UnicodeString(const char *codepageData
,
2764 const char *codepage
= 0);
2767 * char * / UConverter constructor.
2768 * This constructor uses an existing UConverter object to
2769 * convert the codepage string to Unicode and construct a UnicodeString
2772 * The converter is reset at first.
2773 * If the error code indicates a failure before this constructor is called,
2774 * or if an error occurs during conversion or construction,
2775 * then the string will be bogus.
2777 * This function avoids the overhead of opening and closing a converter if
2778 * multiple strings are constructed.
2780 * @param src input codepage string
2781 * @param srcLength length of the input string, can be -1 for NUL-terminated strings
2782 * @param cnv converter object (ucnv_resetToUnicode() will be called),
2783 * can be NULL for the default converter
2784 * @param errorCode normal ICU error code
2788 const char *src
, int32_t srcLength
,
2790 UErrorCode
&errorCode
);
2795 * Constructs a Unicode string from an invariant-character char * string.
2796 * About invariant characters see utypes.h.
2797 * This constructor has no runtime dependency on conversion code and is
2798 * therefore recommended over ones taking a charset name string
2799 * (where the empty string "" indicates invariant-character conversion).
2801 * Use the macro US_INV as the third, signature-distinguishing parameter.
2805 * void fn(const char *s) {
2806 * UnicodeString ustr(s, -1, US_INV);
2811 * @param src String using only invariant characters.
2812 * @param length Length of src, or -1 if NUL-terminated.
2813 * @param inv Signature-distinguishing paramater, use US_INV.
2818 UnicodeString(const char *src
, int32_t length
, enum EInvariant inv
);
2823 * @param that The UnicodeString object to copy.
2826 UnicodeString(const UnicodeString
& that
);
2829 * 'Substring' constructor from tail of source string.
2830 * @param src The UnicodeString object to copy.
2831 * @param srcStart The offset into <tt>src</tt> at which to start copying.
2834 UnicodeString(const UnicodeString
& src
, int32_t srcStart
);
2837 * 'Substring' constructor from subrange of source string.
2838 * @param src The UnicodeString object to copy.
2839 * @param srcStart The offset into <tt>src</tt> at which to start copying.
2840 * @param srcLength The number of characters from <tt>src</tt> to copy.
2843 UnicodeString(const UnicodeString
& src
, int32_t srcStart
, int32_t srcLength
);
2846 * Clone this object, an instance of a subclass of Replaceable.
2847 * Clones can be used concurrently in multiple threads.
2848 * If a subclass does not implement clone(), or if an error occurs,
2849 * then NULL is returned.
2850 * The clone functions in all subclasses return a pointer to a Replaceable
2851 * because some compilers do not support covariant (same-as-this)
2852 * return types; cast to the appropriate subclass if necessary.
2853 * The caller must delete the clone.
2855 * @return a clone of this object
2857 * @see Replaceable::clone
2858 * @see getDynamicClassID
2861 virtual Replaceable
*clone() const;
2866 virtual ~UnicodeString();
2869 /* Miscellaneous operations */
2872 * Unescape a string of characters and return a string containing
2873 * the result. The following escape sequences are recognized:
2875 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
2876 * \\Uhhhhhhhh 8 hex digits
2877 * \\xhh 1-2 hex digits
2878 * \\ooo 1-3 octal digits; o in [0-7]
2879 * \\cX control-X; X is masked with 0x1F
2881 * as well as the standard ANSI C escapes:
2883 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
2884 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
2885 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
2887 * Anything else following a backslash is generically escaped. For
2888 * example, "[a\\-z]" returns "[a-z]".
2890 * If an escape sequence is ill-formed, this method returns an empty
2891 * string. An example of an ill-formed sequence is "\\u" followed by
2892 * fewer than 4 hex digits.
2894 * This function is similar to u_unescape() but not identical to it.
2895 * The latter takes a source char*, so it does escape recognition
2896 * and also invariant conversion.
2898 * @return a string with backslash escapes interpreted, or an
2899 * empty string on error.
2900 * @see UnicodeString#unescapeAt()
2902 * @see u_unescapeAt()
2905 UnicodeString
unescape() const;
2908 * Unescape a single escape sequence and return the represented
2909 * character. See unescape() for a listing of the recognized escape
2910 * sequences. The character at offset-1 is assumed (without
2911 * checking) to be a backslash. If the escape sequence is
2912 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
2915 * @param offset an input output parameter. On input, it is the
2916 * offset into this string where the escape sequence is located,
2917 * after the initial backslash. On output, it is advanced after the
2918 * last character parsed. On error, it is not advanced at all.
2919 * @return the character represented by the escape sequence at
2920 * offset, or (UChar32)0xFFFFFFFF on error.
2921 * @see UnicodeString#unescape()
2923 * @see u_unescapeAt()
2926 UChar32
unescapeAt(int32_t &offset
) const;
2929 * ICU "poor man's RTTI", returns a UClassID for this class.
2933 static UClassID U_EXPORT2
getStaticClassID();
2936 * ICU "poor man's RTTI", returns a UClassID for the actual class.
2940 virtual UClassID
getDynamicClassID() const;
2942 //========================================
2943 // Implementation methods
2944 //========================================
2948 * Implement Replaceable::getLength() (see jitterbug 1027).
2951 virtual int32_t getLength() const;
2954 * The change in Replaceable to use virtual getCharAt() allows
2955 * UnicodeString::charAt() to be inline again (see jitterbug 709).
2958 virtual UChar
getCharAt(int32_t offset
) const;
2961 * The change in Replaceable to use virtual getChar32At() allows
2962 * UnicodeString::char32At() to be inline again (see jitterbug 709).
2965 virtual UChar32
getChar32At(int32_t offset
) const;
2970 doCompare(int32_t start
,
2972 const UnicodeString
& srcText
,
2974 int32_t srcLength
) const;
2976 int8_t doCompare(int32_t start
,
2978 const UChar
*srcChars
,
2980 int32_t srcLength
) const;
2983 doCompareCodePointOrder(int32_t start
,
2985 const UnicodeString
& srcText
,
2987 int32_t srcLength
) const;
2989 int8_t doCompareCodePointOrder(int32_t start
,
2991 const UChar
*srcChars
,
2993 int32_t srcLength
) const;
2996 doCaseCompare(int32_t start
,
2998 const UnicodeString
&srcText
,
3001 uint32_t options
) const;
3004 doCaseCompare(int32_t start
,
3006 const UChar
*srcChars
,
3009 uint32_t options
) const;
3011 int32_t doIndexOf(UChar c
,
3013 int32_t length
) const;
3015 int32_t doIndexOf(UChar32 c
,
3017 int32_t length
) const;
3019 int32_t doLastIndexOf(UChar c
,
3021 int32_t length
) const;
3023 int32_t doLastIndexOf(UChar32 c
,
3025 int32_t length
) const;
3027 void doExtract(int32_t start
,
3030 int32_t dstStart
) const;
3032 inline void doExtract(int32_t start
,
3034 UnicodeString
& target
) const;
3036 inline UChar
doCharAt(int32_t offset
) const;
3038 UnicodeString
& doReplace(int32_t start
,
3040 const UnicodeString
& srcText
,
3044 UnicodeString
& doReplace(int32_t start
,
3046 const UChar
*srcChars
,
3050 UnicodeString
& doReverse(int32_t start
,
3053 // calculate hash code
3054 int32_t doHashCode(void) const;
3056 // get pointer to start of array
3057 inline UChar
* getArrayStart(void);
3058 inline const UChar
* getArrayStart(void) const;
3060 // allocate the array; result may be fStackBuffer
3061 // sets refCount to 1 if appropriate
3062 // sets fArray, fCapacity, and fFlags
3063 // returns boolean for success or failure
3064 UBool
allocate(int32_t capacity
);
3066 // release the array if owned
3067 void releaseArray(void);
3069 // turn a bogus string into an empty one
3072 // implements assigment operator, copy constructor, and fastCopyFrom()
3073 UnicodeString
©From(const UnicodeString
&src
, UBool fastCopy
=FALSE
);
3075 // Pin start and limit to acceptable values.
3076 inline void pinIndex(int32_t& start
) const;
3077 inline void pinIndices(int32_t& start
,
3078 int32_t& length
) const;
3080 #if !UCONFIG_NO_CONVERSION
3082 /* Internal extract() using UConverter. */
3083 int32_t doExtract(int32_t start
, int32_t length
,
3084 char *dest
, int32_t destCapacity
,
3086 UErrorCode
&errorCode
) const;
3089 * Real constructor for converting from codepage data.
3090 * It assumes that it is called with !fRefCounted.
3092 * If <code>codepage==0</code>, then the default converter
3093 * is used for the platform encoding.
3094 * If <code>codepage</code> is an empty string (<code>""</code>),
3095 * then a simple conversion is performed on the codepage-invariant
3096 * subset ("invariant characters") of the platform encoding. See utypes.h.
3098 void doCodepageCreate(const char *codepageData
,
3100 const char *codepage
);
3103 * Worker function for creating a UnicodeString from
3104 * a codepage string using a UConverter.
3107 doCodepageCreate(const char *codepageData
,
3109 UConverter
*converter
,
3110 UErrorCode
&status
);
3115 * This function is called when write access to the array
3118 * We need to make a copy of the array if
3119 * the buffer is read-only, or
3120 * the buffer is refCounted (shared), and refCount>1, or
3121 * the buffer is too small.
3123 * Return FALSE if memory could not be allocated.
3125 UBool
cloneArrayIfNeeded(int32_t newCapacity
= -1,
3126 int32_t growCapacity
= -1,
3127 UBool doCopyArray
= TRUE
,
3128 int32_t **pBufferToDelete
= 0,
3129 UBool forceClone
= FALSE
);
3131 // common function for case mappings
3133 caseMap(BreakIterator
*titleIter
,
3136 int32_t toWhichCase
);
3140 int32_t removeRef(void);
3141 int32_t refCount(void) const;
3145 US_STACKBUF_SIZE
=7, // Size of stack buffer for small strings
3146 kInvalidUChar
=0xffff, // invalid UChar index
3147 kGrowSize
=128, // grow size for this buffer
3148 kInvalidHashCode
=0, // invalid hash code
3149 kEmptyHashCode
=1, // hash code for empty string
3151 // bit flag values for fFlags
3152 kIsBogus
=1, // this string is bogus, i.e., not valid or NULL
3153 kUsingStackBuffer
=2,// fArray==fStackBuffer
3154 kRefCounted
=4, // there is a refCount field before the characters in fArray
3155 kBufferIsReadonly
=8,// do not write to this buffer
3156 kOpenGetBuffer
=16, // getBuffer(minCapacity) was called (is "open"),
3157 // and releaseBuffer(newLength) must be called
3159 // combined values for convenience
3160 kShortString
=kUsingStackBuffer
,
3161 kLongString
=kRefCounted
,
3162 kReadonlyAlias
=kBufferIsReadonly
,
3166 friend class StringCharacterIterator
;
3167 friend class StringThreadTest
;
3170 * The following are all the class fields that are stored
3171 * in each UnicodeString object.
3172 * Note that UnicodeString has virtual functions,
3173 * therefore there is an implicit vtable pointer
3174 * as the first real field.
3175 * The fields should be aligned such that no padding is
3176 * necessary, mostly by having larger types first.
3177 * On 32-bit machines, the size should be 32 bytes,
3178 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3180 // (implicit) *vtable;
3181 int32_t fLength
; // number of characters in fArray
3182 int32_t fCapacity
; // sizeof fArray
3183 UChar
*fArray
; // the Unicode data
3184 uint16_t fFlags
; // bit flags: see constants above
3185 UChar fStackBuffer
[ US_STACKBUF_SIZE
]; // buffer for small strings
3190 * Create a new UnicodeString with the concatenation of two others.
3192 * @param s1 The first string to be copied to the new one.
3193 * @param s2 The second string to be copied to the new one, after s1.
3194 * @return UnicodeString(s1).append(s2)
3197 U_COMMON_API UnicodeString U_EXPORT2
3198 operator+ (const UnicodeString
&s1
, const UnicodeString
&s2
);
3202 // inline implementations -------------------------------------------------- ***
3204 //========================================
3206 //========================================
3208 * Copy an array of UnicodeString OBJECTS (not pointers).
3212 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString
*src
, U_NAMESPACE_QUALIFIER UnicodeString
*dst
, int32_t count
)
3213 { while(count
-- > 0) *dst
++ = *src
++; }
3216 * Copy an array of UnicodeString OBJECTS (not pointers).
3220 uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString
*src
, int32_t srcStart
,
3221 U_NAMESPACE_QUALIFIER UnicodeString
*dst
, int32_t dstStart
, int32_t count
)
3222 { uprv_arrayCopy(src
+srcStart
, dst
+dstStart
, count
); }
3226 //========================================
3228 //========================================
3230 //========================================
3232 //========================================
3235 UnicodeString::pinIndex(int32_t& start
) const
3240 } else if(start
> fLength
) {
3246 UnicodeString::pinIndices(int32_t& start
,
3247 int32_t& _length
) const
3252 } else if(start
> fLength
) {
3257 } else if(_length
> (fLength
- start
)) {
3258 _length
= (fLength
- start
);
3263 UnicodeString::getArrayStart()
3267 UnicodeString::getArrayStart() const
3270 //========================================
3271 // Read-only implementation methods
3272 //========================================
3274 UnicodeString::length() const
3278 UnicodeString::getCapacity() const
3279 { return fCapacity
; }
3282 UnicodeString::hashCode() const
3283 { return doHashCode(); }
3286 UnicodeString::isBogus() const
3287 { return (UBool
)(fFlags
& kIsBogus
); }
3289 inline const UChar
*
3290 UnicodeString::getBuffer() const {
3291 if(!(fFlags
&(kIsBogus
|kOpenGetBuffer
))) {
3298 //========================================
3299 // Read-only alias methods
3300 //========================================
3302 UnicodeString::doCompare(int32_t start
,
3304 const UnicodeString
& srcText
,
3306 int32_t srcLength
) const
3308 if(srcText
.isBogus()) {
3309 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3311 srcText
.pinIndices(srcStart
, srcLength
);
3312 return doCompare(start
, length
, srcText
.fArray
, srcStart
, srcLength
);
3317 UnicodeString::operator== (const UnicodeString
& text
) const
3320 return text
.isBogus();
3324 fLength
== text
.fLength
&&
3325 doCompare(0, fLength
, text
, 0, text
.fLength
) == 0;
3330 UnicodeString::operator!= (const UnicodeString
& text
) const
3331 { return (! operator==(text
)); }
3334 UnicodeString::operator> (const UnicodeString
& text
) const
3335 { return doCompare(0, fLength
, text
, 0, text
.fLength
) == 1; }
3338 UnicodeString::operator< (const UnicodeString
& text
) const
3339 { return doCompare(0, fLength
, text
, 0, text
.fLength
) == -1; }
3342 UnicodeString::operator>= (const UnicodeString
& text
) const
3343 { return doCompare(0, fLength
, text
, 0, text
.fLength
) != -1; }
3346 UnicodeString::operator<= (const UnicodeString
& text
) const
3347 { return doCompare(0, fLength
, text
, 0, text
.fLength
) != 1; }
3350 UnicodeString::compare(const UnicodeString
& text
) const
3351 { return doCompare(0, fLength
, text
, 0, text
.fLength
); }
3354 UnicodeString::compare(int32_t start
,
3356 const UnicodeString
& srcText
) const
3357 { return doCompare(start
, _length
, srcText
, 0, srcText
.fLength
); }
3360 UnicodeString::compare(const UChar
*srcChars
,
3361 int32_t srcLength
) const
3362 { return doCompare(0, fLength
, srcChars
, 0, srcLength
); }
3365 UnicodeString::compare(int32_t start
,
3367 const UnicodeString
& srcText
,
3369 int32_t srcLength
) const
3370 { return doCompare(start
, _length
, srcText
, srcStart
, srcLength
); }
3373 UnicodeString::compare(int32_t start
,
3375 const UChar
*srcChars
) const
3376 { return doCompare(start
, _length
, srcChars
, 0, _length
); }
3379 UnicodeString::compare(int32_t start
,
3381 const UChar
*srcChars
,
3383 int32_t srcLength
) const
3384 { return doCompare(start
, _length
, srcChars
, srcStart
, srcLength
); }
3387 UnicodeString::compareBetween(int32_t start
,
3389 const UnicodeString
& srcText
,
3391 int32_t srcLimit
) const
3392 { return doCompare(start
, limit
- start
,
3393 srcText
, srcStart
, srcLimit
- srcStart
); }
3396 UnicodeString::doCompareCodePointOrder(int32_t start
,
3398 const UnicodeString
& srcText
,
3400 int32_t srcLength
) const
3402 if(srcText
.isBogus()) {
3403 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3405 srcText
.pinIndices(srcStart
, srcLength
);
3406 return doCompareCodePointOrder(start
, length
, srcText
.fArray
, srcStart
, srcLength
);
3411 UnicodeString::compareCodePointOrder(const UnicodeString
& text
) const
3412 { return doCompareCodePointOrder(0, fLength
, text
, 0, text
.fLength
); }
3415 UnicodeString::compareCodePointOrder(int32_t start
,
3417 const UnicodeString
& srcText
) const
3418 { return doCompareCodePointOrder(start
, _length
, srcText
, 0, srcText
.fLength
); }
3421 UnicodeString::compareCodePointOrder(const UChar
*srcChars
,
3422 int32_t srcLength
) const
3423 { return doCompareCodePointOrder(0, fLength
, srcChars
, 0, srcLength
); }
3426 UnicodeString::compareCodePointOrder(int32_t start
,
3428 const UnicodeString
& srcText
,
3430 int32_t srcLength
) const
3431 { return doCompareCodePointOrder(start
, _length
, srcText
, srcStart
, srcLength
); }
3434 UnicodeString::compareCodePointOrder(int32_t start
,
3436 const UChar
*srcChars
) const
3437 { return doCompareCodePointOrder(start
, _length
, srcChars
, 0, _length
); }
3440 UnicodeString::compareCodePointOrder(int32_t start
,
3442 const UChar
*srcChars
,
3444 int32_t srcLength
) const
3445 { return doCompareCodePointOrder(start
, _length
, srcChars
, srcStart
, srcLength
); }
3448 UnicodeString::compareCodePointOrderBetween(int32_t start
,
3450 const UnicodeString
& srcText
,
3452 int32_t srcLimit
) const
3453 { return doCompareCodePointOrder(start
, limit
- start
,
3454 srcText
, srcStart
, srcLimit
- srcStart
); }
3457 UnicodeString::doCaseCompare(int32_t start
,
3459 const UnicodeString
&srcText
,
3462 uint32_t options
) const
3464 if(srcText
.isBogus()) {
3465 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3467 srcText
.pinIndices(srcStart
, srcLength
);
3468 return doCaseCompare(start
, length
, srcText
.fArray
, srcStart
, srcLength
, options
);
3473 UnicodeString::caseCompare(const UnicodeString
&text
, uint32_t options
) const {
3474 return doCaseCompare(0, fLength
, text
, 0, text
.fLength
, options
);
3478 UnicodeString::caseCompare(int32_t start
,
3480 const UnicodeString
&srcText
,
3481 uint32_t options
) const {
3482 return doCaseCompare(start
, _length
, srcText
, 0, srcText
.fLength
, options
);
3486 UnicodeString::caseCompare(const UChar
*srcChars
,
3488 uint32_t options
) const {
3489 return doCaseCompare(0, fLength
, srcChars
, 0, srcLength
, options
);
3493 UnicodeString::caseCompare(int32_t start
,
3495 const UnicodeString
&srcText
,
3498 uint32_t options
) const {
3499 return doCaseCompare(start
, _length
, srcText
, srcStart
, srcLength
, options
);
3503 UnicodeString::caseCompare(int32_t start
,
3505 const UChar
*srcChars
,
3506 uint32_t options
) const {
3507 return doCaseCompare(start
, _length
, srcChars
, 0, _length
, options
);
3511 UnicodeString::caseCompare(int32_t start
,
3513 const UChar
*srcChars
,
3516 uint32_t options
) const {
3517 return doCaseCompare(start
, _length
, srcChars
, srcStart
, srcLength
, options
);
3521 UnicodeString::caseCompareBetween(int32_t start
,
3523 const UnicodeString
&srcText
,
3526 uint32_t options
) const {
3527 return doCaseCompare(start
, limit
- start
, srcText
, srcStart
, srcLimit
- srcStart
, options
);
3531 UnicodeString::indexOf(const UnicodeString
& srcText
,
3535 int32_t _length
) const
3537 if(!srcText
.isBogus()) {
3538 srcText
.pinIndices(srcStart
, srcLength
);
3540 return indexOf(srcText
.getArrayStart(), srcStart
, srcLength
, start
, _length
);
3547 UnicodeString::indexOf(const UnicodeString
& text
) const
3548 { return indexOf(text
, 0, text
.fLength
, 0, fLength
); }
3551 UnicodeString::indexOf(const UnicodeString
& text
,
3552 int32_t start
) const {
3554 return indexOf(text
, 0, text
.fLength
, start
, fLength
- start
);
3558 UnicodeString::indexOf(const UnicodeString
& text
,
3560 int32_t _length
) const
3561 { return indexOf(text
, 0, text
.fLength
, start
, _length
); }
3564 UnicodeString::indexOf(const UChar
*srcChars
,
3566 int32_t start
) const {
3568 return indexOf(srcChars
, 0, srcLength
, start
, fLength
- start
);
3572 UnicodeString::indexOf(const UChar
*srcChars
,
3575 int32_t _length
) const
3576 { return indexOf(srcChars
, 0, srcLength
, start
, _length
); }
3579 UnicodeString::indexOf(UChar c
,
3581 int32_t _length
) const
3582 { return doIndexOf(c
, start
, _length
); }
3585 UnicodeString::indexOf(UChar32 c
,
3587 int32_t _length
) const
3588 { return doIndexOf(c
, start
, _length
); }
3591 UnicodeString::indexOf(UChar c
) const
3592 { return doIndexOf(c
, 0, fLength
); }
3595 UnicodeString::indexOf(UChar32 c
) const
3596 { return indexOf(c
, 0, fLength
); }
3599 UnicodeString::indexOf(UChar c
,
3600 int32_t start
) const {
3602 return doIndexOf(c
, start
, fLength
- start
);
3606 UnicodeString::indexOf(UChar32 c
,
3607 int32_t start
) const {
3609 return indexOf(c
, start
, fLength
- start
);
3613 UnicodeString::lastIndexOf(const UChar
*srcChars
,
3616 int32_t _length
) const
3617 { return lastIndexOf(srcChars
, 0, srcLength
, start
, _length
); }
3620 UnicodeString::lastIndexOf(const UChar
*srcChars
,
3622 int32_t start
) const {
3624 return lastIndexOf(srcChars
, 0, srcLength
, start
, fLength
- start
);
3628 UnicodeString::lastIndexOf(const UnicodeString
& srcText
,
3632 int32_t _length
) const
3634 if(!srcText
.isBogus()) {
3635 srcText
.pinIndices(srcStart
, srcLength
);
3637 return lastIndexOf(srcText
.getArrayStart(), srcStart
, srcLength
, start
, _length
);
3644 UnicodeString::lastIndexOf(const UnicodeString
& text
,
3646 int32_t _length
) const
3647 { return lastIndexOf(text
, 0, text
.fLength
, start
, _length
); }
3650 UnicodeString::lastIndexOf(const UnicodeString
& text
,
3651 int32_t start
) const {
3653 return lastIndexOf(text
, 0, text
.fLength
, start
, fLength
- start
);
3657 UnicodeString::lastIndexOf(const UnicodeString
& text
) const
3658 { return lastIndexOf(text
, 0, text
.fLength
, 0, fLength
); }
3661 UnicodeString::lastIndexOf(UChar c
,
3663 int32_t _length
) const
3664 { return doLastIndexOf(c
, start
, _length
); }
3667 UnicodeString::lastIndexOf(UChar32 c
,
3669 int32_t _length
) const {
3670 return doLastIndexOf(c
, start
, _length
);
3674 UnicodeString::lastIndexOf(UChar c
) const
3675 { return doLastIndexOf(c
, 0, fLength
); }
3678 UnicodeString::lastIndexOf(UChar32 c
) const {
3679 return lastIndexOf(c
, 0, fLength
);
3683 UnicodeString::lastIndexOf(UChar c
,
3684 int32_t start
) const {
3686 return doLastIndexOf(c
, start
, fLength
- start
);
3690 UnicodeString::lastIndexOf(UChar32 c
,
3691 int32_t start
) const {
3693 return lastIndexOf(c
, start
, fLength
- start
);
3697 UnicodeString::startsWith(const UnicodeString
& text
) const
3698 { return compare(0, text
.fLength
, text
, 0, text
.fLength
) == 0; }
3701 UnicodeString::startsWith(const UnicodeString
& srcText
,
3703 int32_t srcLength
) const
3704 { return doCompare(0, srcLength
, srcText
, srcStart
, srcLength
) == 0; }
3707 UnicodeString::startsWith(const UChar
*srcChars
,
3708 int32_t srcLength
) const
3709 { return doCompare(0, srcLength
, srcChars
, 0, srcLength
) == 0; }
3712 UnicodeString::startsWith(const UChar
*srcChars
,
3714 int32_t srcLength
) const
3715 { return doCompare(0, srcLength
, srcChars
, srcStart
, srcLength
) == 0;}
3718 UnicodeString::endsWith(const UnicodeString
& text
) const
3719 { return doCompare(fLength
- text
.fLength
, text
.fLength
,
3720 text
, 0, text
.fLength
) == 0; }
3723 UnicodeString::endsWith(const UnicodeString
& srcText
,
3725 int32_t srcLength
) const {
3726 srcText
.pinIndices(srcStart
, srcLength
);
3727 return doCompare(fLength
- srcLength
, srcLength
,
3728 srcText
, srcStart
, srcLength
) == 0;
3732 UnicodeString::endsWith(const UChar
*srcChars
,
3733 int32_t srcLength
) const {
3735 srcLength
= u_strlen(srcChars
);
3737 return doCompare(fLength
- srcLength
, srcLength
,
3738 srcChars
, 0, srcLength
) == 0;
3742 UnicodeString::endsWith(const UChar
*srcChars
,
3744 int32_t srcLength
) const {
3746 srcLength
= u_strlen(srcChars
+ srcStart
);
3748 return doCompare(fLength
- srcLength
, srcLength
,
3749 srcChars
, srcStart
, srcLength
) == 0;
3752 //========================================
3754 //========================================
3755 inline UnicodeString
&
3756 UnicodeString::replace(int32_t start
,
3758 const UnicodeString
& srcText
)
3759 { return doReplace(start
, _length
, srcText
, 0, srcText
.fLength
); }
3761 inline UnicodeString
&
3762 UnicodeString::replace(int32_t start
,
3764 const UnicodeString
& srcText
,
3767 { return doReplace(start
, _length
, srcText
, srcStart
, srcLength
); }
3769 inline UnicodeString
&
3770 UnicodeString::replace(int32_t start
,
3772 const UChar
*srcChars
,
3774 { return doReplace(start
, _length
, srcChars
, 0, srcLength
); }
3776 inline UnicodeString
&
3777 UnicodeString::replace(int32_t start
,
3779 const UChar
*srcChars
,
3782 { return doReplace(start
, _length
, srcChars
, srcStart
, srcLength
); }
3784 inline UnicodeString
&
3785 UnicodeString::replace(int32_t start
,
3788 { return doReplace(start
, _length
, &srcChar
, 0, 1); }
3790 inline UnicodeString
&
3791 UnicodeString::replace(int32_t start
,
3794 UChar buffer
[U16_MAX_LENGTH
];
3796 UBool isError
= FALSE
;
3797 U16_APPEND(buffer
, count
, U16_MAX_LENGTH
, srcChar
, isError
);
3798 return doReplace(start
, _length
, buffer
, 0, count
);
3801 inline UnicodeString
&
3802 UnicodeString::replaceBetween(int32_t start
,
3804 const UnicodeString
& srcText
)
3805 { return doReplace(start
, limit
- start
, srcText
, 0, srcText
.fLength
); }
3807 inline UnicodeString
&
3808 UnicodeString::replaceBetween(int32_t start
,
3810 const UnicodeString
& srcText
,
3813 { return doReplace(start
, limit
- start
, srcText
, srcStart
, srcLimit
- srcStart
); }
3815 inline UnicodeString
&
3816 UnicodeString::findAndReplace(const UnicodeString
& oldText
,
3817 const UnicodeString
& newText
)
3818 { return findAndReplace(0, fLength
, oldText
, 0, oldText
.fLength
,
3819 newText
, 0, newText
.fLength
); }
3821 inline UnicodeString
&
3822 UnicodeString::findAndReplace(int32_t start
,
3824 const UnicodeString
& oldText
,
3825 const UnicodeString
& newText
)
3826 { return findAndReplace(start
, _length
, oldText
, 0, oldText
.fLength
,
3827 newText
, 0, newText
.fLength
); }
3829 // ============================
3831 // ============================
3833 UnicodeString::doExtract(int32_t start
,
3835 UnicodeString
& target
) const
3836 { target
.replace(0, target
.fLength
, *this, start
, _length
); }
3839 UnicodeString::extract(int32_t start
,
3842 int32_t targetStart
) const
3843 { doExtract(start
, _length
, target
, targetStart
); }
3846 UnicodeString::extract(int32_t start
,
3848 UnicodeString
& target
) const
3849 { doExtract(start
, _length
, target
); }
3851 #if !UCONFIG_NO_CONVERSION
3854 UnicodeString::extract(int32_t start
,
3857 const char *codepage
) const
3860 // This dstSize value will be checked explicitly
3861 return extract(start
, _length
, dst
, dst
!=0 ? 0xffffffff : 0, codepage
);
3867 UnicodeString::extractBetween(int32_t start
,
3870 int32_t dstStart
) const {
3873 doExtract(start
, limit
- start
, dst
, dstStart
);
3877 UnicodeString::doCharAt(int32_t offset
) const
3879 if((uint32_t)offset
< (uint32_t)fLength
) {
3880 return fArray
[offset
];
3882 return kInvalidUChar
;
3887 UnicodeString::charAt(int32_t offset
) const
3888 { return doCharAt(offset
); }
3891 UnicodeString::operator[] (int32_t offset
) const
3892 { return doCharAt(offset
); }
3895 UnicodeString::char32At(int32_t offset
) const
3897 if((uint32_t)offset
< (uint32_t)fLength
) {
3899 U16_GET(fArray
, 0, offset
, fLength
, c
);
3902 return kInvalidUChar
;
3907 UnicodeString::getChar32Start(int32_t offset
) const {
3908 if((uint32_t)offset
< (uint32_t)fLength
) {
3909 U16_SET_CP_START(fArray
, 0, offset
);
3917 UnicodeString::getChar32Limit(int32_t offset
) const {
3918 if((uint32_t)offset
< (uint32_t)fLength
) {
3919 U16_SET_CP_LIMIT(fArray
, 0, offset
, fLength
);
3927 UnicodeString::isEmpty() const {
3928 return fLength
== 0;
3931 //========================================
3932 // Write implementation methods
3933 //========================================
3934 inline const UChar
*
3935 UnicodeString::getTerminatedBuffer() {
3936 if(fFlags
&(kIsBogus
|kOpenGetBuffer
)) {
3938 } else if(fLength
<fCapacity
&& fArray
[fLength
]==0) {
3940 } else if(cloneArrayIfNeeded(fLength
+1)) {
3948 inline UnicodeString
&
3949 UnicodeString::operator= (UChar ch
)
3950 { return doReplace(0, fLength
, &ch
, 0, 1); }
3952 inline UnicodeString
&
3953 UnicodeString::operator= (UChar32 ch
)
3954 { return replace(0, fLength
, ch
); }
3956 inline UnicodeString
&
3957 UnicodeString::setTo(const UnicodeString
& srcText
,
3962 return doReplace(0, fLength
, srcText
, srcStart
, srcLength
);
3965 inline UnicodeString
&
3966 UnicodeString::setTo(const UnicodeString
& srcText
,
3970 srcText
.pinIndex(srcStart
);
3971 return doReplace(0, fLength
, srcText
, srcStart
, srcText
.fLength
- srcStart
);
3974 inline UnicodeString
&
3975 UnicodeString::setTo(const UnicodeString
& srcText
)
3978 return doReplace(0, fLength
, srcText
, 0, srcText
.fLength
);
3981 inline UnicodeString
&
3982 UnicodeString::setTo(const UChar
*srcChars
,
3986 return doReplace(0, fLength
, srcChars
, 0, srcLength
);
3989 inline UnicodeString
&
3990 UnicodeString::setTo(UChar srcChar
)
3993 return doReplace(0, fLength
, &srcChar
, 0, 1);
3996 inline UnicodeString
&
3997 UnicodeString::setTo(UChar32 srcChar
)
4000 return replace(0, fLength
, srcChar
);
4003 inline UnicodeString
&
4004 UnicodeString::operator+= (UChar ch
)
4005 { return doReplace(fLength
, 0, &ch
, 0, 1); }
4007 inline UnicodeString
&
4008 UnicodeString::operator+= (UChar32 ch
) {
4009 UChar buffer
[U16_MAX_LENGTH
];
4010 int32_t _length
= 0;
4011 UBool isError
= FALSE
;
4012 U16_APPEND(buffer
, _length
, U16_MAX_LENGTH
, ch
, isError
);
4013 return doReplace(fLength
, 0, buffer
, 0, _length
);
4016 inline UnicodeString
&
4017 UnicodeString::operator+= (const UnicodeString
& srcText
)
4018 { return doReplace(fLength
, 0, srcText
, 0, srcText
.fLength
); }
4020 inline UnicodeString
&
4021 UnicodeString::append(const UnicodeString
& srcText
,
4024 { return doReplace(fLength
, 0, srcText
, srcStart
, srcLength
); }
4026 inline UnicodeString
&
4027 UnicodeString::append(const UnicodeString
& srcText
)
4028 { return doReplace(fLength
, 0, srcText
, 0, srcText
.fLength
); }
4030 inline UnicodeString
&
4031 UnicodeString::append(const UChar
*srcChars
,
4034 { return doReplace(fLength
, 0, srcChars
, srcStart
, srcLength
); }
4036 inline UnicodeString
&
4037 UnicodeString::append(const UChar
*srcChars
,
4039 { return doReplace(fLength
, 0, srcChars
, 0, srcLength
); }
4041 inline UnicodeString
&
4042 UnicodeString::append(UChar srcChar
)
4043 { return doReplace(fLength
, 0, &srcChar
, 0, 1); }
4045 inline UnicodeString
&
4046 UnicodeString::append(UChar32 srcChar
) {
4047 UChar buffer
[U16_MAX_LENGTH
];
4048 int32_t _length
= 0;
4049 UBool isError
= FALSE
;
4050 U16_APPEND(buffer
, _length
, U16_MAX_LENGTH
, srcChar
, isError
);
4051 return doReplace(fLength
, 0, buffer
, 0, _length
);
4054 inline UnicodeString
&
4055 UnicodeString::insert(int32_t start
,
4056 const UnicodeString
& srcText
,
4059 { return doReplace(start
, 0, srcText
, srcStart
, srcLength
); }
4061 inline UnicodeString
&
4062 UnicodeString::insert(int32_t start
,
4063 const UnicodeString
& srcText
)
4064 { return doReplace(start
, 0, srcText
, 0, srcText
.fLength
); }
4066 inline UnicodeString
&
4067 UnicodeString::insert(int32_t start
,
4068 const UChar
*srcChars
,
4071 { return doReplace(start
, 0, srcChars
, srcStart
, srcLength
); }
4073 inline UnicodeString
&
4074 UnicodeString::insert(int32_t start
,
4075 const UChar
*srcChars
,
4077 { return doReplace(start
, 0, srcChars
, 0, srcLength
); }
4079 inline UnicodeString
&
4080 UnicodeString::insert(int32_t start
,
4082 { return doReplace(start
, 0, &srcChar
, 0, 1); }
4084 inline UnicodeString
&
4085 UnicodeString::insert(int32_t start
,
4087 { return replace(start
, 0, srcChar
); }
4090 inline UnicodeString
&
4091 UnicodeString::remove()
4093 // remove() of a bogus string makes the string empty and non-bogus
4102 inline UnicodeString
&
4103 UnicodeString::remove(int32_t start
,
4106 if(start
<= 0 && _length
== INT32_MAX
) {
4107 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4110 return doReplace(start
, _length
, NULL
, 0, 0);
4114 inline UnicodeString
&
4115 UnicodeString::removeBetween(int32_t start
,
4117 { return doReplace(start
, limit
- start
, NULL
, 0, 0); }
4120 UnicodeString::truncate(int32_t targetLength
)
4122 if(isBogus() && targetLength
== 0) {
4123 // truncate(0) of a bogus string makes the string empty and non-bogus
4126 } else if((uint32_t)targetLength
< (uint32_t)fLength
) {
4127 fLength
= targetLength
;
4134 inline UnicodeString
&
4135 UnicodeString::reverse()
4136 { return doReverse(0, fLength
); }
4138 inline UnicodeString
&
4139 UnicodeString::reverse(int32_t start
,
4141 { return doReverse(start
, _length
); }