]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/unistr.h
ICU-511.35.tar.gz
[apple/icu.git] / icuSources / common / unicode / unistr.h
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
51004dcb 3* Copyright (C) 1998-2013, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11* Date Name Description
12* 09/25/98 stephen Creation.
13* 11/11/98 stephen Changed per 11/9 code review.
14* 04/20/99 stephen Overhauled per 4/16 code review.
15* 11/18/99 aliu Made to inherit from Replaceable. Added method
16* handleReplaceBetween(); other methods unchanged.
17* 06/25/01 grhoten Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
73c04bcf
A
24/**
25 * \file
26 * \brief C++ API: Unicode String
27 */
28
729e4ab9 29#include "unicode/utypes.h"
b75a7d8f 30#include "unicode/rep.h"
729e4ab9
A
31#include "unicode/std_string.h"
32#include "unicode/stringpiece.h"
33#include "unicode/bytestream.h"
4388f060 34#include "unicode/ucasemap.h"
b75a7d8f
A
35
36struct UConverter; // unicode/ucnv.h
37class StringThreadTest;
38
39#ifndef U_COMPARE_CODE_POINT_ORDER
40/* see also ustring.h and unorm.h */
41/**
42 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
43 * Compare strings in code point order instead of code unit order.
374ca955 44 * @stable ICU 2.2
b75a7d8f
A
45 */
46#define U_COMPARE_CODE_POINT_ORDER 0x8000
47#endif
48
49#ifndef USTRING_H
73c04bcf
A
50/**
51 * \ingroup ustring_ustrlen
52 */
374ca955 53U_STABLE int32_t U_EXPORT2
b75a7d8f
A
54u_strlen(const UChar *s);
55#endif
56
51004dcb
A
57#ifndef U_HIDE_INTERNAL_API
58/**
59 * \def U_STRING_CASE_MAPPER_DEFINED
60 * @internal
61 */
62
4388f060
A
63#ifndef U_STRING_CASE_MAPPER_DEFINED
64#define U_STRING_CASE_MAPPER_DEFINED
65
66/**
67 * Internal string case mapping function type.
68 * @internal
69 */
70typedef int32_t U_CALLCONV
71UStringCaseMapper(const UCaseMap *csm,
72 UChar *dest, int32_t destCapacity,
73 const UChar *src, int32_t srcLength,
74 UErrorCode *pErrorCode);
75
76#endif
51004dcb 77#endif /* U_HIDE_INTERNAL_API */
4388f060 78
b75a7d8f
A
79U_NAMESPACE_BEGIN
80
4388f060 81class BreakIterator; // unicode/brkiter.h
b75a7d8f 82class Locale; // unicode/locid.h
b75a7d8f 83class StringCharacterIterator;
4388f060 84class UnicodeStringAppendable; // unicode/appendable.h
b75a7d8f
A
85
86/* The <iostream> include has been moved to unicode/ustream.h */
87
374ca955
A
88/**
89 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
90 * which constructs a Unicode string from an invariant-character char * string.
91 * About invariant characters see utypes.h.
92 * This constructor has no runtime dependency on conversion code and is
93 * therefore recommended over ones taking a charset name string
94 * (where the empty string "" indicates invariant-character conversion).
95 *
73c04bcf 96 * @stable ICU 3.2
374ca955 97 */
4388f060 98#define US_INV icu::UnicodeString::kInvariant
374ca955 99
b75a7d8f
A
100/**
101 * Unicode String literals in C++.
102 * Dependent on the platform properties, different UnicodeString
103 * constructors should be used to create a UnicodeString object from
104 * a string literal.
105 * The macros are defined for maximum performance.
106 * They work only for strings that contain "invariant characters", i.e.,
107 * only latin letters, digits, and some punctuation.
108 * See utypes.h for details.
109 *
110 * The string parameter must be a C string literal.
111 * The length of the string, not including the terminating
112 * <code>NUL</code>, must be specified as a constant.
113 * The U_STRING_DECL macro should be invoked exactly once for one
114 * such string variable before it is used.
115 * @stable ICU 2.0
116 */
46f4442e 117#if defined(U_DECLARE_UTF16)
4388f060 118# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
46f4442e 119#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
4388f060 120# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
b75a7d8f 121#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
4388f060 122# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
b75a7d8f 123#else
4388f060 124# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
b75a7d8f
A
125#endif
126
127/**
128 * Unicode String literals in C++.
129 * Dependent on the platform properties, different UnicodeString
130 * constructors should be used to create a UnicodeString object from
131 * a string literal.
132 * The macros are defined for improved performance.
133 * They work only for strings that contain "invariant characters", i.e.,
134 * only latin letters, digits, and some punctuation.
135 * See utypes.h for details.
136 *
137 * The string parameter must be a C string literal.
138 * @stable ICU 2.0
139 */
46f4442e 140#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
b75a7d8f 141
4388f060
A
142/**
143 * \def UNISTR_FROM_CHAR_EXPLICIT
144 * This can be defined to be empty or "explicit".
145 * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32)
146 * constructors are marked as explicit, preventing their inadvertent use.
51004dcb 147 * @stable ICU 49
4388f060
A
148 */
149#ifndef UNISTR_FROM_CHAR_EXPLICIT
150# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
151 // Auto-"explicit" in ICU library code.
152# define UNISTR_FROM_CHAR_EXPLICIT explicit
153# else
154 // Empty by default for source code compatibility.
155# define UNISTR_FROM_CHAR_EXPLICIT
156# endif
157#endif
158
159/**
160 * \def UNISTR_FROM_STRING_EXPLICIT
161 * This can be defined to be empty or "explicit".
162 * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *)
163 * constructors are marked as explicit, preventing their inadvertent use.
164 *
165 * In particular, this helps prevent accidentally depending on ICU conversion code
166 * by passing a string literal into an API with a const UnicodeString & parameter.
51004dcb 167 * @stable ICU 49
4388f060
A
168 */
169#ifndef UNISTR_FROM_STRING_EXPLICIT
170# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
171 // Auto-"explicit" in ICU library code.
172# define UNISTR_FROM_STRING_EXPLICIT explicit
173# else
174 // Empty by default for source code compatibility.
175# define UNISTR_FROM_STRING_EXPLICIT
176# endif
177#endif
178
b75a7d8f
A
179/**
180 * UnicodeString is a string class that stores Unicode characters directly and provides
181 * similar functionality as the Java String and StringBuffer classes.
182 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
183 *
184 * The UnicodeString class is not suitable for subclassing.
185 *
186 * <p>For an overview of Unicode strings in C and C++ see the
46f4442e 187 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
b75a7d8f
A
188 *
189 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
73c04bcf
A
190 * A Unicode character may be stored with either one code unit
191 * (the most common case) or with a matched pair of special code units
192 * ("surrogates"). The data type for code units is UChar.
b75a7d8f
A
193 * For single-character handling, a Unicode character code <em>point</em> is a value
194 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
195 *
196 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
197 * This is the same as with multi-byte char* strings in traditional string handling.
198 * Operations on partial strings typically do not test for code point boundaries.
199 * If necessary, the user needs to take care of such boundaries by testing for the code unit
200 * values or by using functions like
201 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
202 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
203 *
204 * UnicodeString methods are more lenient with regard to input parameter values
205 * than other ICU APIs. In particular:
206 * - If indexes are out of bounds for a UnicodeString object
207 * (<0 or >length()) then they are "pinned" to the nearest boundary.
208 * - If primitive string pointer values (e.g., const UChar * or char *)
209 * for input strings are NULL, then those input string parameters are treated
210 * as if they pointed to an empty string.
211 * However, this is <em>not</em> the case for char * parameters for charset names
212 * or other IDs.
213 * - Most UnicodeString methods do not take a UErrorCode parameter because
214 * there are usually very few opportunities for failure other than a shortage
215 * of memory, error codes in low-level C++ string methods would be inconvenient,
216 * and the error code as the last parameter (ICU convention) would prevent
217 * the use of default parameter values.
218 * Instead, such methods set the UnicodeString into a "bogus" state
219 * (see isBogus()) if an error occurs.
220 *
221 * In string comparisons, two UnicodeString objects that are both "bogus"
222 * compare equal (to be transitive and prevent endless loops in sorting),
223 * and a "bogus" string compares less than any non-"bogus" one.
224 *
225 * Const UnicodeString methods are thread-safe. Multiple threads can use
226 * const methods on the same UnicodeString object simultaneously,
227 * but non-const methods must not be called concurrently (in multiple threads)
228 * with any other (const or non-const) methods.
229 *
230 * Similarly, const UnicodeString & parameters are thread-safe.
231 * One object may be passed in as such a parameter concurrently in multiple threads.
232 * This includes the const UnicodeString & parameters for
233 * copy construction, assignment, and cloning.
234 *
235 * <p>UnicodeString uses several storage methods.
236 * String contents can be stored inside the UnicodeString object itself,
237 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
238 * Most of this is done transparently, but careful aliasing in particular provides
239 * significant performance improvements.
240 * Also, the internal buffer is accessible via special functions.
241 * For details see the
46f4442e 242 * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
b75a7d8f
A
243 *
244 * @see utf.h
245 * @see CharacterIterator
246 * @stable ICU 2.0
247 */
248class U_COMMON_API UnicodeString : public Replaceable
249{
250public:
251
374ca955
A
252 /**
253 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
254 * which constructs a Unicode string from an invariant-character char * string.
255 * Use the macro US_INV instead of the full qualification for this value.
256 *
257 * @see US_INV
73c04bcf 258 * @stable ICU 3.2
374ca955
A
259 */
260 enum EInvariant {
261 /**
262 * @see EInvariant
73c04bcf 263 * @stable ICU 3.2
374ca955
A
264 */
265 kInvariant
266 };
267
b75a7d8f
A
268 //========================================
269 // Read-only operations
270 //========================================
271
272 /* Comparison - bitwise only - for international comparison use collation */
273
274 /**
275 * Equality operator. Performs only bitwise comparison.
276 * @param text The UnicodeString to compare to this one.
277 * @return TRUE if <TT>text</TT> contains the same characters as this one,
278 * FALSE otherwise.
279 * @stable ICU 2.0
280 */
281 inline UBool operator== (const UnicodeString& text) const;
282
283 /**
284 * Inequality operator. Performs only bitwise comparison.
285 * @param text The UnicodeString to compare to this one.
286 * @return FALSE if <TT>text</TT> contains the same characters as this one,
287 * TRUE otherwise.
288 * @stable ICU 2.0
289 */
290 inline UBool operator!= (const UnicodeString& text) const;
291
292 /**
293 * Greater than operator. Performs only bitwise comparison.
294 * @param text The UnicodeString to compare to this one.
374ca955
A
295 * @return TRUE if the characters in this are bitwise
296 * greater than the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
297 * @stable ICU 2.0
298 */
299 inline UBool operator> (const UnicodeString& text) const;
300
301 /**
302 * Less than operator. Performs only bitwise comparison.
303 * @param text The UnicodeString to compare to this one.
374ca955
A
304 * @return TRUE if the characters in this are bitwise
305 * less than the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
306 * @stable ICU 2.0
307 */
308 inline UBool operator< (const UnicodeString& text) const;
309
310 /**
311 * Greater than or equal operator. Performs only bitwise comparison.
312 * @param text The UnicodeString to compare to this one.
374ca955
A
313 * @return TRUE if the characters in this are bitwise
314 * greater than or equal to the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
315 * @stable ICU 2.0
316 */
317 inline UBool operator>= (const UnicodeString& text) const;
318
319 /**
320 * Less than or equal operator. Performs only bitwise comparison.
321 * @param text The UnicodeString to compare to this one.
374ca955
A
322 * @return TRUE if the characters in this are bitwise
323 * less than or equal to the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
324 * @stable ICU 2.0
325 */
326 inline UBool operator<= (const UnicodeString& text) const;
327
328 /**
329 * Compare the characters bitwise in this UnicodeString to
374ca955 330 * the characters in <code>text</code>.
b75a7d8f 331 * @param text The UnicodeString to compare to this one.
374ca955
A
332 * @return The result of bitwise character comparison: 0 if this
333 * contains the same characters as <code>text</code>, -1 if the characters in
334 * this are bitwise less than the characters in <code>text</code>, +1 if the
335 * characters in this are bitwise greater than the characters
336 * in <code>text</code>.
b75a7d8f
A
337 * @stable ICU 2.0
338 */
339 inline int8_t compare(const UnicodeString& text) const;
340
341 /**
374ca955
A
342 * Compare the characters bitwise in the range
343 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
344 * in <TT>text</TT>
b75a7d8f
A
345 * @param start the offset at which the compare operation begins
346 * @param length the number of characters of text to compare.
374ca955
A
347 * @param text the other text to be compared against this string.
348 * @return The result of bitwise character comparison: 0 if this
349 * contains the same characters as <code>text</code>, -1 if the characters in
350 * this are bitwise less than the characters in <code>text</code>, +1 if the
351 * characters in this are bitwise greater than the characters
352 * in <code>text</code>.
b75a7d8f
A
353 * @stable ICU 2.0
354 */
355 inline int8_t compare(int32_t start,
356 int32_t length,
374ca955 357 const UnicodeString& text) const;
b75a7d8f
A
358
359 /**
374ca955
A
360 * Compare the characters bitwise in the range
361 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
362 * in <TT>srcText</TT> in the range
363 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
364 * @param start the offset at which the compare operation begins
365 * @param length the number of characters in this to compare.
366 * @param srcText the text to be compared
367 * @param srcStart the offset into <TT>srcText</TT> to start comparison
368 * @param srcLength the number of characters in <TT>src</TT> to compare
374ca955
A
369 * @return The result of bitwise character comparison: 0 if this
370 * contains the same characters as <code>srcText</code>, -1 if the characters in
371 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
372 * characters in this are bitwise greater than the characters
373 * in <code>srcText</code>.
b75a7d8f
A
374 * @stable ICU 2.0
375 */
376 inline int8_t compare(int32_t start,
377 int32_t length,
378 const UnicodeString& srcText,
379 int32_t srcStart,
380 int32_t srcLength) const;
381
382 /**
374ca955 383 * Compare the characters bitwise in this UnicodeString with the first
b75a7d8f
A
384 * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
385 * @param srcChars The characters to compare to this UnicodeString.
386 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
374ca955
A
387 * @return The result of bitwise character comparison: 0 if this
388 * contains the same characters as <code>srcChars</code>, -1 if the characters in
389 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
390 * characters in this are bitwise greater than the characters
391 * in <code>srcChars</code>.
b75a7d8f
A
392 * @stable ICU 2.0
393 */
394 inline int8_t compare(const UChar *srcChars,
395 int32_t srcLength) const;
396
397 /**
374ca955
A
398 * Compare the characters bitwise in the range
399 * [<TT>start</TT>, <TT>start + length</TT>) with the first
b75a7d8f
A
400 * <TT>length</TT> characters in <TT>srcChars</TT>
401 * @param start the offset at which the compare operation begins
402 * @param length the number of characters to compare.
403 * @param srcChars the characters to be compared
374ca955
A
404 * @return The result of bitwise character comparison: 0 if this
405 * contains the same characters as <code>srcChars</code>, -1 if the characters in
406 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
407 * characters in this are bitwise greater than the characters
408 * in <code>srcChars</code>.
b75a7d8f
A
409 * @stable ICU 2.0
410 */
411 inline int8_t compare(int32_t start,
412 int32_t length,
413 const UChar *srcChars) const;
414
415 /**
374ca955
A
416 * Compare the characters bitwise in the range
417 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
418 * in <TT>srcChars</TT> in the range
419 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
420 * @param start the offset at which the compare operation begins
421 * @param length the number of characters in this to compare
422 * @param srcChars the characters to be compared
423 * @param srcStart the offset into <TT>srcChars</TT> to start comparison
424 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
374ca955
A
425 * @return The result of bitwise character comparison: 0 if this
426 * contains the same characters as <code>srcChars</code>, -1 if the characters in
427 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
428 * characters in this are bitwise greater than the characters
429 * in <code>srcChars</code>.
b75a7d8f
A
430 * @stable ICU 2.0
431 */
432 inline int8_t compare(int32_t start,
433 int32_t length,
434 const UChar *srcChars,
435 int32_t srcStart,
436 int32_t srcLength) const;
437
438 /**
374ca955
A
439 * Compare the characters bitwise in the range
440 * [<TT>start</TT>, <TT>limit</TT>) with the characters
441 * in <TT>srcText</TT> in the range
442 * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
b75a7d8f
A
443 * @param start the offset at which the compare operation begins
444 * @param limit the offset immediately following the compare operation
445 * @param srcText the text to be compared
446 * @param srcStart the offset into <TT>srcText</TT> to start comparison
447 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
374ca955
A
448 * @return The result of bitwise character comparison: 0 if this
449 * contains the same characters as <code>srcText</code>, -1 if the characters in
450 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
451 * characters in this are bitwise greater than the characters
452 * in <code>srcText</code>.
b75a7d8f
A
453 * @stable ICU 2.0
454 */
455 inline int8_t compareBetween(int32_t start,
456 int32_t limit,
457 const UnicodeString& srcText,
458 int32_t srcStart,
459 int32_t srcLimit) const;
460
461 /**
462 * Compare two Unicode strings in code point order.
46f4442e 463 * The result may be different from the results of compare(), operator<, etc.
b75a7d8f
A
464 * if supplementary characters are present:
465 *
466 * In UTF-16, supplementary characters (with code points U+10000 and above) are
467 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
468 * which means that they compare as less than some other BMP characters like U+feff.
469 * This function compares Unicode strings in code point order.
470 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
471 *
472 * @param text Another string to compare this one to.
473 * @return a negative/zero/positive integer corresponding to whether
474 * this string is less than/equal to/greater than the second one
475 * in code point order
476 * @stable ICU 2.0
477 */
478 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
479
480 /**
481 * Compare two Unicode strings in code point order.
46f4442e 482 * The result may be different from the results of compare(), operator<, etc.
b75a7d8f
A
483 * if supplementary characters are present:
484 *
485 * In UTF-16, supplementary characters (with code points U+10000 and above) are
486 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
487 * which means that they compare as less than some other BMP characters like U+feff.
488 * This function compares Unicode strings in code point order.
489 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
490 *
491 * @param start The start offset in this string at which the compare operation begins.
492 * @param length The number of code units from this string to compare.
493 * @param srcText Another string to compare this one to.
494 * @return a negative/zero/positive integer corresponding to whether
495 * this string is less than/equal to/greater than the second one
496 * in code point order
497 * @stable ICU 2.0
498 */
499 inline int8_t compareCodePointOrder(int32_t start,
500 int32_t length,
501 const UnicodeString& srcText) const;
502
503 /**
504 * Compare two Unicode strings in code point order.
46f4442e 505 * The result may be different from the results of compare(), operator<, etc.
b75a7d8f
A
506 * if supplementary characters are present:
507 *
508 * In UTF-16, supplementary characters (with code points U+10000 and above) are
509 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
510 * which means that they compare as less than some other BMP characters like U+feff.
511 * This function compares Unicode strings in code point order.
512 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
513 *
514 * @param start The start offset in this string at which the compare operation begins.
515 * @param length The number of code units from this string to compare.
516 * @param srcText Another string to compare this one to.
517 * @param srcStart The start offset in that string at which the compare operation begins.
518 * @param srcLength The number of code units from that string to compare.
519 * @return a negative/zero/positive integer corresponding to whether
520 * this string is less than/equal to/greater than the second one
521 * in code point order
522 * @stable ICU 2.0
523 */
524 inline int8_t compareCodePointOrder(int32_t start,
525 int32_t length,
526 const UnicodeString& srcText,
527 int32_t srcStart,
528 int32_t srcLength) const;
529
530 /**
531 * Compare two Unicode strings in code point order.
46f4442e 532 * The result may be different from the results of compare(), operator<, etc.
b75a7d8f
A
533 * if supplementary characters are present:
534 *
535 * In UTF-16, supplementary characters (with code points U+10000 and above) are
536 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
537 * which means that they compare as less than some other BMP characters like U+feff.
538 * This function compares Unicode strings in code point order.
539 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
540 *
541 * @param srcChars A pointer to another string to compare this one to.
542 * @param srcLength The number of code units from that string to compare.
543 * @return a negative/zero/positive integer corresponding to whether
544 * this string is less than/equal to/greater than the second one
545 * in code point order
546 * @stable ICU 2.0
547 */
548 inline int8_t compareCodePointOrder(const UChar *srcChars,
549 int32_t srcLength) const;
550
551 /**
552 * Compare two Unicode strings in code point order.
46f4442e 553 * The result may be different from the results of compare(), operator<, etc.
b75a7d8f
A
554 * if supplementary characters are present:
555 *
556 * In UTF-16, supplementary characters (with code points U+10000 and above) are
557 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
558 * which means that they compare as less than some other BMP characters like U+feff.
559 * This function compares Unicode strings in code point order.
560 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
561 *
562 * @param start The start offset in this string at which the compare operation begins.
563 * @param length The number of code units from this string to compare.
564 * @param srcChars A pointer to another string to compare this one to.
565 * @return a negative/zero/positive integer corresponding to whether
566 * this string is less than/equal to/greater than the second one
567 * in code point order
568 * @stable ICU 2.0
569 */
570 inline int8_t compareCodePointOrder(int32_t start,
571 int32_t length,
572 const UChar *srcChars) const;
573
574 /**
575 * Compare two Unicode strings in code point order.
46f4442e 576 * The result may be different from the results of compare(), operator<, etc.
b75a7d8f
A
577 * if supplementary characters are present:
578 *
579 * In UTF-16, supplementary characters (with code points U+10000 and above) are
580 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
581 * which means that they compare as less than some other BMP characters like U+feff.
582 * This function compares Unicode strings in code point order.
583 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
584 *
585 * @param start The start offset in this string at which the compare operation begins.
586 * @param length The number of code units from this string to compare.
587 * @param srcChars A pointer to another string to compare this one to.
588 * @param srcStart The start offset in that string at which the compare operation begins.
589 * @param srcLength The number of code units from that string to compare.
590 * @return a negative/zero/positive integer corresponding to whether
591 * this string is less than/equal to/greater than the second one
592 * in code point order
593 * @stable ICU 2.0
594 */
595 inline int8_t compareCodePointOrder(int32_t start,
596 int32_t length,
597 const UChar *srcChars,
598 int32_t srcStart,
599 int32_t srcLength) const;
600
601 /**
602 * Compare two Unicode strings in code point order.
46f4442e 603 * The result may be different from the results of compare(), operator<, etc.
b75a7d8f
A
604 * if supplementary characters are present:
605 *
606 * In UTF-16, supplementary characters (with code points U+10000 and above) are
607 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
608 * which means that they compare as less than some other BMP characters like U+feff.
609 * This function compares Unicode strings in code point order.
610 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
611 *
612 * @param start The start offset in this string at which the compare operation begins.
613 * @param limit The offset after the last code unit from this string to compare.
614 * @param srcText Another string to compare this one to.
615 * @param srcStart The start offset in that string at which the compare operation begins.
616 * @param srcLimit The offset after the last code unit from that string to compare.
617 * @return a negative/zero/positive integer corresponding to whether
618 * this string is less than/equal to/greater than the second one
619 * in code point order
620 * @stable ICU 2.0
621 */
622 inline int8_t compareCodePointOrderBetween(int32_t start,
623 int32_t limit,
624 const UnicodeString& srcText,
625 int32_t srcStart,
626 int32_t srcLimit) const;
627
628 /**
629 * Compare two strings case-insensitively using full case folding.
630 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
631 *
632 * @param text Another string to compare this one to.
633 * @param options A bit set of options:
634 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
635 * Comparison in code unit order with default case folding.
636 *
637 * - U_COMPARE_CODE_POINT_ORDER
638 * Set to choose code point order instead of code unit order
639 * (see u_strCompare for details).
640 *
641 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
642 *
643 * @return A negative, zero, or positive integer indicating the comparison result.
644 * @stable ICU 2.0
645 */
646 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
647
648 /**
649 * Compare two strings case-insensitively using full case folding.
650 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
651 *
652 * @param start The start offset in this string at which the compare operation begins.
653 * @param length The number of code units from this string to compare.
654 * @param srcText Another string to compare this one to.
655 * @param options A bit set of options:
656 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
657 * Comparison in code unit order with default case folding.
658 *
659 * - U_COMPARE_CODE_POINT_ORDER
660 * Set to choose code point order instead of code unit order
661 * (see u_strCompare for details).
662 *
663 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
664 *
665 * @return A negative, zero, or positive integer indicating the comparison result.
666 * @stable ICU 2.0
667 */
668 inline int8_t caseCompare(int32_t start,
669 int32_t length,
670 const UnicodeString& srcText,
671 uint32_t options) const;
672
673 /**
674 * Compare two strings case-insensitively using full case folding.
675 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
676 *
677 * @param start The start offset in this string at which the compare operation begins.
678 * @param length The number of code units from this string to compare.
679 * @param srcText Another string to compare this one to.
680 * @param srcStart The start offset in that string at which the compare operation begins.
681 * @param srcLength The number of code units from that string to compare.
682 * @param options A bit set of options:
683 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
684 * Comparison in code unit order with default case folding.
685 *
686 * - U_COMPARE_CODE_POINT_ORDER
687 * Set to choose code point order instead of code unit order
688 * (see u_strCompare for details).
689 *
690 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
691 *
692 * @return A negative, zero, or positive integer indicating the comparison result.
693 * @stable ICU 2.0
694 */
695 inline int8_t caseCompare(int32_t start,
696 int32_t length,
697 const UnicodeString& srcText,
698 int32_t srcStart,
699 int32_t srcLength,
700 uint32_t options) const;
701
702 /**
703 * Compare two strings case-insensitively using full case folding.
704 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
705 *
706 * @param srcChars A pointer to another string to compare this one to.
707 * @param srcLength The number of code units from that string to compare.
708 * @param options A bit set of options:
709 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
710 * Comparison in code unit order with default case folding.
711 *
712 * - U_COMPARE_CODE_POINT_ORDER
713 * Set to choose code point order instead of code unit order
714 * (see u_strCompare for details).
715 *
716 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
717 *
718 * @return A negative, zero, or positive integer indicating the comparison result.
719 * @stable ICU 2.0
720 */
721 inline int8_t caseCompare(const UChar *srcChars,
722 int32_t srcLength,
723 uint32_t options) const;
724
725 /**
726 * Compare two strings case-insensitively using full case folding.
727 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
728 *
729 * @param start The start offset in this string at which the compare operation begins.
730 * @param length The number of code units from this string to compare.
731 * @param srcChars A pointer to another string to compare this one to.
732 * @param options A bit set of options:
733 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
734 * Comparison in code unit order with default case folding.
735 *
736 * - U_COMPARE_CODE_POINT_ORDER
737 * Set to choose code point order instead of code unit order
738 * (see u_strCompare for details).
739 *
740 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
741 *
742 * @return A negative, zero, or positive integer indicating the comparison result.
743 * @stable ICU 2.0
744 */
745 inline int8_t caseCompare(int32_t start,
746 int32_t length,
747 const UChar *srcChars,
748 uint32_t options) const;
749
750 /**
751 * Compare two strings case-insensitively using full case folding.
752 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
753 *
754 * @param start The start offset in this string at which the compare operation begins.
755 * @param length The number of code units from this string to compare.
756 * @param srcChars A pointer to another string to compare this one to.
757 * @param srcStart The start offset in that string at which the compare operation begins.
758 * @param srcLength The number of code units from that string to compare.
759 * @param options A bit set of options:
760 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
761 * Comparison in code unit order with default case folding.
762 *
763 * - U_COMPARE_CODE_POINT_ORDER
764 * Set to choose code point order instead of code unit order
765 * (see u_strCompare for details).
766 *
767 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
768 *
769 * @return A negative, zero, or positive integer indicating the comparison result.
770 * @stable ICU 2.0
771 */
772 inline int8_t caseCompare(int32_t start,
773 int32_t length,
774 const UChar *srcChars,
775 int32_t srcStart,
776 int32_t srcLength,
777 uint32_t options) const;
778
779 /**
780 * Compare two strings case-insensitively using full case folding.
781 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
782 *
783 * @param start The start offset in this string at which the compare operation begins.
784 * @param limit The offset after the last code unit from this string to compare.
785 * @param srcText Another string to compare this one to.
786 * @param srcStart The start offset in that string at which the compare operation begins.
787 * @param srcLimit The offset after the last code unit from that string to compare.
788 * @param options A bit set of options:
789 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
790 * Comparison in code unit order with default case folding.
791 *
792 * - U_COMPARE_CODE_POINT_ORDER
793 * Set to choose code point order instead of code unit order
794 * (see u_strCompare for details).
795 *
796 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
797 *
798 * @return A negative, zero, or positive integer indicating the comparison result.
799 * @stable ICU 2.0
800 */
801 inline int8_t caseCompareBetween(int32_t start,
802 int32_t limit,
803 const UnicodeString& srcText,
804 int32_t srcStart,
805 int32_t srcLimit,
806 uint32_t options) const;
807
808 /**
809 * Determine if this starts with the characters in <TT>text</TT>
810 * @param text The text to match.
374ca955 811 * @return TRUE if this starts with the characters in <TT>text</TT>,
b75a7d8f
A
812 * FALSE otherwise
813 * @stable ICU 2.0
814 */
815 inline UBool startsWith(const UnicodeString& text) const;
816
817 /**
374ca955
A
818 * Determine if this starts with the characters in <TT>srcText</TT>
819 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
820 * @param srcText The text to match.
821 * @param srcStart the offset into <TT>srcText</TT> to start matching
822 * @param srcLength the number of characters in <TT>srcText</TT> to match
374ca955 823 * @return TRUE if this starts with the characters in <TT>text</TT>,
b75a7d8f
A
824 * FALSE otherwise
825 * @stable ICU 2.0
826 */
827 inline UBool startsWith(const UnicodeString& srcText,
828 int32_t srcStart,
829 int32_t srcLength) const;
830
831 /**
832 * Determine if this starts with the characters in <TT>srcChars</TT>
833 * @param srcChars The characters to match.
834 * @param srcLength the number of characters in <TT>srcChars</TT>
374ca955 835 * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
b75a7d8f
A
836 * FALSE otherwise
837 * @stable ICU 2.0
838 */
839 inline UBool startsWith(const UChar *srcChars,
840 int32_t srcLength) const;
841
842 /**
374ca955
A
843 * Determine if this ends with the characters in <TT>srcChars</TT>
844 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
845 * @param srcChars The characters to match.
846 * @param srcStart the offset into <TT>srcText</TT> to start matching
847 * @param srcLength the number of characters in <TT>srcChars</TT> to match
848 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
849 * @stable ICU 2.0
850 */
851 inline UBool startsWith(const UChar *srcChars,
852 int32_t srcStart,
853 int32_t srcLength) const;
854
855 /**
856 * Determine if this ends with the characters in <TT>text</TT>
857 * @param text The text to match.
374ca955 858 * @return TRUE if this ends with the characters in <TT>text</TT>,
b75a7d8f
A
859 * FALSE otherwise
860 * @stable ICU 2.0
861 */
862 inline UBool endsWith(const UnicodeString& text) const;
863
864 /**
374ca955
A
865 * Determine if this ends with the characters in <TT>srcText</TT>
866 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
867 * @param srcText The text to match.
868 * @param srcStart the offset into <TT>srcText</TT> to start matching
869 * @param srcLength the number of characters in <TT>srcText</TT> to match
374ca955 870 * @return TRUE if this ends with the characters in <TT>text</TT>,
b75a7d8f
A
871 * FALSE otherwise
872 * @stable ICU 2.0
873 */
874 inline UBool endsWith(const UnicodeString& srcText,
875 int32_t srcStart,
876 int32_t srcLength) const;
877
878 /**
879 * Determine if this ends with the characters in <TT>srcChars</TT>
880 * @param srcChars The characters to match.
881 * @param srcLength the number of characters in <TT>srcChars</TT>
374ca955 882 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
b75a7d8f
A
883 * FALSE otherwise
884 * @stable ICU 2.0
885 */
886 inline UBool endsWith(const UChar *srcChars,
887 int32_t srcLength) const;
888
889 /**
374ca955
A
890 * Determine if this ends with the characters in <TT>srcChars</TT>
891 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
892 * @param srcChars The characters to match.
893 * @param srcStart the offset into <TT>srcText</TT> to start matching
894 * @param srcLength the number of characters in <TT>srcChars</TT> to match
374ca955 895 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
b75a7d8f
A
896 * FALSE otherwise
897 * @stable ICU 2.0
898 */
899 inline UBool endsWith(const UChar *srcChars,
900 int32_t srcStart,
901 int32_t srcLength) const;
902
903
904 /* Searching - bitwise only */
905
906 /**
907 * Locate in this the first occurrence of the characters in <TT>text</TT>,
908 * using bitwise comparison.
909 * @param text The text to search for.
374ca955 910 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
911 * or -1 if not found.
912 * @stable ICU 2.0
913 */
914 inline int32_t indexOf(const UnicodeString& text) const;
915
916 /**
917 * Locate in this the first occurrence of the characters in <TT>text</TT>
918 * starting at offset <TT>start</TT>, using bitwise comparison.
919 * @param text The text to search for.
920 * @param start The offset at which searching will start.
374ca955 921 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
922 * or -1 if not found.
923 * @stable ICU 2.0
924 */
925 inline int32_t indexOf(const UnicodeString& text,
926 int32_t start) const;
927
928 /**
929 * Locate in this the first occurrence in the range
374ca955 930 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
b75a7d8f
A
931 * in <TT>text</TT>, using bitwise comparison.
932 * @param text The text to search for.
933 * @param start The offset at which searching will start.
934 * @param length The number of characters to search
374ca955 935 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
936 * or -1 if not found.
937 * @stable ICU 2.0
938 */
939 inline int32_t indexOf(const UnicodeString& text,
940 int32_t start,
941 int32_t length) const;
942
943 /**
944 * Locate in this the first occurrence in the range
945 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
374ca955
A
946 * in <TT>srcText</TT> in the range
947 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
948 * using bitwise comparison.
b75a7d8f
A
949 * @param srcText The text to search for.
950 * @param srcStart the offset into <TT>srcText</TT> at which
951 * to start matching
952 * @param srcLength the number of characters in <TT>srcText</TT> to match
953 * @param start the offset into this at which to start matching
954 * @param length the number of characters in this to search
374ca955 955 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
956 * or -1 if not found.
957 * @stable ICU 2.0
958 */
959 inline int32_t indexOf(const UnicodeString& srcText,
960 int32_t srcStart,
961 int32_t srcLength,
962 int32_t start,
963 int32_t length) const;
964
965 /**
966 * Locate in this the first occurrence of the characters in
374ca955
A
967 * <TT>srcChars</TT>
968 * starting at offset <TT>start</TT>, using bitwise comparison.
b75a7d8f
A
969 * @param srcChars The text to search for.
970 * @param srcLength the number of characters in <TT>srcChars</TT> to match
971 * @param start the offset into this at which to start matching
374ca955 972 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
973 * or -1 if not found.
974 * @stable ICU 2.0
975 */
976 inline int32_t indexOf(const UChar *srcChars,
977 int32_t srcLength,
978 int32_t start) const;
979
980 /**
981 * Locate in this the first occurrence in the range
374ca955 982 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
b75a7d8f
A
983 * in <TT>srcChars</TT>, using bitwise comparison.
984 * @param srcChars The text to search for.
985 * @param srcLength the number of characters in <TT>srcChars</TT>
986 * @param start The offset at which searching will start.
987 * @param length The number of characters to search
374ca955 988 * @return The offset into this of the start of <TT>srcChars</TT>,
b75a7d8f
A
989 * or -1 if not found.
990 * @stable ICU 2.0
991 */
992 inline int32_t indexOf(const UChar *srcChars,
993 int32_t srcLength,
994 int32_t start,
995 int32_t length) const;
374ca955 996
b75a7d8f 997 /**
374ca955
A
998 * Locate in this the first occurrence in the range
999 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1000 * in <TT>srcChars</TT> in the range
1001 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1002 * using bitwise comparison.
b75a7d8f 1003 * @param srcChars The text to search for.
374ca955 1004 * @param srcStart the offset into <TT>srcChars</TT> at which
b75a7d8f
A
1005 * to start matching
1006 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1007 * @param start the offset into this at which to start matching
1008 * @param length the number of characters in this to search
374ca955 1009 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1010 * or -1 if not found.
1011 * @stable ICU 2.0
1012 */
1013 int32_t indexOf(const UChar *srcChars,
1014 int32_t srcStart,
1015 int32_t srcLength,
1016 int32_t start,
1017 int32_t length) const;
1018
1019 /**
1020 * Locate in this the first occurrence of the BMP code point <code>c</code>,
1021 * using bitwise comparison.
1022 * @param c The code unit to search for.
1023 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1024 * @stable ICU 2.0
1025 */
1026 inline int32_t indexOf(UChar c) const;
1027
1028 /**
374ca955 1029 * Locate in this the first occurrence of the code point <TT>c</TT>,
b75a7d8f
A
1030 * using bitwise comparison.
1031 *
1032 * @param c The code point to search for.
1033 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1034 * @stable ICU 2.0
1035 */
1036 inline int32_t indexOf(UChar32 c) const;
1037
1038 /**
1039 * Locate in this the first occurrence of the BMP code point <code>c</code>,
1040 * starting at offset <TT>start</TT>, using bitwise comparison.
1041 * @param c The code unit to search for.
1042 * @param start The offset at which searching will start.
1043 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1044 * @stable ICU 2.0
1045 */
1046 inline int32_t indexOf(UChar c,
1047 int32_t start) const;
1048
1049 /**
1050 * Locate in this the first occurrence of the code point <TT>c</TT>
1051 * starting at offset <TT>start</TT>, using bitwise comparison.
1052 *
1053 * @param c The code point to search for.
1054 * @param start The offset at which searching will start.
1055 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1056 * @stable ICU 2.0
1057 */
1058 inline int32_t indexOf(UChar32 c,
1059 int32_t start) const;
1060
1061 /**
1062 * Locate in this the first occurrence of the BMP code point <code>c</code>
374ca955
A
1063 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1064 * using bitwise comparison.
b75a7d8f
A
1065 * @param c The code unit to search for.
1066 * @param start the offset into this at which to start matching
1067 * @param length the number of characters in this to search
1068 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1069 * @stable ICU 2.0
1070 */
1071 inline int32_t indexOf(UChar c,
1072 int32_t start,
1073 int32_t length) const;
1074
1075 /**
374ca955
A
1076 * Locate in this the first occurrence of the code point <TT>c</TT>
1077 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1078 * using bitwise comparison.
b75a7d8f
A
1079 *
1080 * @param c The code point to search for.
1081 * @param start the offset into this at which to start matching
1082 * @param length the number of characters in this to search
1083 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1084 * @stable ICU 2.0
1085 */
1086 inline int32_t indexOf(UChar32 c,
1087 int32_t start,
1088 int32_t length) const;
1089
1090 /**
374ca955 1091 * Locate in this the last occurrence of the characters in <TT>text</TT>,
b75a7d8f
A
1092 * using bitwise comparison.
1093 * @param text The text to search for.
374ca955 1094 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1095 * or -1 if not found.
1096 * @stable ICU 2.0
1097 */
1098 inline int32_t lastIndexOf(const UnicodeString& text) const;
1099
1100 /**
1101 * Locate in this the last occurrence of the characters in <TT>text</TT>
1102 * starting at offset <TT>start</TT>, using bitwise comparison.
1103 * @param text The text to search for.
1104 * @param start The offset at which searching will start.
374ca955 1105 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1106 * or -1 if not found.
1107 * @stable ICU 2.0
1108 */
1109 inline int32_t lastIndexOf(const UnicodeString& text,
1110 int32_t start) const;
1111
1112 /**
374ca955 1113 * Locate in this the last occurrence in the range
b75a7d8f
A
1114 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1115 * in <TT>text</TT>, using bitwise comparison.
1116 * @param text The text to search for.
1117 * @param start The offset at which searching will start.
1118 * @param length The number of characters to search
374ca955 1119 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1120 * or -1 if not found.
1121 * @stable ICU 2.0
1122 */
1123 inline int32_t lastIndexOf(const UnicodeString& text,
1124 int32_t start,
1125 int32_t length) const;
1126
1127 /**
374ca955
A
1128 * Locate in this the last occurrence in the range
1129 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1130 * in <TT>srcText</TT> in the range
1131 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1132 * using bitwise comparison.
b75a7d8f 1133 * @param srcText The text to search for.
374ca955 1134 * @param srcStart the offset into <TT>srcText</TT> at which
b75a7d8f
A
1135 * to start matching
1136 * @param srcLength the number of characters in <TT>srcText</TT> to match
1137 * @param start the offset into this at which to start matching
1138 * @param length the number of characters in this to search
374ca955 1139 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1140 * or -1 if not found.
1141 * @stable ICU 2.0
1142 */
1143 inline int32_t lastIndexOf(const UnicodeString& srcText,
1144 int32_t srcStart,
1145 int32_t srcLength,
1146 int32_t start,
1147 int32_t length) const;
1148
1149 /**
374ca955
A
1150 * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1151 * starting at offset <TT>start</TT>, using bitwise comparison.
b75a7d8f
A
1152 * @param srcChars The text to search for.
1153 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1154 * @param start the offset into this at which to start matching
374ca955 1155 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1156 * or -1 if not found.
1157 * @stable ICU 2.0
1158 */
1159 inline int32_t lastIndexOf(const UChar *srcChars,
1160 int32_t srcLength,
1161 int32_t start) const;
1162
1163 /**
374ca955
A
1164 * Locate in this the last occurrence in the range
1165 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
b75a7d8f
A
1166 * in <TT>srcChars</TT>, using bitwise comparison.
1167 * @param srcChars The text to search for.
1168 * @param srcLength the number of characters in <TT>srcChars</TT>
1169 * @param start The offset at which searching will start.
1170 * @param length The number of characters to search
374ca955 1171 * @return The offset into this of the start of <TT>srcChars</TT>,
b75a7d8f
A
1172 * or -1 if not found.
1173 * @stable ICU 2.0
1174 */
1175 inline int32_t lastIndexOf(const UChar *srcChars,
1176 int32_t srcLength,
1177 int32_t start,
1178 int32_t length) const;
374ca955 1179
b75a7d8f 1180 /**
374ca955
A
1181 * Locate in this the last occurrence in the range
1182 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1183 * in <TT>srcChars</TT> in the range
1184 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1185 * using bitwise comparison.
b75a7d8f
A
1186 * @param srcChars The text to search for.
1187 * @param srcStart the offset into <TT>srcChars</TT> at which
1188 * to start matching
1189 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1190 * @param start the offset into this at which to start matching
1191 * @param length the number of characters in this to search
374ca955 1192 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1193 * or -1 if not found.
1194 * @stable ICU 2.0
1195 */
1196 int32_t lastIndexOf(const UChar *srcChars,
1197 int32_t srcStart,
1198 int32_t srcLength,
1199 int32_t start,
1200 int32_t length) const;
1201
1202 /**
1203 * Locate in this the last occurrence of the BMP code point <code>c</code>,
1204 * using bitwise comparison.
1205 * @param c The code unit to search for.
1206 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1207 * @stable ICU 2.0
1208 */
1209 inline int32_t lastIndexOf(UChar c) const;
1210
1211 /**
374ca955 1212 * Locate in this the last occurrence of the code point <TT>c</TT>,
b75a7d8f
A
1213 * using bitwise comparison.
1214 *
1215 * @param c The code point to search for.
1216 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1217 * @stable ICU 2.0
1218 */
1219 inline int32_t lastIndexOf(UChar32 c) const;
1220
1221 /**
1222 * Locate in this the last occurrence of the BMP code point <code>c</code>
1223 * starting at offset <TT>start</TT>, using bitwise comparison.
1224 * @param c The code unit to search for.
1225 * @param start The offset at which searching will start.
1226 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1227 * @stable ICU 2.0
1228 */
1229 inline int32_t lastIndexOf(UChar c,
1230 int32_t start) const;
1231
1232 /**
1233 * Locate in this the last occurrence of the code point <TT>c</TT>
1234 * starting at offset <TT>start</TT>, using bitwise comparison.
1235 *
1236 * @param c The code point to search for.
1237 * @param start The offset at which searching will start.
1238 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1239 * @stable ICU 2.0
1240 */
1241 inline int32_t lastIndexOf(UChar32 c,
1242 int32_t start) const;
1243
1244 /**
1245 * Locate in this the last occurrence of the BMP code point <code>c</code>
374ca955
A
1246 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1247 * using bitwise comparison.
b75a7d8f
A
1248 * @param c The code unit to search for.
1249 * @param start the offset into this at which to start matching
1250 * @param length the number of characters in this to search
1251 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1252 * @stable ICU 2.0
1253 */
1254 inline int32_t lastIndexOf(UChar c,
1255 int32_t start,
1256 int32_t length) const;
1257
1258 /**
374ca955
A
1259 * Locate in this the last occurrence of the code point <TT>c</TT>
1260 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1261 * using bitwise comparison.
b75a7d8f
A
1262 *
1263 * @param c The code point to search for.
1264 * @param start the offset into this at which to start matching
1265 * @param length the number of characters in this to search
1266 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1267 * @stable ICU 2.0
1268 */
1269 inline int32_t lastIndexOf(UChar32 c,
1270 int32_t start,
1271 int32_t length) const;
1272
1273
1274 /* Character access */
1275
1276 /**
1277 * Return the code unit at offset <tt>offset</tt>.
1278 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1279 * @param offset a valid offset into the text
1280 * @return the code unit at offset <tt>offset</tt>
1281 * or 0xffff if the offset is not valid for this string
1282 * @stable ICU 2.0
1283 */
1284 inline UChar charAt(int32_t offset) const;
1285
1286 /**
1287 * Return the code unit at offset <tt>offset</tt>.
1288 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1289 * @param offset a valid offset into the text
1290 * @return the code unit at offset <tt>offset</tt>
1291 * @stable ICU 2.0
1292 */
1293 inline UChar operator[] (int32_t offset) const;
1294
1295 /**
1296 * Return the code point that contains the code unit
1297 * at offset <tt>offset</tt>.
1298 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1299 * @param offset a valid offset into the text
1300 * that indicates the text offset of any of the code units
1301 * that will be assembled into a code point (21-bit value) and returned
1302 * @return the code point of text at <tt>offset</tt>
1303 * or 0xffff if the offset is not valid for this string
1304 * @stable ICU 2.0
1305 */
4388f060 1306 UChar32 char32At(int32_t offset) const;
b75a7d8f
A
1307
1308 /**
1309 * Adjust a random-access offset so that
1310 * it points to the beginning of a Unicode character.
1311 * The offset that is passed in points to
1312 * any code unit of a code point,
1313 * while the returned offset will point to the first code unit
1314 * of the same code point.
1315 * In UTF-16, if the input offset points to a second surrogate
1316 * of a surrogate pair, then the returned offset will point
1317 * to the first surrogate.
1318 * @param offset a valid offset into one code point of the text
1319 * @return offset of the first code unit of the same code point
1320 * @see U16_SET_CP_START
1321 * @stable ICU 2.0
1322 */
4388f060 1323 int32_t getChar32Start(int32_t offset) const;
b75a7d8f
A
1324
1325 /**
1326 * Adjust a random-access offset so that
1327 * it points behind a Unicode character.
1328 * The offset that is passed in points behind
1329 * any code unit of a code point,
1330 * while the returned offset will point behind the last code unit
1331 * of the same code point.
1332 * In UTF-16, if the input offset points behind the first surrogate
1333 * (i.e., to the second surrogate)
1334 * of a surrogate pair, then the returned offset will point
1335 * behind the second surrogate (i.e., to the first surrogate).
1336 * @param offset a valid offset after any code unit of a code point of the text
1337 * @return offset of the first code unit after the same code point
1338 * @see U16_SET_CP_LIMIT
1339 * @stable ICU 2.0
1340 */
4388f060 1341 int32_t getChar32Limit(int32_t offset) const;
b75a7d8f
A
1342
1343 /**
1344 * Move the code unit index along the string by delta code points.
1345 * Interpret the input index as a code unit-based offset into the string,
1346 * move the index forward or backward by delta code points, and
1347 * return the resulting index.
1348 * The input index should point to the first code unit of a code point,
1349 * if there is more than one.
1350 *
1351 * Both input and output indexes are code unit-based as for all
1352 * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1353 * If delta<0 then the index is moved backward (toward the start of the string).
1354 * If delta>0 then the index is moved forward (toward the end of the string).
1355 *
1356 * This behaves like CharacterIterator::move32(delta, kCurrent).
1357 *
1358 * Behavior for out-of-bounds indexes:
1359 * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1360 * if the input index<0 then it is pinned to 0;
1361 * if it is index>length() then it is pinned to length().
1362 * Afterwards, the index is moved by <code>delta</code> code points
1363 * forward or backward,
1364 * but no further backward than to 0 and no further forward than to length().
1365 * The resulting index return value will be in between 0 and length(), inclusively.
1366 *
1367 * Examples:
1368 * <pre>
1369 * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1370 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1371 *
1372 * // initial index: position of U+10000
1373 * int32_t index=1;
1374 *
1375 * // the following examples will all result in index==4, position of U+10ffff
1376 *
1377 * // skip 2 code points from some position in the string
1378 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1379 *
1380 * // go to the 3rd code point from the start of s (0-based)
1381 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1382 *
1383 * // go to the next-to-last code point of s
1384 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1385 * </pre>
1386 *
1387 * @param index input code unit index
1388 * @param delta (signed) code point count to move the index forward or backward
1389 * in the string
1390 * @return the resulting code unit index
1391 * @stable ICU 2.0
1392 */
1393 int32_t moveIndex32(int32_t index, int32_t delta) const;
1394
1395 /* Substring extraction */
1396
1397 /**
374ca955 1398 * Copy the characters in the range
b75a7d8f
A
1399 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1400 * beginning at <tt>dstStart</tt>.
1401 * If the string aliases to <code>dst</code> itself as an external buffer,
1402 * then extract() will not copy the contents.
1403 *
1404 * @param start offset of first character which will be copied into the array
1405 * @param length the number of characters to extract
1406 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1407 * must be at least (<tt>dstStart + length</tt>).
1408 * @param dstStart the offset in <TT>dst</TT> where the first character
1409 * will be extracted
1410 * @stable ICU 2.0
1411 */
374ca955
A
1412 inline void extract(int32_t start,
1413 int32_t length,
1414 UChar *dst,
b75a7d8f
A
1415 int32_t dstStart = 0) const;
1416
1417 /**
1418 * Copy the contents of the string into dest.
1419 * This is a convenience function that
1420 * checks if there is enough space in dest,
1421 * extracts the entire string if possible,
1422 * and NUL-terminates dest if possible.
1423 *
1424 * If the string fits into dest but cannot be NUL-terminated
1425 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1426 * If the string itself does not fit into dest
1427 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1428 *
1429 * If the string aliases to <code>dest</code> itself as an external buffer,
1430 * then extract() will not copy the contents.
1431 *
1432 * @param dest Destination string buffer.
1433 * @param destCapacity Number of UChars available at dest.
1434 * @param errorCode ICU error code.
1435 * @return length()
1436 * @stable ICU 2.0
1437 */
1438 int32_t
1439 extract(UChar *dest, int32_t destCapacity,
1440 UErrorCode &errorCode) const;
1441
1442 /**
374ca955 1443 * Copy the characters in the range
b75a7d8f
A
1444 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1445 * <tt>target</tt>.
1446 * @param start offset of first character which will be copied
1447 * @param length the number of characters to extract
1448 * @param target UnicodeString into which to copy characters.
1449 * @return A reference to <TT>target</TT>
1450 * @stable ICU 2.0
1451 */
1452 inline void extract(int32_t start,
1453 int32_t length,
1454 UnicodeString& target) const;
1455
1456 /**
374ca955 1457 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
b75a7d8f
A
1458 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1459 * @param start offset of first character which will be copied into the array
1460 * @param limit offset immediately following the last character to be copied
374ca955 1461 * @param dst array in which to copy characters. The length of <tt>dst</tt>
b75a7d8f
A
1462 * must be at least (<tt>dstStart + (limit - start)</tt>).
1463 * @param dstStart the offset in <TT>dst</TT> where the first character
1464 * will be extracted
1465 * @stable ICU 2.0
1466 */
374ca955
A
1467 inline void extractBetween(int32_t start,
1468 int32_t limit,
1469 UChar *dst,
b75a7d8f
A
1470 int32_t dstStart = 0) const;
1471
1472 /**
374ca955 1473 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
b75a7d8f
A
1474 * into the UnicodeString <tt>target</tt>. Replaceable API.
1475 * @param start offset of first character which will be copied
1476 * @param limit offset immediately following the last character to be copied
1477 * @param target UnicodeString into which to copy characters.
1478 * @return A reference to <TT>target</TT>
1479 * @stable ICU 2.0
1480 */
1481 virtual void extractBetween(int32_t start,
1482 int32_t limit,
1483 UnicodeString& target) const;
1484
1485 /**
1486 * Copy the characters in the range
374ca955
A
1487 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1488 * All characters must be invariant (see utypes.h).
1489 * Use US_INV as the last, signature-distinguishing parameter.
1490 *
1491 * This function does not write any more than <code>targetLength</code>
1492 * characters but returns the length of the entire output string
1493 * so that one can allocate a larger buffer and call the function again
1494 * if necessary.
1495 * The output string is NUL-terminated if possible.
1496 *
1497 * @param start offset of first character which will be copied
1498 * @param startLength the number of characters to extract
1499 * @param target the target buffer for extraction, can be NULL
1500 * if targetLength is 0
1501 * @param targetCapacity the length of the target buffer
1502 * @param inv Signature-distinguishing paramater, use US_INV.
1503 * @return the output string length, not including the terminating NUL
73c04bcf 1504 * @stable ICU 3.2
374ca955
A
1505 */
1506 int32_t extract(int32_t start,
1507 int32_t startLength,
1508 char *target,
1509 int32_t targetCapacity,
1510 enum EInvariant inv) const;
1511
729e4ab9
A
1512#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1513
1514 /**
1515 * Copy the characters in the range
1516 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1517 * in the platform's default codepage.
1518 * This function does not write any more than <code>targetLength</code>
1519 * characters but returns the length of the entire output string
1520 * so that one can allocate a larger buffer and call the function again
1521 * if necessary.
1522 * The output string is NUL-terminated if possible.
1523 *
1524 * @param start offset of first character which will be copied
1525 * @param startLength the number of characters to extract
1526 * @param target the target buffer for extraction
1527 * @param targetLength the length of the target buffer
1528 * If <TT>target</TT> is NULL, then the number of bytes required for
1529 * <TT>target</TT> is returned.
1530 * @return the output string length, not including the terminating NUL
1531 * @stable ICU 2.0
1532 */
1533 int32_t extract(int32_t start,
1534 int32_t startLength,
1535 char *target,
1536 uint32_t targetLength) const;
1537
1538#endif
1539
374ca955
A
1540#if !UCONFIG_NO_CONVERSION
1541
1542 /**
1543 * Copy the characters in the range
b75a7d8f
A
1544 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1545 * in a specified codepage.
1546 * The output string is NUL-terminated.
1547 *
374ca955
A
1548 * Recommendation: For invariant-character strings use
1549 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1550 * because it avoids object code dependencies of UnicodeString on
1551 * the conversion code.
1552 *
b75a7d8f
A
1553 * @param start offset of first character which will be copied
1554 * @param startLength the number of characters to extract
1555 * @param target the target buffer for extraction
374ca955 1556 * @param codepage the desired codepage for the characters. 0 has
b75a7d8f
A
1557 * the special meaning of the default codepage
1558 * If <code>codepage</code> is an empty string (<code>""</code>),
1559 * then a simple conversion is performed on the codepage-invariant
1560 * subset ("invariant characters") of the platform encoding. See utypes.h.
1561 * If <TT>target</TT> is NULL, then the number of bytes required for
1562 * <TT>target</TT> is returned. It is assumed that the target is big enough
1563 * to fit all of the characters.
1564 * @return the output string length, not including the terminating NUL
1565 * @stable ICU 2.0
1566 */
1567 inline int32_t extract(int32_t start,
1568 int32_t startLength,
1569 char *target,
1570 const char *codepage = 0) const;
1571
1572 /**
374ca955 1573 * Copy the characters in the range
b75a7d8f
A
1574 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1575 * in a specified codepage.
1576 * This function does not write any more than <code>targetLength</code>
1577 * characters but returns the length of the entire output string
1578 * so that one can allocate a larger buffer and call the function again
1579 * if necessary.
1580 * The output string is NUL-terminated if possible.
1581 *
374ca955
A
1582 * Recommendation: For invariant-character strings use
1583 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1584 * because it avoids object code dependencies of UnicodeString on
1585 * the conversion code.
1586 *
b75a7d8f
A
1587 * @param start offset of first character which will be copied
1588 * @param startLength the number of characters to extract
1589 * @param target the target buffer for extraction
1590 * @param targetLength the length of the target buffer
374ca955 1591 * @param codepage the desired codepage for the characters. 0 has
b75a7d8f
A
1592 * the special meaning of the default codepage
1593 * If <code>codepage</code> is an empty string (<code>""</code>),
1594 * then a simple conversion is performed on the codepage-invariant
1595 * subset ("invariant characters") of the platform encoding. See utypes.h.
1596 * If <TT>target</TT> is NULL, then the number of bytes required for
1597 * <TT>target</TT> is returned.
1598 * @return the output string length, not including the terminating NUL
1599 * @stable ICU 2.0
1600 */
1601 int32_t extract(int32_t start,
1602 int32_t startLength,
1603 char *target,
1604 uint32_t targetLength,
729e4ab9 1605 const char *codepage) const;
b75a7d8f
A
1606
1607 /**
1608 * Convert the UnicodeString into a codepage string using an existing UConverter.
1609 * The output string is NUL-terminated if possible.
1610 *
1611 * This function avoids the overhead of opening and closing a converter if
1612 * multiple strings are extracted.
1613 *
1614 * @param dest destination string buffer, can be NULL if destCapacity==0
1615 * @param destCapacity the number of chars available at dest
1616 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1617 * or NULL for the default converter
1618 * @param errorCode normal ICU error code
1619 * @return the length of the output string, not counting the terminating NUL;
1620 * if the length is greater than destCapacity, then the string will not fit
1621 * and a buffer of the indicated length would need to be passed in
1622 * @stable ICU 2.0
1623 */
1624 int32_t extract(char *dest, int32_t destCapacity,
1625 UConverter *cnv,
1626 UErrorCode &errorCode) const;
1627
374ca955
A
1628#endif
1629
729e4ab9
A
1630 /**
1631 * Create a temporary substring for the specified range.
1632 * Unlike the substring constructor and setTo() functions,
1633 * the object returned here will be a read-only alias (using getBuffer())
1634 * rather than copying the text.
1635 * As a result, this substring operation is much faster but requires
1636 * that the original string not be modified or deleted during the lifetime
1637 * of the returned substring object.
1638 * @param start offset of the first character visible in the substring
1639 * @param length length of the substring
1640 * @return a read-only alias UnicodeString object for the substring
1641 * @stable ICU 4.4
1642 */
1643 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1644
1645 /**
1646 * Create a temporary substring for the specified range.
1647 * Same as tempSubString(start, length) except that the substring range
1648 * is specified as a (start, limit) pair (with an exclusive limit index)
1649 * rather than a (start, length) pair.
1650 * @param start offset of the first character visible in the substring
1651 * @param limit offset immediately following the last character visible in the substring
1652 * @return a read-only alias UnicodeString object for the substring
1653 * @stable ICU 4.4
1654 */
1655 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1656
1657 /**
1658 * Convert the UnicodeString to UTF-8 and write the result
1659 * to a ByteSink. This is called by toUTF8String().
1660 * Unpaired surrogates are replaced with U+FFFD.
1661 * Calls u_strToUTF8WithSub().
1662 *
1663 * @param sink A ByteSink to which the UTF-8 version of the string is written.
1664 * sink.Flush() is called at the end.
1665 * @stable ICU 4.2
1666 * @see toUTF8String
1667 */
1668 void toUTF8(ByteSink &sink) const;
1669
1670#if U_HAVE_STD_STRING
1671
1672 /**
1673 * Convert the UnicodeString to UTF-8 and append the result
1674 * to a standard string.
1675 * Unpaired surrogates are replaced with U+FFFD.
1676 * Calls toUTF8().
1677 *
1678 * @param result A standard string (or a compatible object)
1679 * to which the UTF-8 version of the string is appended.
1680 * @return The string object.
1681 * @stable ICU 4.2
1682 * @see toUTF8
1683 */
1684 template<typename StringClass>
1685 StringClass &toUTF8String(StringClass &result) const {
1686 StringByteSink<StringClass> sbs(&result);
1687 toUTF8(sbs);
1688 return result;
1689 }
1690
1691#endif
1692
1693 /**
1694 * Convert the UnicodeString to UTF-32.
1695 * Unpaired surrogates are replaced with U+FFFD.
1696 * Calls u_strToUTF32WithSub().
1697 *
1698 * @param utf32 destination string buffer, can be NULL if capacity==0
1699 * @param capacity the number of UChar32s available at utf32
1700 * @param errorCode Standard ICU error code. Its input value must
1701 * pass the U_SUCCESS() test, or else the function returns
1702 * immediately. Check for U_FAILURE() on output or use with
1703 * function chaining. (See User Guide for details.)
1704 * @return The length of the UTF-32 string.
1705 * @see fromUTF32
1706 * @stable ICU 4.2
1707 */
1708 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1709
b75a7d8f
A
1710 /* Length operations */
1711
1712 /**
374ca955
A
1713 * Return the length of the UnicodeString object.
1714 * The length is the number of UChar code units are in the UnicodeString.
1715 * If you want the number of code points, please use countChar32().
b75a7d8f 1716 * @return the length of the UnicodeString object
374ca955 1717 * @see countChar32
b75a7d8f
A
1718 * @stable ICU 2.0
1719 */
1720 inline int32_t length(void) const;
1721
1722 /**
1723 * Count Unicode code points in the length UChar code units of the string.
1724 * A code point may occupy either one or two UChar code units.
1725 * Counting code points involves reading all code units.
1726 *
1727 * This functions is basically the inverse of moveIndex32().
1728 *
1729 * @param start the index of the first code unit to check
1730 * @param length the number of UChar code units to check
1731 * @return the number of code points in the specified code units
374ca955 1732 * @see length
b75a7d8f
A
1733 * @stable ICU 2.0
1734 */
1735 int32_t
1736 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1737
1738 /**
1739 * Check if the length UChar code units of the string
1740 * contain more Unicode code points than a certain number.
1741 * This is more efficient than counting all code points in this part of the string
1742 * and comparing that number with a threshold.
1743 * This function may not need to scan the string at all if the length
1744 * falls within a certain range, and
1745 * never needs to count more than 'number+1' code points.
1746 * Logically equivalent to (countChar32(start, length)>number).
1747 * A Unicode code point may occupy either one or two UChar code units.
1748 *
1749 * @param start the index of the first code unit to check (0 for the entire string)
1750 * @param length the number of UChar code units to check
1751 * (use INT32_MAX for the entire string; remember that start/length
1752 * values are pinned)
1753 * @param number The number of code points in the (sub)string is compared against
1754 * the 'number' parameter.
1755 * @return Boolean value for whether the string contains more Unicode code points
1756 * than 'number'. Same as (u_countChar32(s, length)>number).
1757 * @see countChar32
1758 * @see u_strHasMoreChar32Than
374ca955 1759 * @stable ICU 2.4
b75a7d8f
A
1760 */
1761 UBool
1762 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1763
1764 /**
1765 * Determine if this string is empty.
1766 * @return TRUE if this string contains 0 characters, FALSE otherwise.
1767 * @stable ICU 2.0
1768 */
1769 inline UBool isEmpty(void) const;
1770
1771 /**
1772 * Return the capacity of the internal buffer of the UnicodeString object.
1773 * This is useful together with the getBuffer functions.
1774 * See there for details.
1775 *
1776 * @return the number of UChars available in the internal buffer
1777 * @see getBuffer
1778 * @stable ICU 2.0
1779 */
1780 inline int32_t getCapacity(void) const;
1781
1782 /* Other operations */
1783
1784 /**
1785 * Generate a hash code for this object.
1786 * @return The hash code of this UnicodeString.
1787 * @stable ICU 2.0
1788 */
1789 inline int32_t hashCode(void) const;
1790
1791 /**
1792 * Determine if this object contains a valid string.
729e4ab9
A
1793 * A bogus string has no value. It is different from an empty string,
1794 * although in both cases isEmpty() returns TRUE and length() returns 0.
1795 * setToBogus() and isBogus() can be used to indicate that no string value is available.
1796 * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
b75a7d8f
A
1797 * length() returns 0.
1798 *
1799 * @return TRUE if the string is valid, FALSE otherwise
1800 * @see setToBogus()
1801 * @stable ICU 2.0
1802 */
1803 inline UBool isBogus(void) const;
1804
1805
1806 //========================================
1807 // Write operations
1808 //========================================
1809
1810 /* Assignment operations */
1811
1812 /**
1813 * Assignment operator. Replace the characters in this UnicodeString
1814 * with the characters from <TT>srcText</TT>.
1815 * @param srcText The text containing the characters to replace
1816 * @return a reference to this
1817 * @stable ICU 2.0
1818 */
1819 UnicodeString &operator=(const UnicodeString &srcText);
1820
1821 /**
1822 * Almost the same as the assignment operator.
1823 * Replace the characters in this UnicodeString
1824 * with the characters from <code>srcText</code>.
1825 *
51004dcb
A
1826 * This function works the same as the assignment operator
1827 * for all strings except for ones that are readonly aliases.
1828 *
b75a7d8f
A
1829 * Starting with ICU 2.4, the assignment operator and the copy constructor
1830 * allocate a new buffer and copy the buffer contents even for readonly aliases.
1831 * This function implements the old, more efficient but less safe behavior
1832 * of making this string also a readonly alias to the same buffer.
51004dcb 1833 *
b75a7d8f 1834 * The fastCopyFrom function must be used only if it is known that the lifetime of
51004dcb 1835 * this UnicodeString does not exceed the lifetime of the aliased buffer
b75a7d8f 1836 * including its contents, for example for strings from resource bundles
51004dcb 1837 * or aliases to string constants.
b75a7d8f
A
1838 *
1839 * @param src The text containing the characters to replace.
1840 * @return a reference to this
374ca955 1841 * @stable ICU 2.4
b75a7d8f
A
1842 */
1843 UnicodeString &fastCopyFrom(const UnicodeString &src);
1844
1845 /**
1846 * Assignment operator. Replace the characters in this UnicodeString
1847 * with the code unit <TT>ch</TT>.
1848 * @param ch the code unit to replace
1849 * @return a reference to this
1850 * @stable ICU 2.0
1851 */
1852 inline UnicodeString& operator= (UChar ch);
1853
1854 /**
1855 * Assignment operator. Replace the characters in this UnicodeString
1856 * with the code point <TT>ch</TT>.
1857 * @param ch the code point to replace
1858 * @return a reference to this
1859 * @stable ICU 2.0
1860 */
1861 inline UnicodeString& operator= (UChar32 ch);
1862
1863 /**
1864 * Set the text in the UnicodeString object to the characters
374ca955 1865 * in <TT>srcText</TT> in the range
b75a7d8f
A
1866 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1867 * <TT>srcText</TT> is not modified.
1868 * @param srcText the source for the new characters
1869 * @param srcStart the offset into <TT>srcText</TT> where new characters
1870 * will be obtained
1871 * @return a reference to this
374ca955 1872 * @stable ICU 2.2
b75a7d8f 1873 */
374ca955 1874 inline UnicodeString& setTo(const UnicodeString& srcText,
b75a7d8f
A
1875 int32_t srcStart);
1876
1877 /**
1878 * Set the text in the UnicodeString object to the characters
374ca955 1879 * in <TT>srcText</TT> in the range
b75a7d8f
A
1880 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1881 * <TT>srcText</TT> is not modified.
1882 * @param srcText the source for the new characters
1883 * @param srcStart the offset into <TT>srcText</TT> where new characters
1884 * will be obtained
1885 * @param srcLength the number of characters in <TT>srcText</TT> in the
1886 * replace string.
1887 * @return a reference to this
1888 * @stable ICU 2.0
1889 */
374ca955
A
1890 inline UnicodeString& setTo(const UnicodeString& srcText,
1891 int32_t srcStart,
b75a7d8f
A
1892 int32_t srcLength);
1893
1894 /**
374ca955
A
1895 * Set the text in the UnicodeString object to the characters in
1896 * <TT>srcText</TT>.
b75a7d8f
A
1897 * <TT>srcText</TT> is not modified.
1898 * @param srcText the source for the new characters
1899 * @return a reference to this
1900 * @stable ICU 2.0
1901 */
1902 inline UnicodeString& setTo(const UnicodeString& srcText);
1903
1904 /**
1905 * Set the characters in the UnicodeString object to the characters
1906 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1907 * @param srcChars the source for the new characters
1908 * @param srcLength the number of Unicode characters in srcChars.
1909 * @return a reference to this
1910 * @stable ICU 2.0
1911 */
1912 inline UnicodeString& setTo(const UChar *srcChars,
1913 int32_t srcLength);
1914
1915 /**
1916 * Set the characters in the UnicodeString object to the code unit
1917 * <TT>srcChar</TT>.
374ca955 1918 * @param srcChar the code unit which becomes the UnicodeString's character
b75a7d8f
A
1919 * content
1920 * @return a reference to this
1921 * @stable ICU 2.0
1922 */
1923 UnicodeString& setTo(UChar srcChar);
1924
1925 /**
1926 * Set the characters in the UnicodeString object to the code point
1927 * <TT>srcChar</TT>.
374ca955 1928 * @param srcChar the code point which becomes the UnicodeString's character
b75a7d8f
A
1929 * content
1930 * @return a reference to this
1931 * @stable ICU 2.0
1932 */
1933 UnicodeString& setTo(UChar32 srcChar);
1934
1935 /**
1936 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1937 * The text will be used for the UnicodeString object, but
1938 * it will not be released when the UnicodeString is destroyed.
1939 * This has copy-on-write semantics:
1940 * When the string is modified, then the buffer is first copied into
1941 * newly allocated memory.
1942 * The aliased buffer is never modified.
51004dcb
A
1943 *
1944 * In an assignment to another UnicodeString, when using the copy constructor
1945 * or the assignment operator, the text will be copied.
1946 * When using fastCopyFrom(), the text will be aliased again,
b75a7d8f
A
1947 * so that both strings then alias the same readonly-text.
1948 *
1949 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1950 * This must be true if <code>textLength==-1</code>.
1951 * @param text The characters to alias for the UnicodeString.
1952 * @param textLength The number of Unicode characters in <code>text</code> to alias.
1953 * If -1, then this constructor will determine the length
1954 * by calling <code>u_strlen()</code>.
1955 * @return a reference to this
1956 * @stable ICU 2.0
1957 */
1958 UnicodeString &setTo(UBool isTerminated,
1959 const UChar *text,
1960 int32_t textLength);
1961
1962 /**
1963 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1964 * The text will be used for the UnicodeString object, but
1965 * it will not be released when the UnicodeString is destroyed.
1966 * This has write-through semantics:
1967 * For as long as the capacity of the buffer is sufficient, write operations
1968 * will directly affect the buffer. When more capacity is necessary, then
1969 * a new buffer will be allocated and the contents copied as with regularly
1970 * constructed strings.
1971 * In an assignment to another UnicodeString, the buffer will be copied.
1972 * The extract(UChar *dst) function detects whether the dst pointer is the same
1973 * as the string buffer itself and will in this case not copy the contents.
1974 *
1975 * @param buffer The characters to alias for the UnicodeString.
1976 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1977 * @param buffCapacity The size of <code>buffer</code> in UChars.
1978 * @return a reference to this
1979 * @stable ICU 2.0
1980 */
1981 UnicodeString &setTo(UChar *buffer,
1982 int32_t buffLength,
1983 int32_t buffCapacity);
1984
1985 /**
1986 * Make this UnicodeString object invalid.
1987 * The string will test TRUE with isBogus().
1988 *
1989 * A bogus string has no value. It is different from an empty string.
1990 * It can be used to indicate that no string value is available.
1991 * getBuffer() and getTerminatedBuffer() return NULL, and
1992 * length() returns 0.
1993 *
1994 * This utility function is used throughout the UnicodeString
1995 * implementation to indicate that a UnicodeString operation failed,
1996 * and may be used in other functions,
1997 * especially but not exclusively when such functions do not
1998 * take a UErrorCode for simplicity.
1999 *
2000 * The following methods, and no others, will clear a string object's bogus flag:
2001 * - remove()
2002 * - remove(0, INT32_MAX)
2003 * - truncate(0)
2004 * - operator=() (assignment operator)
2005 * - setTo(...)
2006 *
2007 * The simplest ways to turn a bogus string into an empty one
2008 * is to use the remove() function.
2009 * Examples for other functions that are equivalent to "set to empty string":
2010 * \code
2011 * if(s.isBogus()) {
2012 * s.remove(); // set to an empty string (remove all), or
2013 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
2014 * s.truncate(0); // set to an empty string (complete truncation), or
2015 * s=UnicodeString(); // assign an empty string, or
2016 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
2017 * static const UChar nul=0;
2018 * s.setTo(&nul, 0); // set to an empty C Unicode string
2019 * }
2020 * \endcode
2021 *
2022 * @see isBogus()
2023 * @stable ICU 2.0
2024 */
2025 void setToBogus();
2026
2027 /**
2028 * Set the character at the specified offset to the specified character.
2029 * @param offset A valid offset into the text of the character to set
2030 * @param ch The new character
2031 * @return A reference to this
2032 * @stable ICU 2.0
2033 */
374ca955 2034 UnicodeString& setCharAt(int32_t offset,
b75a7d8f
A
2035 UChar ch);
2036
2037
2038 /* Append operations */
2039
2040 /**
2041 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
2042 * object.
2043 * @param ch the code unit to be appended
2044 * @return a reference to this
2045 * @stable ICU 2.0
2046 */
2047 inline UnicodeString& operator+= (UChar ch);
2048
2049 /**
2050 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
2051 * object.
2052 * @param ch the code point to be appended
2053 * @return a reference to this
2054 * @stable ICU 2.0
2055 */
2056 inline UnicodeString& operator+= (UChar32 ch);
2057
2058 /**
2059 * Append operator. Append the characters in <TT>srcText</TT> to the
4388f060 2060 * UnicodeString object. <TT>srcText</TT> is not modified.
b75a7d8f
A
2061 * @param srcText the source for the new characters
2062 * @return a reference to this
2063 * @stable ICU 2.0
2064 */
2065 inline UnicodeString& operator+= (const UnicodeString& srcText);
2066
2067 /**
2068 * Append the characters
374ca955
A
2069 * in <TT>srcText</TT> in the range
2070 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
2071 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
b75a7d8f
A
2072 * is not modified.
2073 * @param srcText the source for the new characters
374ca955 2074 * @param srcStart the offset into <TT>srcText</TT> where new characters
b75a7d8f 2075 * will be obtained
374ca955 2076 * @param srcLength the number of characters in <TT>srcText</TT> in
b75a7d8f
A
2077 * the append string
2078 * @return a reference to this
2079 * @stable ICU 2.0
2080 */
374ca955
A
2081 inline UnicodeString& append(const UnicodeString& srcText,
2082 int32_t srcStart,
b75a7d8f
A
2083 int32_t srcLength);
2084
2085 /**
4388f060
A
2086 * Append the characters in <TT>srcText</TT> to the UnicodeString object.
2087 * <TT>srcText</TT> is not modified.
b75a7d8f
A
2088 * @param srcText the source for the new characters
2089 * @return a reference to this
2090 * @stable ICU 2.0
2091 */
2092 inline UnicodeString& append(const UnicodeString& srcText);
2093
2094 /**
374ca955
A
2095 * Append the characters in <TT>srcChars</TT> in the range
2096 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
2097 * object at offset
b75a7d8f
A
2098 * <TT>start</TT>. <TT>srcChars</TT> is not modified.
2099 * @param srcChars the source for the new characters
374ca955 2100 * @param srcStart the offset into <TT>srcChars</TT> where new characters
b75a7d8f 2101 * will be obtained
374ca955 2102 * @param srcLength the number of characters in <TT>srcChars</TT> in
4388f060 2103 * the append string; can be -1 if <TT>srcChars</TT> is NUL-terminated
b75a7d8f
A
2104 * @return a reference to this
2105 * @stable ICU 2.0
2106 */
374ca955
A
2107 inline UnicodeString& append(const UChar *srcChars,
2108 int32_t srcStart,
b75a7d8f
A
2109 int32_t srcLength);
2110
2111 /**
374ca955 2112 * Append the characters in <TT>srcChars</TT> to the UnicodeString object
b75a7d8f
A
2113 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2114 * @param srcChars the source for the new characters
4388f060
A
2115 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>;
2116 * can be -1 if <TT>srcChars</TT> is NUL-terminated
b75a7d8f
A
2117 * @return a reference to this
2118 * @stable ICU 2.0
2119 */
2120 inline UnicodeString& append(const UChar *srcChars,
2121 int32_t srcLength);
2122
2123 /**
2124 * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
2125 * @param srcChar the code unit to append
2126 * @return a reference to this
2127 * @stable ICU 2.0
2128 */
2129 inline UnicodeString& append(UChar srcChar);
2130
2131 /**
2132 * Append the code point <TT>srcChar</TT> to the UnicodeString object.
2133 * @param srcChar the code point to append
2134 * @return a reference to this
2135 * @stable ICU 2.0
2136 */
4388f060 2137 UnicodeString& append(UChar32 srcChar);
b75a7d8f
A
2138
2139
2140 /* Insert operations */
2141
2142 /**
374ca955
A
2143 * Insert the characters in <TT>srcText</TT> in the range
2144 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
b75a7d8f
A
2145 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2146 * @param start the offset where the insertion begins
374ca955
A
2147 * @param srcText the source for the new characters
2148 * @param srcStart the offset into <TT>srcText</TT> where new characters
b75a7d8f 2149 * will be obtained
374ca955 2150 * @param srcLength the number of characters in <TT>srcText</TT> in
b75a7d8f
A
2151 * the insert string
2152 * @return a reference to this
2153 * @stable ICU 2.0
2154 */
374ca955
A
2155 inline UnicodeString& insert(int32_t start,
2156 const UnicodeString& srcText,
2157 int32_t srcStart,
b75a7d8f
A
2158 int32_t srcLength);
2159
2160 /**
2161 * Insert the characters in <TT>srcText</TT> into the UnicodeString object
2162 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
2163 * @param start the offset where the insertion begins
374ca955 2164 * @param srcText the source for the new characters
b75a7d8f
A
2165 * @return a reference to this
2166 * @stable ICU 2.0
2167 */
374ca955 2168 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
2169 const UnicodeString& srcText);
2170
2171 /**
374ca955 2172 * Insert the characters in <TT>srcChars</TT> in the range
b75a7d8f
A
2173 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
2174 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2175 * @param start the offset at which the insertion begins
2176 * @param srcChars the source for the new characters
374ca955 2177 * @param srcStart the offset into <TT>srcChars</TT> where new characters
b75a7d8f 2178 * will be obtained
374ca955 2179 * @param srcLength the number of characters in <TT>srcChars</TT>
b75a7d8f
A
2180 * in the insert string
2181 * @return a reference to this
2182 * @stable ICU 2.0
2183 */
374ca955
A
2184 inline UnicodeString& insert(int32_t start,
2185 const UChar *srcChars,
2186 int32_t srcStart,
b75a7d8f
A
2187 int32_t srcLength);
2188
2189 /**
374ca955 2190 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
b75a7d8f
A
2191 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2192 * @param start the offset where the insertion begins
2193 * @param srcChars the source for the new characters
2194 * @param srcLength the number of Unicode characters in srcChars.
2195 * @return a reference to this
2196 * @stable ICU 2.0
2197 */
374ca955 2198 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
2199 const UChar *srcChars,
2200 int32_t srcLength);
2201
2202 /**
374ca955 2203 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
b75a7d8f
A
2204 * offset <TT>start</TT>.
2205 * @param start the offset at which the insertion occurs
2206 * @param srcChar the code unit to insert
2207 * @return a reference to this
2208 * @stable ICU 2.0
2209 */
374ca955 2210 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
2211 UChar srcChar);
2212
2213 /**
374ca955 2214 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
b75a7d8f
A
2215 * offset <TT>start</TT>.
2216 * @param start the offset at which the insertion occurs
2217 * @param srcChar the code point to insert
2218 * @return a reference to this
2219 * @stable ICU 2.0
2220 */
374ca955 2221 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
2222 UChar32 srcChar);
2223
2224
2225 /* Replace operations */
2226
2227 /**
374ca955
A
2228 * Replace the characters in the range
2229 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2230 * <TT>srcText</TT> in the range
2231 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
2232 * <TT>srcText</TT> is not modified.
2233 * @param start the offset at which the replace operation begins
374ca955 2234 * @param length the number of characters to replace. The character at
b75a7d8f
A
2235 * <TT>start + length</TT> is not modified.
2236 * @param srcText the source for the new characters
374ca955 2237 * @param srcStart the offset into <TT>srcText</TT> where new characters
b75a7d8f 2238 * will be obtained
374ca955 2239 * @param srcLength the number of characters in <TT>srcText</TT> in
b75a7d8f
A
2240 * the replace string
2241 * @return a reference to this
2242 * @stable ICU 2.0
2243 */
374ca955
A
2244 UnicodeString& replace(int32_t start,
2245 int32_t length,
2246 const UnicodeString& srcText,
2247 int32_t srcStart,
b75a7d8f
A
2248 int32_t srcLength);
2249
2250 /**
374ca955
A
2251 * Replace the characters in the range
2252 * [<TT>start</TT>, <TT>start + length</TT>)
b75a7d8f
A
2253 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2254 * not modified.
2255 * @param start the offset at which the replace operation begins
2256 * @param length the number of characters to replace. The character at
2257 * <TT>start + length</TT> is not modified.
2258 * @param srcText the source for the new characters
2259 * @return a reference to this
2260 * @stable ICU 2.0
2261 */
374ca955
A
2262 UnicodeString& replace(int32_t start,
2263 int32_t length,
b75a7d8f
A
2264 const UnicodeString& srcText);
2265
2266 /**
374ca955
A
2267 * Replace the characters in the range
2268 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2269 * <TT>srcChars</TT> in the range
2270 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
b75a7d8f
A
2271 * is not modified.
2272 * @param start the offset at which the replace operation begins
2273 * @param length the number of characters to replace. The character at
2274 * <TT>start + length</TT> is not modified.
2275 * @param srcChars the source for the new characters
2276 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2277 * will be obtained
374ca955 2278 * @param srcLength the number of characters in <TT>srcChars</TT>
b75a7d8f
A
2279 * in the replace string
2280 * @return a reference to this
2281 * @stable ICU 2.0
2282 */
374ca955
A
2283 UnicodeString& replace(int32_t start,
2284 int32_t length,
2285 const UChar *srcChars,
2286 int32_t srcStart,
b75a7d8f
A
2287 int32_t srcLength);
2288
2289 /**
374ca955 2290 * Replace the characters in the range
b75a7d8f
A
2291 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2292 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2293 * @param start the offset at which the replace operation begins
2294 * @param length number of characters to replace. The character at
2295 * <TT>start + length</TT> is not modified.
2296 * @param srcChars the source for the new characters
2297 * @param srcLength the number of Unicode characters in srcChars
2298 * @return a reference to this
2299 * @stable ICU 2.0
2300 */
374ca955
A
2301 inline UnicodeString& replace(int32_t start,
2302 int32_t length,
b75a7d8f
A
2303 const UChar *srcChars,
2304 int32_t srcLength);
2305
2306 /**
374ca955 2307 * Replace the characters in the range
b75a7d8f
A
2308 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2309 * <TT>srcChar</TT>.
2310 * @param start the offset at which the replace operation begins
2311 * @param length the number of characters to replace. The character at
2312 * <TT>start + length</TT> is not modified.
2313 * @param srcChar the new code unit
2314 * @return a reference to this
2315 * @stable ICU 2.0
2316 */
374ca955
A
2317 inline UnicodeString& replace(int32_t start,
2318 int32_t length,
b75a7d8f
A
2319 UChar srcChar);
2320
2321 /**
374ca955 2322 * Replace the characters in the range
b75a7d8f
A
2323 * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2324 * <TT>srcChar</TT>.
2325 * @param start the offset at which the replace operation begins
2326 * @param length the number of characters to replace. The character at
2327 * <TT>start + length</TT> is not modified.
2328 * @param srcChar the new code point
2329 * @return a reference to this
2330 * @stable ICU 2.0
2331 */
4388f060 2332 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
b75a7d8f
A
2333
2334 /**
374ca955 2335 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
b75a7d8f
A
2336 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2337 * @param start the offset at which the replace operation begins
2338 * @param limit the offset immediately following the replace range
2339 * @param srcText the source for the new characters
2340 * @return a reference to this
2341 * @stable ICU 2.0
2342 */
374ca955
A
2343 inline UnicodeString& replaceBetween(int32_t start,
2344 int32_t limit,
b75a7d8f
A
2345 const UnicodeString& srcText);
2346
2347 /**
374ca955
A
2348 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2349 * with the characters in <TT>srcText</TT> in the range
b75a7d8f
A
2350 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2351 * @param start the offset at which the replace operation begins
2352 * @param limit the offset immediately following the replace range
2353 * @param srcText the source for the new characters
374ca955 2354 * @param srcStart the offset into <TT>srcChars</TT> where new characters
b75a7d8f 2355 * will be obtained
374ca955 2356 * @param srcLimit the offset immediately following the range to copy
b75a7d8f
A
2357 * in <TT>srcText</TT>
2358 * @return a reference to this
2359 * @stable ICU 2.0
2360 */
374ca955
A
2361 inline UnicodeString& replaceBetween(int32_t start,
2362 int32_t limit,
2363 const UnicodeString& srcText,
2364 int32_t srcStart,
b75a7d8f
A
2365 int32_t srcLimit);
2366
2367 /**
2368 * Replace a substring of this object with the given text.
2369 * @param start the beginning index, inclusive; <code>0 <= start
2370 * <= limit</code>.
2371 * @param limit the ending index, exclusive; <code>start <= limit
2372 * <= length()</code>.
2373 * @param text the text to replace characters <code>start</code>
2374 * to <code>limit - 1</code>
2375 * @stable ICU 2.0
2376 */
2377 virtual void handleReplaceBetween(int32_t start,
2378 int32_t limit,
2379 const UnicodeString& text);
2380
2381 /**
2382 * Replaceable API
2383 * @return TRUE if it has MetaData
374ca955 2384 * @stable ICU 2.4
b75a7d8f
A
2385 */
2386 virtual UBool hasMetaData() const;
374ca955 2387
b75a7d8f
A
2388 /**
2389 * Copy a substring of this object, retaining attribute (out-of-band)
2390 * information. This method is used to duplicate or reorder substrings.
2391 * The destination index must not overlap the source range.
374ca955 2392 *
b75a7d8f
A
2393 * @param start the beginning index, inclusive; <code>0 <= start <=
2394 * limit</code>.
2395 * @param limit the ending index, exclusive; <code>start <= limit <=
2396 * length()</code>.
2397 * @param dest the destination index. The characters from
2398 * <code>start..limit-1</code> will be copied to <code>dest</code>.
2399 * Implementations of this method may assume that <code>dest <= start ||
2400 * dest >= limit</code>.
2401 * @stable ICU 2.0
2402 */
2403 virtual void copy(int32_t start, int32_t limit, int32_t dest);
2404
2405 /* Search and replace operations */
2406
2407 /**
374ca955 2408 * Replace all occurrences of characters in oldText with the characters
b75a7d8f
A
2409 * in newText
2410 * @param oldText the text containing the search text
2411 * @param newText the text containing the replacement text
2412 * @return a reference to this
2413 * @stable ICU 2.0
2414 */
2415 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2416 const UnicodeString& newText);
2417
2418 /**
374ca955 2419 * Replace all occurrences of characters in oldText with characters
b75a7d8f
A
2420 * in newText
2421 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2422 * @param start the start of the range in which replace will performed
2423 * @param length the length of the range in which replace will be performed
2424 * @param oldText the text containing the search text
2425 * @param newText the text containing the replacement text
2426 * @return a reference to this
2427 * @stable ICU 2.0
2428 */
2429 inline UnicodeString& findAndReplace(int32_t start,
2430 int32_t length,
2431 const UnicodeString& oldText,
2432 const UnicodeString& newText);
2433
2434 /**
374ca955
A
2435 * Replace all occurrences of characters in oldText in the range
2436 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2437 * in newText in the range
2438 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
b75a7d8f
A
2439 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2440 * @param start the start of the range in which replace will performed
2441 * @param length the length of the range in which replace will be performed
2442 * @param oldText the text containing the search text
2443 * @param oldStart the start of the search range in <TT>oldText</TT>
2444 * @param oldLength the length of the search range in <TT>oldText</TT>
2445 * @param newText the text containing the replacement text
2446 * @param newStart the start of the replacement range in <TT>newText</TT>
2447 * @param newLength the length of the replacement range in <TT>newText</TT>
2448 * @return a reference to this
2449 * @stable ICU 2.0
2450 */
2451 UnicodeString& findAndReplace(int32_t start,
2452 int32_t length,
2453 const UnicodeString& oldText,
2454 int32_t oldStart,
2455 int32_t oldLength,
2456 const UnicodeString& newText,
2457 int32_t newStart,
2458 int32_t newLength);
2459
2460
2461 /* Remove operations */
2462
2463 /**
2464 * Remove all characters from the UnicodeString object.
2465 * @return a reference to this
2466 * @stable ICU 2.0
2467 */
2468 inline UnicodeString& remove(void);
2469
2470 /**
374ca955 2471 * Remove the characters in the range
b75a7d8f
A
2472 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2473 * @param start the offset of the first character to remove
2474 * @param length the number of characters to remove
2475 * @return a reference to this
2476 * @stable ICU 2.0
2477 */
374ca955 2478 inline UnicodeString& remove(int32_t start,
b75a7d8f
A
2479 int32_t length = (int32_t)INT32_MAX);
2480
2481 /**
374ca955 2482 * Remove the characters in the range
b75a7d8f
A
2483 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2484 * @param start the offset of the first character to remove
2485 * @param limit the offset immediately following the range to remove
2486 * @return a reference to this
2487 * @stable ICU 2.0
2488 */
2489 inline UnicodeString& removeBetween(int32_t start,
2490 int32_t limit = (int32_t)INT32_MAX);
2491
729e4ab9
A
2492 /**
2493 * Retain only the characters in the range
2494 * [<code>start</code>, <code>limit</code>) from the UnicodeString object.
2495 * Removes characters before <code>start</code> and at and after <code>limit</code>.
2496 * @param start the offset of the first character to retain
2497 * @param limit the offset immediately following the range to retain
2498 * @return a reference to this
2499 * @stable ICU 4.4
2500 */
2501 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
b75a7d8f
A
2502
2503 /* Length operations */
2504
2505 /**
374ca955
A
2506 * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2507 * If the length of this UnicodeString is less than targetLength,
b75a7d8f
A
2508 * length() - targetLength copies of padChar will be added to the
2509 * beginning of this UnicodeString.
2510 * @param targetLength the desired length of the string
374ca955 2511 * @param padChar the character to use for padding. Defaults to
b75a7d8f
A
2512 * space (U+0020)
2513 * @return TRUE if the text was padded, FALSE otherwise.
2514 * @stable ICU 2.0
2515 */
2516 UBool padLeading(int32_t targetLength,
2517 UChar padChar = 0x0020);
2518
2519 /**
374ca955
A
2520 * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2521 * If the length of this UnicodeString is less than targetLength,
b75a7d8f
A
2522 * length() - targetLength copies of padChar will be added to the
2523 * end of this UnicodeString.
2524 * @param targetLength the desired length of the string
374ca955 2525 * @param padChar the character to use for padding. Defaults to
b75a7d8f
A
2526 * space (U+0020)
2527 * @return TRUE if the text was padded, FALSE otherwise.
2528 * @stable ICU 2.0
2529 */
2530 UBool padTrailing(int32_t targetLength,
2531 UChar padChar = 0x0020);
2532
2533 /**
2534 * Truncate this UnicodeString to the <TT>targetLength</TT>.
2535 * @param targetLength the desired length of this UnicodeString.
2536 * @return TRUE if the text was truncated, FALSE otherwise
2537 * @stable ICU 2.0
2538 */
2539 inline UBool truncate(int32_t targetLength);
2540
2541 /**
2542 * Trims leading and trailing whitespace from this UnicodeString.
2543 * @return a reference to this
2544 * @stable ICU 2.0
2545 */
2546 UnicodeString& trim(void);
2547
2548
2549 /* Miscellaneous operations */
2550
2551 /**
2552 * Reverse this UnicodeString in place.
2553 * @return a reference to this
2554 * @stable ICU 2.0
2555 */
2556 inline UnicodeString& reverse(void);
2557
2558 /**
2559 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2560 * this UnicodeString.
2561 * @param start the start of the range to reverse
2562 * @param length the number of characters to to reverse
2563 * @return a reference to this
2564 * @stable ICU 2.0
2565 */
2566 inline UnicodeString& reverse(int32_t start,
2567 int32_t length);
2568
2569 /**
2570 * Convert the characters in this to UPPER CASE following the conventions of
2571 * the default locale.
2572 * @return A reference to this.
2573 * @stable ICU 2.0
2574 */
2575 UnicodeString& toUpper(void);
2576
2577 /**
2578 * Convert the characters in this to UPPER CASE following the conventions of
2579 * a specific locale.
2580 * @param locale The locale containing the conventions to use.
2581 * @return A reference to this.
2582 * @stable ICU 2.0
2583 */
2584 UnicodeString& toUpper(const Locale& locale);
2585
2586 /**
2587 * Convert the characters in this to lower case following the conventions of
2588 * the default locale.
2589 * @return A reference to this.
2590 * @stable ICU 2.0
2591 */
2592 UnicodeString& toLower(void);
2593
2594 /**
2595 * Convert the characters in this to lower case following the conventions of
2596 * a specific locale.
2597 * @param locale The locale containing the conventions to use.
2598 * @return A reference to this.
2599 * @stable ICU 2.0
2600 */
2601 UnicodeString& toLower(const Locale& locale);
2602
2603#if !UCONFIG_NO_BREAK_ITERATION
2604
2605 /**
2606 * Titlecase this string, convenience function using the default locale.
2607 *
2608 * Casing is locale-dependent and context-sensitive.
2609 * Titlecasing uses a break iterator to find the first characters of words
2610 * that are to be titlecased. It titlecases those characters and lowercases
2611 * all others.
2612 *
2613 * The titlecase break iterator can be provided to customize for arbitrary
2614 * styles, using rules and dictionaries beyond the standard iterators.
2615 * It may be more efficient to always provide an iterator to avoid
2616 * opening and closing one for each string.
2617 * The standard titlecase iterator for the root locale implements the
2618 * algorithm of Unicode TR 21.
2619 *
46f4442e 2620 * This function uses only the setText(), first() and next() methods of the
b75a7d8f
A
2621 * provided break iterator.
2622 *
2623 * @param titleIter A break iterator to find the first characters of words
2624 * that are to be titlecased.
2625 * If none is provided (0), then a standard titlecase
2626 * break iterator is opened.
374ca955 2627 * Otherwise the provided iterator is set to the string's text.
b75a7d8f
A
2628 * @return A reference to this.
2629 * @stable ICU 2.1
2630 */
2631 UnicodeString &toTitle(BreakIterator *titleIter);
2632
2633 /**
2634 * Titlecase this string.
2635 *
2636 * Casing is locale-dependent and context-sensitive.
2637 * Titlecasing uses a break iterator to find the first characters of words
2638 * that are to be titlecased. It titlecases those characters and lowercases
2639 * all others.
2640 *
2641 * The titlecase break iterator can be provided to customize for arbitrary
2642 * styles, using rules and dictionaries beyond the standard iterators.
2643 * It may be more efficient to always provide an iterator to avoid
2644 * opening and closing one for each string.
2645 * The standard titlecase iterator for the root locale implements the
2646 * algorithm of Unicode TR 21.
2647 *
46f4442e 2648 * This function uses only the setText(), first() and next() methods of the
b75a7d8f
A
2649 * provided break iterator.
2650 *
2651 * @param titleIter A break iterator to find the first characters of words
2652 * that are to be titlecased.
2653 * If none is provided (0), then a standard titlecase
2654 * break iterator is opened.
374ca955 2655 * Otherwise the provided iterator is set to the string's text.
b75a7d8f
A
2656 * @param locale The locale to consider.
2657 * @return A reference to this.
2658 * @stable ICU 2.1
2659 */
2660 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2661
46f4442e
A
2662 /**
2663 * Titlecase this string, with options.
2664 *
2665 * Casing is locale-dependent and context-sensitive.
2666 * Titlecasing uses a break iterator to find the first characters of words
2667 * that are to be titlecased. It titlecases those characters and lowercases
2668 * all others. (This can be modified with options.)
2669 *
2670 * The titlecase break iterator can be provided to customize for arbitrary
2671 * styles, using rules and dictionaries beyond the standard iterators.
2672 * It may be more efficient to always provide an iterator to avoid
2673 * opening and closing one for each string.
2674 * The standard titlecase iterator for the root locale implements the
2675 * algorithm of Unicode TR 21.
2676 *
2677 * This function uses only the setText(), first() and next() methods of the
2678 * provided break iterator.
2679 *
2680 * @param titleIter A break iterator to find the first characters of words
2681 * that are to be titlecased.
2682 * If none is provided (0), then a standard titlecase
2683 * break iterator is opened.
2684 * Otherwise the provided iterator is set to the string's text.
2685 * @param locale The locale to consider.
2686 * @param options Options bit set, see ucasemap_open().
2687 * @return A reference to this.
2688 * @see U_TITLECASE_NO_LOWERCASE
2689 * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
2690 * @see ucasemap_open
729e4ab9 2691 * @stable ICU 3.8
46f4442e
A
2692 */
2693 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2694
b75a7d8f
A
2695#endif
2696
2697 /**
51004dcb
A
2698 * Case-folds the characters in this string.
2699 *
b75a7d8f
A
2700 * Case-folding is locale-independent and not context-sensitive,
2701 * but there is an option for whether to include or exclude mappings for dotted I
51004dcb
A
2702 * and dotless i that are marked with 'T' in CaseFolding.txt.
2703 *
b75a7d8f
A
2704 * The result may be longer or shorter than the original.
2705 *
2706 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2707 * @return A reference to this.
2708 * @stable ICU 2.0
2709 */
2710 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2711
2712 //========================================
2713 // Access to the internal buffer
2714 //========================================
2715
2716 /**
2717 * Get a read/write pointer to the internal buffer.
2718 * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2719 * writable, and is still owned by the UnicodeString object.
2720 * Calls to getBuffer(minCapacity) must not be nested, and
2721 * must be matched with calls to releaseBuffer(newLength).
2722 * If the string buffer was read-only or shared,
2723 * then it will be reallocated and copied.
2724 *
2725 * An attempted nested call will return 0, and will not further modify the
2726 * state of the UnicodeString object.
2727 * It also returns 0 if the string is bogus.
2728 *
2729 * The actual capacity of the string buffer may be larger than minCapacity.
2730 * getCapacity() returns the actual capacity.
2731 * For many operations, the full capacity should be used to avoid reallocations.
2732 *
2733 * While the buffer is "open" between getBuffer(minCapacity)
2734 * and releaseBuffer(newLength), the following applies:
2735 * - The string length is set to 0.
2736 * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2737 * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2738 * - You can read from and write to the returned buffer.
2739 * - The previous string contents will still be in the buffer;
2740 * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2741 * If the length() was greater than minCapacity, then any contents after minCapacity
2742 * may be lost.
2743 * The buffer contents is not NUL-terminated by getBuffer().
2744 * If length()<getCapacity() then you can terminate it by writing a NUL
2745 * at index length().
2746 * - You must call releaseBuffer(newLength) before and in order to
2747 * return to normal UnicodeString operation.
2748 *
2749 * @param minCapacity the minimum number of UChars that are to be available
2750 * in the buffer, starting at the returned pointer;
2751 * default to the current string capacity if minCapacity==-1
2752 * @return a writable pointer to the internal string buffer,
2753 * or 0 if an error occurs (nested calls, out of memory)
2754 *
2755 * @see releaseBuffer
2756 * @see getTerminatedBuffer()
2757 * @stable ICU 2.0
2758 */
2759 UChar *getBuffer(int32_t minCapacity);
2760
2761 /**
2762 * Release a read/write buffer on a UnicodeString object with an
2763 * "open" getBuffer(minCapacity).
2764 * This function must be called in a matched pair with getBuffer(minCapacity).
2765 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2766 *
2767 * It will set the string length to newLength, at most to the current capacity.
2768 * If newLength==-1 then it will set the length according to the
2769 * first NUL in the buffer, or to the capacity if there is no NUL.
2770 *
2771 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2772 *
2773 * @param newLength the new length of the UnicodeString object;
2774 * defaults to the current capacity if newLength is greater than that;
2775 * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2776 * the current capacity of the string
2777 *
2778 * @see getBuffer(int32_t minCapacity)
2779 * @stable ICU 2.0
2780 */
2781 void releaseBuffer(int32_t newLength=-1);
2782
2783 /**
2784 * Get a read-only pointer to the internal buffer.
2785 * This can be called at any time on a valid UnicodeString.
2786 *
2787 * It returns 0 if the string is bogus, or
2788 * during an "open" getBuffer(minCapacity).
2789 *
2790 * It can be called as many times as desired.
2791 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2792 * at which time the pointer is semantically invalidated and must not be used any more.
2793 *
2794 * The capacity of the buffer can be determined with getCapacity().
2795 * The part after length() may or may not be initialized and valid,
2796 * depending on the history of the UnicodeString object.
2797 *
2798 * The buffer contents is (probably) not NUL-terminated.
2799 * You can check if it is with
2800 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2801 * (See getTerminatedBuffer().)
2802 *
2803 * The buffer may reside in read-only memory. Its contents must not
2804 * be modified.
2805 *
2806 * @return a read-only pointer to the internal string buffer,
2807 * or 0 if the string is empty or bogus
2808 *
2809 * @see getBuffer(int32_t minCapacity)
2810 * @see getTerminatedBuffer()
2811 * @stable ICU 2.0
2812 */
2813 inline const UChar *getBuffer() const;
2814
2815 /**
2816 * Get a read-only pointer to the internal buffer,
2817 * making sure that it is NUL-terminated.
2818 * This can be called at any time on a valid UnicodeString.
2819 *
2820 * It returns 0 if the string is bogus, or
2821 * during an "open" getBuffer(minCapacity), or if the buffer cannot
2822 * be NUL-terminated (because memory allocation failed).
2823 *
2824 * It can be called as many times as desired.
2825 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2826 * at which time the pointer is semantically invalidated and must not be used any more.
2827 *
2828 * The capacity of the buffer can be determined with getCapacity().
2829 * The part after length()+1 may or may not be initialized and valid,
2830 * depending on the history of the UnicodeString object.
2831 *
2832 * The buffer contents is guaranteed to be NUL-terminated.
2833 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2834 * is written.
2835 * For this reason, this function is not const, unlike getBuffer().
2836 * Note that a UnicodeString may also contain NUL characters as part of its contents.
2837 *
2838 * The buffer may reside in read-only memory. Its contents must not
2839 * be modified.
2840 *
2841 * @return a read-only pointer to the internal string buffer,
2842 * or 0 if the string is empty or bogus
2843 *
2844 * @see getBuffer(int32_t minCapacity)
2845 * @see getBuffer()
374ca955 2846 * @stable ICU 2.2
b75a7d8f
A
2847 */
2848 inline const UChar *getTerminatedBuffer();
2849
2850 //========================================
2851 // Constructors
2852 //========================================
2853
374ca955 2854 /** Construct an empty UnicodeString.
b75a7d8f
A
2855 * @stable ICU 2.0
2856 */
51004dcb 2857 inline UnicodeString();
b75a7d8f
A
2858
2859 /**
2860 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2861 * @param capacity the number of UChars this UnicodeString should hold
2862 * before a resize is necessary; if count is greater than 0 and count
2863 * code points c take up more space than capacity, then capacity is adjusted
2864 * accordingly.
2865 * @param c is used to initially fill the string
2866 * @param count specifies how many code points c are to be written in the
2867 * string
2868 * @stable ICU 2.0
2869 */
2870 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2871
2872 /**
2873 * Single UChar (code unit) constructor.
4388f060
A
2874 *
2875 * It is recommended to mark this constructor "explicit" by
2876 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2877 * on the compiler command line or similar.
b75a7d8f
A
2878 * @param ch the character to place in the UnicodeString
2879 * @stable ICU 2.0
2880 */
4388f060 2881 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch);
b75a7d8f
A
2882
2883 /**
2884 * Single UChar32 (code point) constructor.
4388f060
A
2885 *
2886 * It is recommended to mark this constructor "explicit" by
2887 * <code>-DUNISTR_FROM_CHAR_EXPLICIT=explicit</code>
2888 * on the compiler command line or similar.
b75a7d8f
A
2889 * @param ch the character to place in the UnicodeString
2890 * @stable ICU 2.0
2891 */
4388f060 2892 UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
b75a7d8f
A
2893
2894 /**
2895 * UChar* constructor.
4388f060
A
2896 *
2897 * It is recommended to mark this constructor "explicit" by
2898 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2899 * on the compiler command line or similar.
b75a7d8f
A
2900 * @param text The characters to place in the UnicodeString. <TT>text</TT>
2901 * must be NULL (U+0000) terminated.
2902 * @stable ICU 2.0
2903 */
4388f060 2904 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text);
b75a7d8f
A
2905
2906 /**
2907 * UChar* constructor.
2908 * @param text The characters to place in the UnicodeString.
2909 * @param textLength The number of Unicode characters in <TT>text</TT>
2910 * to copy.
2911 * @stable ICU 2.0
2912 */
2913 UnicodeString(const UChar *text,
2914 int32_t textLength);
2915
2916 /**
2917 * Readonly-aliasing UChar* constructor.
2918 * The text will be used for the UnicodeString object, but
2919 * it will not be released when the UnicodeString is destroyed.
2920 * This has copy-on-write semantics:
2921 * When the string is modified, then the buffer is first copied into
2922 * newly allocated memory.
2923 * The aliased buffer is never modified.
51004dcb
A
2924 *
2925 * In an assignment to another UnicodeString, when using the copy constructor
2926 * or the assignment operator, the text will be copied.
2927 * When using fastCopyFrom(), the text will be aliased again,
b75a7d8f
A
2928 * so that both strings then alias the same readonly-text.
2929 *
2930 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2931 * This must be true if <code>textLength==-1</code>.
2932 * @param text The characters to alias for the UnicodeString.
2933 * @param textLength The number of Unicode characters in <code>text</code> to alias.
2934 * If -1, then this constructor will determine the length
2935 * by calling <code>u_strlen()</code>.
2936 * @stable ICU 2.0
2937 */
2938 UnicodeString(UBool isTerminated,
2939 const UChar *text,
2940 int32_t textLength);
2941
2942 /**
2943 * Writable-aliasing UChar* constructor.
2944 * The text will be used for the UnicodeString object, but
2945 * it will not be released when the UnicodeString is destroyed.
2946 * This has write-through semantics:
2947 * For as long as the capacity of the buffer is sufficient, write operations
2948 * will directly affect the buffer. When more capacity is necessary, then
2949 * a new buffer will be allocated and the contents copied as with regularly
2950 * constructed strings.
2951 * In an assignment to another UnicodeString, the buffer will be copied.
2952 * The extract(UChar *dst) function detects whether the dst pointer is the same
2953 * as the string buffer itself and will in this case not copy the contents.
2954 *
2955 * @param buffer The characters to alias for the UnicodeString.
2956 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2957 * @param buffCapacity The size of <code>buffer</code> in UChars.
2958 * @stable ICU 2.0
2959 */
2960 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2961
729e4ab9
A
2962#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2963
2964 /**
2965 * char* constructor.
4388f060
A
2966 * Uses the default converter (and thus depends on the ICU conversion code)
2967 * unless U_CHARSET_IS_UTF8 is set to 1.
2968 *
2969 * For ASCII (really "invariant character") strings it is more efficient to use
2970 * the constructor that takes a US_INV (for its enum EInvariant).
2971 * For ASCII (invariant-character) string literals, see UNICODE_STRING and
2972 * UNICODE_STRING_SIMPLE.
2973 *
2974 * It is recommended to mark this constructor "explicit" by
2975 * <code>-DUNISTR_FROM_STRING_EXPLICIT=explicit</code>
2976 * on the compiler command line or similar.
729e4ab9
A
2977 * @param codepageData an array of bytes, null-terminated,
2978 * in the platform's default codepage.
2979 * @stable ICU 2.0
4388f060
A
2980 * @see UNICODE_STRING
2981 * @see UNICODE_STRING_SIMPLE
729e4ab9 2982 */
4388f060 2983 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
729e4ab9
A
2984
2985 /**
2986 * char* constructor.
4388f060
A
2987 * Uses the default converter (and thus depends on the ICU conversion code)
2988 * unless U_CHARSET_IS_UTF8 is set to 1.
729e4ab9
A
2989 * @param codepageData an array of bytes in the platform's default codepage.
2990 * @param dataLength The number of bytes in <TT>codepageData</TT>.
2991 * @stable ICU 2.0
2992 */
2993 UnicodeString(const char *codepageData, int32_t dataLength);
2994
2995#endif
2996
374ca955
A
2997#if !UCONFIG_NO_CONVERSION
2998
b75a7d8f
A
2999 /**
3000 * char* constructor.
3001 * @param codepageData an array of bytes, null-terminated
3002 * @param codepage the encoding of <TT>codepageData</TT>. The special
374ca955 3003 * value 0 for <TT>codepage</TT> indicates that the text is in the
b75a7d8f 3004 * platform's default codepage.
374ca955 3005 *
b75a7d8f
A
3006 * If <code>codepage</code> is an empty string (<code>""</code>),
3007 * then a simple conversion is performed on the codepage-invariant
3008 * subset ("invariant characters") of the platform encoding. See utypes.h.
374ca955
A
3009 * Recommendation: For invariant-character strings use the constructor
3010 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3011 * because it avoids object code dependencies of UnicodeString on
3012 * the conversion code.
3013 *
b75a7d8f
A
3014 * @stable ICU 2.0
3015 */
729e4ab9 3016 UnicodeString(const char *codepageData, const char *codepage);
b75a7d8f
A
3017
3018 /**
3019 * char* constructor.
3020 * @param codepageData an array of bytes.
3021 * @param dataLength The number of bytes in <TT>codepageData</TT>.
3022 * @param codepage the encoding of <TT>codepageData</TT>. The special
374ca955 3023 * value 0 for <TT>codepage</TT> indicates that the text is in the
b75a7d8f
A
3024 * platform's default codepage.
3025 * If <code>codepage</code> is an empty string (<code>""</code>),
3026 * then a simple conversion is performed on the codepage-invariant
3027 * subset ("invariant characters") of the platform encoding. See utypes.h.
374ca955
A
3028 * Recommendation: For invariant-character strings use the constructor
3029 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
3030 * because it avoids object code dependencies of UnicodeString on
3031 * the conversion code.
3032 *
b75a7d8f
A
3033 * @stable ICU 2.0
3034 */
729e4ab9 3035 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
b75a7d8f
A
3036
3037 /**
3038 * char * / UConverter constructor.
3039 * This constructor uses an existing UConverter object to
3040 * convert the codepage string to Unicode and construct a UnicodeString
3041 * from that.
3042 *
3043 * The converter is reset at first.
3044 * If the error code indicates a failure before this constructor is called,
3045 * or if an error occurs during conversion or construction,
3046 * then the string will be bogus.
3047 *
3048 * This function avoids the overhead of opening and closing a converter if
3049 * multiple strings are constructed.
3050 *
3051 * @param src input codepage string
3052 * @param srcLength length of the input string, can be -1 for NUL-terminated strings
3053 * @param cnv converter object (ucnv_resetToUnicode() will be called),
3054 * can be NULL for the default converter
3055 * @param errorCode normal ICU error code
3056 * @stable ICU 2.0
3057 */
3058 UnicodeString(
3059 const char *src, int32_t srcLength,
3060 UConverter *cnv,
3061 UErrorCode &errorCode);
3062
374ca955
A
3063#endif
3064
3065 /**
3066 * Constructs a Unicode string from an invariant-character char * string.
3067 * About invariant characters see utypes.h.
3068 * This constructor has no runtime dependency on conversion code and is
3069 * therefore recommended over ones taking a charset name string
3070 * (where the empty string "" indicates invariant-character conversion).
3071 *
3072 * Use the macro US_INV as the third, signature-distinguishing parameter.
3073 *
3074 * For example:
3075 * \code
3076 * void fn(const char *s) {
3077 * UnicodeString ustr(s, -1, US_INV);
3078 * // use ustr ...
3079 * }
3080 * \endcode
3081 *
3082 * @param src String using only invariant characters.
3083 * @param length Length of src, or -1 if NUL-terminated.
3084 * @param inv Signature-distinguishing paramater, use US_INV.
3085 *
3086 * @see US_INV
73c04bcf 3087 * @stable ICU 3.2
374ca955
A
3088 */
3089 UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3090
b75a7d8f
A
3091
3092 /**
3093 * Copy constructor.
3094 * @param that The UnicodeString object to copy.
3095 * @stable ICU 2.0
3096 */
3097 UnicodeString(const UnicodeString& that);
3098
3099 /**
3100 * 'Substring' constructor from tail of source string.
3101 * @param src The UnicodeString object to copy.
3102 * @param srcStart The offset into <tt>src</tt> at which to start copying.
374ca955 3103 * @stable ICU 2.2
b75a7d8f
A
3104 */
3105 UnicodeString(const UnicodeString& src, int32_t srcStart);
3106
3107 /**
3108 * 'Substring' constructor from subrange of source string.
3109 * @param src The UnicodeString object to copy.
3110 * @param srcStart The offset into <tt>src</tt> at which to start copying.
3111 * @param srcLength The number of characters from <tt>src</tt> to copy.
374ca955 3112 * @stable ICU 2.2
b75a7d8f
A
3113 */
3114 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3115
3116 /**
3117 * Clone this object, an instance of a subclass of Replaceable.
3118 * Clones can be used concurrently in multiple threads.
3119 * If a subclass does not implement clone(), or if an error occurs,
3120 * then NULL is returned.
3121 * The clone functions in all subclasses return a pointer to a Replaceable
3122 * because some compilers do not support covariant (same-as-this)
3123 * return types; cast to the appropriate subclass if necessary.
3124 * The caller must delete the clone.
3125 *
3126 * @return a clone of this object
3127 *
3128 * @see Replaceable::clone
3129 * @see getDynamicClassID
374ca955 3130 * @stable ICU 2.6
b75a7d8f
A
3131 */
3132 virtual Replaceable *clone() const;
3133
374ca955 3134 /** Destructor.
b75a7d8f
A
3135 * @stable ICU 2.0
3136 */
374ca955 3137 virtual ~UnicodeString();
b75a7d8f 3138
729e4ab9
A
3139 /**
3140 * Create a UnicodeString from a UTF-8 string.
3141 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3142 * Calls u_strFromUTF8WithSub().
3143 *
3144 * @param utf8 UTF-8 input string.
3145 * Note that a StringPiece can be implicitly constructed
3146 * from a std::string or a NUL-terminated const char * string.
3147 * @return A UnicodeString with equivalent UTF-16 contents.
3148 * @see toUTF8
3149 * @see toUTF8String
3150 * @stable ICU 4.2
3151 */
3152 static UnicodeString fromUTF8(const StringPiece &utf8);
3153
3154 /**
3155 * Create a UnicodeString from a UTF-32 string.
3156 * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
3157 * Calls u_strFromUTF32WithSub().
3158 *
3159 * @param utf32 UTF-32 input string. Must not be NULL.
3160 * @param length Length of the input string, or -1 if NUL-terminated.
3161 * @return A UnicodeString with equivalent UTF-16 contents.
3162 * @see toUTF32
3163 * @stable ICU 4.2
3164 */
3165 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
b75a7d8f
A
3166
3167 /* Miscellaneous operations */
3168
3169 /**
3170 * Unescape a string of characters and return a string containing
3171 * the result. The following escape sequences are recognized:
3172 *
374ca955
A
3173 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
3174 * \\Uhhhhhhhh 8 hex digits
3175 * \\xhh 1-2 hex digits
3176 * \\ooo 1-3 octal digits; o in [0-7]
3177 * \\cX control-X; X is masked with 0x1F
b75a7d8f
A
3178 *
3179 * as well as the standard ANSI C escapes:
3180 *
374ca955
A
3181 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
3182 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
729e4ab9 3183 * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
b75a7d8f
A
3184 *
3185 * Anything else following a backslash is generically escaped. For
374ca955 3186 * example, "[a\\-z]" returns "[a-z]".
b75a7d8f
A
3187 *
3188 * If an escape sequence is ill-formed, this method returns an empty
374ca955 3189 * string. An example of an ill-formed sequence is "\\u" followed by
b75a7d8f
A
3190 * fewer than 4 hex digits.
3191 *
3192 * This function is similar to u_unescape() but not identical to it.
3193 * The latter takes a source char*, so it does escape recognition
3194 * and also invariant conversion.
3195 *
3196 * @return a string with backslash escapes interpreted, or an
3197 * empty string on error.
3198 * @see UnicodeString#unescapeAt()
3199 * @see u_unescape()
3200 * @see u_unescapeAt()
3201 * @stable ICU 2.0
3202 */
3203 UnicodeString unescape() const;
3204
3205 /**
3206 * Unescape a single escape sequence and return the represented
3207 * character. See unescape() for a listing of the recognized escape
3208 * sequences. The character at offset-1 is assumed (without
3209 * checking) to be a backslash. If the escape sequence is
51004dcb 3210 * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
b75a7d8f
A
3211 * returned.
3212 *
3213 * @param offset an input output parameter. On input, it is the
3214 * offset into this string where the escape sequence is located,
3215 * after the initial backslash. On output, it is advanced after the
3216 * last character parsed. On error, it is not advanced at all.
3217 * @return the character represented by the escape sequence at
51004dcb 3218 * offset, or U_SENTINEL=-1 on error.
b75a7d8f
A
3219 * @see UnicodeString#unescape()
3220 * @see u_unescape()
3221 * @see u_unescapeAt()
3222 * @stable ICU 2.0
3223 */
3224 UChar32 unescapeAt(int32_t &offset) const;
3225
3226 /**
374ca955 3227 * ICU "poor man's RTTI", returns a UClassID for this class.
b75a7d8f 3228 *
374ca955 3229 * @stable ICU 2.2
b75a7d8f 3230 */
374ca955 3231 static UClassID U_EXPORT2 getStaticClassID();
b75a7d8f
A
3232
3233 /**
374ca955 3234 * ICU "poor man's RTTI", returns a UClassID for the actual class.
b75a7d8f 3235 *
374ca955 3236 * @stable ICU 2.2
b75a7d8f 3237 */
374ca955 3238 virtual UClassID getDynamicClassID() const;
b75a7d8f
A
3239
3240 //========================================
3241 // Implementation methods
3242 //========================================
3243
3244protected:
3245 /**
3246 * Implement Replaceable::getLength() (see jitterbug 1027).
374ca955 3247 * @stable ICU 2.4
b75a7d8f
A
3248 */
3249 virtual int32_t getLength() const;
3250
3251 /**
3252 * The change in Replaceable to use virtual getCharAt() allows
3253 * UnicodeString::charAt() to be inline again (see jitterbug 709).
374ca955 3254 * @stable ICU 2.4
b75a7d8f
A
3255 */
3256 virtual UChar getCharAt(int32_t offset) const;
3257
3258 /**
3259 * The change in Replaceable to use virtual getChar32At() allows
3260 * UnicodeString::char32At() to be inline again (see jitterbug 709).
374ca955 3261 * @stable ICU 2.4
b75a7d8f
A
3262 */
3263 virtual UChar32 getChar32At(int32_t offset) const;
3264
3265private:
729e4ab9
A
3266 // For char* constructors. Could be made public.
3267 UnicodeString &setToUTF8(const StringPiece &utf8);
3268 // For extract(char*).
3269 // We could make a toUTF8(target, capacity, errorCode) public but not
3270 // this version: New API will be cleaner if we make callers create substrings
3271 // rather than having start+length on every method,
3272 // and it should take a UErrorCode&.
3273 int32_t
3274 toUTF8(int32_t start, int32_t len,
3275 char *target, int32_t capacity) const;
3276
51004dcb
A
3277 /**
3278 * Internal string contents comparison, called by operator==.
3279 * Requires: this & text not bogus and have same lengths.
3280 */
3281 UBool doEquals(const UnicodeString &text, int32_t len) const;
b75a7d8f
A
3282
3283 inline int8_t
3284 doCompare(int32_t start,
3285 int32_t length,
3286 const UnicodeString& srcText,
3287 int32_t srcStart,
3288 int32_t srcLength) const;
3289
3290 int8_t doCompare(int32_t start,
3291 int32_t length,
3292 const UChar *srcChars,
3293 int32_t srcStart,
3294 int32_t srcLength) const;
3295
3296 inline int8_t
3297 doCompareCodePointOrder(int32_t start,
3298 int32_t length,
3299 const UnicodeString& srcText,
3300 int32_t srcStart,
3301 int32_t srcLength) const;
3302
3303 int8_t doCompareCodePointOrder(int32_t start,
3304 int32_t length,
3305 const UChar *srcChars,
3306 int32_t srcStart,
3307 int32_t srcLength) const;
3308
3309 inline int8_t
3310 doCaseCompare(int32_t start,
3311 int32_t length,
3312 const UnicodeString &srcText,
3313 int32_t srcStart,
3314 int32_t srcLength,
3315 uint32_t options) const;
3316
3317 int8_t
3318 doCaseCompare(int32_t start,
3319 int32_t length,
3320 const UChar *srcChars,
3321 int32_t srcStart,
3322 int32_t srcLength,
3323 uint32_t options) const;
3324
3325 int32_t doIndexOf(UChar c,
3326 int32_t start,
3327 int32_t length) const;
3328
3329 int32_t doIndexOf(UChar32 c,
3330 int32_t start,
3331 int32_t length) const;
3332
3333 int32_t doLastIndexOf(UChar c,
3334 int32_t start,
3335 int32_t length) const;
3336
3337 int32_t doLastIndexOf(UChar32 c,
3338 int32_t start,
3339 int32_t length) const;
3340
374ca955
A
3341 void doExtract(int32_t start,
3342 int32_t length,
3343 UChar *dst,
b75a7d8f
A
3344 int32_t dstStart) const;
3345
3346 inline void doExtract(int32_t start,
3347 int32_t length,
3348 UnicodeString& target) const;
3349
3350 inline UChar doCharAt(int32_t offset) const;
3351
374ca955
A
3352 UnicodeString& doReplace(int32_t start,
3353 int32_t length,
3354 const UnicodeString& srcText,
3355 int32_t srcStart,
b75a7d8f
A
3356 int32_t srcLength);
3357
374ca955
A
3358 UnicodeString& doReplace(int32_t start,
3359 int32_t length,
3360 const UChar *srcChars,
3361 int32_t srcStart,
b75a7d8f
A
3362 int32_t srcLength);
3363
3364 UnicodeString& doReverse(int32_t start,
3365 int32_t length);
3366
3367 // calculate hash code
3368 int32_t doHashCode(void) const;
3369
3370 // get pointer to start of array
46f4442e 3371 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
b75a7d8f
A
3372 inline UChar* getArrayStart(void);
3373 inline const UChar* getArrayStart(void) const;
3374
46f4442e
A
3375 // A UnicodeString object (not necessarily its current buffer)
3376 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3377 inline UBool isWritable() const;
3378
3379 // Is the current buffer writable?
3380 inline UBool isBufferWritable() const;
3381
3382 // None of the following does releaseArray().
3383 inline void setLength(int32_t len); // sets only fShortLength and fLength
3384 inline void setToEmpty(); // sets fFlags=kShortString
46f4442e
A
3385 inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3386
b75a7d8f
A
3387 // allocate the array; result may be fStackBuffer
3388 // sets refCount to 1 if appropriate
3389 // sets fArray, fCapacity, and fFlags
3390 // returns boolean for success or failure
3391 UBool allocate(int32_t capacity);
3392
3393 // release the array if owned
3394 void releaseArray(void);
3395
3396 // turn a bogus string into an empty one
3397 void unBogus();
3398
3399 // implements assigment operator, copy constructor, and fastCopyFrom()
3400 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3401
3402 // Pin start and limit to acceptable values.
3403 inline void pinIndex(int32_t& start) const;
3404 inline void pinIndices(int32_t& start,
3405 int32_t& length) const;
3406
374ca955
A
3407#if !UCONFIG_NO_CONVERSION
3408
b75a7d8f
A
3409 /* Internal extract() using UConverter. */
3410 int32_t doExtract(int32_t start, int32_t length,
3411 char *dest, int32_t destCapacity,
3412 UConverter *cnv,
3413 UErrorCode &errorCode) const;
3414
3415 /*
3416 * Real constructor for converting from codepage data.
3417 * It assumes that it is called with !fRefCounted.
3418 *
3419 * If <code>codepage==0</code>, then the default converter
3420 * is used for the platform encoding.
3421 * If <code>codepage</code> is an empty string (<code>""</code>),
3422 * then a simple conversion is performed on the codepage-invariant
3423 * subset ("invariant characters") of the platform encoding. See utypes.h.
3424 */
3425 void doCodepageCreate(const char *codepageData,
3426 int32_t dataLength,
3427 const char *codepage);
3428
3429 /*
3430 * Worker function for creating a UnicodeString from
3431 * a codepage string using a UConverter.
3432 */
3433 void
3434 doCodepageCreate(const char *codepageData,
3435 int32_t dataLength,
3436 UConverter *converter,
3437 UErrorCode &status);
374ca955
A
3438
3439#endif
3440
b75a7d8f
A
3441 /*
3442 * This function is called when write access to the array
3443 * is necessary.
3444 *
3445 * We need to make a copy of the array if
3446 * the buffer is read-only, or
3447 * the buffer is refCounted (shared), and refCount>1, or
3448 * the buffer is too small.
3449 *
3450 * Return FALSE if memory could not be allocated.
3451 */
3452 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3453 int32_t growCapacity = -1,
3454 UBool doCopyArray = TRUE,
3455 int32_t **pBufferToDelete = 0,
3456 UBool forceClone = FALSE);
3457
4388f060
A
3458 /**
3459 * Common function for UnicodeString case mappings.
3460 * The stringCaseMapper has the same type UStringCaseMapper
3461 * as in ustr_imp.h for ustrcase_map().
3462 */
b75a7d8f 3463 UnicodeString &
4388f060 3464 caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
b75a7d8f
A
3465
3466 // ref counting
3467 void addRef(void);
3468 int32_t removeRef(void);
3469 int32_t refCount(void) const;
3470
3471 // constants
3472 enum {
4388f060
A
3473 // Set the stack buffer size so that sizeof(UnicodeString) is,
3474 // naturally (without padding), a multiple of sizeof(pointer).
3475 US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
b75a7d8f
A
3476 kInvalidUChar=0xffff, // invalid UChar index
3477 kGrowSize=128, // grow size for this buffer
3478 kInvalidHashCode=0, // invalid hash code
3479 kEmptyHashCode=1, // hash code for empty string
3480
3481 // bit flag values for fFlags
3482 kIsBogus=1, // this string is bogus, i.e., not valid or NULL
4388f060 3483 kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
b75a7d8f
A
3484 kRefCounted=4, // there is a refCount field before the characters in fArray
3485 kBufferIsReadonly=8,// do not write to this buffer
3486 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3487 // and releaseBuffer(newLength) must be called
3488
3489 // combined values for convenience
3490 kShortString=kUsingStackBuffer,
3491 kLongString=kRefCounted,
3492 kReadonlyAlias=kBufferIsReadonly,
3493 kWritableAlias=0
3494 };
3495
b75a7d8f 3496 friend class StringThreadTest;
4388f060 3497 friend class UnicodeStringAppendable;
b75a7d8f 3498
46f4442e
A
3499 union StackBufferOrFields; // forward declaration necessary before friend declaration
3500 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3501
b75a7d8f
A
3502 /*
3503 * The following are all the class fields that are stored
3504 * in each UnicodeString object.
3505 * Note that UnicodeString has virtual functions,
3506 * therefore there is an implicit vtable pointer
3507 * as the first real field.
4388f060 3508 * The fields should be aligned such that no padding is necessary.
b75a7d8f
A
3509 * On 32-bit machines, the size should be 32 bytes,
3510 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4388f060
A
3511 *
3512 * We use a hack to achieve this.
3513 *
3514 * With at least some compilers, each of the following is forced to
3515 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3516 * rounded up with additional padding if the fields do not already fit that requirement:
3517 * - sizeof(class UnicodeString)
3518 * - offsetof(UnicodeString, fUnion)
3519 * - sizeof(fUnion)
3520 * - sizeof(fFields)
3521 *
3522 * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3523 * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3524 * (Padding at the end of fFields is ok:
3525 * As long as there is no padding after fStackBuffer, it is not wasted space.)
3526 *
3527 * We further assume that the compiler does not reorder the fields,
3528 * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3529 * with at most some padding (but no other field) in between.
3530 * (Padding there would be wasted space, but functionally harmless.)
3531 *
3532 * We use a few more sizeof(pointer)'s chunks of space with
3533 * fRestOfStackBuffer, fShortLength and fFlags,
3534 * to get up exactly to the intended sizeof(UnicodeString).
b75a7d8f
A
3535 */
3536 // (implicit) *vtable;
46f4442e
A
3537 union StackBufferOrFields {
3538 // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3539 // else fFields is used
4388f060 3540 UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
46f4442e 3541 struct {
4388f060
A
3542 UChar *fArray; // the Unicode data
3543 int32_t fCapacity; // capacity of fArray (in UChars)
3544 int32_t fLength; // number of characters in fArray if >127; else undefined
46f4442e
A
3545 } fFields;
3546 } fUnion;
4388f060
A
3547 UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3548 int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3549 uint8_t fFlags; // bit flags: see constants above
b75a7d8f
A
3550};
3551
374ca955
A
3552/**
3553 * Create a new UnicodeString with the concatenation of two others.
3554 *
3555 * @param s1 The first string to be copied to the new one.
3556 * @param s2 The second string to be copied to the new one, after s1.
3557 * @return UnicodeString(s1).append(s2)
73c04bcf 3558 * @stable ICU 2.8
374ca955
A
3559 */
3560U_COMMON_API UnicodeString U_EXPORT2
3561operator+ (const UnicodeString &s1, const UnicodeString &s2);
3562
b75a7d8f
A
3563//========================================
3564// Inline members
3565//========================================
3566
3567//========================================
3568// Privates
3569//========================================
3570
3571inline void
3572UnicodeString::pinIndex(int32_t& start) const
3573{
3574 // pin index
3575 if(start < 0) {
3576 start = 0;
46f4442e
A
3577 } else if(start > length()) {
3578 start = length();
b75a7d8f
A
3579 }
3580}
3581
3582inline void
3583UnicodeString::pinIndices(int32_t& start,
3584 int32_t& _length) const
3585{
3586 // pin indices
46f4442e 3587 int32_t len = length();
b75a7d8f
A
3588 if(start < 0) {
3589 start = 0;
46f4442e
A
3590 } else if(start > len) {
3591 start = len;
b75a7d8f
A
3592 }
3593 if(_length < 0) {
3594 _length = 0;
46f4442e
A
3595 } else if(_length > (len - start)) {
3596 _length = (len - start);
b75a7d8f
A
3597 }
3598}
3599
374ca955 3600inline UChar*
b75a7d8f 3601UnicodeString::getArrayStart()
46f4442e 3602{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
b75a7d8f 3603
374ca955 3604inline const UChar*
b75a7d8f 3605UnicodeString::getArrayStart() const
46f4442e 3606{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
b75a7d8f 3607
51004dcb
A
3608//========================================
3609// Default constructor
3610//========================================
3611
3612inline
3613UnicodeString::UnicodeString()
3614 : fShortLength(0),
3615 fFlags(kShortString)
3616{}
3617
b75a7d8f
A
3618//========================================
3619// Read-only implementation methods
3620//========================================
374ca955 3621inline int32_t
b75a7d8f 3622UnicodeString::length() const
46f4442e 3623{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
b75a7d8f 3624
374ca955 3625inline int32_t
b75a7d8f 3626UnicodeString::getCapacity() const
46f4442e 3627{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
b75a7d8f 3628
374ca955 3629inline int32_t
b75a7d8f
A
3630UnicodeString::hashCode() const
3631{ return doHashCode(); }
3632
374ca955 3633inline UBool
b75a7d8f
A
3634UnicodeString::isBogus() const
3635{ return (UBool)(fFlags & kIsBogus); }
3636
46f4442e
A
3637inline UBool
3638UnicodeString::isWritable() const
3639{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3640
3641inline UBool
3642UnicodeString::isBufferWritable() const
3643{
3644 return (UBool)(
3645 !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3646 (!(fFlags&kRefCounted) || refCount()==1));
3647}
3648
b75a7d8f
A
3649inline const UChar *
3650UnicodeString::getBuffer() const {
46f4442e 3651 if(fFlags&(kIsBogus|kOpenGetBuffer)) {
b75a7d8f 3652 return 0;
46f4442e
A
3653 } else if(fFlags&kUsingStackBuffer) {
3654 return fUnion.fStackBuffer;
3655 } else {
3656 return fUnion.fFields.fArray;
b75a7d8f
A
3657 }
3658}
3659
3660//========================================
3661// Read-only alias methods
3662//========================================
3663inline int8_t
3664UnicodeString::doCompare(int32_t start,
46f4442e 3665 int32_t thisLength,
b75a7d8f
A
3666 const UnicodeString& srcText,
3667 int32_t srcStart,
3668 int32_t srcLength) const
3669{
3670 if(srcText.isBogus()) {
3671 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3672 } else {
3673 srcText.pinIndices(srcStart, srcLength);
46f4442e 3674 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
b75a7d8f
A
3675 }
3676}
3677
3678inline UBool
3679UnicodeString::operator== (const UnicodeString& text) const
3680{
3681 if(isBogus()) {
3682 return text.isBogus();
3683 } else {
46f4442e 3684 int32_t len = length(), textLength = text.length();
51004dcb 3685 return !text.isBogus() && len == textLength && doEquals(text, len);
b75a7d8f
A
3686 }
3687}
3688
3689inline UBool
3690UnicodeString::operator!= (const UnicodeString& text) const
3691{ return (! operator==(text)); }
3692
3693inline UBool
3694UnicodeString::operator> (const UnicodeString& text) const
46f4442e 3695{ return doCompare(0, length(), text, 0, text.length()) == 1; }
b75a7d8f
A
3696
3697inline UBool
3698UnicodeString::operator< (const UnicodeString& text) const
46f4442e 3699{ return doCompare(0, length(), text, 0, text.length()) == -1; }
b75a7d8f
A
3700
3701inline UBool
3702UnicodeString::operator>= (const UnicodeString& text) const
46f4442e 3703{ return doCompare(0, length(), text, 0, text.length()) != -1; }
b75a7d8f
A
3704
3705inline UBool
3706UnicodeString::operator<= (const UnicodeString& text) const
46f4442e 3707{ return doCompare(0, length(), text, 0, text.length()) != 1; }
b75a7d8f 3708
374ca955 3709inline int8_t
b75a7d8f 3710UnicodeString::compare(const UnicodeString& text) const
46f4442e 3711{ return doCompare(0, length(), text, 0, text.length()); }
b75a7d8f 3712
374ca955 3713inline int8_t
b75a7d8f
A
3714UnicodeString::compare(int32_t start,
3715 int32_t _length,
3716 const UnicodeString& srcText) const
46f4442e 3717{ return doCompare(start, _length, srcText, 0, srcText.length()); }
b75a7d8f 3718
374ca955 3719inline int8_t
b75a7d8f
A
3720UnicodeString::compare(const UChar *srcChars,
3721 int32_t srcLength) const
46f4442e 3722{ return doCompare(0, length(), srcChars, 0, srcLength); }
b75a7d8f 3723
374ca955 3724inline int8_t
b75a7d8f
A
3725UnicodeString::compare(int32_t start,
3726 int32_t _length,
3727 const UnicodeString& srcText,
3728 int32_t srcStart,
3729 int32_t srcLength) const
3730{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3731
3732inline int8_t
3733UnicodeString::compare(int32_t start,
3734 int32_t _length,
3735 const UChar *srcChars) const
3736{ return doCompare(start, _length, srcChars, 0, _length); }
3737
374ca955 3738inline int8_t
b75a7d8f
A
3739UnicodeString::compare(int32_t start,
3740 int32_t _length,
3741 const UChar *srcChars,
3742 int32_t srcStart,
3743 int32_t srcLength) const
3744{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3745
3746inline int8_t
3747UnicodeString::compareBetween(int32_t start,
3748 int32_t limit,
3749 const UnicodeString& srcText,
3750 int32_t srcStart,
3751 int32_t srcLimit) const
374ca955 3752{ return doCompare(start, limit - start,
b75a7d8f
A
3753 srcText, srcStart, srcLimit - srcStart); }
3754
3755inline int8_t
3756UnicodeString::doCompareCodePointOrder(int32_t start,
46f4442e 3757 int32_t thisLength,
b75a7d8f
A
3758 const UnicodeString& srcText,
3759 int32_t srcStart,
3760 int32_t srcLength) const
3761{
3762 if(srcText.isBogus()) {
3763 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3764 } else {
3765 srcText.pinIndices(srcStart, srcLength);
46f4442e 3766 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
b75a7d8f
A
3767 }
3768}
3769
374ca955 3770inline int8_t
b75a7d8f 3771UnicodeString::compareCodePointOrder(const UnicodeString& text) const
46f4442e 3772{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
b75a7d8f 3773
374ca955 3774inline int8_t
b75a7d8f
A
3775UnicodeString::compareCodePointOrder(int32_t start,
3776 int32_t _length,
3777 const UnicodeString& srcText) const
46f4442e 3778{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
b75a7d8f 3779
374ca955 3780inline int8_t
b75a7d8f
A
3781UnicodeString::compareCodePointOrder(const UChar *srcChars,
3782 int32_t srcLength) const
46f4442e 3783{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
b75a7d8f 3784
374ca955 3785inline int8_t
b75a7d8f
A
3786UnicodeString::compareCodePointOrder(int32_t start,
3787 int32_t _length,
3788 const UnicodeString& srcText,
3789 int32_t srcStart,
3790 int32_t srcLength) const
3791{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3792
3793inline int8_t
3794UnicodeString::compareCodePointOrder(int32_t start,
3795 int32_t _length,
3796 const UChar *srcChars) const
3797{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3798
374ca955 3799inline int8_t
b75a7d8f
A
3800UnicodeString::compareCodePointOrder(int32_t start,
3801 int32_t _length,
3802 const UChar *srcChars,
3803 int32_t srcStart,
3804 int32_t srcLength) const
3805{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3806
3807inline int8_t
3808UnicodeString::compareCodePointOrderBetween(int32_t start,
3809 int32_t limit,
3810 const UnicodeString& srcText,
3811 int32_t srcStart,
3812 int32_t srcLimit) const
374ca955 3813{ return doCompareCodePointOrder(start, limit - start,
b75a7d8f
A
3814 srcText, srcStart, srcLimit - srcStart); }
3815
3816inline int8_t
3817UnicodeString::doCaseCompare(int32_t start,
46f4442e 3818 int32_t thisLength,
b75a7d8f
A
3819 const UnicodeString &srcText,
3820 int32_t srcStart,
3821 int32_t srcLength,
3822 uint32_t options) const
3823{
3824 if(srcText.isBogus()) {
3825 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3826 } else {
3827 srcText.pinIndices(srcStart, srcLength);
46f4442e 3828 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
b75a7d8f
A
3829 }
3830}
3831
374ca955 3832inline int8_t
b75a7d8f 3833UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
46f4442e 3834 return doCaseCompare(0, length(), text, 0, text.length(), options);
b75a7d8f
A
3835}
3836
374ca955 3837inline int8_t
b75a7d8f
A
3838UnicodeString::caseCompare(int32_t start,
3839 int32_t _length,
3840 const UnicodeString &srcText,
3841 uint32_t options) const {
46f4442e 3842 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
b75a7d8f
A
3843}
3844
374ca955 3845inline int8_t
b75a7d8f
A
3846UnicodeString::caseCompare(const UChar *srcChars,
3847 int32_t srcLength,
3848 uint32_t options) const {
46f4442e 3849 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
b75a7d8f
A
3850}
3851
374ca955 3852inline int8_t
b75a7d8f
A
3853UnicodeString::caseCompare(int32_t start,
3854 int32_t _length,
3855 const UnicodeString &srcText,
3856 int32_t srcStart,
3857 int32_t srcLength,
3858 uint32_t options) const {
3859 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3860}
3861
3862inline int8_t
3863UnicodeString::caseCompare(int32_t start,
3864 int32_t _length,
3865 const UChar *srcChars,
3866 uint32_t options) const {
3867 return doCaseCompare(start, _length, srcChars, 0, _length, options);
3868}
3869
374ca955 3870inline int8_t
b75a7d8f
A
3871UnicodeString::caseCompare(int32_t start,
3872 int32_t _length,
3873 const UChar *srcChars,
3874 int32_t srcStart,
3875 int32_t srcLength,
3876 uint32_t options) const {
3877 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3878}
3879
3880inline int8_t
3881UnicodeString::caseCompareBetween(int32_t start,
3882 int32_t limit,
3883 const UnicodeString &srcText,
3884 int32_t srcStart,
3885 int32_t srcLimit,
3886 uint32_t options) const {
3887 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3888}
3889
374ca955 3890inline int32_t
b75a7d8f
A
3891UnicodeString::indexOf(const UnicodeString& srcText,
3892 int32_t srcStart,
3893 int32_t srcLength,
3894 int32_t start,
3895 int32_t _length) const
3896{
3897 if(!srcText.isBogus()) {
3898 srcText.pinIndices(srcStart, srcLength);
3899 if(srcLength > 0) {
3900 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3901 }
3902 }
3903 return -1;
3904}
3905
374ca955 3906inline int32_t
b75a7d8f 3907UnicodeString::indexOf(const UnicodeString& text) const
46f4442e 3908{ return indexOf(text, 0, text.length(), 0, length()); }
b75a7d8f 3909
374ca955 3910inline int32_t
b75a7d8f
A
3911UnicodeString::indexOf(const UnicodeString& text,
3912 int32_t start) const {
3913 pinIndex(start);
46f4442e 3914 return indexOf(text, 0, text.length(), start, length() - start);
b75a7d8f
A
3915}
3916
374ca955 3917inline int32_t
b75a7d8f
A
3918UnicodeString::indexOf(const UnicodeString& text,
3919 int32_t start,
3920 int32_t _length) const
46f4442e 3921{ return indexOf(text, 0, text.length(), start, _length); }
b75a7d8f 3922
374ca955 3923inline int32_t
b75a7d8f
A
3924UnicodeString::indexOf(const UChar *srcChars,
3925 int32_t srcLength,
3926 int32_t start) const {
3927 pinIndex(start);
46f4442e 3928 return indexOf(srcChars, 0, srcLength, start, length() - start);
b75a7d8f
A
3929}
3930
374ca955 3931inline int32_t
b75a7d8f
A
3932UnicodeString::indexOf(const UChar *srcChars,
3933 int32_t srcLength,
3934 int32_t start,
3935 int32_t _length) const
3936{ return indexOf(srcChars, 0, srcLength, start, _length); }
3937
374ca955 3938inline int32_t
b75a7d8f
A
3939UnicodeString::indexOf(UChar c,
3940 int32_t start,
3941 int32_t _length) const
3942{ return doIndexOf(c, start, _length); }
3943
374ca955 3944inline int32_t
b75a7d8f
A
3945UnicodeString::indexOf(UChar32 c,
3946 int32_t start,
3947 int32_t _length) const
3948{ return doIndexOf(c, start, _length); }
3949
374ca955 3950inline int32_t
b75a7d8f 3951UnicodeString::indexOf(UChar c) const
46f4442e 3952{ return doIndexOf(c, 0, length()); }
b75a7d8f 3953
374ca955 3954inline int32_t
b75a7d8f 3955UnicodeString::indexOf(UChar32 c) const
46f4442e 3956{ return indexOf(c, 0, length()); }
b75a7d8f 3957
374ca955 3958inline int32_t
b75a7d8f
A
3959UnicodeString::indexOf(UChar c,
3960 int32_t start) const {
3961 pinIndex(start);
46f4442e 3962 return doIndexOf(c, start, length() - start);
b75a7d8f
A
3963}
3964
374ca955 3965inline int32_t
b75a7d8f
A
3966UnicodeString::indexOf(UChar32 c,
3967 int32_t start) const {
3968 pinIndex(start);
46f4442e 3969 return indexOf(c, start, length() - start);
b75a7d8f
A
3970}
3971
374ca955 3972inline int32_t
b75a7d8f
A
3973UnicodeString::lastIndexOf(const UChar *srcChars,
3974 int32_t srcLength,
3975 int32_t start,
3976 int32_t _length) const
3977{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3978
374ca955 3979inline int32_t
b75a7d8f
A
3980UnicodeString::lastIndexOf(const UChar *srcChars,
3981 int32_t srcLength,
3982 int32_t start) const {
3983 pinIndex(start);
46f4442e 3984 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
b75a7d8f
A
3985}
3986
374ca955 3987inline int32_t
b75a7d8f
A
3988UnicodeString::lastIndexOf(const UnicodeString& srcText,
3989 int32_t srcStart,
3990 int32_t srcLength,
3991 int32_t start,
3992 int32_t _length) const
3993{
3994 if(!srcText.isBogus()) {
3995 srcText.pinIndices(srcStart, srcLength);
3996 if(srcLength > 0) {
3997 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3998 }
3999 }
4000 return -1;
4001}
4002
374ca955 4003inline int32_t
b75a7d8f
A
4004UnicodeString::lastIndexOf(const UnicodeString& text,
4005 int32_t start,
4006 int32_t _length) const
46f4442e 4007{ return lastIndexOf(text, 0, text.length(), start, _length); }
b75a7d8f 4008
374ca955 4009inline int32_t
b75a7d8f
A
4010UnicodeString::lastIndexOf(const UnicodeString& text,
4011 int32_t start) const {
4012 pinIndex(start);
46f4442e 4013 return lastIndexOf(text, 0, text.length(), start, length() - start);
b75a7d8f
A
4014}
4015
374ca955 4016inline int32_t
b75a7d8f 4017UnicodeString::lastIndexOf(const UnicodeString& text) const
46f4442e 4018{ return lastIndexOf(text, 0, text.length(), 0, length()); }
b75a7d8f 4019
374ca955 4020inline int32_t
b75a7d8f
A
4021UnicodeString::lastIndexOf(UChar c,
4022 int32_t start,
4023 int32_t _length) const
4024{ return doLastIndexOf(c, start, _length); }
4025
374ca955 4026inline int32_t
b75a7d8f
A
4027UnicodeString::lastIndexOf(UChar32 c,
4028 int32_t start,
4029 int32_t _length) const {
4030 return doLastIndexOf(c, start, _length);
4031}
4032
374ca955 4033inline int32_t
b75a7d8f 4034UnicodeString::lastIndexOf(UChar c) const
46f4442e 4035{ return doLastIndexOf(c, 0, length()); }
b75a7d8f 4036
374ca955 4037inline int32_t
b75a7d8f 4038UnicodeString::lastIndexOf(UChar32 c) const {
46f4442e 4039 return lastIndexOf(c, 0, length());
b75a7d8f
A
4040}
4041
374ca955 4042inline int32_t
b75a7d8f
A
4043UnicodeString::lastIndexOf(UChar c,
4044 int32_t start) const {
4045 pinIndex(start);
46f4442e 4046 return doLastIndexOf(c, start, length() - start);
b75a7d8f
A
4047}
4048
374ca955 4049inline int32_t
b75a7d8f
A
4050UnicodeString::lastIndexOf(UChar32 c,
4051 int32_t start) const {
4052 pinIndex(start);
46f4442e 4053 return lastIndexOf(c, start, length() - start);
b75a7d8f
A
4054}
4055
374ca955 4056inline UBool
b75a7d8f 4057UnicodeString::startsWith(const UnicodeString& text) const
46f4442e 4058{ return compare(0, text.length(), text, 0, text.length()) == 0; }
b75a7d8f 4059
374ca955 4060inline UBool
b75a7d8f
A
4061UnicodeString::startsWith(const UnicodeString& srcText,
4062 int32_t srcStart,
4063 int32_t srcLength) const
4064{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4065
374ca955 4066inline UBool
4388f060
A
4067UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4068 if(srcLength < 0) {
4069 srcLength = u_strlen(srcChars);
4070 }
4071 return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4072}
b75a7d8f 4073
374ca955 4074inline UBool
4388f060
A
4075UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4076 if(srcLength < 0) {
4077 srcLength = u_strlen(srcChars);
4078 }
4079 return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4080}
b75a7d8f 4081
374ca955 4082inline UBool
b75a7d8f 4083UnicodeString::endsWith(const UnicodeString& text) const
46f4442e
A
4084{ return doCompare(length() - text.length(), text.length(),
4085 text, 0, text.length()) == 0; }
b75a7d8f 4086
374ca955 4087inline UBool
b75a7d8f
A
4088UnicodeString::endsWith(const UnicodeString& srcText,
4089 int32_t srcStart,
4090 int32_t srcLength) const {
4091 srcText.pinIndices(srcStart, srcLength);
46f4442e 4092 return doCompare(length() - srcLength, srcLength,
b75a7d8f
A
4093 srcText, srcStart, srcLength) == 0;
4094}
4095
374ca955 4096inline UBool
b75a7d8f
A
4097UnicodeString::endsWith(const UChar *srcChars,
4098 int32_t srcLength) const {
4099 if(srcLength < 0) {
4100 srcLength = u_strlen(srcChars);
4101 }
46f4442e 4102 return doCompare(length() - srcLength, srcLength,
b75a7d8f
A
4103 srcChars, 0, srcLength) == 0;
4104}
4105
374ca955 4106inline UBool
b75a7d8f
A
4107UnicodeString::endsWith(const UChar *srcChars,
4108 int32_t srcStart,
4109 int32_t srcLength) const {
4110 if(srcLength < 0) {
4111 srcLength = u_strlen(srcChars + srcStart);
4112 }
46f4442e 4113 return doCompare(length() - srcLength, srcLength,
b75a7d8f
A
4114 srcChars, srcStart, srcLength) == 0;
4115}
4116
4117//========================================
4118// replace
4119//========================================
374ca955
A
4120inline UnicodeString&
4121UnicodeString::replace(int32_t start,
4122 int32_t _length,
4123 const UnicodeString& srcText)
46f4442e 4124{ return doReplace(start, _length, srcText, 0, srcText.length()); }
b75a7d8f 4125
374ca955
A
4126inline UnicodeString&
4127UnicodeString::replace(int32_t start,
4128 int32_t _length,
4129 const UnicodeString& srcText,
4130 int32_t srcStart,
b75a7d8f
A
4131 int32_t srcLength)
4132{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4133
374ca955
A
4134inline UnicodeString&
4135UnicodeString::replace(int32_t start,
4136 int32_t _length,
b75a7d8f
A
4137 const UChar *srcChars,
4138 int32_t srcLength)
4139{ return doReplace(start, _length, srcChars, 0, srcLength); }
4140
374ca955
A
4141inline UnicodeString&
4142UnicodeString::replace(int32_t start,
4143 int32_t _length,
4144 const UChar *srcChars,
4145 int32_t srcStart,
b75a7d8f
A
4146 int32_t srcLength)
4147{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4148
374ca955
A
4149inline UnicodeString&
4150UnicodeString::replace(int32_t start,
4151 int32_t _length,
b75a7d8f
A
4152 UChar srcChar)
4153{ return doReplace(start, _length, &srcChar, 0, 1); }
4154
374ca955
A
4155inline UnicodeString&
4156UnicodeString::replaceBetween(int32_t start,
4157 int32_t limit,
b75a7d8f 4158 const UnicodeString& srcText)
46f4442e 4159{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
b75a7d8f
A
4160
4161inline UnicodeString&
374ca955
A
4162UnicodeString::replaceBetween(int32_t start,
4163 int32_t limit,
4164 const UnicodeString& srcText,
4165 int32_t srcStart,
b75a7d8f
A
4166 int32_t srcLimit)
4167{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4168
374ca955 4169inline UnicodeString&
b75a7d8f
A
4170UnicodeString::findAndReplace(const UnicodeString& oldText,
4171 const UnicodeString& newText)
46f4442e
A
4172{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4173 newText, 0, newText.length()); }
b75a7d8f 4174
374ca955 4175inline UnicodeString&
b75a7d8f
A
4176UnicodeString::findAndReplace(int32_t start,
4177 int32_t _length,
4178 const UnicodeString& oldText,
4179 const UnicodeString& newText)
46f4442e
A
4180{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4181 newText, 0, newText.length()); }
b75a7d8f
A
4182
4183// ============================
4184// extract
4185// ============================
4186inline void
4187UnicodeString::doExtract(int32_t start,
4188 int32_t _length,
4189 UnicodeString& target) const
46f4442e 4190{ target.replace(0, target.length(), *this, start, _length); }
b75a7d8f 4191
374ca955
A
4192inline void
4193UnicodeString::extract(int32_t start,
4194 int32_t _length,
4195 UChar *target,
b75a7d8f
A
4196 int32_t targetStart) const
4197{ doExtract(start, _length, target, targetStart); }
4198
374ca955 4199inline void
b75a7d8f
A
4200UnicodeString::extract(int32_t start,
4201 int32_t _length,
4202 UnicodeString& target) const
4203{ doExtract(start, _length, target); }
4204
374ca955
A
4205#if !UCONFIG_NO_CONVERSION
4206
b75a7d8f
A
4207inline int32_t
4208UnicodeString::extract(int32_t start,
4209 int32_t _length,
4210 char *dst,
4211 const char *codepage) const
4212
4213{
4214 // This dstSize value will be checked explicitly
4215 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4216}
4217
374ca955
A
4218#endif
4219
4220inline void
4221UnicodeString::extractBetween(int32_t start,
4222 int32_t limit,
4223 UChar *dst,
4224 int32_t dstStart) const {
4225 pinIndex(start);
4226 pinIndex(limit);
4227 doExtract(start, limit - start, dst, dstStart);
4228}
b75a7d8f 4229
729e4ab9
A
4230inline UnicodeString
4231UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4232 return tempSubString(start, limit - start);
4233}
4234
b75a7d8f
A
4235inline UChar
4236UnicodeString::doCharAt(int32_t offset) const
4237{
46f4442e
A
4238 if((uint32_t)offset < (uint32_t)length()) {
4239 return getArrayStart()[offset];
b75a7d8f
A
4240 } else {
4241 return kInvalidUChar;
4242 }
4243}
4244
4245inline UChar
4246UnicodeString::charAt(int32_t offset) const
4247{ return doCharAt(offset); }
4248
4249inline UChar
4250UnicodeString::operator[] (int32_t offset) const
4251{ return doCharAt(offset); }
4252
b75a7d8f
A
4253inline UBool
4254UnicodeString::isEmpty() const {
46f4442e 4255 return fShortLength == 0;
b75a7d8f
A
4256}
4257
4258//========================================
4259// Write implementation methods
4260//========================================
46f4442e
A
4261inline void
4262UnicodeString::setLength(int32_t len) {
4263 if(len <= 127) {
4264 fShortLength = (int8_t)len;
4265 } else {
4266 fShortLength = (int8_t)-1;
4267 fUnion.fFields.fLength = len;
4268 }
4269}
4270
4271inline void
4272UnicodeString::setToEmpty() {
4273 fShortLength = 0;
4274 fFlags = kShortString;
4275}
4276
46f4442e
A
4277inline void
4278UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4279 setLength(len);
4280 fUnion.fFields.fArray = array;
4281 fUnion.fFields.fCapacity = capacity;
4282}
4283
b75a7d8f
A
4284inline const UChar *
4285UnicodeString::getTerminatedBuffer() {
46f4442e 4286 if(!isWritable()) {
b75a7d8f 4287 return 0;
b75a7d8f 4288 } else {
46f4442e
A
4289 UChar *array = getArrayStart();
4290 int32_t len = length();
729e4ab9
A
4291 if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4292 /*
4293 * kRefCounted: Do not write the NUL if the buffer is shared.
4294 * That is mostly safe, except when the length of one copy was modified
4295 * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4296 * Then the NUL would be written into the middle of another copy's string.
4297 */
4298 if(!(fFlags&kBufferIsReadonly)) {
4299 /*
4300 * We must not write to a readonly buffer, but it is known to be
4301 * NUL-terminated if len<capacity.
4302 * A shared, allocated buffer (refCount()>1) must not have its contents
4303 * modified, but the NUL at [len] is beyond the string contents,
4304 * and multiple string objects and threads writing the same NUL into the
4305 * same location is harmless.
4306 * In all other cases, the buffer is fully writable and it is anyway safe
4307 * to write the NUL.
4308 *
4309 * Note: An earlier version of this code tested whether there is a NUL
4310 * at [len] already, but, while safe, it generated lots of warnings from
4311 * tools like valgrind and Purify.
4312 */
4313 array[len] = 0;
4314 }
46f4442e
A
4315 return array;
4316 } else if(cloneArrayIfNeeded(len+1)) {
4317 array = getArrayStart();
4318 array[len] = 0;
4319 return array;
4320 } else {
4321 return 0;
4322 }
b75a7d8f
A
4323 }
4324}
4325
374ca955
A
4326inline UnicodeString&
4327UnicodeString::operator= (UChar ch)
46f4442e 4328{ return doReplace(0, length(), &ch, 0, 1); }
b75a7d8f 4329
374ca955
A
4330inline UnicodeString&
4331UnicodeString::operator= (UChar32 ch)
46f4442e 4332{ return replace(0, length(), ch); }
b75a7d8f 4333
374ca955
A
4334inline UnicodeString&
4335UnicodeString::setTo(const UnicodeString& srcText,
4336 int32_t srcStart,
b75a7d8f
A
4337 int32_t srcLength)
4338{
4339 unBogus();
46f4442e 4340 return doReplace(0, length(), srcText, srcStart, srcLength);
b75a7d8f
A
4341}
4342
374ca955
A
4343inline UnicodeString&
4344UnicodeString::setTo(const UnicodeString& srcText,
b75a7d8f
A
4345 int32_t srcStart)
4346{
4347 unBogus();
4348 srcText.pinIndex(srcStart);
46f4442e 4349 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
b75a7d8f
A
4350}
4351
374ca955 4352inline UnicodeString&
b75a7d8f
A
4353UnicodeString::setTo(const UnicodeString& srcText)
4354{
4388f060 4355 return copyFrom(srcText);
b75a7d8f
A
4356}
4357
374ca955 4358inline UnicodeString&
b75a7d8f
A
4359UnicodeString::setTo(const UChar *srcChars,
4360 int32_t srcLength)
4361{
4362 unBogus();
46f4442e 4363 return doReplace(0, length(), srcChars, 0, srcLength);
b75a7d8f
A
4364}
4365
374ca955 4366inline UnicodeString&
b75a7d8f
A
4367UnicodeString::setTo(UChar srcChar)
4368{
4369 unBogus();
46f4442e 4370 return doReplace(0, length(), &srcChar, 0, 1);
b75a7d8f
A
4371}
4372
374ca955 4373inline UnicodeString&
b75a7d8f
A
4374UnicodeString::setTo(UChar32 srcChar)
4375{
4376 unBogus();
46f4442e 4377 return replace(0, length(), srcChar);
b75a7d8f
A
4378}
4379
374ca955
A
4380inline UnicodeString&
4381UnicodeString::append(const UnicodeString& srcText,
4382 int32_t srcStart,
b75a7d8f 4383 int32_t srcLength)
46f4442e 4384{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
b75a7d8f 4385
374ca955 4386inline UnicodeString&
b75a7d8f 4387UnicodeString::append(const UnicodeString& srcText)
46f4442e 4388{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
b75a7d8f 4389
374ca955
A
4390inline UnicodeString&
4391UnicodeString::append(const UChar *srcChars,
4392 int32_t srcStart,
b75a7d8f 4393 int32_t srcLength)
46f4442e 4394{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
b75a7d8f 4395
374ca955 4396inline UnicodeString&
b75a7d8f
A
4397UnicodeString::append(const UChar *srcChars,
4398 int32_t srcLength)
46f4442e 4399{ return doReplace(length(), 0, srcChars, 0, srcLength); }
b75a7d8f 4400
374ca955 4401inline UnicodeString&
b75a7d8f 4402UnicodeString::append(UChar srcChar)
46f4442e 4403{ return doReplace(length(), 0, &srcChar, 0, 1); }
b75a7d8f 4404
374ca955 4405inline UnicodeString&
73c04bcf 4406UnicodeString::operator+= (UChar ch)
46f4442e 4407{ return doReplace(length(), 0, &ch, 0, 1); }
73c04bcf
A
4408
4409inline UnicodeString&
4410UnicodeString::operator+= (UChar32 ch) {
4411 return append(ch);
4412}
4413
4414inline UnicodeString&
4415UnicodeString::operator+= (const UnicodeString& srcText)
46f4442e 4416{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
73c04bcf 4417
374ca955
A
4418inline UnicodeString&
4419UnicodeString::insert(int32_t start,
4420 const UnicodeString& srcText,
4421 int32_t srcStart,
b75a7d8f
A
4422 int32_t srcLength)
4423{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4424
374ca955
A
4425inline UnicodeString&
4426UnicodeString::insert(int32_t start,
b75a7d8f 4427 const UnicodeString& srcText)
46f4442e 4428{ return doReplace(start, 0, srcText, 0, srcText.length()); }
b75a7d8f 4429
374ca955
A
4430inline UnicodeString&
4431UnicodeString::insert(int32_t start,
4432 const UChar *srcChars,
4433 int32_t srcStart,
b75a7d8f
A
4434 int32_t srcLength)
4435{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4436
374ca955
A
4437inline UnicodeString&
4438UnicodeString::insert(int32_t start,
b75a7d8f
A
4439 const UChar *srcChars,
4440 int32_t srcLength)
4441{ return doReplace(start, 0, srcChars, 0, srcLength); }
4442
374ca955
A
4443inline UnicodeString&
4444UnicodeString::insert(int32_t start,
b75a7d8f
A
4445 UChar srcChar)
4446{ return doReplace(start, 0, &srcChar, 0, 1); }
4447
374ca955
A
4448inline UnicodeString&
4449UnicodeString::insert(int32_t start,
b75a7d8f
A
4450 UChar32 srcChar)
4451{ return replace(start, 0, srcChar); }
4452
4453
374ca955 4454inline UnicodeString&
b75a7d8f
A
4455UnicodeString::remove()
4456{
4457 // remove() of a bogus string makes the string empty and non-bogus
729e4ab9
A
4458 // we also un-alias a read-only alias to deal with NUL-termination
4459 // issues with getTerminatedBuffer()
4460 if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4461 setToEmpty();
b75a7d8f 4462 } else {
729e4ab9 4463 fShortLength = 0;
b75a7d8f
A
4464 }
4465 return *this;
4466}
4467
374ca955
A
4468inline UnicodeString&
4469UnicodeString::remove(int32_t start,
b75a7d8f
A
4470 int32_t _length)
4471{
73c04bcf
A
4472 if(start <= 0 && _length == INT32_MAX) {
4473 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4474 return remove();
4475 }
b75a7d8f 4476 return doReplace(start, _length, NULL, 0, 0);
b75a7d8f
A
4477}
4478
374ca955 4479inline UnicodeString&
b75a7d8f
A
4480UnicodeString::removeBetween(int32_t start,
4481 int32_t limit)
4482{ return doReplace(start, limit - start, NULL, 0, 0); }
4483
729e4ab9
A
4484inline UnicodeString &
4485UnicodeString::retainBetween(int32_t start, int32_t limit) {
4486 truncate(limit);
4487 return doReplace(0, start, NULL, 0, 0);
4488}
4489
374ca955 4490inline UBool
b75a7d8f
A
4491UnicodeString::truncate(int32_t targetLength)
4492{
4493 if(isBogus() && targetLength == 0) {
4494 // truncate(0) of a bogus string makes the string empty and non-bogus
4495 unBogus();
4496 return FALSE;
46f4442e
A
4497 } else if((uint32_t)targetLength < (uint32_t)length()) {
4498 setLength(targetLength);
729e4ab9
A
4499 if(fFlags&kBufferIsReadonly) {
4500 fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
4501 }
b75a7d8f
A
4502 return TRUE;
4503 } else {
4504 return FALSE;
4505 }
4506}
4507
374ca955 4508inline UnicodeString&
b75a7d8f 4509UnicodeString::reverse()
46f4442e 4510{ return doReverse(0, length()); }
b75a7d8f 4511
374ca955 4512inline UnicodeString&
b75a7d8f
A
4513UnicodeString::reverse(int32_t start,
4514 int32_t _length)
4515{ return doReverse(start, _length); }
4516
b75a7d8f
A
4517U_NAMESPACE_END
4518
4519#endif