]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/unicode/unistr.h
ICU-6.2.22.tar.gz
[apple/icu.git] / icuSources / common / unicode / unistr.h
CommitLineData
b75a7d8f
A
1/*
2**********************************************************************
374ca955 3* Copyright (C) 1998-2004, International Business Machines
b75a7d8f
A
4* Corporation and others. All Rights Reserved.
5**********************************************************************
6*
7* File unistr.h
8*
9* Modification History:
10*
11* Date Name Description
12* 09/25/98 stephen Creation.
13* 11/11/98 stephen Changed per 11/9 code review.
14* 04/20/99 stephen Overhauled per 4/16 code review.
15* 11/18/99 aliu Made to inherit from Replaceable. Added method
16* handleReplaceBetween(); other methods unchanged.
17* 06/25/01 grhoten Remove dependency on iostream.
18******************************************************************************
19*/
20
21#ifndef UNISTR_H
22#define UNISTR_H
23
b75a7d8f
A
24#include "unicode/rep.h"
25
26struct UConverter; // unicode/ucnv.h
27class StringThreadTest;
28
29#ifndef U_COMPARE_CODE_POINT_ORDER
30/* see also ustring.h and unorm.h */
31/**
32 * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
33 * Compare strings in code point order instead of code unit order.
374ca955 34 * @stable ICU 2.2
b75a7d8f
A
35 */
36#define U_COMPARE_CODE_POINT_ORDER 0x8000
37#endif
38
39#ifndef USTRING_H
40/* see ustring.h */
374ca955 41U_STABLE int32_t U_EXPORT2
b75a7d8f
A
42u_strlen(const UChar *s);
43#endif
44
45U_NAMESPACE_BEGIN
46
47class Locale; // unicode/locid.h
b75a7d8f
A
48class StringCharacterIterator;
49class BreakIterator; // unicode/brkiter.h
50
51/* The <iostream> include has been moved to unicode/ustream.h */
52
374ca955
A
53/**
54 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
55 * which constructs a Unicode string from an invariant-character char * string.
56 * About invariant characters see utypes.h.
57 * This constructor has no runtime dependency on conversion code and is
58 * therefore recommended over ones taking a charset name string
59 * (where the empty string "" indicates invariant-character conversion).
60 *
61 * @draft ICU 3.2
62 */
63#define US_INV UnicodeString::kInvariant
64
b75a7d8f
A
65/**
66 * Unicode String literals in C++.
67 * Dependent on the platform properties, different UnicodeString
68 * constructors should be used to create a UnicodeString object from
69 * a string literal.
70 * The macros are defined for maximum performance.
71 * They work only for strings that contain "invariant characters", i.e.,
72 * only latin letters, digits, and some punctuation.
73 * See utypes.h for details.
74 *
75 * The string parameter must be a C string literal.
76 * The length of the string, not including the terminating
77 * <code>NUL</code>, must be specified as a constant.
78 * The U_STRING_DECL macro should be invoked exactly once for one
79 * such string variable before it is used.
80 * @stable ICU 2.0
81 */
82#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
83# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
84#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
85# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
86#else
374ca955 87# define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
b75a7d8f
A
88#endif
89
90/**
91 * Unicode String literals in C++.
92 * Dependent on the platform properties, different UnicodeString
93 * constructors should be used to create a UnicodeString object from
94 * a string literal.
95 * The macros are defined for improved performance.
96 * They work only for strings that contain "invariant characters", i.e.,
97 * only latin letters, digits, and some punctuation.
98 * See utypes.h for details.
99 *
100 * The string parameter must be a C string literal.
101 * @stable ICU 2.0
102 */
103#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
104# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
105#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
106# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
107#else
374ca955 108# define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
b75a7d8f
A
109#endif
110
111/**
112 * UnicodeString is a string class that stores Unicode characters directly and provides
113 * similar functionality as the Java String and StringBuffer classes.
114 * It is a concrete implementation of the abstract class Replaceable (for transliteration).
115 *
116 * The UnicodeString class is not suitable for subclassing.
117 *
118 * <p>For an overview of Unicode strings in C and C++ see the
119 * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
120 *
121 * <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
122 * A Unicode character may be stored with either
374ca955 123 * one code unit &#8212; which is the most common case &#8212; or with a matched pair of
b75a7d8f
A
124 * special code units ("surrogates").
125 * The data type for code units is UChar.<br>
126 * For single-character handling, a Unicode character code <em>point</em> is a value
127 * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
128 *
129 * <p>Indexes and offsets into and lengths of strings always count code units, not code points.
130 * This is the same as with multi-byte char* strings in traditional string handling.
131 * Operations on partial strings typically do not test for code point boundaries.
132 * If necessary, the user needs to take care of such boundaries by testing for the code unit
133 * values or by using functions like
134 * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
135 * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).</p>
136 *
137 * UnicodeString methods are more lenient with regard to input parameter values
138 * than other ICU APIs. In particular:
139 * - If indexes are out of bounds for a UnicodeString object
140 * (<0 or >length()) then they are "pinned" to the nearest boundary.
141 * - If primitive string pointer values (e.g., const UChar * or char *)
142 * for input strings are NULL, then those input string parameters are treated
143 * as if they pointed to an empty string.
144 * However, this is <em>not</em> the case for char * parameters for charset names
145 * or other IDs.
146 * - Most UnicodeString methods do not take a UErrorCode parameter because
147 * there are usually very few opportunities for failure other than a shortage
148 * of memory, error codes in low-level C++ string methods would be inconvenient,
149 * and the error code as the last parameter (ICU convention) would prevent
150 * the use of default parameter values.
151 * Instead, such methods set the UnicodeString into a "bogus" state
152 * (see isBogus()) if an error occurs.
153 *
154 * In string comparisons, two UnicodeString objects that are both "bogus"
155 * compare equal (to be transitive and prevent endless loops in sorting),
156 * and a "bogus" string compares less than any non-"bogus" one.
157 *
158 * Const UnicodeString methods are thread-safe. Multiple threads can use
159 * const methods on the same UnicodeString object simultaneously,
160 * but non-const methods must not be called concurrently (in multiple threads)
161 * with any other (const or non-const) methods.
162 *
163 * Similarly, const UnicodeString & parameters are thread-safe.
164 * One object may be passed in as such a parameter concurrently in multiple threads.
165 * This includes the const UnicodeString & parameters for
166 * copy construction, assignment, and cloning.
167 *
168 * <p>UnicodeString uses several storage methods.
169 * String contents can be stored inside the UnicodeString object itself,
170 * in an allocated and shared buffer, or in an outside buffer that is "aliased".
171 * Most of this is done transparently, but careful aliasing in particular provides
172 * significant performance improvements.
173 * Also, the internal buffer is accessible via special functions.
174 * For details see the
175 * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
176 *
177 * @see utf.h
178 * @see CharacterIterator
179 * @stable ICU 2.0
180 */
181class U_COMMON_API UnicodeString : public Replaceable
182{
183public:
184
374ca955
A
185 /**
186 * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
187 * which constructs a Unicode string from an invariant-character char * string.
188 * Use the macro US_INV instead of the full qualification for this value.
189 *
190 * @see US_INV
191 * @draft ICU 3.2
192 */
193 enum EInvariant {
194 /**
195 * @see EInvariant
196 * @draft ICU 3.2
197 */
198 kInvariant
199 };
200
b75a7d8f
A
201 //========================================
202 // Read-only operations
203 //========================================
204
205 /* Comparison - bitwise only - for international comparison use collation */
206
207 /**
208 * Equality operator. Performs only bitwise comparison.
209 * @param text The UnicodeString to compare to this one.
210 * @return TRUE if <TT>text</TT> contains the same characters as this one,
211 * FALSE otherwise.
212 * @stable ICU 2.0
213 */
214 inline UBool operator== (const UnicodeString& text) const;
215
216 /**
217 * Inequality operator. Performs only bitwise comparison.
218 * @param text The UnicodeString to compare to this one.
219 * @return FALSE if <TT>text</TT> contains the same characters as this one,
220 * TRUE otherwise.
221 * @stable ICU 2.0
222 */
223 inline UBool operator!= (const UnicodeString& text) const;
224
225 /**
226 * Greater than operator. Performs only bitwise comparison.
227 * @param text The UnicodeString to compare to this one.
374ca955
A
228 * @return TRUE if the characters in this are bitwise
229 * greater than the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
230 * @stable ICU 2.0
231 */
232 inline UBool operator> (const UnicodeString& text) const;
233
234 /**
235 * Less than operator. Performs only bitwise comparison.
236 * @param text The UnicodeString to compare to this one.
374ca955
A
237 * @return TRUE if the characters in this are bitwise
238 * less than the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
239 * @stable ICU 2.0
240 */
241 inline UBool operator< (const UnicodeString& text) const;
242
243 /**
244 * Greater than or equal operator. Performs only bitwise comparison.
245 * @param text The UnicodeString to compare to this one.
374ca955
A
246 * @return TRUE if the characters in this are bitwise
247 * greater than or equal to the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
248 * @stable ICU 2.0
249 */
250 inline UBool operator>= (const UnicodeString& text) const;
251
252 /**
253 * Less than or equal operator. Performs only bitwise comparison.
254 * @param text The UnicodeString to compare to this one.
374ca955
A
255 * @return TRUE if the characters in this are bitwise
256 * less than or equal to the characters in <code>text</code>, FALSE otherwise
b75a7d8f
A
257 * @stable ICU 2.0
258 */
259 inline UBool operator<= (const UnicodeString& text) const;
260
261 /**
262 * Compare the characters bitwise in this UnicodeString to
374ca955 263 * the characters in <code>text</code>.
b75a7d8f 264 * @param text The UnicodeString to compare to this one.
374ca955
A
265 * @return The result of bitwise character comparison: 0 if this
266 * contains the same characters as <code>text</code>, -1 if the characters in
267 * this are bitwise less than the characters in <code>text</code>, +1 if the
268 * characters in this are bitwise greater than the characters
269 * in <code>text</code>.
b75a7d8f
A
270 * @stable ICU 2.0
271 */
272 inline int8_t compare(const UnicodeString& text) const;
273
274 /**
374ca955
A
275 * Compare the characters bitwise in the range
276 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
277 * in <TT>text</TT>
b75a7d8f
A
278 * @param start the offset at which the compare operation begins
279 * @param length the number of characters of text to compare.
374ca955
A
280 * @param text the other text to be compared against this string.
281 * @return The result of bitwise character comparison: 0 if this
282 * contains the same characters as <code>text</code>, -1 if the characters in
283 * this are bitwise less than the characters in <code>text</code>, +1 if the
284 * characters in this are bitwise greater than the characters
285 * in <code>text</code>.
b75a7d8f
A
286 * @stable ICU 2.0
287 */
288 inline int8_t compare(int32_t start,
289 int32_t length,
374ca955 290 const UnicodeString& text) const;
b75a7d8f
A
291
292 /**
374ca955
A
293 * Compare the characters bitwise in the range
294 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
295 * in <TT>srcText</TT> in the range
296 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
297 * @param start the offset at which the compare operation begins
298 * @param length the number of characters in this to compare.
299 * @param srcText the text to be compared
300 * @param srcStart the offset into <TT>srcText</TT> to start comparison
301 * @param srcLength the number of characters in <TT>src</TT> to compare
374ca955
A
302 * @return The result of bitwise character comparison: 0 if this
303 * contains the same characters as <code>srcText</code>, -1 if the characters in
304 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
305 * characters in this are bitwise greater than the characters
306 * in <code>srcText</code>.
b75a7d8f
A
307 * @stable ICU 2.0
308 */
309 inline int8_t compare(int32_t start,
310 int32_t length,
311 const UnicodeString& srcText,
312 int32_t srcStart,
313 int32_t srcLength) const;
314
315 /**
374ca955 316 * Compare the characters bitwise in this UnicodeString with the first
b75a7d8f
A
317 * <TT>srcLength</TT> characters in <TT>srcChars</TT>.
318 * @param srcChars The characters to compare to this UnicodeString.
319 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
374ca955
A
320 * @return The result of bitwise character comparison: 0 if this
321 * contains the same characters as <code>srcChars</code>, -1 if the characters in
322 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
323 * characters in this are bitwise greater than the characters
324 * in <code>srcChars</code>.
b75a7d8f
A
325 * @stable ICU 2.0
326 */
327 inline int8_t compare(const UChar *srcChars,
328 int32_t srcLength) const;
329
330 /**
374ca955
A
331 * Compare the characters bitwise in the range
332 * [<TT>start</TT>, <TT>start + length</TT>) with the first
b75a7d8f
A
333 * <TT>length</TT> characters in <TT>srcChars</TT>
334 * @param start the offset at which the compare operation begins
335 * @param length the number of characters to compare.
336 * @param srcChars the characters to be compared
374ca955
A
337 * @return The result of bitwise character comparison: 0 if this
338 * contains the same characters as <code>srcChars</code>, -1 if the characters in
339 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
340 * characters in this are bitwise greater than the characters
341 * in <code>srcChars</code>.
b75a7d8f
A
342 * @stable ICU 2.0
343 */
344 inline int8_t compare(int32_t start,
345 int32_t length,
346 const UChar *srcChars) const;
347
348 /**
374ca955
A
349 * Compare the characters bitwise in the range
350 * [<TT>start</TT>, <TT>start + length</TT>) with the characters
351 * in <TT>srcChars</TT> in the range
352 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
353 * @param start the offset at which the compare operation begins
354 * @param length the number of characters in this to compare
355 * @param srcChars the characters to be compared
356 * @param srcStart the offset into <TT>srcChars</TT> to start comparison
357 * @param srcLength the number of characters in <TT>srcChars</TT> to compare
374ca955
A
358 * @return The result of bitwise character comparison: 0 if this
359 * contains the same characters as <code>srcChars</code>, -1 if the characters in
360 * this are bitwise less than the characters in <code>srcChars</code>, +1 if the
361 * characters in this are bitwise greater than the characters
362 * in <code>srcChars</code>.
b75a7d8f
A
363 * @stable ICU 2.0
364 */
365 inline int8_t compare(int32_t start,
366 int32_t length,
367 const UChar *srcChars,
368 int32_t srcStart,
369 int32_t srcLength) const;
370
371 /**
374ca955
A
372 * Compare the characters bitwise in the range
373 * [<TT>start</TT>, <TT>limit</TT>) with the characters
374 * in <TT>srcText</TT> in the range
375 * [<TT>srcStart</TT>, <TT>srcLimit</TT>).
b75a7d8f
A
376 * @param start the offset at which the compare operation begins
377 * @param limit the offset immediately following the compare operation
378 * @param srcText the text to be compared
379 * @param srcStart the offset into <TT>srcText</TT> to start comparison
380 * @param srcLimit the offset into <TT>srcText</TT> to limit comparison
374ca955
A
381 * @return The result of bitwise character comparison: 0 if this
382 * contains the same characters as <code>srcText</code>, -1 if the characters in
383 * this are bitwise less than the characters in <code>srcText</code>, +1 if the
384 * characters in this are bitwise greater than the characters
385 * in <code>srcText</code>.
b75a7d8f
A
386 * @stable ICU 2.0
387 */
388 inline int8_t compareBetween(int32_t start,
389 int32_t limit,
390 const UnicodeString& srcText,
391 int32_t srcStart,
392 int32_t srcLimit) const;
393
394 /**
395 * Compare two Unicode strings in code point order.
396 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
397 * if supplementary characters are present:
398 *
399 * In UTF-16, supplementary characters (with code points U+10000 and above) are
400 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
401 * which means that they compare as less than some other BMP characters like U+feff.
402 * This function compares Unicode strings in code point order.
403 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
404 *
405 * @param text Another string to compare this one to.
406 * @return a negative/zero/positive integer corresponding to whether
407 * this string is less than/equal to/greater than the second one
408 * in code point order
409 * @stable ICU 2.0
410 */
411 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
412
413 /**
414 * Compare two Unicode strings in code point order.
415 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
416 * if supplementary characters are present:
417 *
418 * In UTF-16, supplementary characters (with code points U+10000 and above) are
419 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
420 * which means that they compare as less than some other BMP characters like U+feff.
421 * This function compares Unicode strings in code point order.
422 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
423 *
424 * @param start The start offset in this string at which the compare operation begins.
425 * @param length The number of code units from this string to compare.
426 * @param srcText Another string to compare this one to.
427 * @return a negative/zero/positive integer corresponding to whether
428 * this string is less than/equal to/greater than the second one
429 * in code point order
430 * @stable ICU 2.0
431 */
432 inline int8_t compareCodePointOrder(int32_t start,
433 int32_t length,
434 const UnicodeString& srcText) const;
435
436 /**
437 * Compare two Unicode strings in code point order.
438 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
439 * if supplementary characters are present:
440 *
441 * In UTF-16, supplementary characters (with code points U+10000 and above) are
442 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
443 * which means that they compare as less than some other BMP characters like U+feff.
444 * This function compares Unicode strings in code point order.
445 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
446 *
447 * @param start The start offset in this string at which the compare operation begins.
448 * @param length The number of code units from this string to compare.
449 * @param srcText Another string to compare this one to.
450 * @param srcStart The start offset in that string at which the compare operation begins.
451 * @param srcLength The number of code units from that string to compare.
452 * @return a negative/zero/positive integer corresponding to whether
453 * this string is less than/equal to/greater than the second one
454 * in code point order
455 * @stable ICU 2.0
456 */
457 inline int8_t compareCodePointOrder(int32_t start,
458 int32_t length,
459 const UnicodeString& srcText,
460 int32_t srcStart,
461 int32_t srcLength) const;
462
463 /**
464 * Compare two Unicode strings in code point order.
465 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
466 * if supplementary characters are present:
467 *
468 * In UTF-16, supplementary characters (with code points U+10000 and above) are
469 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
470 * which means that they compare as less than some other BMP characters like U+feff.
471 * This function compares Unicode strings in code point order.
472 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
473 *
474 * @param srcChars A pointer to another string to compare this one to.
475 * @param srcLength The number of code units from that string to compare.
476 * @return a negative/zero/positive integer corresponding to whether
477 * this string is less than/equal to/greater than the second one
478 * in code point order
479 * @stable ICU 2.0
480 */
481 inline int8_t compareCodePointOrder(const UChar *srcChars,
482 int32_t srcLength) const;
483
484 /**
485 * Compare two Unicode strings in code point order.
486 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
487 * if supplementary characters are present:
488 *
489 * In UTF-16, supplementary characters (with code points U+10000 and above) are
490 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
491 * which means that they compare as less than some other BMP characters like U+feff.
492 * This function compares Unicode strings in code point order.
493 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
494 *
495 * @param start The start offset in this string at which the compare operation begins.
496 * @param length The number of code units from this string to compare.
497 * @param srcChars A pointer to another string to compare this one to.
498 * @return a negative/zero/positive integer corresponding to whether
499 * this string is less than/equal to/greater than the second one
500 * in code point order
501 * @stable ICU 2.0
502 */
503 inline int8_t compareCodePointOrder(int32_t start,
504 int32_t length,
505 const UChar *srcChars) const;
506
507 /**
508 * Compare two Unicode strings in code point order.
509 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
510 * if supplementary characters are present:
511 *
512 * In UTF-16, supplementary characters (with code points U+10000 and above) are
513 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
514 * which means that they compare as less than some other BMP characters like U+feff.
515 * This function compares Unicode strings in code point order.
516 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
517 *
518 * @param start The start offset in this string at which the compare operation begins.
519 * @param length The number of code units from this string to compare.
520 * @param srcChars A pointer to another string to compare this one to.
521 * @param srcStart The start offset in that string at which the compare operation begins.
522 * @param srcLength The number of code units from that string to compare.
523 * @return a negative/zero/positive integer corresponding to whether
524 * this string is less than/equal to/greater than the second one
525 * in code point order
526 * @stable ICU 2.0
527 */
528 inline int8_t compareCodePointOrder(int32_t start,
529 int32_t length,
530 const UChar *srcChars,
531 int32_t srcStart,
532 int32_t srcLength) const;
533
534 /**
535 * Compare two Unicode strings in code point order.
536 * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
537 * if supplementary characters are present:
538 *
539 * In UTF-16, supplementary characters (with code points U+10000 and above) are
540 * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
541 * which means that they compare as less than some other BMP characters like U+feff.
542 * This function compares Unicode strings in code point order.
543 * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
544 *
545 * @param start The start offset in this string at which the compare operation begins.
546 * @param limit The offset after the last code unit from this string to compare.
547 * @param srcText Another string to compare this one to.
548 * @param srcStart The start offset in that string at which the compare operation begins.
549 * @param srcLimit The offset after the last code unit from that string to compare.
550 * @return a negative/zero/positive integer corresponding to whether
551 * this string is less than/equal to/greater than the second one
552 * in code point order
553 * @stable ICU 2.0
554 */
555 inline int8_t compareCodePointOrderBetween(int32_t start,
556 int32_t limit,
557 const UnicodeString& srcText,
558 int32_t srcStart,
559 int32_t srcLimit) const;
560
561 /**
562 * Compare two strings case-insensitively using full case folding.
563 * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
564 *
565 * @param text Another string to compare this one to.
566 * @param options A bit set of options:
567 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
568 * Comparison in code unit order with default case folding.
569 *
570 * - U_COMPARE_CODE_POINT_ORDER
571 * Set to choose code point order instead of code unit order
572 * (see u_strCompare for details).
573 *
574 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
575 *
576 * @return A negative, zero, or positive integer indicating the comparison result.
577 * @stable ICU 2.0
578 */
579 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
580
581 /**
582 * Compare two strings case-insensitively using full case folding.
583 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
584 *
585 * @param start The start offset in this string at which the compare operation begins.
586 * @param length The number of code units from this string to compare.
587 * @param srcText Another string to compare this one to.
588 * @param options A bit set of options:
589 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
590 * Comparison in code unit order with default case folding.
591 *
592 * - U_COMPARE_CODE_POINT_ORDER
593 * Set to choose code point order instead of code unit order
594 * (see u_strCompare for details).
595 *
596 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
597 *
598 * @return A negative, zero, or positive integer indicating the comparison result.
599 * @stable ICU 2.0
600 */
601 inline int8_t caseCompare(int32_t start,
602 int32_t length,
603 const UnicodeString& srcText,
604 uint32_t options) const;
605
606 /**
607 * Compare two strings case-insensitively using full case folding.
608 * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
609 *
610 * @param start The start offset in this string at which the compare operation begins.
611 * @param length The number of code units from this string to compare.
612 * @param srcText Another string to compare this one to.
613 * @param srcStart The start offset in that string at which the compare operation begins.
614 * @param srcLength The number of code units from that string to compare.
615 * @param options A bit set of options:
616 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
617 * Comparison in code unit order with default case folding.
618 *
619 * - U_COMPARE_CODE_POINT_ORDER
620 * Set to choose code point order instead of code unit order
621 * (see u_strCompare for details).
622 *
623 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
624 *
625 * @return A negative, zero, or positive integer indicating the comparison result.
626 * @stable ICU 2.0
627 */
628 inline int8_t caseCompare(int32_t start,
629 int32_t length,
630 const UnicodeString& srcText,
631 int32_t srcStart,
632 int32_t srcLength,
633 uint32_t options) const;
634
635 /**
636 * Compare two strings case-insensitively using full case folding.
637 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
638 *
639 * @param srcChars A pointer to another string to compare this one to.
640 * @param srcLength The number of code units from that string to compare.
641 * @param options A bit set of options:
642 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
643 * Comparison in code unit order with default case folding.
644 *
645 * - U_COMPARE_CODE_POINT_ORDER
646 * Set to choose code point order instead of code unit order
647 * (see u_strCompare for details).
648 *
649 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
650 *
651 * @return A negative, zero, or positive integer indicating the comparison result.
652 * @stable ICU 2.0
653 */
654 inline int8_t caseCompare(const UChar *srcChars,
655 int32_t srcLength,
656 uint32_t options) const;
657
658 /**
659 * Compare two strings case-insensitively using full case folding.
660 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
661 *
662 * @param start The start offset in this string at which the compare operation begins.
663 * @param length The number of code units from this string to compare.
664 * @param srcChars A pointer to another string to compare this one to.
665 * @param options A bit set of options:
666 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
667 * Comparison in code unit order with default case folding.
668 *
669 * - U_COMPARE_CODE_POINT_ORDER
670 * Set to choose code point order instead of code unit order
671 * (see u_strCompare for details).
672 *
673 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
674 *
675 * @return A negative, zero, or positive integer indicating the comparison result.
676 * @stable ICU 2.0
677 */
678 inline int8_t caseCompare(int32_t start,
679 int32_t length,
680 const UChar *srcChars,
681 uint32_t options) const;
682
683 /**
684 * Compare two strings case-insensitively using full case folding.
685 * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
686 *
687 * @param start The start offset in this string at which the compare operation begins.
688 * @param length The number of code units from this string to compare.
689 * @param srcChars A pointer to another string to compare this one to.
690 * @param srcStart The start offset in that string at which the compare operation begins.
691 * @param srcLength The number of code units from that string to compare.
692 * @param options A bit set of options:
693 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
694 * Comparison in code unit order with default case folding.
695 *
696 * - U_COMPARE_CODE_POINT_ORDER
697 * Set to choose code point order instead of code unit order
698 * (see u_strCompare for details).
699 *
700 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
701 *
702 * @return A negative, zero, or positive integer indicating the comparison result.
703 * @stable ICU 2.0
704 */
705 inline int8_t caseCompare(int32_t start,
706 int32_t length,
707 const UChar *srcChars,
708 int32_t srcStart,
709 int32_t srcLength,
710 uint32_t options) const;
711
712 /**
713 * Compare two strings case-insensitively using full case folding.
714 * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
715 *
716 * @param start The start offset in this string at which the compare operation begins.
717 * @param limit The offset after the last code unit from this string to compare.
718 * @param srcText Another string to compare this one to.
719 * @param srcStart The start offset in that string at which the compare operation begins.
720 * @param srcLimit The offset after the last code unit from that string to compare.
721 * @param options A bit set of options:
722 * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
723 * Comparison in code unit order with default case folding.
724 *
725 * - U_COMPARE_CODE_POINT_ORDER
726 * Set to choose code point order instead of code unit order
727 * (see u_strCompare for details).
728 *
729 * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
730 *
731 * @return A negative, zero, or positive integer indicating the comparison result.
732 * @stable ICU 2.0
733 */
734 inline int8_t caseCompareBetween(int32_t start,
735 int32_t limit,
736 const UnicodeString& srcText,
737 int32_t srcStart,
738 int32_t srcLimit,
739 uint32_t options) const;
740
741 /**
742 * Determine if this starts with the characters in <TT>text</TT>
743 * @param text The text to match.
374ca955 744 * @return TRUE if this starts with the characters in <TT>text</TT>,
b75a7d8f
A
745 * FALSE otherwise
746 * @stable ICU 2.0
747 */
748 inline UBool startsWith(const UnicodeString& text) const;
749
750 /**
374ca955
A
751 * Determine if this starts with the characters in <TT>srcText</TT>
752 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
753 * @param srcText The text to match.
754 * @param srcStart the offset into <TT>srcText</TT> to start matching
755 * @param srcLength the number of characters in <TT>srcText</TT> to match
374ca955 756 * @return TRUE if this starts with the characters in <TT>text</TT>,
b75a7d8f
A
757 * FALSE otherwise
758 * @stable ICU 2.0
759 */
760 inline UBool startsWith(const UnicodeString& srcText,
761 int32_t srcStart,
762 int32_t srcLength) const;
763
764 /**
765 * Determine if this starts with the characters in <TT>srcChars</TT>
766 * @param srcChars The characters to match.
767 * @param srcLength the number of characters in <TT>srcChars</TT>
374ca955 768 * @return TRUE if this starts with the characters in <TT>srcChars</TT>,
b75a7d8f
A
769 * FALSE otherwise
770 * @stable ICU 2.0
771 */
772 inline UBool startsWith(const UChar *srcChars,
773 int32_t srcLength) const;
774
775 /**
374ca955
A
776 * Determine if this ends with the characters in <TT>srcChars</TT>
777 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
778 * @param srcChars The characters to match.
779 * @param srcStart the offset into <TT>srcText</TT> to start matching
780 * @param srcLength the number of characters in <TT>srcChars</TT> to match
781 * @return TRUE if this ends with the characters in <TT>srcChars</TT>, FALSE otherwise
782 * @stable ICU 2.0
783 */
784 inline UBool startsWith(const UChar *srcChars,
785 int32_t srcStart,
786 int32_t srcLength) const;
787
788 /**
789 * Determine if this ends with the characters in <TT>text</TT>
790 * @param text The text to match.
374ca955 791 * @return TRUE if this ends with the characters in <TT>text</TT>,
b75a7d8f
A
792 * FALSE otherwise
793 * @stable ICU 2.0
794 */
795 inline UBool endsWith(const UnicodeString& text) const;
796
797 /**
374ca955
A
798 * Determine if this ends with the characters in <TT>srcText</TT>
799 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
800 * @param srcText The text to match.
801 * @param srcStart the offset into <TT>srcText</TT> to start matching
802 * @param srcLength the number of characters in <TT>srcText</TT> to match
374ca955 803 * @return TRUE if this ends with the characters in <TT>text</TT>,
b75a7d8f
A
804 * FALSE otherwise
805 * @stable ICU 2.0
806 */
807 inline UBool endsWith(const UnicodeString& srcText,
808 int32_t srcStart,
809 int32_t srcLength) const;
810
811 /**
812 * Determine if this ends with the characters in <TT>srcChars</TT>
813 * @param srcChars The characters to match.
814 * @param srcLength the number of characters in <TT>srcChars</TT>
374ca955 815 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
b75a7d8f
A
816 * FALSE otherwise
817 * @stable ICU 2.0
818 */
819 inline UBool endsWith(const UChar *srcChars,
820 int32_t srcLength) const;
821
822 /**
374ca955
A
823 * Determine if this ends with the characters in <TT>srcChars</TT>
824 * in the range [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
825 * @param srcChars The characters to match.
826 * @param srcStart the offset into <TT>srcText</TT> to start matching
827 * @param srcLength the number of characters in <TT>srcChars</TT> to match
374ca955 828 * @return TRUE if this ends with the characters in <TT>srcChars</TT>,
b75a7d8f
A
829 * FALSE otherwise
830 * @stable ICU 2.0
831 */
832 inline UBool endsWith(const UChar *srcChars,
833 int32_t srcStart,
834 int32_t srcLength) const;
835
836
837 /* Searching - bitwise only */
838
839 /**
840 * Locate in this the first occurrence of the characters in <TT>text</TT>,
841 * using bitwise comparison.
842 * @param text The text to search for.
374ca955 843 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
844 * or -1 if not found.
845 * @stable ICU 2.0
846 */
847 inline int32_t indexOf(const UnicodeString& text) const;
848
849 /**
850 * Locate in this the first occurrence of the characters in <TT>text</TT>
851 * starting at offset <TT>start</TT>, using bitwise comparison.
852 * @param text The text to search for.
853 * @param start The offset at which searching will start.
374ca955 854 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
855 * or -1 if not found.
856 * @stable ICU 2.0
857 */
858 inline int32_t indexOf(const UnicodeString& text,
859 int32_t start) const;
860
861 /**
862 * Locate in this the first occurrence in the range
374ca955 863 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
b75a7d8f
A
864 * in <TT>text</TT>, using bitwise comparison.
865 * @param text The text to search for.
866 * @param start The offset at which searching will start.
867 * @param length The number of characters to search
374ca955 868 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
869 * or -1 if not found.
870 * @stable ICU 2.0
871 */
872 inline int32_t indexOf(const UnicodeString& text,
873 int32_t start,
874 int32_t length) const;
875
876 /**
877 * Locate in this the first occurrence in the range
878 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
374ca955
A
879 * in <TT>srcText</TT> in the range
880 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
881 * using bitwise comparison.
b75a7d8f
A
882 * @param srcText The text to search for.
883 * @param srcStart the offset into <TT>srcText</TT> at which
884 * to start matching
885 * @param srcLength the number of characters in <TT>srcText</TT> to match
886 * @param start the offset into this at which to start matching
887 * @param length the number of characters in this to search
374ca955 888 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
889 * or -1 if not found.
890 * @stable ICU 2.0
891 */
892 inline int32_t indexOf(const UnicodeString& srcText,
893 int32_t srcStart,
894 int32_t srcLength,
895 int32_t start,
896 int32_t length) const;
897
898 /**
899 * Locate in this the first occurrence of the characters in
374ca955
A
900 * <TT>srcChars</TT>
901 * starting at offset <TT>start</TT>, using bitwise comparison.
b75a7d8f
A
902 * @param srcChars The text to search for.
903 * @param srcLength the number of characters in <TT>srcChars</TT> to match
904 * @param start the offset into this at which to start matching
374ca955 905 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
906 * or -1 if not found.
907 * @stable ICU 2.0
908 */
909 inline int32_t indexOf(const UChar *srcChars,
910 int32_t srcLength,
911 int32_t start) const;
912
913 /**
914 * Locate in this the first occurrence in the range
374ca955 915 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
b75a7d8f
A
916 * in <TT>srcChars</TT>, using bitwise comparison.
917 * @param srcChars The text to search for.
918 * @param srcLength the number of characters in <TT>srcChars</TT>
919 * @param start The offset at which searching will start.
920 * @param length The number of characters to search
374ca955 921 * @return The offset into this of the start of <TT>srcChars</TT>,
b75a7d8f
A
922 * or -1 if not found.
923 * @stable ICU 2.0
924 */
925 inline int32_t indexOf(const UChar *srcChars,
926 int32_t srcLength,
927 int32_t start,
928 int32_t length) const;
374ca955 929
b75a7d8f 930 /**
374ca955
A
931 * Locate in this the first occurrence in the range
932 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
933 * in <TT>srcChars</TT> in the range
934 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
935 * using bitwise comparison.
b75a7d8f 936 * @param srcChars The text to search for.
374ca955 937 * @param srcStart the offset into <TT>srcChars</TT> at which
b75a7d8f
A
938 * to start matching
939 * @param srcLength the number of characters in <TT>srcChars</TT> to match
940 * @param start the offset into this at which to start matching
941 * @param length the number of characters in this to search
374ca955 942 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
943 * or -1 if not found.
944 * @stable ICU 2.0
945 */
946 int32_t indexOf(const UChar *srcChars,
947 int32_t srcStart,
948 int32_t srcLength,
949 int32_t start,
950 int32_t length) const;
951
952 /**
953 * Locate in this the first occurrence of the BMP code point <code>c</code>,
954 * using bitwise comparison.
955 * @param c The code unit to search for.
956 * @return The offset into this of <TT>c</TT>, or -1 if not found.
957 * @stable ICU 2.0
958 */
959 inline int32_t indexOf(UChar c) const;
960
961 /**
374ca955 962 * Locate in this the first occurrence of the code point <TT>c</TT>,
b75a7d8f
A
963 * using bitwise comparison.
964 *
965 * @param c The code point to search for.
966 * @return The offset into this of <TT>c</TT>, or -1 if not found.
967 * @stable ICU 2.0
968 */
969 inline int32_t indexOf(UChar32 c) const;
970
971 /**
972 * Locate in this the first occurrence of the BMP code point <code>c</code>,
973 * starting at offset <TT>start</TT>, using bitwise comparison.
974 * @param c The code unit to search for.
975 * @param start The offset at which searching will start.
976 * @return The offset into this of <TT>c</TT>, or -1 if not found.
977 * @stable ICU 2.0
978 */
979 inline int32_t indexOf(UChar c,
980 int32_t start) const;
981
982 /**
983 * Locate in this the first occurrence of the code point <TT>c</TT>
984 * starting at offset <TT>start</TT>, using bitwise comparison.
985 *
986 * @param c The code point to search for.
987 * @param start The offset at which searching will start.
988 * @return The offset into this of <TT>c</TT>, or -1 if not found.
989 * @stable ICU 2.0
990 */
991 inline int32_t indexOf(UChar32 c,
992 int32_t start) const;
993
994 /**
995 * Locate in this the first occurrence of the BMP code point <code>c</code>
374ca955
A
996 * in the range [<TT>start</TT>, <TT>start + length</TT>),
997 * using bitwise comparison.
b75a7d8f
A
998 * @param c The code unit to search for.
999 * @param start the offset into this at which to start matching
1000 * @param length the number of characters in this to search
1001 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1002 * @stable ICU 2.0
1003 */
1004 inline int32_t indexOf(UChar c,
1005 int32_t start,
1006 int32_t length) const;
1007
1008 /**
374ca955
A
1009 * Locate in this the first occurrence of the code point <TT>c</TT>
1010 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1011 * using bitwise comparison.
b75a7d8f
A
1012 *
1013 * @param c The code point to search for.
1014 * @param start the offset into this at which to start matching
1015 * @param length the number of characters in this to search
1016 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1017 * @stable ICU 2.0
1018 */
1019 inline int32_t indexOf(UChar32 c,
1020 int32_t start,
1021 int32_t length) const;
1022
1023 /**
374ca955 1024 * Locate in this the last occurrence of the characters in <TT>text</TT>,
b75a7d8f
A
1025 * using bitwise comparison.
1026 * @param text The text to search for.
374ca955 1027 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1028 * or -1 if not found.
1029 * @stable ICU 2.0
1030 */
1031 inline int32_t lastIndexOf(const UnicodeString& text) const;
1032
1033 /**
1034 * Locate in this the last occurrence of the characters in <TT>text</TT>
1035 * starting at offset <TT>start</TT>, using bitwise comparison.
1036 * @param text The text to search for.
1037 * @param start The offset at which searching will start.
374ca955 1038 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1039 * or -1 if not found.
1040 * @stable ICU 2.0
1041 */
1042 inline int32_t lastIndexOf(const UnicodeString& text,
1043 int32_t start) const;
1044
1045 /**
374ca955 1046 * Locate in this the last occurrence in the range
b75a7d8f
A
1047 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1048 * in <TT>text</TT>, using bitwise comparison.
1049 * @param text The text to search for.
1050 * @param start The offset at which searching will start.
1051 * @param length The number of characters to search
374ca955 1052 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1053 * or -1 if not found.
1054 * @stable ICU 2.0
1055 */
1056 inline int32_t lastIndexOf(const UnicodeString& text,
1057 int32_t start,
1058 int32_t length) const;
1059
1060 /**
374ca955
A
1061 * Locate in this the last occurrence in the range
1062 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1063 * in <TT>srcText</TT> in the range
1064 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1065 * using bitwise comparison.
b75a7d8f 1066 * @param srcText The text to search for.
374ca955 1067 * @param srcStart the offset into <TT>srcText</TT> at which
b75a7d8f
A
1068 * to start matching
1069 * @param srcLength the number of characters in <TT>srcText</TT> to match
1070 * @param start the offset into this at which to start matching
1071 * @param length the number of characters in this to search
374ca955 1072 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1073 * or -1 if not found.
1074 * @stable ICU 2.0
1075 */
1076 inline int32_t lastIndexOf(const UnicodeString& srcText,
1077 int32_t srcStart,
1078 int32_t srcLength,
1079 int32_t start,
1080 int32_t length) const;
1081
1082 /**
374ca955
A
1083 * Locate in this the last occurrence of the characters in <TT>srcChars</TT>
1084 * starting at offset <TT>start</TT>, using bitwise comparison.
b75a7d8f
A
1085 * @param srcChars The text to search for.
1086 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1087 * @param start the offset into this at which to start matching
374ca955 1088 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1089 * or -1 if not found.
1090 * @stable ICU 2.0
1091 */
1092 inline int32_t lastIndexOf(const UChar *srcChars,
1093 int32_t srcLength,
1094 int32_t start) const;
1095
1096 /**
374ca955
A
1097 * Locate in this the last occurrence in the range
1098 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
b75a7d8f
A
1099 * in <TT>srcChars</TT>, using bitwise comparison.
1100 * @param srcChars The text to search for.
1101 * @param srcLength the number of characters in <TT>srcChars</TT>
1102 * @param start The offset at which searching will start.
1103 * @param length The number of characters to search
374ca955 1104 * @return The offset into this of the start of <TT>srcChars</TT>,
b75a7d8f
A
1105 * or -1 if not found.
1106 * @stable ICU 2.0
1107 */
1108 inline int32_t lastIndexOf(const UChar *srcChars,
1109 int32_t srcLength,
1110 int32_t start,
1111 int32_t length) const;
374ca955 1112
b75a7d8f 1113 /**
374ca955
A
1114 * Locate in this the last occurrence in the range
1115 * [<TT>start</TT>, <TT>start + length</TT>) of the characters
1116 * in <TT>srcChars</TT> in the range
1117 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>),
1118 * using bitwise comparison.
b75a7d8f
A
1119 * @param srcChars The text to search for.
1120 * @param srcStart the offset into <TT>srcChars</TT> at which
1121 * to start matching
1122 * @param srcLength the number of characters in <TT>srcChars</TT> to match
1123 * @param start the offset into this at which to start matching
1124 * @param length the number of characters in this to search
374ca955 1125 * @return The offset into this of the start of <TT>text</TT>,
b75a7d8f
A
1126 * or -1 if not found.
1127 * @stable ICU 2.0
1128 */
1129 int32_t lastIndexOf(const UChar *srcChars,
1130 int32_t srcStart,
1131 int32_t srcLength,
1132 int32_t start,
1133 int32_t length) const;
1134
1135 /**
1136 * Locate in this the last occurrence of the BMP code point <code>c</code>,
1137 * using bitwise comparison.
1138 * @param c The code unit to search for.
1139 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1140 * @stable ICU 2.0
1141 */
1142 inline int32_t lastIndexOf(UChar c) const;
1143
1144 /**
374ca955 1145 * Locate in this the last occurrence of the code point <TT>c</TT>,
b75a7d8f
A
1146 * using bitwise comparison.
1147 *
1148 * @param c The code point to search for.
1149 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1150 * @stable ICU 2.0
1151 */
1152 inline int32_t lastIndexOf(UChar32 c) const;
1153
1154 /**
1155 * Locate in this the last occurrence of the BMP code point <code>c</code>
1156 * starting at offset <TT>start</TT>, using bitwise comparison.
1157 * @param c The code unit to search for.
1158 * @param start The offset at which searching will start.
1159 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1160 * @stable ICU 2.0
1161 */
1162 inline int32_t lastIndexOf(UChar c,
1163 int32_t start) const;
1164
1165 /**
1166 * Locate in this the last occurrence of the code point <TT>c</TT>
1167 * starting at offset <TT>start</TT>, using bitwise comparison.
1168 *
1169 * @param c The code point to search for.
1170 * @param start The offset at which searching will start.
1171 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1172 * @stable ICU 2.0
1173 */
1174 inline int32_t lastIndexOf(UChar32 c,
1175 int32_t start) const;
1176
1177 /**
1178 * Locate in this the last occurrence of the BMP code point <code>c</code>
374ca955
A
1179 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1180 * using bitwise comparison.
b75a7d8f
A
1181 * @param c The code unit to search for.
1182 * @param start the offset into this at which to start matching
1183 * @param length the number of characters in this to search
1184 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1185 * @stable ICU 2.0
1186 */
1187 inline int32_t lastIndexOf(UChar c,
1188 int32_t start,
1189 int32_t length) const;
1190
1191 /**
374ca955
A
1192 * Locate in this the last occurrence of the code point <TT>c</TT>
1193 * in the range [<TT>start</TT>, <TT>start + length</TT>),
1194 * using bitwise comparison.
b75a7d8f
A
1195 *
1196 * @param c The code point to search for.
1197 * @param start the offset into this at which to start matching
1198 * @param length the number of characters in this to search
1199 * @return The offset into this of <TT>c</TT>, or -1 if not found.
1200 * @stable ICU 2.0
1201 */
1202 inline int32_t lastIndexOf(UChar32 c,
1203 int32_t start,
1204 int32_t length) const;
1205
1206
1207 /* Character access */
1208
1209 /**
1210 * Return the code unit at offset <tt>offset</tt>.
1211 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1212 * @param offset a valid offset into the text
1213 * @return the code unit at offset <tt>offset</tt>
1214 * or 0xffff if the offset is not valid for this string
1215 * @stable ICU 2.0
1216 */
1217 inline UChar charAt(int32_t offset) const;
1218
1219 /**
1220 * Return the code unit at offset <tt>offset</tt>.
1221 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1222 * @param offset a valid offset into the text
1223 * @return the code unit at offset <tt>offset</tt>
1224 * @stable ICU 2.0
1225 */
1226 inline UChar operator[] (int32_t offset) const;
1227
1228 /**
1229 * Return the code point that contains the code unit
1230 * at offset <tt>offset</tt>.
1231 * If the offset is not valid (0..length()-1) then U+ffff is returned.
1232 * @param offset a valid offset into the text
1233 * that indicates the text offset of any of the code units
1234 * that will be assembled into a code point (21-bit value) and returned
1235 * @return the code point of text at <tt>offset</tt>
1236 * or 0xffff if the offset is not valid for this string
1237 * @stable ICU 2.0
1238 */
1239 inline UChar32 char32At(int32_t offset) const;
1240
1241 /**
1242 * Adjust a random-access offset so that
1243 * it points to the beginning of a Unicode character.
1244 * The offset that is passed in points to
1245 * any code unit of a code point,
1246 * while the returned offset will point to the first code unit
1247 * of the same code point.
1248 * In UTF-16, if the input offset points to a second surrogate
1249 * of a surrogate pair, then the returned offset will point
1250 * to the first surrogate.
1251 * @param offset a valid offset into one code point of the text
1252 * @return offset of the first code unit of the same code point
1253 * @see U16_SET_CP_START
1254 * @stable ICU 2.0
1255 */
1256 inline int32_t getChar32Start(int32_t offset) const;
1257
1258 /**
1259 * Adjust a random-access offset so that
1260 * it points behind a Unicode character.
1261 * The offset that is passed in points behind
1262 * any code unit of a code point,
1263 * while the returned offset will point behind the last code unit
1264 * of the same code point.
1265 * In UTF-16, if the input offset points behind the first surrogate
1266 * (i.e., to the second surrogate)
1267 * of a surrogate pair, then the returned offset will point
1268 * behind the second surrogate (i.e., to the first surrogate).
1269 * @param offset a valid offset after any code unit of a code point of the text
1270 * @return offset of the first code unit after the same code point
1271 * @see U16_SET_CP_LIMIT
1272 * @stable ICU 2.0
1273 */
1274 inline int32_t getChar32Limit(int32_t offset) const;
1275
1276 /**
1277 * Move the code unit index along the string by delta code points.
1278 * Interpret the input index as a code unit-based offset into the string,
1279 * move the index forward or backward by delta code points, and
1280 * return the resulting index.
1281 * The input index should point to the first code unit of a code point,
1282 * if there is more than one.
1283 *
1284 * Both input and output indexes are code unit-based as for all
1285 * string indexes/offsets in ICU (and other libraries, like MBCS char*).
1286 * If delta<0 then the index is moved backward (toward the start of the string).
1287 * If delta>0 then the index is moved forward (toward the end of the string).
1288 *
1289 * This behaves like CharacterIterator::move32(delta, kCurrent).
1290 *
1291 * Behavior for out-of-bounds indexes:
1292 * <code>moveIndex32</code> pins the input index to 0..length(), i.e.,
1293 * if the input index<0 then it is pinned to 0;
1294 * if it is index>length() then it is pinned to length().
1295 * Afterwards, the index is moved by <code>delta</code> code points
1296 * forward or backward,
1297 * but no further backward than to 0 and no further forward than to length().
1298 * The resulting index return value will be in between 0 and length(), inclusively.
1299 *
1300 * Examples:
1301 * <pre>
1302 * // s has code points 'a' U+10000 'b' U+10ffff U+2029
1303 * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
1304 *
1305 * // initial index: position of U+10000
1306 * int32_t index=1;
1307 *
1308 * // the following examples will all result in index==4, position of U+10ffff
1309 *
1310 * // skip 2 code points from some position in the string
1311 * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
1312 *
1313 * // go to the 3rd code point from the start of s (0-based)
1314 * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
1315 *
1316 * // go to the next-to-last code point of s
1317 * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
1318 * </pre>
1319 *
1320 * @param index input code unit index
1321 * @param delta (signed) code point count to move the index forward or backward
1322 * in the string
1323 * @return the resulting code unit index
1324 * @stable ICU 2.0
1325 */
1326 int32_t moveIndex32(int32_t index, int32_t delta) const;
1327
1328 /* Substring extraction */
1329
1330 /**
374ca955 1331 * Copy the characters in the range
b75a7d8f
A
1332 * [<tt>start</tt>, <tt>start + length</tt>) into the array <tt>dst</tt>,
1333 * beginning at <tt>dstStart</tt>.
1334 * If the string aliases to <code>dst</code> itself as an external buffer,
1335 * then extract() will not copy the contents.
1336 *
1337 * @param start offset of first character which will be copied into the array
1338 * @param length the number of characters to extract
1339 * @param dst array in which to copy characters. The length of <tt>dst</tt>
1340 * must be at least (<tt>dstStart + length</tt>).
1341 * @param dstStart the offset in <TT>dst</TT> where the first character
1342 * will be extracted
1343 * @stable ICU 2.0
1344 */
374ca955
A
1345 inline void extract(int32_t start,
1346 int32_t length,
1347 UChar *dst,
b75a7d8f
A
1348 int32_t dstStart = 0) const;
1349
1350 /**
1351 * Copy the contents of the string into dest.
1352 * This is a convenience function that
1353 * checks if there is enough space in dest,
1354 * extracts the entire string if possible,
1355 * and NUL-terminates dest if possible.
1356 *
1357 * If the string fits into dest but cannot be NUL-terminated
1358 * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
1359 * If the string itself does not fit into dest
1360 * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
1361 *
1362 * If the string aliases to <code>dest</code> itself as an external buffer,
1363 * then extract() will not copy the contents.
1364 *
1365 * @param dest Destination string buffer.
1366 * @param destCapacity Number of UChars available at dest.
1367 * @param errorCode ICU error code.
1368 * @return length()
1369 * @stable ICU 2.0
1370 */
1371 int32_t
1372 extract(UChar *dest, int32_t destCapacity,
1373 UErrorCode &errorCode) const;
1374
1375 /**
374ca955 1376 * Copy the characters in the range
b75a7d8f
A
1377 * [<tt>start</tt>, <tt>start + length</tt>) into the UnicodeString
1378 * <tt>target</tt>.
1379 * @param start offset of first character which will be copied
1380 * @param length the number of characters to extract
1381 * @param target UnicodeString into which to copy characters.
1382 * @return A reference to <TT>target</TT>
1383 * @stable ICU 2.0
1384 */
1385 inline void extract(int32_t start,
1386 int32_t length,
1387 UnicodeString& target) const;
1388
1389 /**
374ca955 1390 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
b75a7d8f
A
1391 * into the array <tt>dst</tt>, beginning at <tt>dstStart</tt>.
1392 * @param start offset of first character which will be copied into the array
1393 * @param limit offset immediately following the last character to be copied
374ca955 1394 * @param dst array in which to copy characters. The length of <tt>dst</tt>
b75a7d8f
A
1395 * must be at least (<tt>dstStart + (limit - start)</tt>).
1396 * @param dstStart the offset in <TT>dst</TT> where the first character
1397 * will be extracted
1398 * @stable ICU 2.0
1399 */
374ca955
A
1400 inline void extractBetween(int32_t start,
1401 int32_t limit,
1402 UChar *dst,
b75a7d8f
A
1403 int32_t dstStart = 0) const;
1404
1405 /**
374ca955 1406 * Copy the characters in the range [<tt>start</tt>, <tt>limit</tt>)
b75a7d8f
A
1407 * into the UnicodeString <tt>target</tt>. Replaceable API.
1408 * @param start offset of first character which will be copied
1409 * @param limit offset immediately following the last character to be copied
1410 * @param target UnicodeString into which to copy characters.
1411 * @return A reference to <TT>target</TT>
1412 * @stable ICU 2.0
1413 */
1414 virtual void extractBetween(int32_t start,
1415 int32_t limit,
1416 UnicodeString& target) const;
1417
1418 /**
1419 * Copy the characters in the range
374ca955
A
1420 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters.
1421 * All characters must be invariant (see utypes.h).
1422 * Use US_INV as the last, signature-distinguishing parameter.
1423 *
1424 * This function does not write any more than <code>targetLength</code>
1425 * characters but returns the length of the entire output string
1426 * so that one can allocate a larger buffer and call the function again
1427 * if necessary.
1428 * The output string is NUL-terminated if possible.
1429 *
1430 * @param start offset of first character which will be copied
1431 * @param startLength the number of characters to extract
1432 * @param target the target buffer for extraction, can be NULL
1433 * if targetLength is 0
1434 * @param targetCapacity the length of the target buffer
1435 * @param inv Signature-distinguishing paramater, use US_INV.
1436 * @return the output string length, not including the terminating NUL
1437 * @draft ICU 3.2
1438 */
1439 int32_t extract(int32_t start,
1440 int32_t startLength,
1441 char *target,
1442 int32_t targetCapacity,
1443 enum EInvariant inv) const;
1444
1445#if !UCONFIG_NO_CONVERSION
1446
1447 /**
1448 * Copy the characters in the range
b75a7d8f
A
1449 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1450 * in a specified codepage.
1451 * The output string is NUL-terminated.
1452 *
374ca955
A
1453 * Recommendation: For invariant-character strings use
1454 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1455 * because it avoids object code dependencies of UnicodeString on
1456 * the conversion code.
1457 *
b75a7d8f
A
1458 * @param start offset of first character which will be copied
1459 * @param startLength the number of characters to extract
1460 * @param target the target buffer for extraction
374ca955 1461 * @param codepage the desired codepage for the characters. 0 has
b75a7d8f
A
1462 * the special meaning of the default codepage
1463 * If <code>codepage</code> is an empty string (<code>""</code>),
1464 * then a simple conversion is performed on the codepage-invariant
1465 * subset ("invariant characters") of the platform encoding. See utypes.h.
1466 * If <TT>target</TT> is NULL, then the number of bytes required for
1467 * <TT>target</TT> is returned. It is assumed that the target is big enough
1468 * to fit all of the characters.
1469 * @return the output string length, not including the terminating NUL
1470 * @stable ICU 2.0
1471 */
1472 inline int32_t extract(int32_t start,
1473 int32_t startLength,
1474 char *target,
1475 const char *codepage = 0) const;
1476
1477 /**
374ca955 1478 * Copy the characters in the range
b75a7d8f
A
1479 * [<tt>start</TT>, <tt>start + length</TT>) into an array of characters
1480 * in a specified codepage.
1481 * This function does not write any more than <code>targetLength</code>
1482 * characters but returns the length of the entire output string
1483 * so that one can allocate a larger buffer and call the function again
1484 * if necessary.
1485 * The output string is NUL-terminated if possible.
1486 *
374ca955
A
1487 * Recommendation: For invariant-character strings use
1488 * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
1489 * because it avoids object code dependencies of UnicodeString on
1490 * the conversion code.
1491 *
b75a7d8f
A
1492 * @param start offset of first character which will be copied
1493 * @param startLength the number of characters to extract
1494 * @param target the target buffer for extraction
1495 * @param targetLength the length of the target buffer
374ca955 1496 * @param codepage the desired codepage for the characters. 0 has
b75a7d8f
A
1497 * the special meaning of the default codepage
1498 * If <code>codepage</code> is an empty string (<code>""</code>),
1499 * then a simple conversion is performed on the codepage-invariant
1500 * subset ("invariant characters") of the platform encoding. See utypes.h.
1501 * If <TT>target</TT> is NULL, then the number of bytes required for
1502 * <TT>target</TT> is returned.
1503 * @return the output string length, not including the terminating NUL
1504 * @stable ICU 2.0
1505 */
1506 int32_t extract(int32_t start,
1507 int32_t startLength,
1508 char *target,
1509 uint32_t targetLength,
1510 const char *codepage = 0) const;
1511
1512 /**
1513 * Convert the UnicodeString into a codepage string using an existing UConverter.
1514 * The output string is NUL-terminated if possible.
1515 *
1516 * This function avoids the overhead of opening and closing a converter if
1517 * multiple strings are extracted.
1518 *
1519 * @param dest destination string buffer, can be NULL if destCapacity==0
1520 * @param destCapacity the number of chars available at dest
1521 * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
1522 * or NULL for the default converter
1523 * @param errorCode normal ICU error code
1524 * @return the length of the output string, not counting the terminating NUL;
1525 * if the length is greater than destCapacity, then the string will not fit
1526 * and a buffer of the indicated length would need to be passed in
1527 * @stable ICU 2.0
1528 */
1529 int32_t extract(char *dest, int32_t destCapacity,
1530 UConverter *cnv,
1531 UErrorCode &errorCode) const;
1532
374ca955
A
1533#endif
1534
b75a7d8f
A
1535 /* Length operations */
1536
1537 /**
374ca955
A
1538 * Return the length of the UnicodeString object.
1539 * The length is the number of UChar code units are in the UnicodeString.
1540 * If you want the number of code points, please use countChar32().
b75a7d8f 1541 * @return the length of the UnicodeString object
374ca955 1542 * @see countChar32
b75a7d8f
A
1543 * @stable ICU 2.0
1544 */
1545 inline int32_t length(void) const;
1546
1547 /**
1548 * Count Unicode code points in the length UChar code units of the string.
1549 * A code point may occupy either one or two UChar code units.
1550 * Counting code points involves reading all code units.
1551 *
1552 * This functions is basically the inverse of moveIndex32().
1553 *
1554 * @param start the index of the first code unit to check
1555 * @param length the number of UChar code units to check
1556 * @return the number of code points in the specified code units
374ca955 1557 * @see length
b75a7d8f
A
1558 * @stable ICU 2.0
1559 */
1560 int32_t
1561 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1562
1563 /**
1564 * Check if the length UChar code units of the string
1565 * contain more Unicode code points than a certain number.
1566 * This is more efficient than counting all code points in this part of the string
1567 * and comparing that number with a threshold.
1568 * This function may not need to scan the string at all if the length
1569 * falls within a certain range, and
1570 * never needs to count more than 'number+1' code points.
1571 * Logically equivalent to (countChar32(start, length)>number).
1572 * A Unicode code point may occupy either one or two UChar code units.
1573 *
1574 * @param start the index of the first code unit to check (0 for the entire string)
1575 * @param length the number of UChar code units to check
1576 * (use INT32_MAX for the entire string; remember that start/length
1577 * values are pinned)
1578 * @param number The number of code points in the (sub)string is compared against
1579 * the 'number' parameter.
1580 * @return Boolean value for whether the string contains more Unicode code points
1581 * than 'number'. Same as (u_countChar32(s, length)>number).
1582 * @see countChar32
1583 * @see u_strHasMoreChar32Than
374ca955 1584 * @stable ICU 2.4
b75a7d8f
A
1585 */
1586 UBool
1587 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1588
1589 /**
1590 * Determine if this string is empty.
1591 * @return TRUE if this string contains 0 characters, FALSE otherwise.
1592 * @stable ICU 2.0
1593 */
1594 inline UBool isEmpty(void) const;
1595
1596 /**
1597 * Return the capacity of the internal buffer of the UnicodeString object.
1598 * This is useful together with the getBuffer functions.
1599 * See there for details.
1600 *
1601 * @return the number of UChars available in the internal buffer
1602 * @see getBuffer
1603 * @stable ICU 2.0
1604 */
1605 inline int32_t getCapacity(void) const;
1606
1607 /* Other operations */
1608
1609 /**
1610 * Generate a hash code for this object.
1611 * @return The hash code of this UnicodeString.
1612 * @stable ICU 2.0
1613 */
1614 inline int32_t hashCode(void) const;
1615
1616 /**
1617 * Determine if this object contains a valid string.
1618 * A bogus string has no value. It is different from an empty string.
1619 * It can be used to indicate that no string value is available.
1620 * getBuffer() and getTerminatedBuffer() return NULL, and
1621 * length() returns 0.
1622 *
1623 * @return TRUE if the string is valid, FALSE otherwise
1624 * @see setToBogus()
1625 * @stable ICU 2.0
1626 */
1627 inline UBool isBogus(void) const;
1628
1629
1630 //========================================
1631 // Write operations
1632 //========================================
1633
1634 /* Assignment operations */
1635
1636 /**
1637 * Assignment operator. Replace the characters in this UnicodeString
1638 * with the characters from <TT>srcText</TT>.
1639 * @param srcText The text containing the characters to replace
1640 * @return a reference to this
1641 * @stable ICU 2.0
1642 */
1643 UnicodeString &operator=(const UnicodeString &srcText);
1644
1645 /**
1646 * Almost the same as the assignment operator.
1647 * Replace the characters in this UnicodeString
1648 * with the characters from <code>srcText</code>.
1649 *
1650 * This function works the same for all strings except for ones that
1651 * are readonly aliases.
1652 * Starting with ICU 2.4, the assignment operator and the copy constructor
1653 * allocate a new buffer and copy the buffer contents even for readonly aliases.
1654 * This function implements the old, more efficient but less safe behavior
1655 * of making this string also a readonly alias to the same buffer.
1656 * The fastCopyFrom function must be used only if it is known that the lifetime of
1657 * this UnicodeString is at least as long as the lifetime of the aliased buffer
1658 * including its contents, for example for strings from resource bundles
1659 * or aliases to string contents.
1660 *
1661 * @param src The text containing the characters to replace.
1662 * @return a reference to this
374ca955 1663 * @stable ICU 2.4
b75a7d8f
A
1664 */
1665 UnicodeString &fastCopyFrom(const UnicodeString &src);
1666
1667 /**
1668 * Assignment operator. Replace the characters in this UnicodeString
1669 * with the code unit <TT>ch</TT>.
1670 * @param ch the code unit to replace
1671 * @return a reference to this
1672 * @stable ICU 2.0
1673 */
1674 inline UnicodeString& operator= (UChar ch);
1675
1676 /**
1677 * Assignment operator. Replace the characters in this UnicodeString
1678 * with the code point <TT>ch</TT>.
1679 * @param ch the code point to replace
1680 * @return a reference to this
1681 * @stable ICU 2.0
1682 */
1683 inline UnicodeString& operator= (UChar32 ch);
1684
1685 /**
1686 * Set the text in the UnicodeString object to the characters
374ca955 1687 * in <TT>srcText</TT> in the range
b75a7d8f
A
1688 * [<TT>srcStart</TT>, <TT>srcText.length()</TT>).
1689 * <TT>srcText</TT> is not modified.
1690 * @param srcText the source for the new characters
1691 * @param srcStart the offset into <TT>srcText</TT> where new characters
1692 * will be obtained
1693 * @return a reference to this
374ca955 1694 * @stable ICU 2.2
b75a7d8f 1695 */
374ca955 1696 inline UnicodeString& setTo(const UnicodeString& srcText,
b75a7d8f
A
1697 int32_t srcStart);
1698
1699 /**
1700 * Set the text in the UnicodeString object to the characters
374ca955 1701 * in <TT>srcText</TT> in the range
b75a7d8f
A
1702 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
1703 * <TT>srcText</TT> is not modified.
1704 * @param srcText the source for the new characters
1705 * @param srcStart the offset into <TT>srcText</TT> where new characters
1706 * will be obtained
1707 * @param srcLength the number of characters in <TT>srcText</TT> in the
1708 * replace string.
1709 * @return a reference to this
1710 * @stable ICU 2.0
1711 */
374ca955
A
1712 inline UnicodeString& setTo(const UnicodeString& srcText,
1713 int32_t srcStart,
b75a7d8f
A
1714 int32_t srcLength);
1715
1716 /**
374ca955
A
1717 * Set the text in the UnicodeString object to the characters in
1718 * <TT>srcText</TT>.
b75a7d8f
A
1719 * <TT>srcText</TT> is not modified.
1720 * @param srcText the source for the new characters
1721 * @return a reference to this
1722 * @stable ICU 2.0
1723 */
1724 inline UnicodeString& setTo(const UnicodeString& srcText);
1725
1726 /**
1727 * Set the characters in the UnicodeString object to the characters
1728 * in <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
1729 * @param srcChars the source for the new characters
1730 * @param srcLength the number of Unicode characters in srcChars.
1731 * @return a reference to this
1732 * @stable ICU 2.0
1733 */
1734 inline UnicodeString& setTo(const UChar *srcChars,
1735 int32_t srcLength);
1736
1737 /**
1738 * Set the characters in the UnicodeString object to the code unit
1739 * <TT>srcChar</TT>.
374ca955 1740 * @param srcChar the code unit which becomes the UnicodeString's character
b75a7d8f
A
1741 * content
1742 * @return a reference to this
1743 * @stable ICU 2.0
1744 */
1745 UnicodeString& setTo(UChar srcChar);
1746
1747 /**
1748 * Set the characters in the UnicodeString object to the code point
1749 * <TT>srcChar</TT>.
374ca955 1750 * @param srcChar the code point which becomes the UnicodeString's character
b75a7d8f
A
1751 * content
1752 * @return a reference to this
1753 * @stable ICU 2.0
1754 */
1755 UnicodeString& setTo(UChar32 srcChar);
1756
1757 /**
1758 * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor.
1759 * The text will be used for the UnicodeString object, but
1760 * it will not be released when the UnicodeString is destroyed.
1761 * This has copy-on-write semantics:
1762 * When the string is modified, then the buffer is first copied into
1763 * newly allocated memory.
1764 * The aliased buffer is never modified.
1765 * In an assignment to another UnicodeString, the text will be aliased again,
1766 * so that both strings then alias the same readonly-text.
1767 *
1768 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
1769 * This must be true if <code>textLength==-1</code>.
1770 * @param text The characters to alias for the UnicodeString.
1771 * @param textLength The number of Unicode characters in <code>text</code> to alias.
1772 * If -1, then this constructor will determine the length
1773 * by calling <code>u_strlen()</code>.
1774 * @return a reference to this
1775 * @stable ICU 2.0
1776 */
1777 UnicodeString &setTo(UBool isTerminated,
1778 const UChar *text,
1779 int32_t textLength);
1780
1781 /**
1782 * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor.
1783 * The text will be used for the UnicodeString object, but
1784 * it will not be released when the UnicodeString is destroyed.
1785 * This has write-through semantics:
1786 * For as long as the capacity of the buffer is sufficient, write operations
1787 * will directly affect the buffer. When more capacity is necessary, then
1788 * a new buffer will be allocated and the contents copied as with regularly
1789 * constructed strings.
1790 * In an assignment to another UnicodeString, the buffer will be copied.
1791 * The extract(UChar *dst) function detects whether the dst pointer is the same
1792 * as the string buffer itself and will in this case not copy the contents.
1793 *
1794 * @param buffer The characters to alias for the UnicodeString.
1795 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
1796 * @param buffCapacity The size of <code>buffer</code> in UChars.
1797 * @return a reference to this
1798 * @stable ICU 2.0
1799 */
1800 UnicodeString &setTo(UChar *buffer,
1801 int32_t buffLength,
1802 int32_t buffCapacity);
1803
1804 /**
1805 * Make this UnicodeString object invalid.
1806 * The string will test TRUE with isBogus().
1807 *
1808 * A bogus string has no value. It is different from an empty string.
1809 * It can be used to indicate that no string value is available.
1810 * getBuffer() and getTerminatedBuffer() return NULL, and
1811 * length() returns 0.
1812 *
1813 * This utility function is used throughout the UnicodeString
1814 * implementation to indicate that a UnicodeString operation failed,
1815 * and may be used in other functions,
1816 * especially but not exclusively when such functions do not
1817 * take a UErrorCode for simplicity.
1818 *
1819 * The following methods, and no others, will clear a string object's bogus flag:
1820 * - remove()
1821 * - remove(0, INT32_MAX)
1822 * - truncate(0)
1823 * - operator=() (assignment operator)
1824 * - setTo(...)
1825 *
1826 * The simplest ways to turn a bogus string into an empty one
1827 * is to use the remove() function.
1828 * Examples for other functions that are equivalent to "set to empty string":
1829 * \code
1830 * if(s.isBogus()) {
1831 * s.remove(); // set to an empty string (remove all), or
1832 * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
1833 * s.truncate(0); // set to an empty string (complete truncation), or
1834 * s=UnicodeString(); // assign an empty string, or
1835 * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
1836 * static const UChar nul=0;
1837 * s.setTo(&nul, 0); // set to an empty C Unicode string
1838 * }
1839 * \endcode
1840 *
1841 * @see isBogus()
1842 * @stable ICU 2.0
1843 */
1844 void setToBogus();
1845
1846 /**
1847 * Set the character at the specified offset to the specified character.
1848 * @param offset A valid offset into the text of the character to set
1849 * @param ch The new character
1850 * @return A reference to this
1851 * @stable ICU 2.0
1852 */
374ca955 1853 UnicodeString& setCharAt(int32_t offset,
b75a7d8f
A
1854 UChar ch);
1855
1856
1857 /* Append operations */
1858
1859 /**
1860 * Append operator. Append the code unit <TT>ch</TT> to the UnicodeString
1861 * object.
1862 * @param ch the code unit to be appended
1863 * @return a reference to this
1864 * @stable ICU 2.0
1865 */
1866 inline UnicodeString& operator+= (UChar ch);
1867
1868 /**
1869 * Append operator. Append the code point <TT>ch</TT> to the UnicodeString
1870 * object.
1871 * @param ch the code point to be appended
1872 * @return a reference to this
1873 * @stable ICU 2.0
1874 */
1875 inline UnicodeString& operator+= (UChar32 ch);
1876
1877 /**
1878 * Append operator. Append the characters in <TT>srcText</TT> to the
1879 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT> is
1880 * not modified.
1881 * @param srcText the source for the new characters
1882 * @return a reference to this
1883 * @stable ICU 2.0
1884 */
1885 inline UnicodeString& operator+= (const UnicodeString& srcText);
1886
1887 /**
1888 * Append the characters
374ca955
A
1889 * in <TT>srcText</TT> in the range
1890 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the
1891 * UnicodeString object at offset <TT>start</TT>. <TT>srcText</TT>
b75a7d8f
A
1892 * is not modified.
1893 * @param srcText the source for the new characters
374ca955 1894 * @param srcStart the offset into <TT>srcText</TT> where new characters
b75a7d8f 1895 * will be obtained
374ca955 1896 * @param srcLength the number of characters in <TT>srcText</TT> in
b75a7d8f
A
1897 * the append string
1898 * @return a reference to this
1899 * @stable ICU 2.0
1900 */
374ca955
A
1901 inline UnicodeString& append(const UnicodeString& srcText,
1902 int32_t srcStart,
b75a7d8f
A
1903 int32_t srcLength);
1904
1905 /**
374ca955 1906 * Append the characters in <TT>srcText</TT> to the UnicodeString object at
b75a7d8f
A
1907 * offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1908 * @param srcText the source for the new characters
1909 * @return a reference to this
1910 * @stable ICU 2.0
1911 */
1912 inline UnicodeString& append(const UnicodeString& srcText);
1913
1914 /**
374ca955
A
1915 * Append the characters in <TT>srcChars</TT> in the range
1916 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) to the UnicodeString
1917 * object at offset
b75a7d8f
A
1918 * <TT>start</TT>. <TT>srcChars</TT> is not modified.
1919 * @param srcChars the source for the new characters
374ca955 1920 * @param srcStart the offset into <TT>srcChars</TT> where new characters
b75a7d8f 1921 * will be obtained
374ca955 1922 * @param srcLength the number of characters in <TT>srcChars</TT> in
b75a7d8f
A
1923 * the append string
1924 * @return a reference to this
1925 * @stable ICU 2.0
1926 */
374ca955
A
1927 inline UnicodeString& append(const UChar *srcChars,
1928 int32_t srcStart,
b75a7d8f
A
1929 int32_t srcLength);
1930
1931 /**
374ca955 1932 * Append the characters in <TT>srcChars</TT> to the UnicodeString object
b75a7d8f
A
1933 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1934 * @param srcChars the source for the new characters
1935 * @param srcLength the number of Unicode characters in <TT>srcChars</TT>
1936 * @return a reference to this
1937 * @stable ICU 2.0
1938 */
1939 inline UnicodeString& append(const UChar *srcChars,
1940 int32_t srcLength);
1941
1942 /**
1943 * Append the code unit <TT>srcChar</TT> to the UnicodeString object.
1944 * @param srcChar the code unit to append
1945 * @return a reference to this
1946 * @stable ICU 2.0
1947 */
1948 inline UnicodeString& append(UChar srcChar);
1949
1950 /**
1951 * Append the code point <TT>srcChar</TT> to the UnicodeString object.
1952 * @param srcChar the code point to append
1953 * @return a reference to this
1954 * @stable ICU 2.0
1955 */
1956 inline UnicodeString& append(UChar32 srcChar);
1957
1958
1959 /* Insert operations */
1960
1961 /**
374ca955
A
1962 * Insert the characters in <TT>srcText</TT> in the range
1963 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
b75a7d8f
A
1964 * object at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1965 * @param start the offset where the insertion begins
374ca955
A
1966 * @param srcText the source for the new characters
1967 * @param srcStart the offset into <TT>srcText</TT> where new characters
b75a7d8f 1968 * will be obtained
374ca955 1969 * @param srcLength the number of characters in <TT>srcText</TT> in
b75a7d8f
A
1970 * the insert string
1971 * @return a reference to this
1972 * @stable ICU 2.0
1973 */
374ca955
A
1974 inline UnicodeString& insert(int32_t start,
1975 const UnicodeString& srcText,
1976 int32_t srcStart,
b75a7d8f
A
1977 int32_t srcLength);
1978
1979 /**
1980 * Insert the characters in <TT>srcText</TT> into the UnicodeString object
1981 * at offset <TT>start</TT>. <TT>srcText</TT> is not modified.
1982 * @param start the offset where the insertion begins
374ca955 1983 * @param srcText the source for the new characters
b75a7d8f
A
1984 * @return a reference to this
1985 * @stable ICU 2.0
1986 */
374ca955 1987 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
1988 const UnicodeString& srcText);
1989
1990 /**
374ca955 1991 * Insert the characters in <TT>srcChars</TT> in the range
b75a7d8f
A
1992 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>) into the UnicodeString
1993 * object at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
1994 * @param start the offset at which the insertion begins
1995 * @param srcChars the source for the new characters
374ca955 1996 * @param srcStart the offset into <TT>srcChars</TT> where new characters
b75a7d8f 1997 * will be obtained
374ca955 1998 * @param srcLength the number of characters in <TT>srcChars</TT>
b75a7d8f
A
1999 * in the insert string
2000 * @return a reference to this
2001 * @stable ICU 2.0
2002 */
374ca955
A
2003 inline UnicodeString& insert(int32_t start,
2004 const UChar *srcChars,
2005 int32_t srcStart,
b75a7d8f
A
2006 int32_t srcLength);
2007
2008 /**
374ca955 2009 * Insert the characters in <TT>srcChars</TT> into the UnicodeString object
b75a7d8f
A
2010 * at offset <TT>start</TT>. <TT>srcChars</TT> is not modified.
2011 * @param start the offset where the insertion begins
2012 * @param srcChars the source for the new characters
2013 * @param srcLength the number of Unicode characters in srcChars.
2014 * @return a reference to this
2015 * @stable ICU 2.0
2016 */
374ca955 2017 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
2018 const UChar *srcChars,
2019 int32_t srcLength);
2020
2021 /**
374ca955 2022 * Insert the code unit <TT>srcChar</TT> into the UnicodeString object at
b75a7d8f
A
2023 * offset <TT>start</TT>.
2024 * @param start the offset at which the insertion occurs
2025 * @param srcChar the code unit to insert
2026 * @return a reference to this
2027 * @stable ICU 2.0
2028 */
374ca955 2029 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
2030 UChar srcChar);
2031
2032 /**
374ca955 2033 * Insert the code point <TT>srcChar</TT> into the UnicodeString object at
b75a7d8f
A
2034 * offset <TT>start</TT>.
2035 * @param start the offset at which the insertion occurs
2036 * @param srcChar the code point to insert
2037 * @return a reference to this
2038 * @stable ICU 2.0
2039 */
374ca955 2040 inline UnicodeString& insert(int32_t start,
b75a7d8f
A
2041 UChar32 srcChar);
2042
2043
2044 /* Replace operations */
2045
2046 /**
374ca955
A
2047 * Replace the characters in the range
2048 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2049 * <TT>srcText</TT> in the range
2050 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>).
b75a7d8f
A
2051 * <TT>srcText</TT> is not modified.
2052 * @param start the offset at which the replace operation begins
374ca955 2053 * @param length the number of characters to replace. The character at
b75a7d8f
A
2054 * <TT>start + length</TT> is not modified.
2055 * @param srcText the source for the new characters
374ca955 2056 * @param srcStart the offset into <TT>srcText</TT> where new characters
b75a7d8f 2057 * will be obtained
374ca955 2058 * @param srcLength the number of characters in <TT>srcText</TT> in
b75a7d8f
A
2059 * the replace string
2060 * @return a reference to this
2061 * @stable ICU 2.0
2062 */
374ca955
A
2063 UnicodeString& replace(int32_t start,
2064 int32_t length,
2065 const UnicodeString& srcText,
2066 int32_t srcStart,
b75a7d8f
A
2067 int32_t srcLength);
2068
2069 /**
374ca955
A
2070 * Replace the characters in the range
2071 * [<TT>start</TT>, <TT>start + length</TT>)
b75a7d8f
A
2072 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is
2073 * not modified.
2074 * @param start the offset at which the replace operation begins
2075 * @param length the number of characters to replace. The character at
2076 * <TT>start + length</TT> is not modified.
2077 * @param srcText the source for the new characters
2078 * @return a reference to this
2079 * @stable ICU 2.0
2080 */
374ca955
A
2081 UnicodeString& replace(int32_t start,
2082 int32_t length,
b75a7d8f
A
2083 const UnicodeString& srcText);
2084
2085 /**
374ca955
A
2086 * Replace the characters in the range
2087 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2088 * <TT>srcChars</TT> in the range
2089 * [<TT>srcStart</TT>, <TT>srcStart + srcLength</TT>). <TT>srcChars</TT>
b75a7d8f
A
2090 * is not modified.
2091 * @param start the offset at which the replace operation begins
2092 * @param length the number of characters to replace. The character at
2093 * <TT>start + length</TT> is not modified.
2094 * @param srcChars the source for the new characters
2095 * @param srcStart the offset into <TT>srcChars</TT> where new characters
2096 * will be obtained
374ca955 2097 * @param srcLength the number of characters in <TT>srcChars</TT>
b75a7d8f
A
2098 * in the replace string
2099 * @return a reference to this
2100 * @stable ICU 2.0
2101 */
374ca955
A
2102 UnicodeString& replace(int32_t start,
2103 int32_t length,
2104 const UChar *srcChars,
2105 int32_t srcStart,
b75a7d8f
A
2106 int32_t srcLength);
2107
2108 /**
374ca955 2109 * Replace the characters in the range
b75a7d8f
A
2110 * [<TT>start</TT>, <TT>start + length</TT>) with the characters in
2111 * <TT>srcChars</TT>. <TT>srcChars</TT> is not modified.
2112 * @param start the offset at which the replace operation begins
2113 * @param length number of characters to replace. The character at
2114 * <TT>start + length</TT> is not modified.
2115 * @param srcChars the source for the new characters
2116 * @param srcLength the number of Unicode characters in srcChars
2117 * @return a reference to this
2118 * @stable ICU 2.0
2119 */
374ca955
A
2120 inline UnicodeString& replace(int32_t start,
2121 int32_t length,
b75a7d8f
A
2122 const UChar *srcChars,
2123 int32_t srcLength);
2124
2125 /**
374ca955 2126 * Replace the characters in the range
b75a7d8f
A
2127 * [<TT>start</TT>, <TT>start + length</TT>) with the code unit
2128 * <TT>srcChar</TT>.
2129 * @param start the offset at which the replace operation begins
2130 * @param length the number of characters to replace. The character at
2131 * <TT>start + length</TT> is not modified.
2132 * @param srcChar the new code unit
2133 * @return a reference to this
2134 * @stable ICU 2.0
2135 */
374ca955
A
2136 inline UnicodeString& replace(int32_t start,
2137 int32_t length,
b75a7d8f
A
2138 UChar srcChar);
2139
2140 /**
374ca955 2141 * Replace the characters in the range
b75a7d8f
A
2142 * [<TT>start</TT>, <TT>start + length</TT>) with the code point
2143 * <TT>srcChar</TT>.
2144 * @param start the offset at which the replace operation begins
2145 * @param length the number of characters to replace. The character at
2146 * <TT>start + length</TT> is not modified.
2147 * @param srcChar the new code point
2148 * @return a reference to this
2149 * @stable ICU 2.0
2150 */
374ca955
A
2151 inline UnicodeString& replace(int32_t start,
2152 int32_t length,
b75a7d8f
A
2153 UChar32 srcChar);
2154
2155 /**
374ca955 2156 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
b75a7d8f
A
2157 * with the characters in <TT>srcText</TT>. <TT>srcText</TT> is not modified.
2158 * @param start the offset at which the replace operation begins
2159 * @param limit the offset immediately following the replace range
2160 * @param srcText the source for the new characters
2161 * @return a reference to this
2162 * @stable ICU 2.0
2163 */
374ca955
A
2164 inline UnicodeString& replaceBetween(int32_t start,
2165 int32_t limit,
b75a7d8f
A
2166 const UnicodeString& srcText);
2167
2168 /**
374ca955
A
2169 * Replace the characters in the range [<TT>start</TT>, <TT>limit</TT>)
2170 * with the characters in <TT>srcText</TT> in the range
b75a7d8f
A
2171 * [<TT>srcStart</TT>, <TT>srcLimit</TT>). <TT>srcText</TT> is not modified.
2172 * @param start the offset at which the replace operation begins
2173 * @param limit the offset immediately following the replace range
2174 * @param srcText the source for the new characters
374ca955 2175 * @param srcStart the offset into <TT>srcChars</TT> where new characters
b75a7d8f 2176 * will be obtained
374ca955 2177 * @param srcLimit the offset immediately following the range to copy
b75a7d8f
A
2178 * in <TT>srcText</TT>
2179 * @return a reference to this
2180 * @stable ICU 2.0
2181 */
374ca955
A
2182 inline UnicodeString& replaceBetween(int32_t start,
2183 int32_t limit,
2184 const UnicodeString& srcText,
2185 int32_t srcStart,
b75a7d8f
A
2186 int32_t srcLimit);
2187
2188 /**
2189 * Replace a substring of this object with the given text.
2190 * @param start the beginning index, inclusive; <code>0 <= start
2191 * <= limit</code>.
2192 * @param limit the ending index, exclusive; <code>start <= limit
2193 * <= length()</code>.
2194 * @param text the text to replace characters <code>start</code>
2195 * to <code>limit - 1</code>
2196 * @stable ICU 2.0
2197 */
2198 virtual void handleReplaceBetween(int32_t start,
2199 int32_t limit,
2200 const UnicodeString& text);
2201
2202 /**
2203 * Replaceable API
2204 * @return TRUE if it has MetaData
374ca955 2205 * @stable ICU 2.4
b75a7d8f
A
2206 */
2207 virtual UBool hasMetaData() const;
374ca955 2208
b75a7d8f
A
2209 /**
2210 * Copy a substring of this object, retaining attribute (out-of-band)
2211 * information. This method is used to duplicate or reorder substrings.
2212 * The destination index must not overlap the source range.
374ca955 2213 *
b75a7d8f
A
2214 * @param start the beginning index, inclusive; <code>0 <= start <=
2215 * limit</code>.
2216 * @param limit the ending index, exclusive; <code>start <= limit <=
2217 * length()</code>.
2218 * @param dest the destination index. The characters from
2219 * <code>start..limit-1</code> will be copied to <code>dest</code>.
2220 * Implementations of this method may assume that <code>dest <= start ||
2221 * dest >= limit</code>.
2222 * @stable ICU 2.0
2223 */
2224 virtual void copy(int32_t start, int32_t limit, int32_t dest);
2225
2226 /* Search and replace operations */
2227
2228 /**
374ca955 2229 * Replace all occurrences of characters in oldText with the characters
b75a7d8f
A
2230 * in newText
2231 * @param oldText the text containing the search text
2232 * @param newText the text containing the replacement text
2233 * @return a reference to this
2234 * @stable ICU 2.0
2235 */
2236 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2237 const UnicodeString& newText);
2238
2239 /**
374ca955 2240 * Replace all occurrences of characters in oldText with characters
b75a7d8f
A
2241 * in newText
2242 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2243 * @param start the start of the range in which replace will performed
2244 * @param length the length of the range in which replace will be performed
2245 * @param oldText the text containing the search text
2246 * @param newText the text containing the replacement text
2247 * @return a reference to this
2248 * @stable ICU 2.0
2249 */
2250 inline UnicodeString& findAndReplace(int32_t start,
2251 int32_t length,
2252 const UnicodeString& oldText,
2253 const UnicodeString& newText);
2254
2255 /**
374ca955
A
2256 * Replace all occurrences of characters in oldText in the range
2257 * [<TT>oldStart</TT>, <TT>oldStart + oldLength</TT>) with the characters
2258 * in newText in the range
2259 * [<TT>newStart</TT>, <TT>newStart + newLength</TT>)
b75a7d8f
A
2260 * in the range [<TT>start</TT>, <TT>start + length</TT>).
2261 * @param start the start of the range in which replace will performed
2262 * @param length the length of the range in which replace will be performed
2263 * @param oldText the text containing the search text
2264 * @param oldStart the start of the search range in <TT>oldText</TT>
2265 * @param oldLength the length of the search range in <TT>oldText</TT>
2266 * @param newText the text containing the replacement text
2267 * @param newStart the start of the replacement range in <TT>newText</TT>
2268 * @param newLength the length of the replacement range in <TT>newText</TT>
2269 * @return a reference to this
2270 * @stable ICU 2.0
2271 */
2272 UnicodeString& findAndReplace(int32_t start,
2273 int32_t length,
2274 const UnicodeString& oldText,
2275 int32_t oldStart,
2276 int32_t oldLength,
2277 const UnicodeString& newText,
2278 int32_t newStart,
2279 int32_t newLength);
2280
2281
2282 /* Remove operations */
2283
2284 /**
2285 * Remove all characters from the UnicodeString object.
2286 * @return a reference to this
2287 * @stable ICU 2.0
2288 */
2289 inline UnicodeString& remove(void);
2290
2291 /**
374ca955 2292 * Remove the characters in the range
b75a7d8f
A
2293 * [<TT>start</TT>, <TT>start + length</TT>) from the UnicodeString object.
2294 * @param start the offset of the first character to remove
2295 * @param length the number of characters to remove
2296 * @return a reference to this
2297 * @stable ICU 2.0
2298 */
374ca955 2299 inline UnicodeString& remove(int32_t start,
b75a7d8f
A
2300 int32_t length = (int32_t)INT32_MAX);
2301
2302 /**
374ca955 2303 * Remove the characters in the range
b75a7d8f
A
2304 * [<TT>start</TT>, <TT>limit</TT>) from the UnicodeString object.
2305 * @param start the offset of the first character to remove
2306 * @param limit the offset immediately following the range to remove
2307 * @return a reference to this
2308 * @stable ICU 2.0
2309 */
2310 inline UnicodeString& removeBetween(int32_t start,
2311 int32_t limit = (int32_t)INT32_MAX);
2312
2313
2314 /* Length operations */
2315
2316 /**
374ca955
A
2317 * Pad the start of this UnicodeString with the character <TT>padChar</TT>.
2318 * If the length of this UnicodeString is less than targetLength,
b75a7d8f
A
2319 * length() - targetLength copies of padChar will be added to the
2320 * beginning of this UnicodeString.
2321 * @param targetLength the desired length of the string
374ca955 2322 * @param padChar the character to use for padding. Defaults to
b75a7d8f
A
2323 * space (U+0020)
2324 * @return TRUE if the text was padded, FALSE otherwise.
2325 * @stable ICU 2.0
2326 */
2327 UBool padLeading(int32_t targetLength,
2328 UChar padChar = 0x0020);
2329
2330 /**
374ca955
A
2331 * Pad the end of this UnicodeString with the character <TT>padChar</TT>.
2332 * If the length of this UnicodeString is less than targetLength,
b75a7d8f
A
2333 * length() - targetLength copies of padChar will be added to the
2334 * end of this UnicodeString.
2335 * @param targetLength the desired length of the string
374ca955 2336 * @param padChar the character to use for padding. Defaults to
b75a7d8f
A
2337 * space (U+0020)
2338 * @return TRUE if the text was padded, FALSE otherwise.
2339 * @stable ICU 2.0
2340 */
2341 UBool padTrailing(int32_t targetLength,
2342 UChar padChar = 0x0020);
2343
2344 /**
2345 * Truncate this UnicodeString to the <TT>targetLength</TT>.
2346 * @param targetLength the desired length of this UnicodeString.
2347 * @return TRUE if the text was truncated, FALSE otherwise
2348 * @stable ICU 2.0
2349 */
2350 inline UBool truncate(int32_t targetLength);
2351
2352 /**
2353 * Trims leading and trailing whitespace from this UnicodeString.
2354 * @return a reference to this
2355 * @stable ICU 2.0
2356 */
2357 UnicodeString& trim(void);
2358
2359
2360 /* Miscellaneous operations */
2361
2362 /**
2363 * Reverse this UnicodeString in place.
2364 * @return a reference to this
2365 * @stable ICU 2.0
2366 */
2367 inline UnicodeString& reverse(void);
2368
2369 /**
2370 * Reverse the range [<TT>start</TT>, <TT>start + length</TT>) in
2371 * this UnicodeString.
2372 * @param start the start of the range to reverse
2373 * @param length the number of characters to to reverse
2374 * @return a reference to this
2375 * @stable ICU 2.0
2376 */
2377 inline UnicodeString& reverse(int32_t start,
2378 int32_t length);
2379
2380 /**
2381 * Convert the characters in this to UPPER CASE following the conventions of
2382 * the default locale.
2383 * @return A reference to this.
2384 * @stable ICU 2.0
2385 */
2386 UnicodeString& toUpper(void);
2387
2388 /**
2389 * Convert the characters in this to UPPER CASE following the conventions of
2390 * a specific locale.
2391 * @param locale The locale containing the conventions to use.
2392 * @return A reference to this.
2393 * @stable ICU 2.0
2394 */
2395 UnicodeString& toUpper(const Locale& locale);
2396
2397 /**
2398 * Convert the characters in this to lower case following the conventions of
2399 * the default locale.
2400 * @return A reference to this.
2401 * @stable ICU 2.0
2402 */
2403 UnicodeString& toLower(void);
2404
2405 /**
2406 * Convert the characters in this to lower case following the conventions of
2407 * a specific locale.
2408 * @param locale The locale containing the conventions to use.
2409 * @return A reference to this.
2410 * @stable ICU 2.0
2411 */
2412 UnicodeString& toLower(const Locale& locale);
2413
2414#if !UCONFIG_NO_BREAK_ITERATION
2415
2416 /**
2417 * Titlecase this string, convenience function using the default locale.
2418 *
2419 * Casing is locale-dependent and context-sensitive.
2420 * Titlecasing uses a break iterator to find the first characters of words
2421 * that are to be titlecased. It titlecases those characters and lowercases
2422 * all others.
2423 *
2424 * The titlecase break iterator can be provided to customize for arbitrary
2425 * styles, using rules and dictionaries beyond the standard iterators.
2426 * It may be more efficient to always provide an iterator to avoid
2427 * opening and closing one for each string.
2428 * The standard titlecase iterator for the root locale implements the
2429 * algorithm of Unicode TR 21.
2430 *
2431 * This function uses only the first() and next() methods of the
2432 * provided break iterator.
2433 *
2434 * @param titleIter A break iterator to find the first characters of words
2435 * that are to be titlecased.
2436 * If none is provided (0), then a standard titlecase
2437 * break iterator is opened.
374ca955 2438 * Otherwise the provided iterator is set to the string's text.
b75a7d8f
A
2439 * @return A reference to this.
2440 * @stable ICU 2.1
2441 */
2442 UnicodeString &toTitle(BreakIterator *titleIter);
2443
2444 /**
2445 * Titlecase this string.
2446 *
2447 * Casing is locale-dependent and context-sensitive.
2448 * Titlecasing uses a break iterator to find the first characters of words
2449 * that are to be titlecased. It titlecases those characters and lowercases
2450 * all others.
2451 *
2452 * The titlecase break iterator can be provided to customize for arbitrary
2453 * styles, using rules and dictionaries beyond the standard iterators.
2454 * It may be more efficient to always provide an iterator to avoid
2455 * opening and closing one for each string.
2456 * The standard titlecase iterator for the root locale implements the
2457 * algorithm of Unicode TR 21.
2458 *
2459 * This function uses only the first() and next() methods of the
2460 * provided break iterator.
2461 *
2462 * @param titleIter A break iterator to find the first characters of words
2463 * that are to be titlecased.
2464 * If none is provided (0), then a standard titlecase
2465 * break iterator is opened.
374ca955 2466 * Otherwise the provided iterator is set to the string's text.
b75a7d8f
A
2467 * @param locale The locale to consider.
2468 * @return A reference to this.
2469 * @stable ICU 2.1
2470 */
2471 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2472
2473#endif
2474
2475 /**
2476 * Case-fold the characters in this string.
2477 * Case-folding is locale-independent and not context-sensitive,
2478 * but there is an option for whether to include or exclude mappings for dotted I
2479 * and dotless i that are marked with 'I' in CaseFolding.txt.
2480 * The result may be longer or shorter than the original.
2481 *
2482 * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
2483 * @return A reference to this.
2484 * @stable ICU 2.0
2485 */
2486 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2487
2488 //========================================
2489 // Access to the internal buffer
2490 //========================================
2491
2492 /**
2493 * Get a read/write pointer to the internal buffer.
2494 * The buffer is guaranteed to be large enough for at least minCapacity UChars,
2495 * writable, and is still owned by the UnicodeString object.
2496 * Calls to getBuffer(minCapacity) must not be nested, and
2497 * must be matched with calls to releaseBuffer(newLength).
2498 * If the string buffer was read-only or shared,
2499 * then it will be reallocated and copied.
2500 *
2501 * An attempted nested call will return 0, and will not further modify the
2502 * state of the UnicodeString object.
2503 * It also returns 0 if the string is bogus.
2504 *
2505 * The actual capacity of the string buffer may be larger than minCapacity.
2506 * getCapacity() returns the actual capacity.
2507 * For many operations, the full capacity should be used to avoid reallocations.
2508 *
2509 * While the buffer is "open" between getBuffer(minCapacity)
2510 * and releaseBuffer(newLength), the following applies:
2511 * - The string length is set to 0.
2512 * - Any read API call on the UnicodeString object will behave like on a 0-length string.
2513 * - Any write API call on the UnicodeString object is disallowed and will have no effect.
2514 * - You can read from and write to the returned buffer.
2515 * - The previous string contents will still be in the buffer;
2516 * if you want to use it, then you need to call length() before getBuffer(minCapacity).
2517 * If the length() was greater than minCapacity, then any contents after minCapacity
2518 * may be lost.
2519 * The buffer contents is not NUL-terminated by getBuffer().
2520 * If length()<getCapacity() then you can terminate it by writing a NUL
2521 * at index length().
2522 * - You must call releaseBuffer(newLength) before and in order to
2523 * return to normal UnicodeString operation.
2524 *
2525 * @param minCapacity the minimum number of UChars that are to be available
2526 * in the buffer, starting at the returned pointer;
2527 * default to the current string capacity if minCapacity==-1
2528 * @return a writable pointer to the internal string buffer,
2529 * or 0 if an error occurs (nested calls, out of memory)
2530 *
2531 * @see releaseBuffer
2532 * @see getTerminatedBuffer()
2533 * @stable ICU 2.0
2534 */
2535 UChar *getBuffer(int32_t minCapacity);
2536
2537 /**
2538 * Release a read/write buffer on a UnicodeString object with an
2539 * "open" getBuffer(minCapacity).
2540 * This function must be called in a matched pair with getBuffer(minCapacity).
2541 * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
2542 *
2543 * It will set the string length to newLength, at most to the current capacity.
2544 * If newLength==-1 then it will set the length according to the
2545 * first NUL in the buffer, or to the capacity if there is no NUL.
2546 *
2547 * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
2548 *
2549 * @param newLength the new length of the UnicodeString object;
2550 * defaults to the current capacity if newLength is greater than that;
2551 * if newLength==-1, it defaults to u_strlen(buffer) but not more than
2552 * the current capacity of the string
2553 *
2554 * @see getBuffer(int32_t minCapacity)
2555 * @stable ICU 2.0
2556 */
2557 void releaseBuffer(int32_t newLength=-1);
2558
2559 /**
2560 * Get a read-only pointer to the internal buffer.
2561 * This can be called at any time on a valid UnicodeString.
2562 *
2563 * It returns 0 if the string is bogus, or
2564 * during an "open" getBuffer(minCapacity).
2565 *
2566 * It can be called as many times as desired.
2567 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2568 * at which time the pointer is semantically invalidated and must not be used any more.
2569 *
2570 * The capacity of the buffer can be determined with getCapacity().
2571 * The part after length() may or may not be initialized and valid,
2572 * depending on the history of the UnicodeString object.
2573 *
2574 * The buffer contents is (probably) not NUL-terminated.
2575 * You can check if it is with
2576 * <code>(s.length()<s.getCapacity() && buffer[s.length()]==0)</code>.
2577 * (See getTerminatedBuffer().)
2578 *
2579 * The buffer may reside in read-only memory. Its contents must not
2580 * be modified.
2581 *
2582 * @return a read-only pointer to the internal string buffer,
2583 * or 0 if the string is empty or bogus
2584 *
2585 * @see getBuffer(int32_t minCapacity)
2586 * @see getTerminatedBuffer()
2587 * @stable ICU 2.0
2588 */
2589 inline const UChar *getBuffer() const;
2590
2591 /**
2592 * Get a read-only pointer to the internal buffer,
2593 * making sure that it is NUL-terminated.
2594 * This can be called at any time on a valid UnicodeString.
2595 *
2596 * It returns 0 if the string is bogus, or
2597 * during an "open" getBuffer(minCapacity), or if the buffer cannot
2598 * be NUL-terminated (because memory allocation failed).
2599 *
2600 * It can be called as many times as desired.
2601 * The pointer that it returns will remain valid until the UnicodeString object is modified,
2602 * at which time the pointer is semantically invalidated and must not be used any more.
2603 *
2604 * The capacity of the buffer can be determined with getCapacity().
2605 * The part after length()+1 may or may not be initialized and valid,
2606 * depending on the history of the UnicodeString object.
2607 *
2608 * The buffer contents is guaranteed to be NUL-terminated.
2609 * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
2610 * is written.
2611 * For this reason, this function is not const, unlike getBuffer().
2612 * Note that a UnicodeString may also contain NUL characters as part of its contents.
2613 *
2614 * The buffer may reside in read-only memory. Its contents must not
2615 * be modified.
2616 *
2617 * @return a read-only pointer to the internal string buffer,
2618 * or 0 if the string is empty or bogus
2619 *
2620 * @see getBuffer(int32_t minCapacity)
2621 * @see getBuffer()
374ca955 2622 * @stable ICU 2.2
b75a7d8f
A
2623 */
2624 inline const UChar *getTerminatedBuffer();
2625
2626 //========================================
2627 // Constructors
2628 //========================================
2629
374ca955 2630 /** Construct an empty UnicodeString.
b75a7d8f
A
2631 * @stable ICU 2.0
2632 */
2633 UnicodeString();
2634
2635 /**
2636 * Construct a UnicodeString with capacity to hold <TT>capacity</TT> UChars
2637 * @param capacity the number of UChars this UnicodeString should hold
2638 * before a resize is necessary; if count is greater than 0 and count
2639 * code points c take up more space than capacity, then capacity is adjusted
2640 * accordingly.
2641 * @param c is used to initially fill the string
2642 * @param count specifies how many code points c are to be written in the
2643 * string
2644 * @stable ICU 2.0
2645 */
2646 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2647
2648 /**
2649 * Single UChar (code unit) constructor.
2650 * @param ch the character to place in the UnicodeString
2651 * @stable ICU 2.0
2652 */
2653 UnicodeString(UChar ch);
2654
2655 /**
2656 * Single UChar32 (code point) constructor.
2657 * @param ch the character to place in the UnicodeString
2658 * @stable ICU 2.0
2659 */
2660 UnicodeString(UChar32 ch);
2661
2662 /**
2663 * UChar* constructor.
2664 * @param text The characters to place in the UnicodeString. <TT>text</TT>
2665 * must be NULL (U+0000) terminated.
2666 * @stable ICU 2.0
2667 */
2668 UnicodeString(const UChar *text);
2669
2670 /**
2671 * UChar* constructor.
2672 * @param text The characters to place in the UnicodeString.
2673 * @param textLength The number of Unicode characters in <TT>text</TT>
2674 * to copy.
2675 * @stable ICU 2.0
2676 */
2677 UnicodeString(const UChar *text,
2678 int32_t textLength);
2679
2680 /**
2681 * Readonly-aliasing UChar* constructor.
2682 * The text will be used for the UnicodeString object, but
2683 * it will not be released when the UnicodeString is destroyed.
2684 * This has copy-on-write semantics:
2685 * When the string is modified, then the buffer is first copied into
2686 * newly allocated memory.
2687 * The aliased buffer is never modified.
2688 * In an assignment to another UnicodeString, the text will be aliased again,
2689 * so that both strings then alias the same readonly-text.
2690 *
2691 * @param isTerminated specifies if <code>text</code> is <code>NUL</code>-terminated.
2692 * This must be true if <code>textLength==-1</code>.
2693 * @param text The characters to alias for the UnicodeString.
2694 * @param textLength The number of Unicode characters in <code>text</code> to alias.
2695 * If -1, then this constructor will determine the length
2696 * by calling <code>u_strlen()</code>.
2697 * @stable ICU 2.0
2698 */
2699 UnicodeString(UBool isTerminated,
2700 const UChar *text,
2701 int32_t textLength);
2702
2703 /**
2704 * Writable-aliasing UChar* constructor.
2705 * The text will be used for the UnicodeString object, but
2706 * it will not be released when the UnicodeString is destroyed.
2707 * This has write-through semantics:
2708 * For as long as the capacity of the buffer is sufficient, write operations
2709 * will directly affect the buffer. When more capacity is necessary, then
2710 * a new buffer will be allocated and the contents copied as with regularly
2711 * constructed strings.
2712 * In an assignment to another UnicodeString, the buffer will be copied.
2713 * The extract(UChar *dst) function detects whether the dst pointer is the same
2714 * as the string buffer itself and will in this case not copy the contents.
2715 *
2716 * @param buffer The characters to alias for the UnicodeString.
2717 * @param buffLength The number of Unicode characters in <code>buffer</code> to alias.
2718 * @param buffCapacity The size of <code>buffer</code> in UChars.
2719 * @stable ICU 2.0
2720 */
2721 UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2722
374ca955
A
2723#if !UCONFIG_NO_CONVERSION
2724
b75a7d8f
A
2725 /**
2726 * char* constructor.
2727 * @param codepageData an array of bytes, null-terminated
2728 * @param codepage the encoding of <TT>codepageData</TT>. The special
374ca955 2729 * value 0 for <TT>codepage</TT> indicates that the text is in the
b75a7d8f 2730 * platform's default codepage.
374ca955 2731 *
b75a7d8f
A
2732 * If <code>codepage</code> is an empty string (<code>""</code>),
2733 * then a simple conversion is performed on the codepage-invariant
2734 * subset ("invariant characters") of the platform encoding. See utypes.h.
374ca955
A
2735 * Recommendation: For invariant-character strings use the constructor
2736 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2737 * because it avoids object code dependencies of UnicodeString on
2738 * the conversion code.
2739 *
b75a7d8f
A
2740 * @stable ICU 2.0
2741 */
2742 UnicodeString(const char *codepageData,
2743 const char *codepage = 0);
2744
2745 /**
2746 * char* constructor.
2747 * @param codepageData an array of bytes.
2748 * @param dataLength The number of bytes in <TT>codepageData</TT>.
2749 * @param codepage the encoding of <TT>codepageData</TT>. The special
374ca955 2750 * value 0 for <TT>codepage</TT> indicates that the text is in the
b75a7d8f
A
2751 * platform's default codepage.
2752 * If <code>codepage</code> is an empty string (<code>""</code>),
2753 * then a simple conversion is performed on the codepage-invariant
2754 * subset ("invariant characters") of the platform encoding. See utypes.h.
374ca955
A
2755 * Recommendation: For invariant-character strings use the constructor
2756 * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
2757 * because it avoids object code dependencies of UnicodeString on
2758 * the conversion code.
2759 *
b75a7d8f
A
2760 * @stable ICU 2.0
2761 */
2762 UnicodeString(const char *codepageData,
2763 int32_t dataLength,
2764 const char *codepage = 0);
2765
2766 /**
2767 * char * / UConverter constructor.
2768 * This constructor uses an existing UConverter object to
2769 * convert the codepage string to Unicode and construct a UnicodeString
2770 * from that.
2771 *
2772 * The converter is reset at first.
2773 * If the error code indicates a failure before this constructor is called,
2774 * or if an error occurs during conversion or construction,
2775 * then the string will be bogus.
2776 *
2777 * This function avoids the overhead of opening and closing a converter if
2778 * multiple strings are constructed.
2779 *
2780 * @param src input codepage string
2781 * @param srcLength length of the input string, can be -1 for NUL-terminated strings
2782 * @param cnv converter object (ucnv_resetToUnicode() will be called),
2783 * can be NULL for the default converter
2784 * @param errorCode normal ICU error code
2785 * @stable ICU 2.0
2786 */
2787 UnicodeString(
2788 const char *src, int32_t srcLength,
2789 UConverter *cnv,
2790 UErrorCode &errorCode);
2791
374ca955
A
2792#endif
2793
2794 /**
2795 * Constructs a Unicode string from an invariant-character char * string.
2796 * About invariant characters see utypes.h.
2797 * This constructor has no runtime dependency on conversion code and is
2798 * therefore recommended over ones taking a charset name string
2799 * (where the empty string "" indicates invariant-character conversion).
2800 *
2801 * Use the macro US_INV as the third, signature-distinguishing parameter.
2802 *
2803 * For example:
2804 * \code
2805 * void fn(const char *s) {
2806 * UnicodeString ustr(s, -1, US_INV);
2807 * // use ustr ...
2808 * }
2809 * \endcode
2810 *
2811 * @param src String using only invariant characters.
2812 * @param length Length of src, or -1 if NUL-terminated.
2813 * @param inv Signature-distinguishing paramater, use US_INV.
2814 *
2815 * @see US_INV
2816 * @draft ICU 3.2
2817 */
2818 UnicodeString(const char *src, int32_t length, enum EInvariant inv);
2819
b75a7d8f
A
2820
2821 /**
2822 * Copy constructor.
2823 * @param that The UnicodeString object to copy.
2824 * @stable ICU 2.0
2825 */
2826 UnicodeString(const UnicodeString& that);
2827
2828 /**
2829 * 'Substring' constructor from tail of source string.
2830 * @param src The UnicodeString object to copy.
2831 * @param srcStart The offset into <tt>src</tt> at which to start copying.
374ca955 2832 * @stable ICU 2.2
b75a7d8f
A
2833 */
2834 UnicodeString(const UnicodeString& src, int32_t srcStart);
2835
2836 /**
2837 * 'Substring' constructor from subrange of source string.
2838 * @param src The UnicodeString object to copy.
2839 * @param srcStart The offset into <tt>src</tt> at which to start copying.
2840 * @param srcLength The number of characters from <tt>src</tt> to copy.
374ca955 2841 * @stable ICU 2.2
b75a7d8f
A
2842 */
2843 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
2844
2845 /**
2846 * Clone this object, an instance of a subclass of Replaceable.
2847 * Clones can be used concurrently in multiple threads.
2848 * If a subclass does not implement clone(), or if an error occurs,
2849 * then NULL is returned.
2850 * The clone functions in all subclasses return a pointer to a Replaceable
2851 * because some compilers do not support covariant (same-as-this)
2852 * return types; cast to the appropriate subclass if necessary.
2853 * The caller must delete the clone.
2854 *
2855 * @return a clone of this object
2856 *
2857 * @see Replaceable::clone
2858 * @see getDynamicClassID
374ca955 2859 * @stable ICU 2.6
b75a7d8f
A
2860 */
2861 virtual Replaceable *clone() const;
2862
374ca955 2863 /** Destructor.
b75a7d8f
A
2864 * @stable ICU 2.0
2865 */
374ca955 2866 virtual ~UnicodeString();
b75a7d8f
A
2867
2868
2869 /* Miscellaneous operations */
2870
2871 /**
2872 * Unescape a string of characters and return a string containing
2873 * the result. The following escape sequences are recognized:
2874 *
374ca955
A
2875 * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
2876 * \\Uhhhhhhhh 8 hex digits
2877 * \\xhh 1-2 hex digits
2878 * \\ooo 1-3 octal digits; o in [0-7]
2879 * \\cX control-X; X is masked with 0x1F
b75a7d8f
A
2880 *
2881 * as well as the standard ANSI C escapes:
2882 *
374ca955
A
2883 * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
2884 * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
2885 * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
b75a7d8f
A
2886 *
2887 * Anything else following a backslash is generically escaped. For
374ca955 2888 * example, "[a\\-z]" returns "[a-z]".
b75a7d8f
A
2889 *
2890 * If an escape sequence is ill-formed, this method returns an empty
374ca955 2891 * string. An example of an ill-formed sequence is "\\u" followed by
b75a7d8f
A
2892 * fewer than 4 hex digits.
2893 *
2894 * This function is similar to u_unescape() but not identical to it.
2895 * The latter takes a source char*, so it does escape recognition
2896 * and also invariant conversion.
2897 *
2898 * @return a string with backslash escapes interpreted, or an
2899 * empty string on error.
2900 * @see UnicodeString#unescapeAt()
2901 * @see u_unescape()
2902 * @see u_unescapeAt()
2903 * @stable ICU 2.0
2904 */
2905 UnicodeString unescape() const;
2906
2907 /**
2908 * Unescape a single escape sequence and return the represented
2909 * character. See unescape() for a listing of the recognized escape
2910 * sequences. The character at offset-1 is assumed (without
2911 * checking) to be a backslash. If the escape sequence is
2912 * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is
2913 * returned.
2914 *
2915 * @param offset an input output parameter. On input, it is the
2916 * offset into this string where the escape sequence is located,
2917 * after the initial backslash. On output, it is advanced after the
2918 * last character parsed. On error, it is not advanced at all.
2919 * @return the character represented by the escape sequence at
2920 * offset, or (UChar32)0xFFFFFFFF on error.
2921 * @see UnicodeString#unescape()
2922 * @see u_unescape()
2923 * @see u_unescapeAt()
2924 * @stable ICU 2.0
2925 */
2926 UChar32 unescapeAt(int32_t &offset) const;
2927
2928 /**
374ca955 2929 * ICU "poor man's RTTI", returns a UClassID for this class.
b75a7d8f 2930 *
374ca955 2931 * @stable ICU 2.2
b75a7d8f 2932 */
374ca955 2933 static UClassID U_EXPORT2 getStaticClassID();
b75a7d8f
A
2934
2935 /**
374ca955 2936 * ICU "poor man's RTTI", returns a UClassID for the actual class.
b75a7d8f 2937 *
374ca955 2938 * @stable ICU 2.2
b75a7d8f 2939 */
374ca955 2940 virtual UClassID getDynamicClassID() const;
b75a7d8f
A
2941
2942 //========================================
2943 // Implementation methods
2944 //========================================
2945
2946protected:
2947 /**
2948 * Implement Replaceable::getLength() (see jitterbug 1027).
374ca955 2949 * @stable ICU 2.4
b75a7d8f
A
2950 */
2951 virtual int32_t getLength() const;
2952
2953 /**
2954 * The change in Replaceable to use virtual getCharAt() allows
2955 * UnicodeString::charAt() to be inline again (see jitterbug 709).
374ca955 2956 * @stable ICU 2.4
b75a7d8f
A
2957 */
2958 virtual UChar getCharAt(int32_t offset) const;
2959
2960 /**
2961 * The change in Replaceable to use virtual getChar32At() allows
2962 * UnicodeString::char32At() to be inline again (see jitterbug 709).
374ca955 2963 * @stable ICU 2.4
b75a7d8f
A
2964 */
2965 virtual UChar32 getChar32At(int32_t offset) const;
2966
2967private:
2968
2969 inline int8_t
2970 doCompare(int32_t start,
2971 int32_t length,
2972 const UnicodeString& srcText,
2973 int32_t srcStart,
2974 int32_t srcLength) const;
2975
2976 int8_t doCompare(int32_t start,
2977 int32_t length,
2978 const UChar *srcChars,
2979 int32_t srcStart,
2980 int32_t srcLength) const;
2981
2982 inline int8_t
2983 doCompareCodePointOrder(int32_t start,
2984 int32_t length,
2985 const UnicodeString& srcText,
2986 int32_t srcStart,
2987 int32_t srcLength) const;
2988
2989 int8_t doCompareCodePointOrder(int32_t start,
2990 int32_t length,
2991 const UChar *srcChars,
2992 int32_t srcStart,
2993 int32_t srcLength) const;
2994
2995 inline int8_t
2996 doCaseCompare(int32_t start,
2997 int32_t length,
2998 const UnicodeString &srcText,
2999 int32_t srcStart,
3000 int32_t srcLength,
3001 uint32_t options) const;
3002
3003 int8_t
3004 doCaseCompare(int32_t start,
3005 int32_t length,
3006 const UChar *srcChars,
3007 int32_t srcStart,
3008 int32_t srcLength,
3009 uint32_t options) const;
3010
3011 int32_t doIndexOf(UChar c,
3012 int32_t start,
3013 int32_t length) const;
3014
3015 int32_t doIndexOf(UChar32 c,
3016 int32_t start,
3017 int32_t length) const;
3018
3019 int32_t doLastIndexOf(UChar c,
3020 int32_t start,
3021 int32_t length) const;
3022
3023 int32_t doLastIndexOf(UChar32 c,
3024 int32_t start,
3025 int32_t length) const;
3026
374ca955
A
3027 void doExtract(int32_t start,
3028 int32_t length,
3029 UChar *dst,
b75a7d8f
A
3030 int32_t dstStart) const;
3031
3032 inline void doExtract(int32_t start,
3033 int32_t length,
3034 UnicodeString& target) const;
3035
3036 inline UChar doCharAt(int32_t offset) const;
3037
374ca955
A
3038 UnicodeString& doReplace(int32_t start,
3039 int32_t length,
3040 const UnicodeString& srcText,
3041 int32_t srcStart,
b75a7d8f
A
3042 int32_t srcLength);
3043
374ca955
A
3044 UnicodeString& doReplace(int32_t start,
3045 int32_t length,
3046 const UChar *srcChars,
3047 int32_t srcStart,
b75a7d8f
A
3048 int32_t srcLength);
3049
3050 UnicodeString& doReverse(int32_t start,
3051 int32_t length);
3052
3053 // calculate hash code
3054 int32_t doHashCode(void) const;
3055
3056 // get pointer to start of array
3057 inline UChar* getArrayStart(void);
3058 inline const UChar* getArrayStart(void) const;
3059
3060 // allocate the array; result may be fStackBuffer
3061 // sets refCount to 1 if appropriate
3062 // sets fArray, fCapacity, and fFlags
3063 // returns boolean for success or failure
3064 UBool allocate(int32_t capacity);
3065
3066 // release the array if owned
3067 void releaseArray(void);
3068
3069 // turn a bogus string into an empty one
3070 void unBogus();
3071
3072 // implements assigment operator, copy constructor, and fastCopyFrom()
3073 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3074
3075 // Pin start and limit to acceptable values.
3076 inline void pinIndex(int32_t& start) const;
3077 inline void pinIndices(int32_t& start,
3078 int32_t& length) const;
3079
374ca955
A
3080#if !UCONFIG_NO_CONVERSION
3081
b75a7d8f
A
3082 /* Internal extract() using UConverter. */
3083 int32_t doExtract(int32_t start, int32_t length,
3084 char *dest, int32_t destCapacity,
3085 UConverter *cnv,
3086 UErrorCode &errorCode) const;
3087
3088 /*
3089 * Real constructor for converting from codepage data.
3090 * It assumes that it is called with !fRefCounted.
3091 *
3092 * If <code>codepage==0</code>, then the default converter
3093 * is used for the platform encoding.
3094 * If <code>codepage</code> is an empty string (<code>""</code>),
3095 * then a simple conversion is performed on the codepage-invariant
3096 * subset ("invariant characters") of the platform encoding. See utypes.h.
3097 */
3098 void doCodepageCreate(const char *codepageData,
3099 int32_t dataLength,
3100 const char *codepage);
3101
3102 /*
3103 * Worker function for creating a UnicodeString from
3104 * a codepage string using a UConverter.
3105 */
3106 void
3107 doCodepageCreate(const char *codepageData,
3108 int32_t dataLength,
3109 UConverter *converter,
3110 UErrorCode &status);
374ca955
A
3111
3112#endif
3113
b75a7d8f
A
3114 /*
3115 * This function is called when write access to the array
3116 * is necessary.
3117 *
3118 * We need to make a copy of the array if
3119 * the buffer is read-only, or
3120 * the buffer is refCounted (shared), and refCount>1, or
3121 * the buffer is too small.
3122 *
3123 * Return FALSE if memory could not be allocated.
3124 */
3125 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3126 int32_t growCapacity = -1,
3127 UBool doCopyArray = TRUE,
3128 int32_t **pBufferToDelete = 0,
3129 UBool forceClone = FALSE);
3130
3131 // common function for case mappings
3132 UnicodeString &
3133 caseMap(BreakIterator *titleIter,
374ca955 3134 const char *locale,
b75a7d8f
A
3135 uint32_t options,
3136 int32_t toWhichCase);
3137
3138 // ref counting
3139 void addRef(void);
3140 int32_t removeRef(void);
3141 int32_t refCount(void) const;
3142
3143 // constants
3144 enum {
3145 US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
3146 kInvalidUChar=0xffff, // invalid UChar index
3147 kGrowSize=128, // grow size for this buffer
3148 kInvalidHashCode=0, // invalid hash code
3149 kEmptyHashCode=1, // hash code for empty string
3150
3151 // bit flag values for fFlags
3152 kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3153 kUsingStackBuffer=2,// fArray==fStackBuffer
3154 kRefCounted=4, // there is a refCount field before the characters in fArray
3155 kBufferIsReadonly=8,// do not write to this buffer
3156 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3157 // and releaseBuffer(newLength) must be called
3158
3159 // combined values for convenience
3160 kShortString=kUsingStackBuffer,
3161 kLongString=kRefCounted,
3162 kReadonlyAlias=kBufferIsReadonly,
3163 kWritableAlias=0
3164 };
3165
3166 friend class StringCharacterIterator;
3167 friend class StringThreadTest;
3168
3169 /*
3170 * The following are all the class fields that are stored
3171 * in each UnicodeString object.
3172 * Note that UnicodeString has virtual functions,
3173 * therefore there is an implicit vtable pointer
3174 * as the first real field.
3175 * The fields should be aligned such that no padding is
3176 * necessary, mostly by having larger types first.
3177 * On 32-bit machines, the size should be 32 bytes,
3178 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3179 */
3180 // (implicit) *vtable;
3181 int32_t fLength; // number of characters in fArray
3182 int32_t fCapacity; // sizeof fArray
3183 UChar *fArray; // the Unicode data
3184 uint16_t fFlags; // bit flags: see constants above
3185 UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
3186
b75a7d8f
A
3187};
3188
374ca955
A
3189/**
3190 * Create a new UnicodeString with the concatenation of two others.
3191 *
3192 * @param s1 The first string to be copied to the new one.
3193 * @param s2 The second string to be copied to the new one, after s1.
3194 * @return UnicodeString(s1).append(s2)
3195 * @draft ICU 2.8
3196 */
3197U_COMMON_API UnicodeString U_EXPORT2
3198operator+ (const UnicodeString &s1, const UnicodeString &s2);
3199
b75a7d8f
A
3200U_NAMESPACE_END
3201
374ca955
A
3202// inline implementations -------------------------------------------------- ***
3203
b75a7d8f
A
3204//========================================
3205// Array copying
3206//========================================
3207/**
3208 * Copy an array of UnicodeString OBJECTS (not pointers).
3209 * @internal
3210 */
374ca955 3211inline void
b75a7d8f
A
3212uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
3213{ while(count-- > 0) *dst++ = *src++; }
3214
3215/**
3216 * Copy an array of UnicodeString OBJECTS (not pointers).
3217 * @internal
3218 */
374ca955
A
3219inline void
3220uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
b75a7d8f
A
3221 U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
3222{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
3223
3224U_NAMESPACE_BEGIN
3225
3226//========================================
3227// Inline members
3228//========================================
3229
3230//========================================
3231// Privates
3232//========================================
3233
3234inline void
3235UnicodeString::pinIndex(int32_t& start) const
3236{
3237 // pin index
3238 if(start < 0) {
3239 start = 0;
3240 } else if(start > fLength) {
3241 start = fLength;
3242 }
3243}
3244
3245inline void
3246UnicodeString::pinIndices(int32_t& start,
3247 int32_t& _length) const
3248{
3249 // pin indices
3250 if(start < 0) {
3251 start = 0;
3252 } else if(start > fLength) {
3253 start = fLength;
3254 }
3255 if(_length < 0) {
3256 _length = 0;
3257 } else if(_length > (fLength - start)) {
3258 _length = (fLength - start);
3259 }
3260}
3261
374ca955 3262inline UChar*
b75a7d8f
A
3263UnicodeString::getArrayStart()
3264{ return fArray; }
3265
374ca955 3266inline const UChar*
b75a7d8f
A
3267UnicodeString::getArrayStart() const
3268{ return fArray; }
3269
3270//========================================
3271// Read-only implementation methods
3272//========================================
374ca955 3273inline int32_t
b75a7d8f
A
3274UnicodeString::length() const
3275{ return fLength; }
3276
374ca955 3277inline int32_t
b75a7d8f
A
3278UnicodeString::getCapacity() const
3279{ return fCapacity; }
3280
374ca955 3281inline int32_t
b75a7d8f
A
3282UnicodeString::hashCode() const
3283{ return doHashCode(); }
3284
374ca955 3285inline UBool
b75a7d8f
A
3286UnicodeString::isBogus() const
3287{ return (UBool)(fFlags & kIsBogus); }
3288
3289inline const UChar *
3290UnicodeString::getBuffer() const {
3291 if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
3292 return fArray;
3293 } else {
3294 return 0;
3295 }
3296}
3297
3298//========================================
3299// Read-only alias methods
3300//========================================
3301inline int8_t
3302UnicodeString::doCompare(int32_t start,
3303 int32_t length,
3304 const UnicodeString& srcText,
3305 int32_t srcStart,
3306 int32_t srcLength) const
3307{
3308 if(srcText.isBogus()) {
3309 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3310 } else {
3311 srcText.pinIndices(srcStart, srcLength);
3312 return doCompare(start, length, srcText.fArray, srcStart, srcLength);
3313 }
3314}
3315
3316inline UBool
3317UnicodeString::operator== (const UnicodeString& text) const
3318{
3319 if(isBogus()) {
3320 return text.isBogus();
3321 } else {
3322 return
3323 !text.isBogus() &&
3324 fLength == text.fLength &&
3325 doCompare(0, fLength, text, 0, text.fLength) == 0;
3326 }
3327}
3328
3329inline UBool
3330UnicodeString::operator!= (const UnicodeString& text) const
3331{ return (! operator==(text)); }
3332
3333inline UBool
3334UnicodeString::operator> (const UnicodeString& text) const
3335{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
3336
3337inline UBool
3338UnicodeString::operator< (const UnicodeString& text) const
3339{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
3340
3341inline UBool
3342UnicodeString::operator>= (const UnicodeString& text) const
3343{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
3344
3345inline UBool
3346UnicodeString::operator<= (const UnicodeString& text) const
3347{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
3348
374ca955 3349inline int8_t
b75a7d8f
A
3350UnicodeString::compare(const UnicodeString& text) const
3351{ return doCompare(0, fLength, text, 0, text.fLength); }
3352
374ca955 3353inline int8_t
b75a7d8f
A
3354UnicodeString::compare(int32_t start,
3355 int32_t _length,
3356 const UnicodeString& srcText) const
3357{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
3358
374ca955 3359inline int8_t
b75a7d8f
A
3360UnicodeString::compare(const UChar *srcChars,
3361 int32_t srcLength) const
3362{ return doCompare(0, fLength, srcChars, 0, srcLength); }
3363
374ca955 3364inline int8_t
b75a7d8f
A
3365UnicodeString::compare(int32_t start,
3366 int32_t _length,
3367 const UnicodeString& srcText,
3368 int32_t srcStart,
3369 int32_t srcLength) const
3370{ return doCompare(start, _length, srcText, srcStart, srcLength); }
3371
3372inline int8_t
3373UnicodeString::compare(int32_t start,
3374 int32_t _length,
3375 const UChar *srcChars) const
3376{ return doCompare(start, _length, srcChars, 0, _length); }
3377
374ca955 3378inline int8_t
b75a7d8f
A
3379UnicodeString::compare(int32_t start,
3380 int32_t _length,
3381 const UChar *srcChars,
3382 int32_t srcStart,
3383 int32_t srcLength) const
3384{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
3385
3386inline int8_t
3387UnicodeString::compareBetween(int32_t start,
3388 int32_t limit,
3389 const UnicodeString& srcText,
3390 int32_t srcStart,
3391 int32_t srcLimit) const
374ca955 3392{ return doCompare(start, limit - start,
b75a7d8f
A
3393 srcText, srcStart, srcLimit - srcStart); }
3394
3395inline int8_t
3396UnicodeString::doCompareCodePointOrder(int32_t start,
3397 int32_t length,
3398 const UnicodeString& srcText,
3399 int32_t srcStart,
3400 int32_t srcLength) const
3401{
3402 if(srcText.isBogus()) {
3403 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3404 } else {
3405 srcText.pinIndices(srcStart, srcLength);
3406 return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
3407 }
3408}
3409
374ca955 3410inline int8_t
b75a7d8f
A
3411UnicodeString::compareCodePointOrder(const UnicodeString& text) const
3412{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
3413
374ca955 3414inline int8_t
b75a7d8f
A
3415UnicodeString::compareCodePointOrder(int32_t start,
3416 int32_t _length,
3417 const UnicodeString& srcText) const
3418{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
3419
374ca955 3420inline int8_t
b75a7d8f
A
3421UnicodeString::compareCodePointOrder(const UChar *srcChars,
3422 int32_t srcLength) const
3423{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
3424
374ca955 3425inline int8_t
b75a7d8f
A
3426UnicodeString::compareCodePointOrder(int32_t start,
3427 int32_t _length,
3428 const UnicodeString& srcText,
3429 int32_t srcStart,
3430 int32_t srcLength) const
3431{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3432
3433inline int8_t
3434UnicodeString::compareCodePointOrder(int32_t start,
3435 int32_t _length,
3436 const UChar *srcChars) const
3437{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3438
374ca955 3439inline int8_t
b75a7d8f
A
3440UnicodeString::compareCodePointOrder(int32_t start,
3441 int32_t _length,
3442 const UChar *srcChars,
3443 int32_t srcStart,
3444 int32_t srcLength) const
3445{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3446
3447inline int8_t
3448UnicodeString::compareCodePointOrderBetween(int32_t start,
3449 int32_t limit,
3450 const UnicodeString& srcText,
3451 int32_t srcStart,
3452 int32_t srcLimit) const
374ca955 3453{ return doCompareCodePointOrder(start, limit - start,
b75a7d8f
A
3454 srcText, srcStart, srcLimit - srcStart); }
3455
3456inline int8_t
3457UnicodeString::doCaseCompare(int32_t start,
3458 int32_t length,
3459 const UnicodeString &srcText,
3460 int32_t srcStart,
3461 int32_t srcLength,
3462 uint32_t options) const
3463{
3464 if(srcText.isBogus()) {
3465 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3466 } else {
3467 srcText.pinIndices(srcStart, srcLength);
3468 return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
3469 }
3470}
3471
374ca955 3472inline int8_t
b75a7d8f
A
3473UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3474 return doCaseCompare(0, fLength, text, 0, text.fLength, options);
3475}
3476
374ca955 3477inline int8_t
b75a7d8f
A
3478UnicodeString::caseCompare(int32_t start,
3479 int32_t _length,
3480 const UnicodeString &srcText,
3481 uint32_t options) const {
3482 return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
3483}
3484
374ca955 3485inline int8_t
b75a7d8f
A
3486UnicodeString::caseCompare(const UChar *srcChars,
3487 int32_t srcLength,
3488 uint32_t options) const {
3489 return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
3490}
3491
374ca955 3492inline int8_t
b75a7d8f
A
3493UnicodeString::caseCompare(int32_t start,
3494 int32_t _length,
3495 const UnicodeString &srcText,
3496 int32_t srcStart,
3497 int32_t srcLength,
3498 uint32_t options) const {
3499 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3500}
3501
3502inline int8_t
3503UnicodeString::caseCompare(int32_t start,
3504 int32_t _length,
3505 const UChar *srcChars,
3506 uint32_t options) const {
3507 return doCaseCompare(start, _length, srcChars, 0, _length, options);
3508}
3509
374ca955 3510inline int8_t
b75a7d8f
A
3511UnicodeString::caseCompare(int32_t start,
3512 int32_t _length,
3513 const UChar *srcChars,
3514 int32_t srcStart,
3515 int32_t srcLength,
3516 uint32_t options) const {
3517 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3518}
3519
3520inline int8_t
3521UnicodeString::caseCompareBetween(int32_t start,
3522 int32_t limit,
3523 const UnicodeString &srcText,
3524 int32_t srcStart,
3525 int32_t srcLimit,
3526 uint32_t options) const {
3527 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3528}
3529
374ca955 3530inline int32_t
b75a7d8f
A
3531UnicodeString::indexOf(const UnicodeString& srcText,
3532 int32_t srcStart,
3533 int32_t srcLength,
3534 int32_t start,
3535 int32_t _length) const
3536{
3537 if(!srcText.isBogus()) {
3538 srcText.pinIndices(srcStart, srcLength);
3539 if(srcLength > 0) {
3540 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3541 }
3542 }
3543 return -1;
3544}
3545
374ca955 3546inline int32_t
b75a7d8f
A
3547UnicodeString::indexOf(const UnicodeString& text) const
3548{ return indexOf(text, 0, text.fLength, 0, fLength); }
3549
374ca955 3550inline int32_t
b75a7d8f
A
3551UnicodeString::indexOf(const UnicodeString& text,
3552 int32_t start) const {
3553 pinIndex(start);
3554 return indexOf(text, 0, text.fLength, start, fLength - start);
3555}
3556
374ca955 3557inline int32_t
b75a7d8f
A
3558UnicodeString::indexOf(const UnicodeString& text,
3559 int32_t start,
3560 int32_t _length) const
3561{ return indexOf(text, 0, text.fLength, start, _length); }
3562
374ca955 3563inline int32_t
b75a7d8f
A
3564UnicodeString::indexOf(const UChar *srcChars,
3565 int32_t srcLength,
3566 int32_t start) const {
3567 pinIndex(start);
3568 return indexOf(srcChars, 0, srcLength, start, fLength - start);
3569}
3570
374ca955 3571inline int32_t
b75a7d8f
A
3572UnicodeString::indexOf(const UChar *srcChars,
3573 int32_t srcLength,
3574 int32_t start,
3575 int32_t _length) const
3576{ return indexOf(srcChars, 0, srcLength, start, _length); }
3577
374ca955 3578inline int32_t
b75a7d8f
A
3579UnicodeString::indexOf(UChar c,
3580 int32_t start,
3581 int32_t _length) const
3582{ return doIndexOf(c, start, _length); }
3583
374ca955 3584inline int32_t
b75a7d8f
A
3585UnicodeString::indexOf(UChar32 c,
3586 int32_t start,
3587 int32_t _length) const
3588{ return doIndexOf(c, start, _length); }
3589
374ca955 3590inline int32_t
b75a7d8f
A
3591UnicodeString::indexOf(UChar c) const
3592{ return doIndexOf(c, 0, fLength); }
3593
374ca955 3594inline int32_t
b75a7d8f
A
3595UnicodeString::indexOf(UChar32 c) const
3596{ return indexOf(c, 0, fLength); }
3597
374ca955 3598inline int32_t
b75a7d8f
A
3599UnicodeString::indexOf(UChar c,
3600 int32_t start) const {
3601 pinIndex(start);
3602 return doIndexOf(c, start, fLength - start);
3603}
3604
374ca955 3605inline int32_t
b75a7d8f
A
3606UnicodeString::indexOf(UChar32 c,
3607 int32_t start) const {
3608 pinIndex(start);
3609 return indexOf(c, start, fLength - start);
3610}
3611
374ca955 3612inline int32_t
b75a7d8f
A
3613UnicodeString::lastIndexOf(const UChar *srcChars,
3614 int32_t srcLength,
3615 int32_t start,
3616 int32_t _length) const
3617{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3618
374ca955 3619inline int32_t
b75a7d8f
A
3620UnicodeString::lastIndexOf(const UChar *srcChars,
3621 int32_t srcLength,
3622 int32_t start) const {
3623 pinIndex(start);
3624 return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
3625}
3626
374ca955 3627inline int32_t
b75a7d8f
A
3628UnicodeString::lastIndexOf(const UnicodeString& srcText,
3629 int32_t srcStart,
3630 int32_t srcLength,
3631 int32_t start,
3632 int32_t _length) const
3633{
3634 if(!srcText.isBogus()) {
3635 srcText.pinIndices(srcStart, srcLength);
3636 if(srcLength > 0) {
3637 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3638 }
3639 }
3640 return -1;
3641}
3642
374ca955 3643inline int32_t
b75a7d8f
A
3644UnicodeString::lastIndexOf(const UnicodeString& text,
3645 int32_t start,
3646 int32_t _length) const
3647{ return lastIndexOf(text, 0, text.fLength, start, _length); }
3648
374ca955 3649inline int32_t
b75a7d8f
A
3650UnicodeString::lastIndexOf(const UnicodeString& text,
3651 int32_t start) const {
3652 pinIndex(start);
3653 return lastIndexOf(text, 0, text.fLength, start, fLength - start);
3654}
3655
374ca955 3656inline int32_t
b75a7d8f
A
3657UnicodeString::lastIndexOf(const UnicodeString& text) const
3658{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
3659
374ca955 3660inline int32_t
b75a7d8f
A
3661UnicodeString::lastIndexOf(UChar c,
3662 int32_t start,
3663 int32_t _length) const
3664{ return doLastIndexOf(c, start, _length); }
3665
374ca955 3666inline int32_t
b75a7d8f
A
3667UnicodeString::lastIndexOf(UChar32 c,
3668 int32_t start,
3669 int32_t _length) const {
3670 return doLastIndexOf(c, start, _length);
3671}
3672
374ca955 3673inline int32_t
b75a7d8f
A
3674UnicodeString::lastIndexOf(UChar c) const
3675{ return doLastIndexOf(c, 0, fLength); }
3676
374ca955 3677inline int32_t
b75a7d8f
A
3678UnicodeString::lastIndexOf(UChar32 c) const {
3679 return lastIndexOf(c, 0, fLength);
3680}
3681
374ca955 3682inline int32_t
b75a7d8f
A
3683UnicodeString::lastIndexOf(UChar c,
3684 int32_t start) const {
3685 pinIndex(start);
3686 return doLastIndexOf(c, start, fLength - start);
3687}
3688
374ca955 3689inline int32_t
b75a7d8f
A
3690UnicodeString::lastIndexOf(UChar32 c,
3691 int32_t start) const {
3692 pinIndex(start);
3693 return lastIndexOf(c, start, fLength - start);
3694}
3695
374ca955 3696inline UBool
b75a7d8f
A
3697UnicodeString::startsWith(const UnicodeString& text) const
3698{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
3699
374ca955 3700inline UBool
b75a7d8f
A
3701UnicodeString::startsWith(const UnicodeString& srcText,
3702 int32_t srcStart,
3703 int32_t srcLength) const
3704{ return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
3705
374ca955 3706inline UBool
b75a7d8f
A
3707UnicodeString::startsWith(const UChar *srcChars,
3708 int32_t srcLength) const
3709{ return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; }
3710
374ca955 3711inline UBool
b75a7d8f
A
3712UnicodeString::startsWith(const UChar *srcChars,
3713 int32_t srcStart,
3714 int32_t srcLength) const
3715{ return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;}
3716
374ca955 3717inline UBool
b75a7d8f 3718UnicodeString::endsWith(const UnicodeString& text) const
374ca955 3719{ return doCompare(fLength - text.fLength, text.fLength,
b75a7d8f
A
3720 text, 0, text.fLength) == 0; }
3721
374ca955 3722inline UBool
b75a7d8f
A
3723UnicodeString::endsWith(const UnicodeString& srcText,
3724 int32_t srcStart,
3725 int32_t srcLength) const {
3726 srcText.pinIndices(srcStart, srcLength);
374ca955 3727 return doCompare(fLength - srcLength, srcLength,
b75a7d8f
A
3728 srcText, srcStart, srcLength) == 0;
3729}
3730
374ca955 3731inline UBool
b75a7d8f
A
3732UnicodeString::endsWith(const UChar *srcChars,
3733 int32_t srcLength) const {
3734 if(srcLength < 0) {
3735 srcLength = u_strlen(srcChars);
3736 }
374ca955 3737 return doCompare(fLength - srcLength, srcLength,
b75a7d8f
A
3738 srcChars, 0, srcLength) == 0;
3739}
3740
374ca955 3741inline UBool
b75a7d8f
A
3742UnicodeString::endsWith(const UChar *srcChars,
3743 int32_t srcStart,
3744 int32_t srcLength) const {
3745 if(srcLength < 0) {
3746 srcLength = u_strlen(srcChars + srcStart);
3747 }
374ca955 3748 return doCompare(fLength - srcLength, srcLength,
b75a7d8f
A
3749 srcChars, srcStart, srcLength) == 0;
3750}
3751
3752//========================================
3753// replace
3754//========================================
374ca955
A
3755inline UnicodeString&
3756UnicodeString::replace(int32_t start,
3757 int32_t _length,
3758 const UnicodeString& srcText)
b75a7d8f
A
3759{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
3760
374ca955
A
3761inline UnicodeString&
3762UnicodeString::replace(int32_t start,
3763 int32_t _length,
3764 const UnicodeString& srcText,
3765 int32_t srcStart,
b75a7d8f
A
3766 int32_t srcLength)
3767{ return doReplace(start, _length, srcText, srcStart, srcLength); }
3768
374ca955
A
3769inline UnicodeString&
3770UnicodeString::replace(int32_t start,
3771 int32_t _length,
b75a7d8f
A
3772 const UChar *srcChars,
3773 int32_t srcLength)
3774{ return doReplace(start, _length, srcChars, 0, srcLength); }
3775
374ca955
A
3776inline UnicodeString&
3777UnicodeString::replace(int32_t start,
3778 int32_t _length,
3779 const UChar *srcChars,
3780 int32_t srcStart,
b75a7d8f
A
3781 int32_t srcLength)
3782{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
3783
374ca955
A
3784inline UnicodeString&
3785UnicodeString::replace(int32_t start,
3786 int32_t _length,
b75a7d8f
A
3787 UChar srcChar)
3788{ return doReplace(start, _length, &srcChar, 0, 1); }
3789
3790inline UnicodeString&
374ca955
A
3791UnicodeString::replace(int32_t start,
3792 int32_t _length,
b75a7d8f
A
3793 UChar32 srcChar) {
3794 UChar buffer[U16_MAX_LENGTH];
3795 int32_t count = 0;
3796 UBool isError = FALSE;
3797 U16_APPEND(buffer, count, U16_MAX_LENGTH, srcChar, isError);
3798 return doReplace(start, _length, buffer, 0, count);
3799}
3800
374ca955
A
3801inline UnicodeString&
3802UnicodeString::replaceBetween(int32_t start,
3803 int32_t limit,
b75a7d8f
A
3804 const UnicodeString& srcText)
3805{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
3806
3807inline UnicodeString&
374ca955
A
3808UnicodeString::replaceBetween(int32_t start,
3809 int32_t limit,
3810 const UnicodeString& srcText,
3811 int32_t srcStart,
b75a7d8f
A
3812 int32_t srcLimit)
3813{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
3814
374ca955 3815inline UnicodeString&
b75a7d8f
A
3816UnicodeString::findAndReplace(const UnicodeString& oldText,
3817 const UnicodeString& newText)
374ca955 3818{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
b75a7d8f
A
3819 newText, 0, newText.fLength); }
3820
374ca955 3821inline UnicodeString&
b75a7d8f
A
3822UnicodeString::findAndReplace(int32_t start,
3823 int32_t _length,
3824 const UnicodeString& oldText,
3825 const UnicodeString& newText)
374ca955 3826{ return findAndReplace(start, _length, oldText, 0, oldText.fLength,
b75a7d8f
A
3827 newText, 0, newText.fLength); }
3828
3829// ============================
3830// extract
3831// ============================
3832inline void
3833UnicodeString::doExtract(int32_t start,
3834 int32_t _length,
3835 UnicodeString& target) const
3836{ target.replace(0, target.fLength, *this, start, _length); }
3837
374ca955
A
3838inline void
3839UnicodeString::extract(int32_t start,
3840 int32_t _length,
3841 UChar *target,
b75a7d8f
A
3842 int32_t targetStart) const
3843{ doExtract(start, _length, target, targetStart); }
3844
374ca955 3845inline void
b75a7d8f
A
3846UnicodeString::extract(int32_t start,
3847 int32_t _length,
3848 UnicodeString& target) const
3849{ doExtract(start, _length, target); }
3850
374ca955
A
3851#if !UCONFIG_NO_CONVERSION
3852
b75a7d8f
A
3853inline int32_t
3854UnicodeString::extract(int32_t start,
3855 int32_t _length,
3856 char *dst,
3857 const char *codepage) const
3858
3859{
3860 // This dstSize value will be checked explicitly
3861 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
3862}
3863
374ca955
A
3864#endif
3865
3866inline void
3867UnicodeString::extractBetween(int32_t start,
3868 int32_t limit,
3869 UChar *dst,
3870 int32_t dstStart) const {
3871 pinIndex(start);
3872 pinIndex(limit);
3873 doExtract(start, limit - start, dst, dstStart);
3874}
b75a7d8f
A
3875
3876inline UChar
3877UnicodeString::doCharAt(int32_t offset) const
3878{
3879 if((uint32_t)offset < (uint32_t)fLength) {
3880 return fArray[offset];
3881 } else {
3882 return kInvalidUChar;
3883 }
3884}
3885
3886inline UChar
3887UnicodeString::charAt(int32_t offset) const
3888{ return doCharAt(offset); }
3889
3890inline UChar
3891UnicodeString::operator[] (int32_t offset) const
3892{ return doCharAt(offset); }
3893
3894inline UChar32
3895UnicodeString::char32At(int32_t offset) const
3896{
3897 if((uint32_t)offset < (uint32_t)fLength) {
3898 UChar32 c;
3899 U16_GET(fArray, 0, offset, fLength, c);
3900 return c;
3901 } else {
3902 return kInvalidUChar;
3903 }
3904}
3905
3906inline int32_t
3907UnicodeString::getChar32Start(int32_t offset) const {
3908 if((uint32_t)offset < (uint32_t)fLength) {
3909 U16_SET_CP_START(fArray, 0, offset);
3910 return offset;
3911 } else {
3912 return 0;
3913 }
3914}
3915
3916inline int32_t
3917UnicodeString::getChar32Limit(int32_t offset) const {
3918 if((uint32_t)offset < (uint32_t)fLength) {
3919 U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
3920 return offset;
3921 } else {
3922 return fLength;
3923 }
3924}
3925
3926inline UBool
3927UnicodeString::isEmpty() const {
3928 return fLength == 0;
3929}
3930
3931//========================================
3932// Write implementation methods
3933//========================================
3934inline const UChar *
3935UnicodeString::getTerminatedBuffer() {
3936 if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3937 return 0;
3938 } else if(fLength<fCapacity && fArray[fLength]==0) {
3939 return fArray;
3940 } else if(cloneArrayIfNeeded(fLength+1)) {
3941 fArray[fLength]=0;
3942 return fArray;
3943 } else {
3944 return 0;
3945 }
3946}
3947
374ca955
A
3948inline UnicodeString&
3949UnicodeString::operator= (UChar ch)
b75a7d8f
A
3950{ return doReplace(0, fLength, &ch, 0, 1); }
3951
374ca955
A
3952inline UnicodeString&
3953UnicodeString::operator= (UChar32 ch)
b75a7d8f
A
3954{ return replace(0, fLength, ch); }
3955
374ca955
A
3956inline UnicodeString&
3957UnicodeString::setTo(const UnicodeString& srcText,
3958 int32_t srcStart,
b75a7d8f
A
3959 int32_t srcLength)
3960{
3961 unBogus();
3962 return doReplace(0, fLength, srcText, srcStart, srcLength);
3963}
3964
374ca955
A
3965inline UnicodeString&
3966UnicodeString::setTo(const UnicodeString& srcText,
b75a7d8f
A
3967 int32_t srcStart)
3968{
3969 unBogus();
3970 srcText.pinIndex(srcStart);
3971 return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
3972}
3973
374ca955 3974inline UnicodeString&
b75a7d8f
A
3975UnicodeString::setTo(const UnicodeString& srcText)
3976{
3977 unBogus();
3978 return doReplace(0, fLength, srcText, 0, srcText.fLength);
3979}
3980
374ca955 3981inline UnicodeString&
b75a7d8f
A
3982UnicodeString::setTo(const UChar *srcChars,
3983 int32_t srcLength)
3984{
3985 unBogus();
3986 return doReplace(0, fLength, srcChars, 0, srcLength);
3987}
3988
374ca955 3989inline UnicodeString&
b75a7d8f
A
3990UnicodeString::setTo(UChar srcChar)
3991{
3992 unBogus();
3993 return doReplace(0, fLength, &srcChar, 0, 1);
3994}
3995
374ca955 3996inline UnicodeString&
b75a7d8f
A
3997UnicodeString::setTo(UChar32 srcChar)
3998{
3999 unBogus();
4000 return replace(0, fLength, srcChar);
4001}
4002
374ca955 4003inline UnicodeString&
b75a7d8f
A
4004UnicodeString::operator+= (UChar ch)
4005{ return doReplace(fLength, 0, &ch, 0, 1); }
4006
374ca955 4007inline UnicodeString&
b75a7d8f
A
4008UnicodeString::operator+= (UChar32 ch) {
4009 UChar buffer[U16_MAX_LENGTH];
4010 int32_t _length = 0;
4011 UBool isError = FALSE;
4012 U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
4013 return doReplace(fLength, 0, buffer, 0, _length);
4014}
4015
374ca955 4016inline UnicodeString&
b75a7d8f
A
4017UnicodeString::operator+= (const UnicodeString& srcText)
4018{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
4019
374ca955
A
4020inline UnicodeString&
4021UnicodeString::append(const UnicodeString& srcText,
4022 int32_t srcStart,
b75a7d8f
A
4023 int32_t srcLength)
4024{ return doReplace(fLength, 0, srcText, srcStart, srcLength); }
4025
374ca955 4026inline UnicodeString&
b75a7d8f
A
4027UnicodeString::append(const UnicodeString& srcText)
4028{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
4029
374ca955
A
4030inline UnicodeString&
4031UnicodeString::append(const UChar *srcChars,
4032 int32_t srcStart,
b75a7d8f
A
4033 int32_t srcLength)
4034{ return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
4035
374ca955 4036inline UnicodeString&
b75a7d8f
A
4037UnicodeString::append(const UChar *srcChars,
4038 int32_t srcLength)
4039{ return doReplace(fLength, 0, srcChars, 0, srcLength); }
4040
374ca955 4041inline UnicodeString&
b75a7d8f
A
4042UnicodeString::append(UChar srcChar)
4043{ return doReplace(fLength, 0, &srcChar, 0, 1); }
4044
374ca955 4045inline UnicodeString&
b75a7d8f
A
4046UnicodeString::append(UChar32 srcChar) {
4047 UChar buffer[U16_MAX_LENGTH];
4048 int32_t _length = 0;
4049 UBool isError = FALSE;
4050 U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
4051 return doReplace(fLength, 0, buffer, 0, _length);
4052}
4053
374ca955
A
4054inline UnicodeString&
4055UnicodeString::insert(int32_t start,
4056 const UnicodeString& srcText,
4057 int32_t srcStart,
b75a7d8f
A
4058 int32_t srcLength)
4059{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4060
374ca955
A
4061inline UnicodeString&
4062UnicodeString::insert(int32_t start,
b75a7d8f
A
4063 const UnicodeString& srcText)
4064{ return doReplace(start, 0, srcText, 0, srcText.fLength); }
4065
374ca955
A
4066inline UnicodeString&
4067UnicodeString::insert(int32_t start,
4068 const UChar *srcChars,
4069 int32_t srcStart,
b75a7d8f
A
4070 int32_t srcLength)
4071{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4072
374ca955
A
4073inline UnicodeString&
4074UnicodeString::insert(int32_t start,
b75a7d8f
A
4075 const UChar *srcChars,
4076 int32_t srcLength)
4077{ return doReplace(start, 0, srcChars, 0, srcLength); }
4078
374ca955
A
4079inline UnicodeString&
4080UnicodeString::insert(int32_t start,
b75a7d8f
A
4081 UChar srcChar)
4082{ return doReplace(start, 0, &srcChar, 0, 1); }
4083
374ca955
A
4084inline UnicodeString&
4085UnicodeString::insert(int32_t start,
b75a7d8f
A
4086 UChar32 srcChar)
4087{ return replace(start, 0, srcChar); }
4088
4089
374ca955 4090inline UnicodeString&
b75a7d8f
A
4091UnicodeString::remove()
4092{
4093 // remove() of a bogus string makes the string empty and non-bogus
4094 if(isBogus()) {
4095 unBogus();
4096 } else {
4097 fLength = 0;
4098 }
4099 return *this;
4100}
4101
374ca955
A
4102inline UnicodeString&
4103UnicodeString::remove(int32_t start,
b75a7d8f
A
4104 int32_t _length)
4105{
4106 if(start <= 0 && _length == INT32_MAX) {
4107 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4108 return remove();
4109 } else {
4110 return doReplace(start, _length, NULL, 0, 0);
4111 }
4112}
4113
374ca955 4114inline UnicodeString&
b75a7d8f
A
4115UnicodeString::removeBetween(int32_t start,
4116 int32_t limit)
4117{ return doReplace(start, limit - start, NULL, 0, 0); }
4118
374ca955 4119inline UBool
b75a7d8f
A
4120UnicodeString::truncate(int32_t targetLength)
4121{
4122 if(isBogus() && targetLength == 0) {
4123 // truncate(0) of a bogus string makes the string empty and non-bogus
4124 unBogus();
4125 return FALSE;
4126 } else if((uint32_t)targetLength < (uint32_t)fLength) {
4127 fLength = targetLength;
4128 return TRUE;
4129 } else {
4130 return FALSE;
4131 }
4132}
4133
374ca955 4134inline UnicodeString&
b75a7d8f
A
4135UnicodeString::reverse()
4136{ return doReverse(0, fLength); }
4137
374ca955 4138inline UnicodeString&
b75a7d8f
A
4139UnicodeString::reverse(int32_t start,
4140 int32_t _length)
4141{ return doReverse(start, _length); }
4142
b75a7d8f
A
4143U_NAMESPACE_END
4144
4145#endif