X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/4388f060552cc537e71e957d32f35e9d75a61233..3d1f044b704633e2e541231cd17ae9ecf9ad5c7a:/icuSources/common/unicode/unistr.h diff --git a/icuSources/common/unicode/unistr.h b/icuSources/common/unicode/unistr.h index a95a6dd0..55739ac2 100644 --- a/icuSources/common/unicode/unistr.h +++ b/icuSources/common/unicode/unistr.h @@ -1,6 +1,8 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** -* Copyright (C) 1998-2011, International Business Machines +* Copyright (C) 1998-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -22,29 +24,19 @@ #define UNISTR_H /** - * \file - * \brief C++ API: Unicode String + * \file + * \brief C++ API: Unicode String */ +#include #include "unicode/utypes.h" +#include "unicode/char16ptr.h" #include "unicode/rep.h" #include "unicode/std_string.h" #include "unicode/stringpiece.h" #include "unicode/bytestream.h" -#include "unicode/ucasemap.h" struct UConverter; // unicode/ucnv.h -class StringThreadTest; - -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also ustring.h and unorm.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif #ifndef USTRING_H /** @@ -54,24 +46,39 @@ U_STABLE int32_t U_EXPORT2 u_strlen(const UChar *s); #endif -#ifndef U_STRING_CASE_MAPPER_DEFINED -#define U_STRING_CASE_MAPPER_DEFINED +#if U_SHOW_CPLUSPLUS_API +U_NAMESPACE_BEGIN + +#if !UCONFIG_NO_BREAK_ITERATION +class BreakIterator; // unicode/brkiter.h +#endif +class Edits; + +U_NAMESPACE_END +#endif // U_SHOW_CPLUSPLUS_API +#if U_SHOW_CPLUSPLUS_API +// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper. /** * Internal string case mapping function type. + * All error checking must be done. + * src and dest must not overlap. * @internal */ typedef int32_t U_CALLCONV -UStringCaseMapper(const UCaseMap *csm, - UChar *dest, int32_t destCapacity, - const UChar *src, int32_t srcLength, - UErrorCode *pErrorCode); - +UStringCaseMapper(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + icu::BreakIterator *iter, #endif + char16_t *dest, int32_t destCapacity, + const char16_t *src, int32_t srcLength, + icu::Edits *edits, + UErrorCode &errorCode); +#endif // U_SHOW_CPLUSPLUS_API +#if U_SHOW_CPLUSPLUS_API U_NAMESPACE_BEGIN -class BreakIterator; // unicode/brkiter.h class Locale; // unicode/locid.h class StringCharacterIterator; class UnicodeStringAppendable; // unicode/appendable.h @@ -92,29 +99,25 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * Unicode String literals in C++. - * Dependent on the platform properties, different UnicodeString - * constructors should be used to create a UnicodeString object from - * a string literal. - * The macros are defined for maximum performance. + * + * Note: these macros are not recommended for new code. + * Prior to the availability of C++11 and u"unicode string literals", + * these macros were provided for portability and efficiency when + * initializing UnicodeStrings from literals. + * * They work only for strings that contain "invariant characters", i.e., * only latin letters, digits, and some punctuation. * See utypes.h for details. * * The string parameter must be a C string literal. * The length of the string, not including the terminating - * NUL, must be specified as a constant. - * The U_STRING_DECL macro should be invoked exactly once for one - * such string variable before it is used. + * `NUL`, must be specified as a constant. * @stable ICU 2.0 */ -#if defined(U_DECLARE_UTF16) -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length) -#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16))) -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length) -#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY -# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length) +#if !U_CHAR16_IS_TYPEDEF +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, u ## cs, _length) #else -# define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV) +# define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const char16_t*)u ## cs, _length) #endif /** @@ -135,9 +138,9 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_CHAR_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(UChar) and UnicodeString(UChar32) + * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32) * constructors are marked as explicit, preventing their inadvertent use. - * @draft ICU 49 + * @stable ICU 49 */ #ifndef UNISTR_FROM_CHAR_EXPLICIT # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) @@ -152,12 +155,12 @@ class UnicodeStringAppendable; // unicode/appendable.h /** * \def UNISTR_FROM_STRING_EXPLICIT * This can be defined to be empty or "explicit". - * If explicit, then the UnicodeString(const char *) and UnicodeString(const UChar *) + * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *) * constructors are marked as explicit, preventing their inadvertent use. * * In particular, this helps prevent accidentally depending on ICU conversion code * by passing a string literal into an API with a const UnicodeString & parameter. - * @draft ICU 49 + * @stable ICU 49 */ #ifndef UNISTR_FROM_STRING_EXPLICIT # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) @@ -169,39 +172,89 @@ class UnicodeStringAppendable; // unicode/appendable.h # endif #endif +/** + * \def UNISTR_OBJECT_SIZE + * Desired sizeof(UnicodeString) in bytes. + * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. + * The object size may want to be a multiple of 16 bytes, + * which is a common granularity for heap allocation. + * + * Any space inside the object beyond sizeof(vtable pointer) + 2 + * is available for storing short strings inside the object. + * The bigger the object, the longer a string that can be stored inside the object, + * without additional heap allocation. + * + * Depending on a platform's pointer size, pointer alignment requirements, + * and struct padding, the compiler will usually round up sizeof(UnicodeString) + * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), + * to hold the fields for heap-allocated strings. + * Such a minimum size also ensures that the object is easily large enough + * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH). + * + * sizeof(UnicodeString) >= 48 should work for all known platforms. + * + * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, + * sizeof(UnicodeString) = 64 would leave space for + * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 + * char16_ts stored inside the object. + * + * The minimum object size on a 64-bit machine would be + * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, + * and the internal buffer would hold up to 11 char16_ts in that case. + * + * @see U16_MAX_LENGTH + * @stable ICU 56 + */ +#ifndef UNISTR_OBJECT_SIZE +# define UNISTR_OBJECT_SIZE 64 +#endif + /** * UnicodeString is a string class that stores Unicode characters directly and provides - * similar functionality as the Java String and StringBuffer classes. + * similar functionality as the Java String and StringBuffer/StringBuilder classes. * It is a concrete implementation of the abstract class Replaceable (for transliteration). * + * A UnicodeString may also "alias" an external array of characters + * (that is, point to it, rather than own the array) + * whose lifetime must then at least match the lifetime of the aliasing object. + * This aliasing may be preserved when returning a UnicodeString by value, + * depending on the compiler and the function implementation, + * via Return Value Optimization (RVO) or the move assignment operator. + * (However, the copy assignment operator does not preserve aliasing.) + * For details see the description of storage models at the end of the class API docs + * and in the User Guide chapter linked from there. + * * The UnicodeString class is not suitable for subclassing. * - *

For an overview of Unicode strings in C and C++ see the - * User Guide Strings chapter.

+ * For an overview of Unicode strings in C and C++ see the + * [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Strings-in-C-C-). * - *

In ICU, a Unicode string consists of 16-bit Unicode code units. + * In ICU, a Unicode string consists of 16-bit Unicode *code units*. * A Unicode character may be stored with either one code unit * (the most common case) or with a matched pair of special code units - * ("surrogates"). The data type for code units is UChar. - * For single-character handling, a Unicode character code point is a value - * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.

+ * ("surrogates"). The data type for code units is char16_t. + * For single-character handling, a Unicode character code *point* is a value + * in the range 0..0x10ffff. ICU uses the UChar32 type for code points. * - *

Indexes and offsets into and lengths of strings always count code units, not code points. + * Indexes and offsets into and lengths of strings always count code units, not code points. * This is the same as with multi-byte char* strings in traditional string handling. * Operations on partial strings typically do not test for code point boundaries. * If necessary, the user needs to take care of such boundaries by testing for the code unit * values or by using functions like * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() - * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).

+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h). * * UnicodeString methods are more lenient with regard to input parameter values * than other ICU APIs. In particular: * - If indexes are out of bounds for a UnicodeString object - * (<0 or >length()) then they are "pinned" to the nearest boundary. - * - If primitive string pointer values (e.g., const UChar * or char *) + * (< 0 or > length()) then they are "pinned" to the nearest boundary. + * - If the buffer passed to an insert/append/replace operation is owned by the + * target object, e.g., calling str.append(str), an extra copy may take place + * to ensure safety. + * - If primitive string pointer values (e.g., const char16_t * or char *) * for input strings are NULL, then those input string parameters are treated * as if they pointed to an empty string. - * However, this is not the case for char * parameters for charset names + * However, this is *not* the case for char * parameters for charset names * or other IDs. * - Most UnicodeString methods do not take a UErrorCode parameter because * there are usually very few opportunities for failure other than a shortage @@ -225,14 +278,14 @@ class UnicodeStringAppendable; // unicode/appendable.h * This includes the const UnicodeString & parameters for * copy construction, assignment, and cloning. * - *

UnicodeString uses several storage methods. + * UnicodeString uses several storage methods. * String contents can be stored inside the UnicodeString object itself, * in an allocated and shared buffer, or in an outside buffer that is "aliased". * Most of this is done transparently, but careful aliasing in particular provides * significant performance improvements. * Also, the internal buffer is accessible via special functions. * For details see the - * User Guide Strings chapter.

+ * [User Guide Strings chapter](http://userguide.icu-project.org/strings#TOC-Maximizing-Performance-with-the-UnicodeString-Storage-Model). * * @see utf.h * @see CharacterIterator @@ -267,7 +320,7 @@ public: /** * Equality operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. - * @return TRUE if text contains the same characters as this one, + * @return TRUE if `text` contains the same characters as this one, * FALSE otherwise. * @stable ICU 2.0 */ @@ -276,7 +329,7 @@ public: /** * Inequality operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. - * @return FALSE if text contains the same characters as this one, + * @return FALSE if `text` contains the same characters as this one, * TRUE otherwise. * @stable ICU 2.0 */ @@ -286,7 +339,7 @@ public: * Greater than operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. * @return TRUE if the characters in this are bitwise - * greater than the characters in text, FALSE otherwise + * greater than the characters in `text`, FALSE otherwise * @stable ICU 2.0 */ inline UBool operator> (const UnicodeString& text) const; @@ -295,7 +348,7 @@ public: * Less than operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. * @return TRUE if the characters in this are bitwise - * less than the characters in text, FALSE otherwise + * less than the characters in `text`, FALSE otherwise * @stable ICU 2.0 */ inline UBool operator< (const UnicodeString& text) const; @@ -304,7 +357,7 @@ public: * Greater than or equal operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. * @return TRUE if the characters in this are bitwise - * greater than or equal to the characters in text, FALSE otherwise + * greater than or equal to the characters in `text`, FALSE otherwise * @stable ICU 2.0 */ inline UBool operator>= (const UnicodeString& text) const; @@ -313,36 +366,37 @@ public: * Less than or equal operator. Performs only bitwise comparison. * @param text The UnicodeString to compare to this one. * @return TRUE if the characters in this are bitwise - * less than or equal to the characters in text, FALSE otherwise + * less than or equal to the characters in `text`, FALSE otherwise * @stable ICU 2.0 */ inline UBool operator<= (const UnicodeString& text) const; /** * Compare the characters bitwise in this UnicodeString to - * the characters in text. + * the characters in `text`. * @param text The UnicodeString to compare to this one. * @return The result of bitwise character comparison: 0 if this - * contains the same characters as text, -1 if the characters in - * this are bitwise less than the characters in text, +1 if the + * contains the same characters as `text`, -1 if the characters in + * this are bitwise less than the characters in `text`, +1 if the * characters in this are bitwise greater than the characters - * in text. + * in `text`. * @stable ICU 2.0 */ inline int8_t compare(const UnicodeString& text) const; /** * Compare the characters bitwise in the range - * [start, start + length) with the characters - * in text + * [`start`, `start + length`) with the characters + * in the **entire string** `text`. + * (The parameters "start" and "length" are not applied to the other text "text".) * @param start the offset at which the compare operation begins * @param length the number of characters of text to compare. * @param text the other text to be compared against this string. * @return The result of bitwise character comparison: 0 if this - * contains the same characters as text, -1 if the characters in - * this are bitwise less than the characters in text, +1 if the + * contains the same characters as `text`, -1 if the characters in + * this are bitwise less than the characters in `text`, +1 if the * characters in this are bitwise greater than the characters - * in text. + * in `text`. * @stable ICU 2.0 */ inline int8_t compare(int32_t start, @@ -351,19 +405,19 @@ public: /** * Compare the characters bitwise in the range - * [start, start + length) with the characters - * in srcText in the range - * [srcStart, srcStart + srcLength). + * [`start`, `start + length`) with the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`). * @param start the offset at which the compare operation begins * @param length the number of characters in this to compare. * @param srcText the text to be compared - * @param srcStart the offset into srcText to start comparison - * @param srcLength the number of characters in src to compare + * @param srcStart the offset into `srcText` to start comparison + * @param srcLength the number of characters in `src` to compare * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcText, -1 if the characters in - * this are bitwise less than the characters in srcText, +1 if the + * contains the same characters as `srcText`, -1 if the characters in + * this are bitwise less than the characters in `srcText`, +1 if the * characters in this are bitwise greater than the characters - * in srcText. + * in `srcText`. * @stable ICU 2.0 */ inline int8_t compare(int32_t start, @@ -374,75 +428,75 @@ public: /** * Compare the characters bitwise in this UnicodeString with the first - * srcLength characters in srcChars. + * `srcLength` characters in `srcChars`. * @param srcChars The characters to compare to this UnicodeString. - * @param srcLength the number of characters in srcChars to compare + * @param srcLength the number of characters in `srcChars` to compare * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcChars, -1 if the characters in - * this are bitwise less than the characters in srcChars, +1 if the + * contains the same characters as `srcChars`, -1 if the characters in + * this are bitwise less than the characters in `srcChars`, +1 if the * characters in this are bitwise greater than the characters - * in srcChars. + * in `srcChars`. * @stable ICU 2.0 */ - inline int8_t compare(const UChar *srcChars, + inline int8_t compare(ConstChar16Ptr srcChars, int32_t srcLength) const; /** * Compare the characters bitwise in the range - * [start, start + length) with the first - * length characters in srcChars + * [`start`, `start + length`) with the first + * `length` characters in `srcChars` * @param start the offset at which the compare operation begins * @param length the number of characters to compare. * @param srcChars the characters to be compared * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcChars, -1 if the characters in - * this are bitwise less than the characters in srcChars, +1 if the + * contains the same characters as `srcChars`, -1 if the characters in + * this are bitwise less than the characters in `srcChars`, +1 if the * characters in this are bitwise greater than the characters - * in srcChars. + * in `srcChars`. * @stable ICU 2.0 */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare the characters bitwise in the range - * [start, start + length) with the characters - * in srcChars in the range - * [srcStart, srcStart + srcLength). + * [`start`, `start + length`) with the characters + * in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`). * @param start the offset at which the compare operation begins * @param length the number of characters in this to compare * @param srcChars the characters to be compared - * @param srcStart the offset into srcChars to start comparison - * @param srcLength the number of characters in srcChars to compare + * @param srcStart the offset into `srcChars` to start comparison + * @param srcLength the number of characters in `srcChars` to compare * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcChars, -1 if the characters in - * this are bitwise less than the characters in srcChars, +1 if the + * contains the same characters as `srcChars`, -1 if the characters in + * this are bitwise less than the characters in `srcChars`, +1 if the * characters in this are bitwise greater than the characters - * in srcChars. + * in `srcChars`. * @stable ICU 2.0 */ inline int8_t compare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; /** * Compare the characters bitwise in the range - * [start, limit) with the characters - * in srcText in the range - * [srcStart, srcLimit). + * [`start`, `limit`) with the characters + * in `srcText` in the range + * [`srcStart`, `srcLimit`). * @param start the offset at which the compare operation begins * @param limit the offset immediately following the compare operation * @param srcText the text to be compared - * @param srcStart the offset into srcText to start comparison - * @param srcLimit the offset into srcText to limit comparison + * @param srcStart the offset into `srcText` to start comparison + * @param srcLimit the offset into `srcText` to limit comparison * @return The result of bitwise character comparison: 0 if this - * contains the same characters as srcText, -1 if the characters in - * this are bitwise less than the characters in srcText, +1 if the + * contains the same characters as `srcText`, -1 if the characters in + * this are bitwise less than the characters in `srcText`, +1 if the * characters in this are bitwise greater than the characters - * in srcText. + * in `srcText`. * @stable ICU 2.0 */ inline int8_t compareBetween(int32_t start, @@ -538,7 +592,7 @@ public: * in code point order * @stable ICU 2.0 */ - inline int8_t compareCodePointOrder(const UChar *srcChars, + inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const; /** @@ -562,7 +616,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars) const; + const char16_t *srcChars) const; /** * Compare two Unicode strings in code point order. @@ -587,7 +641,7 @@ public: */ inline int8_t compareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -711,7 +765,7 @@ public: * @return A negative, zero, or positive integer indicating the comparison result. * @stable ICU 2.0 */ - inline int8_t caseCompare(const UChar *srcChars, + inline int8_t caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const; @@ -737,7 +791,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const; /** @@ -764,7 +818,7 @@ public: */ inline int8_t caseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; @@ -799,21 +853,21 @@ public: uint32_t options) const; /** - * Determine if this starts with the characters in text + * Determine if this starts with the characters in `text` * @param text The text to match. - * @return TRUE if this starts with the characters in text, + * @return TRUE if this starts with the characters in `text`, * FALSE otherwise * @stable ICU 2.0 */ inline UBool startsWith(const UnicodeString& text) const; /** - * Determine if this starts with the characters in srcText - * in the range [srcStart, srcStart + srcLength). + * Determine if this starts with the characters in `srcText` + * in the range [`srcStart`, `srcStart + srcLength`). * @param srcText The text to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcText to match - * @return TRUE if this starts with the characters in text, + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcText` to match + * @return TRUE if this starts with the characters in `text`, * FALSE otherwise * @stable ICU 2.0 */ @@ -822,45 +876,45 @@ public: int32_t srcLength) const; /** - * Determine if this starts with the characters in srcChars + * Determine if this starts with the characters in `srcChars` * @param srcChars The characters to match. - * @param srcLength the number of characters in srcChars - * @return TRUE if this starts with the characters in srcChars, + * @param srcLength the number of characters in `srcChars` + * @return TRUE if this starts with the characters in `srcChars`, * FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** - * Determine if this ends with the characters in srcChars - * in the range [srcStart, srcStart + srcLength). + * Determine if this ends with the characters in `srcChars` + * in the range [`srcStart`, `srcStart + srcLength`). * @param srcChars The characters to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcChars to match - * @return TRUE if this ends with the characters in srcChars, FALSE otherwise + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcChars` to match + * @return TRUE if this ends with the characters in `srcChars`, FALSE otherwise * @stable ICU 2.0 */ - inline UBool startsWith(const UChar *srcChars, + inline UBool startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; /** - * Determine if this ends with the characters in text + * Determine if this ends with the characters in `text` * @param text The text to match. - * @return TRUE if this ends with the characters in text, + * @return TRUE if this ends with the characters in `text`, * FALSE otherwise * @stable ICU 2.0 */ inline UBool endsWith(const UnicodeString& text) const; /** - * Determine if this ends with the characters in srcText - * in the range [srcStart, srcStart + srcLength). + * Determine if this ends with the characters in `srcText` + * in the range [`srcStart`, `srcStart + srcLength`). * @param srcText The text to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcText to match - * @return TRUE if this ends with the characters in text, + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcText` to match + * @return TRUE if this ends with the characters in `text`, * FALSE otherwise * @stable ICU 2.0 */ @@ -869,27 +923,27 @@ public: int32_t srcLength) const; /** - * Determine if this ends with the characters in srcChars + * Determine if this ends with the characters in `srcChars` * @param srcChars The characters to match. - * @param srcLength the number of characters in srcChars - * @return TRUE if this ends with the characters in srcChars, + * @param srcLength the number of characters in `srcChars` + * @return TRUE if this ends with the characters in `srcChars`, * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const; /** - * Determine if this ends with the characters in srcChars - * in the range [srcStart, srcStart + srcLength). + * Determine if this ends with the characters in `srcChars` + * in the range [`srcStart`, `srcStart + srcLength`). * @param srcChars The characters to match. - * @param srcStart the offset into srcText to start matching - * @param srcLength the number of characters in srcChars to match - * @return TRUE if this ends with the characters in srcChars, + * @param srcStart the offset into `srcText` to start matching + * @param srcLength the number of characters in `srcChars` to match + * @return TRUE if this ends with the characters in `srcChars`, * FALSE otherwise * @stable ICU 2.0 */ - inline UBool endsWith(const UChar *srcChars, + inline UBool endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -897,21 +951,21 @@ public: /* Searching - bitwise only */ /** - * Locate in this the first occurrence of the characters in text, + * Locate in this the first occurrence of the characters in `text`, * using bitwise comparison. * @param text The text to search for. - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ inline int32_t indexOf(const UnicodeString& text) const; /** - * Locate in this the first occurrence of the characters in text - * starting at offset start, using bitwise comparison. + * Locate in this the first occurrence of the characters in `text` + * starting at offset `start`, using bitwise comparison. * @param text The text to search for. * @param start The offset at which searching will start. - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ @@ -920,12 +974,12 @@ public: /** * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in text, using bitwise comparison. + * [`start`, `start + length`) of the characters + * in `text`, using bitwise comparison. * @param text The text to search for. * @param start The offset at which searching will start. * @param length The number of characters to search - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ @@ -935,17 +989,17 @@ public: /** * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in srcText in the range - * [srcStart, srcStart + srcLength), + * [`start`, `start + length`) of the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`), * using bitwise comparison. * @param srcText The text to search for. - * @param srcStart the offset into srcText at which + * @param srcStart the offset into `srcText` at which * to start matching - * @param srcLength the number of characters in srcText to match + * @param srcLength the number of characters in `srcText` to match * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ @@ -957,123 +1011,123 @@ public: /** * Locate in this the first occurrence of the characters in - * srcChars - * starting at offset start, using bitwise comparison. + * `srcChars` + * starting at offset `start`, using bitwise comparison. * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars to match + * @param srcLength the number of characters in `srcChars` to match * @param start the offset into this at which to start matching - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; /** * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in srcChars, using bitwise comparison. + * [`start`, `start + length`) of the characters + * in `srcChars`, using bitwise comparison. * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars + * @param srcLength the number of characters in `srcChars` * @param start The offset at which searching will start. * @param length The number of characters to search - * @return The offset into this of the start of srcChars, + * @return The offset into this of the start of `srcChars`, * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(const UChar *srcChars, + inline int32_t indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; /** * Locate in this the first occurrence in the range - * [start, start + length) of the characters - * in srcChars in the range - * [srcStart, srcStart + srcLength), + * [`start`, `start + length`) of the characters + * in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`), * using bitwise comparison. * @param srcChars The text to search for. - * @param srcStart the offset into srcChars at which + * @param srcStart the offset into `srcChars` at which * to start matching - * @param srcLength the number of characters in srcChars to match + * @param srcLength the number of characters in `srcChars` to match * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ - int32_t indexOf(const UChar *srcChars, + int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const; /** - * Locate in this the first occurrence of the BMP code point c, + * Locate in this the first occurrence of the BMP code point `c`, * using bitwise comparison. * @param c The code unit to search for. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c) const; + inline int32_t indexOf(char16_t c) const; /** - * Locate in this the first occurrence of the code point c, + * Locate in this the first occurrence of the code point `c`, * using bitwise comparison. * * @param c The code point to search for. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ inline int32_t indexOf(UChar32 c) const; /** - * Locate in this the first occurrence of the BMP code point c, - * starting at offset start, using bitwise comparison. + * Locate in this the first occurrence of the BMP code point `c`, + * starting at offset `start`, using bitwise comparison. * @param c The code unit to search for. * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start) const; /** - * Locate in this the first occurrence of the code point c - * starting at offset start, using bitwise comparison. + * Locate in this the first occurrence of the code point `c` + * starting at offset `start`, using bitwise comparison. * * @param c The code point to search for. * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ inline int32_t indexOf(UChar32 c, int32_t start) const; /** - * Locate in this the first occurrence of the BMP code point c - * in the range [start, start + length), + * Locate in this the first occurrence of the BMP code point `c` + * in the range [`start`, `start + length`), * using bitwise comparison. * @param c The code unit to search for. * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t indexOf(UChar c, + inline int32_t indexOf(char16_t c, int32_t start, int32_t length) const; /** - * Locate in this the first occurrence of the code point c - * in the range [start, start + length), + * Locate in this the first occurrence of the code point `c` + * in the range [`start`, `start + length`), * using bitwise comparison. * * @param c The code point to search for. * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ inline int32_t indexOf(UChar32 c, @@ -1081,21 +1135,21 @@ public: int32_t length) const; /** - * Locate in this the last occurrence of the characters in text, + * Locate in this the last occurrence of the characters in `text`, * using bitwise comparison. * @param text The text to search for. - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ inline int32_t lastIndexOf(const UnicodeString& text) const; /** - * Locate in this the last occurrence of the characters in text - * starting at offset start, using bitwise comparison. + * Locate in this the last occurrence of the characters in `text` + * starting at offset `start`, using bitwise comparison. * @param text The text to search for. * @param start The offset at which searching will start. - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ @@ -1104,12 +1158,12 @@ public: /** * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in text, using bitwise comparison. + * [`start`, `start + length`) of the characters + * in `text`, using bitwise comparison. * @param text The text to search for. * @param start The offset at which searching will start. * @param length The number of characters to search - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ @@ -1119,17 +1173,17 @@ public: /** * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in srcText in the range - * [srcStart, srcStart + srcLength), + * [`start`, `start + length`) of the characters + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`), * using bitwise comparison. * @param srcText The text to search for. - * @param srcStart the offset into srcText at which + * @param srcStart the offset into `srcText` at which * to start matching - * @param srcLength the number of characters in srcText to match + * @param srcLength the number of characters in `srcText` to match * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ @@ -1140,123 +1194,123 @@ public: int32_t length) const; /** - * Locate in this the last occurrence of the characters in srcChars - * starting at offset start, using bitwise comparison. + * Locate in this the last occurrence of the characters in `srcChars` + * starting at offset `start`, using bitwise comparison. * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars to match + * @param srcLength the number of characters in `srcChars` to match * @param start the offset into this at which to start matching - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const; /** * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in srcChars, using bitwise comparison. + * [`start`, `start + length`) of the characters + * in `srcChars`, using bitwise comparison. * @param srcChars The text to search for. - * @param srcLength the number of characters in srcChars + * @param srcLength the number of characters in `srcChars` * @param start The offset at which searching will start. * @param length The number of characters to search - * @return The offset into this of the start of srcChars, + * @return The offset into this of the start of `srcChars`, * or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(const UChar *srcChars, + inline int32_t lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const; /** * Locate in this the last occurrence in the range - * [start, start + length) of the characters - * in srcChars in the range - * [srcStart, srcStart + srcLength), + * [`start`, `start + length`) of the characters + * in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`), * using bitwise comparison. * @param srcChars The text to search for. - * @param srcStart the offset into srcChars at which + * @param srcStart the offset into `srcChars` at which * to start matching - * @param srcLength the number of characters in srcChars to match + * @param srcLength the number of characters in `srcChars` to match * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of the start of text, + * @return The offset into this of the start of `text`, * or -1 if not found. * @stable ICU 2.0 */ - int32_t lastIndexOf(const UChar *srcChars, + int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const; /** - * Locate in this the last occurrence of the BMP code point c, + * Locate in this the last occurrence of the BMP code point `c`, * using bitwise comparison. * @param c The code unit to search for. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c) const; + inline int32_t lastIndexOf(char16_t c) const; /** - * Locate in this the last occurrence of the code point c, + * Locate in this the last occurrence of the code point `c`, * using bitwise comparison. * * @param c The code point to search for. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ inline int32_t lastIndexOf(UChar32 c) const; /** - * Locate in this the last occurrence of the BMP code point c - * starting at offset start, using bitwise comparison. + * Locate in this the last occurrence of the BMP code point `c` + * starting at offset `start`, using bitwise comparison. * @param c The code unit to search for. * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start) const; /** - * Locate in this the last occurrence of the code point c - * starting at offset start, using bitwise comparison. + * Locate in this the last occurrence of the code point `c` + * starting at offset `start`, using bitwise comparison. * * @param c The code point to search for. * @param start The offset at which searching will start. - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ inline int32_t lastIndexOf(UChar32 c, int32_t start) const; /** - * Locate in this the last occurrence of the BMP code point c - * in the range [start, start + length), + * Locate in this the last occurrence of the BMP code point `c` + * in the range [`start`, `start + length`), * using bitwise comparison. * @param c The code unit to search for. * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ - inline int32_t lastIndexOf(UChar c, + inline int32_t lastIndexOf(char16_t c, int32_t start, int32_t length) const; /** - * Locate in this the last occurrence of the code point c - * in the range [start, start + length), + * Locate in this the last occurrence of the code point `c` + * in the range [`start`, `start + length`), * using bitwise comparison. * * @param c The code point to search for. * @param start the offset into this at which to start matching * @param length the number of characters in this to search - * @return The offset into this of c, or -1 if not found. + * @return The offset into this of `c`, or -1 if not found. * @stable ICU 2.0 */ inline int32_t lastIndexOf(UChar32 c, @@ -1267,32 +1321,32 @@ public: /* Character access */ /** - * Return the code unit at offset offset. + * Return the code unit at offset `offset`. * If the offset is not valid (0..length()-1) then U+ffff is returned. * @param offset a valid offset into the text - * @return the code unit at offset offset + * @return the code unit at offset `offset` * or 0xffff if the offset is not valid for this string * @stable ICU 2.0 */ - inline UChar charAt(int32_t offset) const; + inline char16_t charAt(int32_t offset) const; /** - * Return the code unit at offset offset. + * Return the code unit at offset `offset`. * If the offset is not valid (0..length()-1) then U+ffff is returned. * @param offset a valid offset into the text - * @return the code unit at offset offset + * @return the code unit at offset `offset` * @stable ICU 2.0 */ - inline UChar operator[] (int32_t offset) const; + inline char16_t operator[] (int32_t offset) const; /** * Return the code point that contains the code unit - * at offset offset. + * at offset `offset`. * If the offset is not valid (0..length()-1) then U+ffff is returned. * @param offset a valid offset into the text * that indicates the text offset of any of the code units * that will be assembled into a code point (21-bit value) and returned - * @return the code point of text at offset + * @return the code point of text at `offset` * or 0xffff if the offset is not valid for this string * @stable ICU 2.0 */ @@ -1349,33 +1403,33 @@ public: * This behaves like CharacterIterator::move32(delta, kCurrent). * * Behavior for out-of-bounds indexes: - * moveIndex32 pins the input index to 0..length(), i.e., + * `moveIndex32` pins the input index to 0..length(), i.e., * if the input index<0 then it is pinned to 0; * if it is index>length() then it is pinned to length(). - * Afterwards, the index is moved by delta code points + * Afterwards, the index is moved by `delta` code points * forward or backward, * but no further backward than to 0 and no further forward than to length(). * The resulting index return value will be in between 0 and length(), inclusively. * * Examples: - *
-   * // s has code points 'a' U+10000 'b' U+10ffff U+2029
-   * UnicodeString s=UNICODE_STRING("a\\U00010000b\\U0010ffff\\u2029", 31).unescape();
+   * \code
+   *     // s has code points 'a' U+10000 'b' U+10ffff U+2029
+   *     UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
    *
-   * // initial index: position of U+10000
-   * int32_t index=1;
+   *     // initial index: position of U+10000
+   *     int32_t index=1;
    *
-   * // the following examples will all result in index==4, position of U+10ffff
+   *     // the following examples will all result in index==4, position of U+10ffff
    *
-   * // skip 2 code points from some position in the string
-   * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+   *     // skip 2 code points from some position in the string
+   *     index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
    *
-   * // go to the 3rd code point from the start of s (0-based)
-   * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+   *     // go to the 3rd code point from the start of s (0-based)
+   *     index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
    *
-   * // go to the next-to-last code point of s
-   * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
-   * 
+ * // go to the next-to-last code point of s + * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff + * \endcode * * @param index input code unit index * @param delta (signed) code point count to move the index forward or backward @@ -1389,22 +1443,22 @@ public: /** * Copy the characters in the range - * [start, start + length) into the array dst, - * beginning at dstStart. - * If the string aliases to dst itself as an external buffer, + * [`start`, `start + length`) into the array `dst`, + * beginning at `dstStart`. + * If the string aliases to `dst` itself as an external buffer, * then extract() will not copy the contents. * * @param start offset of first character which will be copied into the array * @param length the number of characters to extract - * @param dst array in which to copy characters. The length of dst - * must be at least (dstStart + length). - * @param dstStart the offset in dst where the first character + * @param dst array in which to copy characters. The length of `dst` + * must be at least (`dstStart + length`). + * @param dstStart the offset in `dst` where the first character * will be extracted * @stable ICU 2.0 */ inline void extract(int32_t start, int32_t length, - UChar *dst, + Char16Ptr dst, int32_t dstStart = 0) const; /** @@ -1419,27 +1473,26 @@ public: * If the string itself does not fit into dest * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR. * - * If the string aliases to dest itself as an external buffer, + * If the string aliases to `dest` itself as an external buffer, * then extract() will not copy the contents. * * @param dest Destination string buffer. - * @param destCapacity Number of UChars available at dest. + * @param destCapacity Number of char16_ts available at dest. * @param errorCode ICU error code. * @return length() * @stable ICU 2.0 */ int32_t - extract(UChar *dest, int32_t destCapacity, + extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const; /** * Copy the characters in the range - * [start, start + length) into the UnicodeString - * target. + * [`start`, `start + length`) into the UnicodeString + * `target`. * @param start offset of first character which will be copied * @param length the number of characters to extract * @param target UnicodeString into which to copy characters. - * @return A reference to target * @stable ICU 2.0 */ inline void extract(int32_t start, @@ -1447,28 +1500,27 @@ public: UnicodeString& target) const; /** - * Copy the characters in the range [start, limit) - * into the array dst, beginning at dstStart. + * Copy the characters in the range [`start`, `limit`) + * into the array `dst`, beginning at `dstStart`. * @param start offset of first character which will be copied into the array * @param limit offset immediately following the last character to be copied - * @param dst array in which to copy characters. The length of dst - * must be at least (dstStart + (limit - start)). - * @param dstStart the offset in dst where the first character + * @param dst array in which to copy characters. The length of `dst` + * must be at least (`dstStart + (limit - start)`). + * @param dstStart the offset in `dst` where the first character * will be extracted * @stable ICU 2.0 */ inline void extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart = 0) const; /** - * Copy the characters in the range [start, limit) - * into the UnicodeString target. Replaceable API. + * Copy the characters in the range [`start`, `limit`) + * into the UnicodeString `target`. Replaceable API. * @param start offset of first character which will be copied * @param limit offset immediately following the last character to be copied * @param target UnicodeString into which to copy characters. - * @return A reference to target * @stable ICU 2.0 */ virtual void extractBetween(int32_t start, @@ -1476,12 +1528,12 @@ public: UnicodeString& target) const; /** - * Copy the characters in the range - * [start, start + length) into an array of characters. + * Copy the characters in the range + * [`start`, `start + startLength`) into an array of characters. * All characters must be invariant (see utypes.h). * Use US_INV as the last, signature-distinguishing parameter. * - * This function does not write any more than targetLength + * This function does not write any more than `targetCapacity` * characters but returns the length of the entire output string * so that one can allocate a larger buffer and call the function again * if necessary. @@ -1506,9 +1558,9 @@ public: /** * Copy the characters in the range - * [start, start + length) into an array of characters + * [`start`, `start + length`) into an array of characters * in the platform's default codepage. - * This function does not write any more than targetLength + * This function does not write any more than `targetLength` * characters but returns the length of the entire output string * so that one can allocate a larger buffer and call the function again * if necessary. @@ -1518,8 +1570,8 @@ public: * @param startLength the number of characters to extract * @param target the target buffer for extraction * @param targetLength the length of the target buffer - * If target is NULL, then the number of bytes required for - * target is returned. + * If `target` is NULL, then the number of bytes required for + * `target` is returned. * @return the output string length, not including the terminating NUL * @stable ICU 2.0 */ @@ -1534,7 +1586,7 @@ public: /** * Copy the characters in the range - * [start, start + length) into an array of characters + * [`start`, `start + length`) into an array of characters * in a specified codepage. * The output string is NUL-terminated. * @@ -1548,11 +1600,11 @@ public: * @param target the target buffer for extraction * @param codepage the desired codepage for the characters. 0 has * the special meaning of the default codepage - * If codepage is an empty string (""), + * If `codepage` is an empty string (`""`), * then a simple conversion is performed on the codepage-invariant * subset ("invariant characters") of the platform encoding. See utypes.h. - * If target is NULL, then the number of bytes required for - * target is returned. It is assumed that the target is big enough + * If `target` is NULL, then the number of bytes required for + * `target` is returned. It is assumed that the target is big enough * to fit all of the characters. * @return the output string length, not including the terminating NUL * @stable ICU 2.0 @@ -1564,9 +1616,9 @@ public: /** * Copy the characters in the range - * [start, start + length) into an array of characters + * [`start`, `start + length`) into an array of characters * in a specified codepage. - * This function does not write any more than targetLength + * This function does not write any more than `targetLength` * characters but returns the length of the entire output string * so that one can allocate a larger buffer and call the function again * if necessary. @@ -1583,11 +1635,11 @@ public: * @param targetLength the length of the target buffer * @param codepage the desired codepage for the characters. 0 has * the special meaning of the default codepage - * If codepage is an empty string (""), + * If `codepage` is an empty string (`""`), * then a simple conversion is performed on the codepage-invariant * subset ("invariant characters") of the platform encoding. See utypes.h. - * If target is NULL, then the number of bytes required for - * target is returned. + * If `target` is NULL, then the number of bytes required for + * `target` is returned. * @return the output string length, not including the terminating NUL * @stable ICU 2.0 */ @@ -1660,8 +1712,6 @@ public: */ void toUTF8(ByteSink &sink) const; -#if U_HAVE_STD_STRING - /** * Convert the UnicodeString to UTF-8 and append the result * to a standard string. @@ -1676,13 +1726,11 @@ public: */ template StringClass &toUTF8String(StringClass &result) const { - StringByteSink sbs(&result); + StringByteSink sbs(&result, length()); toUTF8(sbs); return result; } -#endif - /** * Convert the UnicodeString to UTF-32. * Unpaired surrogates are replaced with U+FFFD. @@ -1704,7 +1752,7 @@ public: /** * Return the length of the UnicodeString object. - * The length is the number of UChar code units are in the UnicodeString. + * The length is the number of char16_t code units are in the UnicodeString. * If you want the number of code points, please use countChar32(). * @return the length of the UnicodeString object * @see countChar32 @@ -1713,14 +1761,14 @@ public: inline int32_t length(void) const; /** - * Count Unicode code points in the length UChar code units of the string. - * A code point may occupy either one or two UChar code units. + * Count Unicode code points in the length char16_t code units of the string. + * A code point may occupy either one or two char16_t code units. * Counting code points involves reading all code units. * * This functions is basically the inverse of moveIndex32(). * * @param start the index of the first code unit to check - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * @return the number of code points in the specified code units * @see length * @stable ICU 2.0 @@ -1729,7 +1777,7 @@ public: countChar32(int32_t start=0, int32_t length=INT32_MAX) const; /** - * Check if the length UChar code units of the string + * Check if the length char16_t code units of the string * contain more Unicode code points than a certain number. * This is more efficient than counting all code points in this part of the string * and comparing that number with a threshold. @@ -1737,10 +1785,10 @@ public: * falls within a certain range, and * never needs to count more than 'number+1' code points. * Logically equivalent to (countChar32(start, length)>number). - * A Unicode code point may occupy either one or two UChar code units. + * A Unicode code point may occupy either one or two char16_t code units. * * @param start the index of the first code unit to check (0 for the entire string) - * @param length the number of UChar code units to check + * @param length the number of char16_t code units to check * (use INT32_MAX for the entire string; remember that start/length * values are pinned) * @param number The number of code points in the (sub)string is compared against @@ -1766,7 +1814,7 @@ public: * This is useful together with the getBuffer functions. * See there for details. * - * @return the number of UChars available in the internal buffer + * @return the number of char16_ts available in the internal buffer * @see getBuffer * @stable ICU 2.0 */ @@ -1789,7 +1837,7 @@ public: * For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and * length() returns 0. * - * @return TRUE if the string is valid, FALSE otherwise + * @return TRUE if the string is bogus/invalid, FALSE otherwise * @see setToBogus() * @stable ICU 2.0 */ @@ -1804,28 +1852,44 @@ public: /** * Assignment operator. Replace the characters in this UnicodeString - * with the characters from srcText. + * with the characters from `srcText`. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * By contrast, the fastCopyFrom() function implements the old, + * more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * * @param srcText The text containing the characters to replace * @return a reference to this * @stable ICU 2.0 + * @see fastCopyFrom */ UnicodeString &operator=(const UnicodeString &srcText); /** * Almost the same as the assignment operator. * Replace the characters in this UnicodeString - * with the characters from srcText. + * with the characters from `srcText`. + * + * This function works the same as the assignment operator + * for all strings except for ones that are readonly aliases. * - * This function works the same for all strings except for ones that - * are readonly aliases. * Starting with ICU 2.4, the assignment operator and the copy constructor * allocate a new buffer and copy the buffer contents even for readonly aliases. * This function implements the old, more efficient but less safe behavior * of making this string also a readonly alias to the same buffer. + * * The fastCopyFrom function must be used only if it is known that the lifetime of - * this UnicodeString is at least as long as the lifetime of the aliased buffer + * this UnicodeString does not exceed the lifetime of the aliased buffer * including its contents, for example for strings from resource bundles - * or aliases to string contents. + * or aliases to string constants. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. * * @param src The text containing the characters to replace. * @return a reference to this @@ -1833,18 +1897,46 @@ public: */ UnicodeString &fastCopyFrom(const UnicodeString &src); + /** + * Move assignment operator; might leave src in bogus state. + * This string will have the same contents and state that the source string had. + * The behavior is undefined if *this and src are the same object. + * @param src source string + * @return *this + * @stable ICU 56 + */ + UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT; + + /** + * Swap strings. + * @param other other string + * @stable ICU 56 + */ + void swap(UnicodeString &other) U_NOEXCEPT; + + /** + * Non-member UnicodeString swap function. + * @param s1 will get s2's contents and state + * @param s2 will get s1's contents and state + * @stable ICU 56 + */ + friend inline void U_EXPORT2 + swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT { + s1.swap(s2); + } + /** * Assignment operator. Replace the characters in this UnicodeString - * with the code unit ch. + * with the code unit `ch`. * @param ch the code unit to replace * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator= (UChar ch); + inline UnicodeString& operator= (char16_t ch); /** * Assignment operator. Replace the characters in this UnicodeString - * with the code point ch. + * with the code point `ch`. * @param ch the code point to replace * @return a reference to this * @stable ICU 2.0 @@ -1853,11 +1945,11 @@ public: /** * Set the text in the UnicodeString object to the characters - * in srcText in the range - * [srcStart, srcText.length()). - * srcText is not modified. + * in `srcText` in the range + * [`srcStart`, `srcText.length()`). + * `srcText` is not modified. * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters + * @param srcStart the offset into `srcText` where new characters * will be obtained * @return a reference to this * @stable ICU 2.2 @@ -1867,13 +1959,13 @@ public: /** * Set the text in the UnicodeString object to the characters - * in srcText in the range - * [srcStart, srcStart + srcLength). - * srcText is not modified. + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`). + * `srcText` is not modified. * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters + * @param srcStart the offset into `srcText` where new characters * will be obtained - * @param srcLength the number of characters in srcText in the + * @param srcLength the number of characters in `srcText` in the * replace string. * @return a reference to this * @stable ICU 2.0 @@ -1884,8 +1976,8 @@ public: /** * Set the text in the UnicodeString object to the characters in - * srcText. - * srcText is not modified. + * `srcText`. + * `srcText` is not modified. * @param srcText the source for the new characters * @return a reference to this * @stable ICU 2.0 @@ -1894,61 +1986,64 @@ public: /** * Set the characters in the UnicodeString object to the characters - * in srcChars. srcChars is not modified. + * in `srcChars`. `srcChars` is not modified. * @param srcChars the source for the new characters * @param srcLength the number of Unicode characters in srcChars. * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& setTo(const UChar *srcChars, + inline UnicodeString& setTo(const char16_t *srcChars, int32_t srcLength); /** * Set the characters in the UnicodeString object to the code unit - * srcChar. + * `srcChar`. * @param srcChar the code unit which becomes the UnicodeString's character * content * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& setTo(UChar srcChar); + inline UnicodeString& setTo(char16_t srcChar); /** * Set the characters in the UnicodeString object to the code point - * srcChar. + * `srcChar`. * @param srcChar the code point which becomes the UnicodeString's character * content * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& setTo(UChar32 srcChar); + inline UnicodeString& setTo(UChar32 srcChar); /** - * Aliasing setTo() function, analogous to the readonly-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: * When the string is modified, then the buffer is first copied into * newly allocated memory. * The aliased buffer is never modified. - * In an assignment to another UnicodeString, the text will be aliased again, + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, * so that both strings then alias the same readonly-text. * - * @param isTerminated specifies if text is NUL-terminated. - * This must be true if textLength==-1. + * @param isTerminated specifies if `text` is `NUL`-terminated. + * This must be true if `textLength==-1`. * @param text The characters to alias for the UnicodeString. - * @param textLength The number of Unicode characters in text to alias. + * @param textLength The number of Unicode characters in `text` to alias. * If -1, then this constructor will determine the length - * by calling u_strlen(). + * by calling `u_strlen()`. * @return a reference to this * @stable ICU 2.0 */ UnicodeString &setTo(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** - * Aliasing setTo() function, analogous to the writable-aliasing UChar* constructor. + * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -1957,16 +2052,16 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. - * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffLength The number of Unicode characters in `buffer` to alias. + * @param buffCapacity The size of `buffer` in char16_ts. * @return a reference to this * @stable ICU 2.0 */ - UnicodeString &setTo(UChar *buffer, + UnicodeString &setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); @@ -2002,7 +2097,7 @@ public: * s.truncate(0); // set to an empty string (complete truncation), or * s=UnicodeString(); // assign an empty string, or * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or - * static const UChar nul=0; + * static const char16_t nul=0; * s.setTo(&nul, 0); // set to an empty C Unicode string * } * \endcode @@ -2020,22 +2115,22 @@ public: * @stable ICU 2.0 */ UnicodeString& setCharAt(int32_t offset, - UChar ch); + char16_t ch); /* Append operations */ /** - * Append operator. Append the code unit ch to the UnicodeString + * Append operator. Append the code unit `ch` to the UnicodeString * object. * @param ch the code unit to be appended * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& operator+= (UChar ch); + inline UnicodeString& operator+= (char16_t ch); /** - * Append operator. Append the code point ch to the UnicodeString + * Append operator. Append the code point `ch` to the UnicodeString * object. * @param ch the code point to be appended * @return a reference to this @@ -2044,8 +2139,8 @@ public: inline UnicodeString& operator+= (UChar32 ch); /** - * Append operator. Append the characters in srcText to the - * UnicodeString object. srcText is not modified. + * Append operator. Append the characters in `srcText` to the + * UnicodeString object. `srcText` is not modified. * @param srcText the source for the new characters * @return a reference to this * @stable ICU 2.0 @@ -2054,14 +2149,14 @@ public: /** * Append the characters - * in srcText in the range - * [srcStart, srcStart + srcLength) to the - * UnicodeString object at offset start. srcText + * in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`) to the + * UnicodeString object at offset `start`. `srcText` * is not modified. * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters + * @param srcStart the offset into `srcText` where new characters * will be obtained - * @param srcLength the number of characters in srcText in + * @param srcLength the number of characters in `srcText` in * the append string * @return a reference to this * @stable ICU 2.0 @@ -2071,8 +2166,8 @@ public: int32_t srcLength); /** - * Append the characters in srcText to the UnicodeString object. - * srcText is not modified. + * Append the characters in `srcText` to the UnicodeString object. + * `srcText` is not modified. * @param srcText the source for the new characters * @return a reference to this * @stable ICU 2.0 @@ -2080,44 +2175,44 @@ public: inline UnicodeString& append(const UnicodeString& srcText); /** - * Append the characters in srcChars in the range - * [srcStart, srcStart + srcLength) to the UnicodeString + * Append the characters in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`) to the UnicodeString * object at offset - * start. srcChars is not modified. + * `start`. `srcChars` is not modified. * @param srcChars the source for the new characters - * @param srcStart the offset into srcChars where new characters + * @param srcStart the offset into `srcChars` where new characters * will be obtained - * @param srcLength the number of characters in srcChars in - * the append string; can be -1 if srcChars is NUL-terminated + * @param srcLength the number of characters in `srcChars` in + * the append string; can be -1 if `srcChars` is NUL-terminated * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); /** - * Append the characters in srcChars to the UnicodeString object - * at offset start. srcChars is not modified. + * Append the characters in `srcChars` to the UnicodeString object + * at offset `start`. `srcChars` is not modified. * @param srcChars the source for the new characters - * @param srcLength the number of Unicode characters in srcChars; - * can be -1 if srcChars is NUL-terminated + * @param srcLength the number of Unicode characters in `srcChars`; + * can be -1 if `srcChars` is NUL-terminated * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(const UChar *srcChars, + inline UnicodeString& append(ConstChar16Ptr srcChars, int32_t srcLength); /** - * Append the code unit srcChar to the UnicodeString object. + * Append the code unit `srcChar` to the UnicodeString object. * @param srcChar the code unit to append * @return a reference to this * @stable ICU 2.0 */ - inline UnicodeString& append(UChar srcChar); + inline UnicodeString& append(char16_t srcChar); /** - * Append the code point srcChar to the UnicodeString object. + * Append the code point `srcChar` to the UnicodeString object. * @param srcChar the code point to append * @return a reference to this * @stable ICU 2.0 @@ -2128,14 +2223,14 @@ public: /* Insert operations */ /** - * Insert the characters in srcText in the range - * [srcStart, srcStart + srcLength) into the UnicodeString - * object at offset start. srcText is not modified. + * Insert the characters in `srcText` in the range + * [`srcStart`, `srcStart + srcLength`) into the UnicodeString + * object at offset `start`. `srcText` is not modified. * @param start the offset where the insertion begins * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters + * @param srcStart the offset into `srcText` where new characters * will be obtained - * @param srcLength the number of characters in srcText in + * @param srcLength the number of characters in `srcText` in * the insert string * @return a reference to this * @stable ICU 2.0 @@ -2146,8 +2241,8 @@ public: int32_t srcLength); /** - * Insert the characters in srcText into the UnicodeString object - * at offset start. srcText is not modified. + * Insert the characters in `srcText` into the UnicodeString object + * at offset `start`. `srcText` is not modified. * @param start the offset where the insertion begins * @param srcText the source for the new characters * @return a reference to this @@ -2157,26 +2252,26 @@ public: const UnicodeString& srcText); /** - * Insert the characters in srcChars in the range - * [srcStart, srcStart + srcLength) into the UnicodeString - * object at offset start. srcChars is not modified. + * Insert the characters in `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`) into the UnicodeString + * object at offset `start`. `srcChars` is not modified. * @param start the offset at which the insertion begins * @param srcChars the source for the new characters - * @param srcStart the offset into srcChars where new characters + * @param srcStart the offset into `srcChars` where new characters * will be obtained - * @param srcLength the number of characters in srcChars + * @param srcLength the number of characters in `srcChars` * in the insert string * @return a reference to this * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); /** - * Insert the characters in srcChars into the UnicodeString object - * at offset start. srcChars is not modified. + * Insert the characters in `srcChars` into the UnicodeString object + * at offset `start`. `srcChars` is not modified. * @param start the offset where the insertion begins * @param srcChars the source for the new characters * @param srcLength the number of Unicode characters in srcChars. @@ -2184,23 +2279,23 @@ public: * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** - * Insert the code unit srcChar into the UnicodeString object at - * offset start. + * Insert the code unit `srcChar` into the UnicodeString object at + * offset `start`. * @param start the offset at which the insertion occurs * @param srcChar the code unit to insert * @return a reference to this * @stable ICU 2.0 */ inline UnicodeString& insert(int32_t start, - UChar srcChar); + char16_t srcChar); /** - * Insert the code point srcChar into the UnicodeString object at - * offset start. + * Insert the code point `srcChar` into the UnicodeString object at + * offset `start`. * @param start the offset at which the insertion occurs * @param srcChar the code point to insert * @return a reference to this @@ -2214,22 +2309,22 @@ public: /** * Replace the characters in the range - * [start, start + length) with the characters in - * srcText in the range - * [srcStart, srcStart + srcLength). - * srcText is not modified. + * [`start`, `start + length`) with the characters in + * `srcText` in the range + * [`srcStart`, `srcStart + srcLength`). + * `srcText` is not modified. * @param start the offset at which the replace operation begins * @param length the number of characters to replace. The character at - * start + length is not modified. + * `start + length` is not modified. * @param srcText the source for the new characters - * @param srcStart the offset into srcText where new characters + * @param srcStart the offset into `srcText` where new characters * will be obtained - * @param srcLength the number of characters in srcText in + * @param srcLength the number of characters in `srcText` in * the replace string * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& replace(int32_t start, + inline UnicodeString& replace(int32_t start, int32_t length, const UnicodeString& srcText, int32_t srcStart, @@ -2237,50 +2332,50 @@ public: /** * Replace the characters in the range - * [start, start + length) - * with the characters in srcText. srcText is + * [`start`, `start + length`) + * with the characters in `srcText`. `srcText` is * not modified. * @param start the offset at which the replace operation begins * @param length the number of characters to replace. The character at - * start + length is not modified. + * `start + length` is not modified. * @param srcText the source for the new characters * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& replace(int32_t start, + inline UnicodeString& replace(int32_t start, int32_t length, const UnicodeString& srcText); /** * Replace the characters in the range - * [start, start + length) with the characters in - * srcChars in the range - * [srcStart, srcStart + srcLength). srcChars + * [`start`, `start + length`) with the characters in + * `srcChars` in the range + * [`srcStart`, `srcStart + srcLength`). `srcChars` * is not modified. * @param start the offset at which the replace operation begins * @param length the number of characters to replace. The character at - * start + length is not modified. + * `start + length` is not modified. * @param srcChars the source for the new characters - * @param srcStart the offset into srcChars where new characters + * @param srcStart the offset into `srcChars` where new characters * will be obtained - * @param srcLength the number of characters in srcChars + * @param srcLength the number of characters in `srcChars` * in the replace string * @return a reference to this * @stable ICU 2.0 */ - UnicodeString& replace(int32_t start, + inline UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); /** * Replace the characters in the range - * [start, start + length) with the characters in - * srcChars. srcChars is not modified. + * [`start`, `start + length`) with the characters in + * `srcChars`. `srcChars` is not modified. * @param start the offset at which the replace operation begins * @param length number of characters to replace. The character at - * start + length is not modified. + * `start + length` is not modified. * @param srcChars the source for the new characters * @param srcLength the number of Unicode characters in srcChars * @return a reference to this @@ -2288,31 +2383,31 @@ public: */ inline UnicodeString& replace(int32_t start, int32_t length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength); /** * Replace the characters in the range - * [start, start + length) with the code unit - * srcChar. + * [`start`, `start + length`) with the code unit + * `srcChar`. * @param start the offset at which the replace operation begins * @param length the number of characters to replace. The character at - * start + length is not modified. + * `start + length` is not modified. * @param srcChar the new code unit * @return a reference to this * @stable ICU 2.0 */ inline UnicodeString& replace(int32_t start, int32_t length, - UChar srcChar); + char16_t srcChar); /** * Replace the characters in the range - * [start, start + length) with the code point - * srcChar. + * [`start`, `start + length`) with the code point + * `srcChar`. * @param start the offset at which the replace operation begins * @param length the number of characters to replace. The character at - * start + length is not modified. + * `start + length` is not modified. * @param srcChar the new code point * @return a reference to this * @stable ICU 2.0 @@ -2320,8 +2415,8 @@ public: UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar); /** - * Replace the characters in the range [start, limit) - * with the characters in srcText. srcText is not modified. + * Replace the characters in the range [`start`, `limit`) + * with the characters in `srcText`. `srcText` is not modified. * @param start the offset at which the replace operation begins * @param limit the offset immediately following the replace range * @param srcText the source for the new characters @@ -2333,16 +2428,16 @@ public: const UnicodeString& srcText); /** - * Replace the characters in the range [start, limit) - * with the characters in srcText in the range - * [srcStart, srcLimit). srcText is not modified. + * Replace the characters in the range [`start`, `limit`) + * with the characters in `srcText` in the range + * [`srcStart`, `srcLimit`). `srcText` is not modified. * @param start the offset at which the replace operation begins * @param limit the offset immediately following the replace range * @param srcText the source for the new characters - * @param srcStart the offset into srcChars where new characters + * @param srcStart the offset into `srcChars` where new characters * will be obtained * @param srcLimit the offset immediately following the range to copy - * in srcText + * in `srcText` * @return a reference to this * @stable ICU 2.0 */ @@ -2354,12 +2449,9 @@ public: /** * Replace a substring of this object with the given text. - * @param start the beginning index, inclusive; 0 <= start - * <= limit. - * @param limit the ending index, exclusive; start <= limit - * <= length(). - * @param text the text to replace characters start - * to limit - 1 + * @param start the beginning index, inclusive; `0 <= start <= limit`. + * @param limit the ending index, exclusive; `start <= limit <= length()`. + * @param text the text to replace characters `start` to `limit - 1` * @stable ICU 2.0 */ virtual void handleReplaceBetween(int32_t start, @@ -2378,14 +2470,12 @@ public: * information. This method is used to duplicate or reorder substrings. * The destination index must not overlap the source range. * - * @param start the beginning index, inclusive; 0 <= start <= - * limit. - * @param limit the ending index, exclusive; start <= limit <= - * length(). + * @param start the beginning index, inclusive; `0 <= start <= limit`. + * @param limit the ending index, exclusive; `start <= limit <= length()`. * @param dest the destination index. The characters from - * start..limit-1 will be copied to dest. - * Implementations of this method may assume that dest <= start || - * dest >= limit. + * `start..limit-1` will be copied to `dest`. + * Implementations of this method may assume that `dest <= start || + * dest >= limit`. * @stable ICU 2.0 */ virtual void copy(int32_t start, int32_t limit, int32_t dest); @@ -2406,7 +2496,7 @@ public: /** * Replace all occurrences of characters in oldText with characters * in newText - * in the range [start, start + length). + * in the range [`start`, `start + length`). * @param start the start of the range in which replace will performed * @param length the length of the range in which replace will be performed * @param oldText the text containing the search text @@ -2421,18 +2511,18 @@ public: /** * Replace all occurrences of characters in oldText in the range - * [oldStart, oldStart + oldLength) with the characters + * [`oldStart`, `oldStart + oldLength`) with the characters * in newText in the range - * [newStart, newStart + newLength) - * in the range [start, start + length). + * [`newStart`, `newStart + newLength`) + * in the range [`start`, `start + length`). * @param start the start of the range in which replace will performed * @param length the length of the range in which replace will be performed * @param oldText the text containing the search text - * @param oldStart the start of the search range in oldText - * @param oldLength the length of the search range in oldText + * @param oldStart the start of the search range in `oldText` + * @param oldLength the length of the search range in `oldText` * @param newText the text containing the replacement text - * @param newStart the start of the replacement range in newText - * @param newLength the length of the replacement range in newText + * @param newStart the start of the replacement range in `newText` + * @param newLength the length of the replacement range in `newText` * @return a reference to this * @stable ICU 2.0 */ @@ -2457,7 +2547,7 @@ public: /** * Remove the characters in the range - * [start, start + length) from the UnicodeString object. + * [`start`, `start + length`) from the UnicodeString object. * @param start the offset of the first character to remove * @param length the number of characters to remove * @return a reference to this @@ -2468,7 +2558,7 @@ public: /** * Remove the characters in the range - * [start, limit) from the UnicodeString object. + * [`start`, `limit`) from the UnicodeString object. * @param start the offset of the first character to remove * @param limit the offset immediately following the range to remove * @return a reference to this @@ -2479,8 +2569,8 @@ public: /** * Retain only the characters in the range - * [start, limit) from the UnicodeString object. - * Removes characters before start and at and after limit. + * [`start`, `limit`) from the UnicodeString object. + * Removes characters before `start` and at and after `limit`. * @param start the offset of the first character to retain * @param limit the offset immediately following the range to retain * @return a reference to this @@ -2491,7 +2581,7 @@ public: /* Length operations */ /** - * Pad the start of this UnicodeString with the character padChar. + * Pad the start of this UnicodeString with the character `padChar`. * If the length of this UnicodeString is less than targetLength, * length() - targetLength copies of padChar will be added to the * beginning of this UnicodeString. @@ -2502,10 +2592,10 @@ public: * @stable ICU 2.0 */ UBool padLeading(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** - * Pad the end of this UnicodeString with the character padChar. + * Pad the end of this UnicodeString with the character `padChar`. * If the length of this UnicodeString is less than targetLength, * length() - targetLength copies of padChar will be added to the * end of this UnicodeString. @@ -2516,10 +2606,10 @@ public: * @stable ICU 2.0 */ UBool padTrailing(int32_t targetLength, - UChar padChar = 0x0020); + char16_t padChar = 0x0020); /** - * Truncate this UnicodeString to the targetLength. + * Truncate this UnicodeString to the `targetLength`. * @param targetLength the desired length of this UnicodeString. * @return TRUE if the text was truncated, FALSE otherwise * @stable ICU 2.0 @@ -2544,7 +2634,7 @@ public: inline UnicodeString& reverse(void); /** - * Reverse the range [start, start + length) in + * Reverse the range [`start`, `start + length`) in * this UnicodeString. * @param start the start of the range to reverse * @param length the number of characters to to reverse @@ -2671,11 +2761,11 @@ public: * break iterator is opened. * Otherwise the provided iterator is set to the string's text. * @param locale The locale to consider. + * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. * @param options Options bit set, see ucasemap_open(). * @return A reference to this. - * @see U_TITLECASE_NO_LOWERCASE - * @see U_TITLECASE_NO_BREAK_ADJUSTMENT - * @see ucasemap_open * @stable ICU 3.8 */ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); @@ -2683,10 +2773,12 @@ public: #endif /** - * Case-fold the characters in this string. + * Case-folds the characters in this string. + * * Case-folding is locale-independent and not context-sensitive, * but there is an option for whether to include or exclude mappings for dotted I - * and dotless i that are marked with 'I' in CaseFolding.txt. + * and dotless i that are marked with 'T' in CaseFolding.txt. + * * The result may be longer or shorter than the original. * * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I @@ -2701,7 +2793,7 @@ public: /** * Get a read/write pointer to the internal buffer. - * The buffer is guaranteed to be large enough for at least minCapacity UChars, + * The buffer is guaranteed to be large enough for at least minCapacity char16_ts, * writable, and is still owned by the UnicodeString object. * Calls to getBuffer(minCapacity) must not be nested, and * must be matched with calls to releaseBuffer(newLength). @@ -2727,22 +2819,22 @@ public: * If the length() was greater than minCapacity, then any contents after minCapacity * may be lost. * The buffer contents is not NUL-terminated by getBuffer(). - * If length()(s.length(). + * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`. * (See getTerminatedBuffer().) * * The buffer may reside in read-only memory. Its contents must not * be modified. * * @return a read-only pointer to the internal string buffer, - * or 0 if the string is empty or bogus + * or nullptr if the string is empty or bogus * * @see getBuffer(int32_t minCapacity) * @see getTerminatedBuffer() * @stable ICU 2.0 */ - inline const UChar *getBuffer() const; + inline const char16_t *getBuffer() const; /** * Get a read-only pointer to the internal buffer, @@ -2831,7 +2923,7 @@ public: * @see getBuffer() * @stable ICU 2.2 */ - inline const UChar *getTerminatedBuffer(); + const char16_t *getTerminatedBuffer(); //======================================== // Constructors @@ -2840,11 +2932,11 @@ public: /** Construct an empty UnicodeString. * @stable ICU 2.0 */ - UnicodeString(); + inline UnicodeString(); /** - * Construct a UnicodeString with capacity to hold capacity UChars - * @param capacity the number of UChars this UnicodeString should hold + * Construct a UnicodeString with capacity to hold `capacity` char16_ts + * @param capacity the number of char16_ts this UnicodeString should hold * before a resize is necessary; if count is greater than 0 and count * code points c take up more space than capacity, then capacity is adjusted * accordingly. @@ -2856,21 +2948,21 @@ public: UnicodeString(int32_t capacity, UChar32 c, int32_t count); /** - * Single UChar (code unit) constructor. + * Single char16_t (code unit) constructor. * * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_CHAR_EXPLICIT=explicit + * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit` * on the compiler command line or similar. * @param ch the character to place in the UnicodeString * @stable ICU 2.0 */ - UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar ch); + UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch); /** * Single UChar32 (code point) constructor. * * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_CHAR_EXPLICIT=explicit + * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit` * on the compiler command line or similar. * @param ch the character to place in the UnicodeString * @stable ICU 2.0 @@ -2878,52 +2970,132 @@ public: UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch); /** - * UChar* constructor. + * char16_t* constructor. * * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` * on the compiler command line or similar. - * @param text The characters to place in the UnicodeString. text + * @param text The characters to place in the UnicodeString. `text` * must be NULL (U+0000) terminated. * @stable ICU 2.0 */ - UNISTR_FROM_STRING_EXPLICIT UnicodeString(const UChar *text); + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text); + +#if !U_CHAR16_IS_TYPEDEF + /** + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *). + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param text NUL-terminated UTF-16 string + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) : + UnicodeString(ConstChar16Ptr(text)) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * + * It is recommended to mark this constructor "explicit" by + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` + * on the compiler command line or similar. + * @param text nullptr + * @stable ICU 59 + */ + UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text); /** - * UChar* constructor. + * char16_t* constructor. * @param text The characters to place in the UnicodeString. - * @param textLength The number of Unicode characters in text + * @param textLength The number of Unicode characters in `text` * to copy. * @stable ICU 2.0 */ - UnicodeString(const UChar *text, + UnicodeString(const char16_t *text, int32_t textLength); +#if !U_CHAR16_IS_TYPEDEF /** - * Readonly-aliasing UChar* constructor. + * uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text UTF-16 string + * @param length string length + * @stable ICU 59 + */ + UnicodeString(const uint16_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t). + * @param text NUL-terminated UTF-16 string + * @param length string length + * @stable ICU 59 + */ + UnicodeString(const wchar_t *text, int32_t length) : + UnicodeString(ConstChar16Ptr(text), length) {} +#endif + + /** + * nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param text nullptr + * @param length ignored + * @stable ICU 59 + */ + inline UnicodeString(const std::nullptr_t text, int32_t length); + + /** + * Readonly-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has copy-on-write semantics: * When the string is modified, then the buffer is first copied into * newly allocated memory. * The aliased buffer is never modified. - * In an assignment to another UnicodeString, the text will be aliased again, + * + * In an assignment to another UnicodeString, when using the copy constructor + * or the assignment operator, the text will be copied. + * When using fastCopyFrom(), the text will be aliased again, * so that both strings then alias the same readonly-text. * - * @param isTerminated specifies if text is NUL-terminated. - * This must be true if textLength==-1. + * @param isTerminated specifies if `text` is `NUL`-terminated. + * This must be true if `textLength==-1`. * @param text The characters to alias for the UnicodeString. - * @param textLength The number of Unicode characters in text to alias. + * @param textLength The number of Unicode characters in `text` to alias. * If -1, then this constructor will determine the length - * by calling u_strlen(). + * by calling `u_strlen()`. * @stable ICU 2.0 */ UnicodeString(UBool isTerminated, - const UChar *text, + ConstChar16Ptr text, int32_t textLength); /** - * Writable-aliasing UChar* constructor. + * Writable-aliasing char16_t* constructor. * The text will be used for the UnicodeString object, but * it will not be released when the UnicodeString is destroyed. * This has write-through semantics: @@ -2932,15 +3104,52 @@ public: * a new buffer will be allocated and the contents copied as with regularly * constructed strings. * In an assignment to another UnicodeString, the buffer will be copied. - * The extract(UChar *dst) function detects whether the dst pointer is the same + * The extract(Char16Ptr dst) function detects whether the dst pointer is the same * as the string buffer itself and will in this case not copy the contents. * * @param buffer The characters to alias for the UnicodeString. - * @param buffLength The number of Unicode characters in buffer to alias. - * @param buffCapacity The size of buffer in UChars. + * @param buffLength The number of Unicode characters in `buffer` to alias. + * @param buffCapacity The size of `buffer` in char16_ts. * @stable ICU 2.0 */ - UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity); + UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity); + +#if !U_CHAR16_IS_TYPEDEF + /** + * Writable-aliasing uint16_t * constructor. + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @stable ICU 59 + */ + UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + +#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN) + /** + * Writable-aliasing wchar_t * constructor. + * (Only defined if U_SIZEOF_WCHAR_T==2.) + * Delegates to UnicodeString(const char16_t *, int32_t, int32_t). + * @param buffer writable buffer of/for UTF-16 text + * @param buffLength length of the current buffer contents + * @param buffCapacity buffer capacity + * @stable ICU 59 + */ + UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) : + UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {} +#endif + + /** + * Writable-aliasing nullptr_t constructor. + * Effectively the same as the default constructor, makes an empty string object. + * @param buffer nullptr + * @param buffLength ignored + * @param buffCapacity ignored + * @stable ICU 59 + */ + inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity); #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION @@ -2955,7 +3164,7 @@ public: * UNICODE_STRING_SIMPLE. * * It is recommended to mark this constructor "explicit" by - * -DUNISTR_FROM_STRING_EXPLICIT=explicit + * `-DUNISTR_FROM_STRING_EXPLICIT=explicit` * on the compiler command line or similar. * @param codepageData an array of bytes, null-terminated, * in the platform's default codepage. @@ -2970,7 +3179,7 @@ public: * Uses the default converter (and thus depends on the ICU conversion code) * unless U_CHARSET_IS_UTF8 is set to 1. * @param codepageData an array of bytes in the platform's default codepage. - * @param dataLength The number of bytes in codepageData. + * @param dataLength The number of bytes in `codepageData`. * @stable ICU 2.0 */ UnicodeString(const char *codepageData, int32_t dataLength); @@ -2982,11 +3191,11 @@ public: /** * char* constructor. * @param codepageData an array of bytes, null-terminated - * @param codepage the encoding of codepageData. The special - * value 0 for codepage indicates that the text is in the + * @param codepage the encoding of `codepageData`. The special + * value 0 for `codepage` indicates that the text is in the * platform's default codepage. * - * If codepage is an empty string (""), + * If `codepage` is an empty string (`""`), * then a simple conversion is performed on the codepage-invariant * subset ("invariant characters") of the platform encoding. See utypes.h. * Recommendation: For invariant-character strings use the constructor @@ -3001,11 +3210,11 @@ public: /** * char* constructor. * @param codepageData an array of bytes. - * @param dataLength The number of bytes in codepageData. - * @param codepage the encoding of codepageData. The special - * value 0 for codepage indicates that the text is in the + * @param dataLength The number of bytes in `codepageData`. + * @param codepage the encoding of `codepageData`. The special + * value 0 for `codepage` indicates that the text is in the * platform's default codepage. - * If codepage is an empty string (""), + * If `codepage` is an empty string (`""`), * then a simple conversion is performed on the codepage-invariant * subset ("invariant characters") of the platform encoding. See utypes.h. * Recommendation: For invariant-character strings use the constructor @@ -3056,12 +3265,11 @@ public: * * For example: * \code - * void fn(const char *s) { - * UnicodeString ustr(s, -1, US_INV); - * // use ustr ... - * } + * void fn(const char *s) { + * UnicodeString ustr(s, -1, US_INV); + * // use ustr ... + * } * \endcode - * * @param src String using only invariant characters. * @param length Length of src, or -1 if NUL-terminated. * @param inv Signature-distinguishing paramater, use US_INV. @@ -3074,15 +3282,34 @@ public: /** * Copy constructor. + * + * Starting with ICU 2.4, the assignment operator and the copy constructor + * allocate a new buffer and copy the buffer contents even for readonly aliases. + * By contrast, the fastCopyFrom() function implements the old, + * more efficient but less safe behavior + * of making this string also a readonly alias to the same buffer. + * + * If the source object has an "open" buffer from getBuffer(minCapacity), + * then the copy is an empty string. + * * @param that The UnicodeString object to copy. * @stable ICU 2.0 + * @see fastCopyFrom */ UnicodeString(const UnicodeString& that); + /** + * Move constructor; might leave src in bogus state. + * This string will have the same contents and state that the source string had. + * @param src source string + * @stable ICU 56 + */ + UnicodeString(UnicodeString &&src) U_NOEXCEPT; + /** * 'Substring' constructor from tail of source string. * @param src The UnicodeString object to copy. - * @param srcStart The offset into src at which to start copying. + * @param srcStart The offset into `src` at which to start copying. * @stable ICU 2.2 */ UnicodeString(const UnicodeString& src, int32_t srcStart); @@ -3090,8 +3317,8 @@ public: /** * 'Substring' constructor from subrange of source string. * @param src The UnicodeString object to copy. - * @param srcStart The offset into src at which to start copying. - * @param srcLength The number of characters from src to copy. + * @param srcStart The offset into `src` at which to start copying. + * @param srcLength The number of characters from `src` to copy. * @stable ICU 2.2 */ UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength); @@ -3132,7 +3359,7 @@ public: * @see toUTF8String * @stable ICU 4.2 */ - static UnicodeString fromUTF8(const StringPiece &utf8); + static UnicodeString fromUTF8(StringPiece utf8); /** * Create a UnicodeString from a UTF-32 string. @@ -3163,7 +3390,7 @@ public: * * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A, * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B, - * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C + * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C * * Anything else following a backslash is generically escaped. For * example, "[a\\-z]" returns "[a-z]". @@ -3190,7 +3417,7 @@ public: * character. See unescape() for a listing of the recognized escape * sequences. The character at offset-1 is assumed (without * checking) to be a backslash. If the escape sequence is - * ill-formed, or the offset is out of range, (UChar32)0xFFFFFFFF is + * ill-formed, or the offset is out of range, U_SENTINEL=-1 is * returned. * * @param offset an input output parameter. On input, it is the @@ -3198,7 +3425,7 @@ public: * after the initial backslash. On output, it is advanced after the * last character parsed. On error, it is not advanced at all. * @return the character represented by the escape sequence at - * offset, or (UChar32)0xFFFFFFFF on error. + * offset, or U_SENTINEL=-1 on error. * @see UnicodeString#unescape() * @see u_unescape() * @see u_unescapeAt() @@ -3236,7 +3463,7 @@ protected: * UnicodeString::charAt() to be inline again (see jitterbug 709). * @stable ICU 2.4 */ - virtual UChar getCharAt(int32_t offset) const; + virtual char16_t getCharAt(int32_t offset) const; /** * The change in Replaceable to use virtual getChar32At() allows @@ -3247,7 +3474,7 @@ protected: private: // For char* constructors. Could be made public. - UnicodeString &setToUTF8(const StringPiece &utf8); + UnicodeString &setToUTF8(StringPiece utf8); // For extract(char*). // We could make a toUTF8(target, capacity, errorCode) public but not // this version: New API will be cleaner if we make callers create substrings @@ -3257,6 +3484,11 @@ private: toUTF8(int32_t start, int32_t len, char *target, int32_t capacity) const; + /** + * Internal string contents comparison, called by operator==. + * Requires: this & text not bogus and have same lengths. + */ + UBool doEquals(const UnicodeString &text, int32_t len) const; inline int8_t doCompare(int32_t start, @@ -3267,7 +3499,7 @@ private: int8_t doCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3280,7 +3512,7 @@ private: int8_t doCompareCodePointOrder(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const; @@ -3295,12 +3527,12 @@ private: int8_t doCaseCompare(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const; - int32_t doIndexOf(UChar c, + int32_t doIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3308,7 +3540,7 @@ private: int32_t start, int32_t length) const; - int32_t doLastIndexOf(UChar c, + int32_t doLastIndexOf(char16_t c, int32_t start, int32_t length) const; @@ -3318,14 +3550,14 @@ private: void doExtract(int32_t start, int32_t length, - UChar *dst, + char16_t *dst, int32_t dstStart) const; inline void doExtract(int32_t start, int32_t length, UnicodeString& target) const; - inline UChar doCharAt(int32_t offset) const; + inline char16_t doCharAt(int32_t offset) const; UnicodeString& doReplace(int32_t start, int32_t length, @@ -3335,10 +3567,13 @@ private: UnicodeString& doReplace(int32_t start, int32_t length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength); + UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength); + UnicodeString& doReverse(int32_t start, int32_t length); @@ -3347,8 +3582,11 @@ private: // get pointer to start of array // these do not check for kOpenGetBuffer, unlike the public getBuffer() function - inline UChar* getArrayStart(void); - inline const UChar* getArrayStart(void) const; + inline char16_t* getArrayStart(void); + inline const char16_t* getArrayStart(void) const; + + inline UBool hasShortLength() const; + inline int32_t getShortLength() const; // A UnicodeString object (not necessarily its current buffer) // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity). @@ -3358,13 +3596,16 @@ private: inline UBool isBufferWritable() const; // None of the following does releaseArray(). - inline void setLength(int32_t len); // sets only fShortLength and fLength - inline void setToEmpty(); // sets fFlags=kShortString - inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags + inline void setZeroLength(); + inline void setShortLength(int32_t len); + inline void setLength(int32_t len); + inline void setToEmpty(); + inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags - // allocate the array; result may be fStackBuffer + // allocate the array; result may be the stack buffer // sets refCount to 1 if appropriate - // sets fArray, fCapacity, and fFlags + // sets fArray, fCapacity, and flags + // sets length to 0 // returns boolean for success or failure UBool allocate(int32_t capacity); @@ -3377,6 +3618,9 @@ private: // implements assigment operator, copy constructor, and fastCopyFrom() UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE); + // Copies just the fields without memory management. + void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT; + // Pin start and limit to acceptable values. inline void pinIndex(int32_t& start) const; inline void pinIndices(int32_t& start, @@ -3394,9 +3638,9 @@ private: * Real constructor for converting from codepage data. * It assumes that it is called with !fRefCounted. * - * If codepage==0, then the default converter + * If `codepage==0`, then the default converter * is used for the platform encoding. - * If codepage is an empty string (""), + * If `codepage` is an empty string (`""`), * then a simple conversion is performed on the codepage-invariant * subset ("invariant characters") of the platform encoding. See utypes.h. */ @@ -3439,7 +3683,11 @@ private: * as in ustr_imp.h for ustrcase_map(). */ UnicodeString & - caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper); + caseMap(int32_t caseLocale, uint32_t options, +#if !UCONFIG_NO_BREAK_ITERATION + BreakIterator *iter, +#endif + UStringCaseMapper *stringCaseMapper); // ref counting void addRef(void); @@ -3448,21 +3696,29 @@ private: // constants enum { - // Set the stack buffer size so that sizeof(UnicodeString) is, - // naturally (without padding), a multiple of sizeof(pointer). - US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings - kInvalidUChar=0xffff, // invalid UChar index - kGrowSize=128, // grow size for this buffer + /** + * Size of stack buffer for short strings. + * Must be at least U16_MAX_LENGTH for the single-code point constructor to work. + * @see UNISTR_OBJECT_SIZE + */ + US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR, + kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index) kInvalidHashCode=0, // invalid hash code kEmptyHashCode=1, // hash code for empty string - // bit flag values for fFlags + // bit flag values for fLengthAndFlags kIsBogus=1, // this string is bogus, i.e., not valid or NULL - kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields + kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields kRefCounted=4, // there is a refCount field before the characters in fArray kBufferIsReadonly=8,// do not write to this buffer kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"), // and releaseBuffer(newLength) must be called + kAllStorageFlags=0x1f, + + kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long + kLength1=1<127; else undefined + int32_t fCapacity; // capacity of fArray (in char16_ts) + // array pointer last to minimize padding for machines with P128 data model + // or pointer sizes that are not a power of 2 + char16_t *fArray; // the Unicode data } fFields; } fUnion; - UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8]; - int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength - uint8_t fFlags; // bit flags: see constants above }; /** @@ -3575,24 +3839,64 @@ UnicodeString::pinIndices(int32_t& start, } } -inline UChar* -UnicodeString::getArrayStart() -{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } +inline char16_t* +UnicodeString::getArrayStart() { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; +} -inline const UChar* -UnicodeString::getArrayStart() const -{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; } +inline const char16_t* +UnicodeString::getArrayStart() const { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + fUnion.fStackFields.fBuffer : fUnion.fFields.fArray; +} + +//======================================== +// Default constructor +//======================================== + +inline +UnicodeString::UnicodeString() { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} + +inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) { + fUnion.fStackFields.fLengthAndFlags=kShortString; +} //======================================== // Read-only implementation methods //======================================== +inline UBool +UnicodeString::hasShortLength() const { + return fUnion.fFields.fLengthAndFlags>=0; +} + +inline int32_t +UnicodeString::getShortLength() const { + // fLengthAndFlags must be non-negative -> short length >= 0 + // and arithmetic or logical shift does not matter. + return fUnion.fFields.fLengthAndFlags>>kLengthShift; +} + inline int32_t -UnicodeString::length() const -{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; } +UnicodeString::length() const { + return hasShortLength() ? getShortLength() : fUnion.fFields.fLength; +} inline int32_t -UnicodeString::getCapacity() const -{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; } +UnicodeString::getCapacity() const { + return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ? + US_STACKBUF_SIZE : fUnion.fFields.fCapacity; +} inline int32_t UnicodeString::hashCode() const @@ -3600,26 +3904,26 @@ UnicodeString::hashCode() const inline UBool UnicodeString::isBogus() const -{ return (UBool)(fFlags & kIsBogus); } +{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); } inline UBool UnicodeString::isWritable() const -{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); } +{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); } inline UBool UnicodeString::isBufferWritable() const { return (UBool)( - !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && - (!(fFlags&kRefCounted) || refCount()==1)); + !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) && + (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1)); } -inline const UChar * +inline const char16_t * UnicodeString::getBuffer() const { - if(fFlags&(kIsBogus|kOpenGetBuffer)) { - return 0; - } else if(fFlags&kUsingStackBuffer) { - return fUnion.fStackBuffer; + if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) { + return nullptr; + } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) { + return fUnion.fStackFields.fBuffer; } else { return fUnion.fFields.fArray; } @@ -3650,10 +3954,7 @@ UnicodeString::operator== (const UnicodeString& text) const return text.isBogus(); } else { int32_t len = length(), textLength = text.length(); - return - !text.isBogus() && - len == textLength && - doCompare(0, len, text, 0, textLength) == 0; + return !text.isBogus() && len == textLength && doEquals(text, len); } } @@ -3688,7 +3989,7 @@ UnicodeString::compare(int32_t start, { return doCompare(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compare(const UChar *srcChars, +UnicodeString::compare(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompare(0, length(), srcChars, 0, srcLength); } @@ -3703,13 +4004,13 @@ UnicodeString::compare(int32_t start, inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompare(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompare(start, _length, srcChars, srcStart, srcLength); } @@ -3749,7 +4050,7 @@ UnicodeString::compareCodePointOrder(int32_t start, { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); } inline int8_t -UnicodeString::compareCodePointOrder(const UChar *srcChars, +UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars, int32_t srcLength) const { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); } @@ -3764,13 +4065,13 @@ UnicodeString::compareCodePointOrder(int32_t start, inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars) const + const char16_t *srcChars) const { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); } inline int8_t UnicodeString::compareCodePointOrder(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); } @@ -3814,7 +4115,7 @@ UnicodeString::caseCompare(int32_t start, } inline int8_t -UnicodeString::caseCompare(const UChar *srcChars, +UnicodeString::caseCompare(ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const { return doCaseCompare(0, length(), srcChars, 0, srcLength, options); @@ -3833,7 +4134,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, uint32_t options) const { return doCaseCompare(start, _length, srcChars, 0, _length, options); } @@ -3841,7 +4142,7 @@ UnicodeString::caseCompare(int32_t start, inline int8_t UnicodeString::caseCompare(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const { @@ -3892,7 +4193,7 @@ UnicodeString::indexOf(const UnicodeString& text, { return indexOf(text, 0, text.length(), start, _length); } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -3900,14 +4201,14 @@ UnicodeString::indexOf(const UChar *srcChars, } inline int32_t -UnicodeString::indexOf(const UChar *srcChars, +UnicodeString::indexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const { return indexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start, int32_t _length) const { return doIndexOf(c, start, _length); } @@ -3919,7 +4220,7 @@ UnicodeString::indexOf(UChar32 c, { return doIndexOf(c, start, _length); } inline int32_t -UnicodeString::indexOf(UChar c) const +UnicodeString::indexOf(char16_t c) const { return doIndexOf(c, 0, length()); } inline int32_t @@ -3927,7 +4228,7 @@ UnicodeString::indexOf(UChar32 c) const { return indexOf(c, 0, length()); } inline int32_t -UnicodeString::indexOf(UChar c, +UnicodeString::indexOf(char16_t c, int32_t start) const { pinIndex(start); return doIndexOf(c, start, length() - start); @@ -3941,14 +4242,14 @@ UnicodeString::indexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t _length) const { return lastIndexOf(srcChars, 0, srcLength, start, _length); } inline int32_t -UnicodeString::lastIndexOf(const UChar *srcChars, +UnicodeString::lastIndexOf(const char16_t *srcChars, int32_t srcLength, int32_t start) const { pinIndex(start); @@ -3989,7 +4290,7 @@ UnicodeString::lastIndexOf(const UnicodeString& text) const { return lastIndexOf(text, 0, text.length(), 0, length()); } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start, int32_t _length) const { return doLastIndexOf(c, start, _length); } @@ -4002,7 +4303,7 @@ UnicodeString::lastIndexOf(UChar32 c, } inline int32_t -UnicodeString::lastIndexOf(UChar c) const +UnicodeString::lastIndexOf(char16_t c) const { return doLastIndexOf(c, 0, length()); } inline int32_t @@ -4011,7 +4312,7 @@ UnicodeString::lastIndexOf(UChar32 c) const { } inline int32_t -UnicodeString::lastIndexOf(UChar c, +UnicodeString::lastIndexOf(char16_t c, int32_t start) const { pinIndex(start); return doLastIndexOf(c, start, length() - start); @@ -4035,17 +4336,17 @@ UnicodeString::startsWith(const UnicodeString& srcText, { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const { +UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, 0, srcLength) == 0; } inline UBool -UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const { +UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0; } @@ -4065,21 +4366,21 @@ UnicodeString::endsWith(const UnicodeString& srcText, } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(ConstChar16Ptr srcChars, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars); + srcLength = u_strlen(toUCharPtr(srcChars)); } return doCompare(length() - srcLength, srcLength, srcChars, 0, srcLength) == 0; } inline UBool -UnicodeString::endsWith(const UChar *srcChars, +UnicodeString::endsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const { if(srcLength < 0) { - srcLength = u_strlen(srcChars + srcStart); + srcLength = u_strlen(toUCharPtr(srcChars + srcStart)); } return doCompare(length() - srcLength, srcLength, srcChars, srcStart, srcLength) == 0; @@ -4105,14 +4406,14 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, _length, srcChars, 0, srcLength); } inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, _length, srcChars, srcStart, srcLength); } @@ -4120,7 +4421,7 @@ UnicodeString::replace(int32_t start, inline UnicodeString& UnicodeString::replace(int32_t start, int32_t _length, - UChar srcChar) + char16_t srcChar) { return doReplace(start, _length, &srcChar, 0, 1); } inline UnicodeString& @@ -4163,7 +4464,7 @@ UnicodeString::doExtract(int32_t start, inline void UnicodeString::extract(int32_t start, int32_t _length, - UChar *target, + Char16Ptr target, int32_t targetStart) const { doExtract(start, _length, target, targetStart); } @@ -4191,7 +4492,7 @@ UnicodeString::extract(int32_t start, inline void UnicodeString::extractBetween(int32_t start, int32_t limit, - UChar *dst, + char16_t *dst, int32_t dstStart) const { pinIndex(start); pinIndex(limit); @@ -4203,7 +4504,7 @@ UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const { return tempSubString(start, limit - start); } -inline UChar +inline char16_t UnicodeString::doCharAt(int32_t offset) const { if((uint32_t)offset < (uint32_t)length()) { @@ -4213,89 +4514,59 @@ UnicodeString::doCharAt(int32_t offset) const } } -inline UChar +inline char16_t UnicodeString::charAt(int32_t offset) const { return doCharAt(offset); } -inline UChar +inline char16_t UnicodeString::operator[] (int32_t offset) const { return doCharAt(offset); } inline UBool UnicodeString::isEmpty() const { - return fShortLength == 0; + // Arithmetic or logical right shift does not matter: only testing for 0. + return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0; } //======================================== // Write implementation methods //======================================== +inline void +UnicodeString::setZeroLength() { + fUnion.fFields.fLengthAndFlags &= kAllStorageFlags; +} + +inline void +UnicodeString::setShortLength(int32_t len) { + // requires 0 <= len <= kMaxShortLength + fUnion.fFields.fLengthAndFlags = + (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift)); +} + inline void UnicodeString::setLength(int32_t len) { - if(len <= 127) { - fShortLength = (int8_t)len; + if(len <= kMaxShortLength) { + setShortLength(len); } else { - fShortLength = (int8_t)-1; + fUnion.fFields.fLengthAndFlags |= kLengthIsLarge; fUnion.fFields.fLength = len; } } inline void UnicodeString::setToEmpty() { - fShortLength = 0; - fFlags = kShortString; + fUnion.fFields.fLengthAndFlags = kShortString; } inline void -UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) { +UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) { setLength(len); fUnion.fFields.fArray = array; fUnion.fFields.fCapacity = capacity; } -inline const UChar * -UnicodeString::getTerminatedBuffer() { - if(!isWritable()) { - return 0; - } else { - UChar *array = getArrayStart(); - int32_t len = length(); - if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) { - /* - * kRefCounted: Do not write the NUL if the buffer is shared. - * That is mostly safe, except when the length of one copy was modified - * without copy-on-write, e.g., via truncate(newLength) or remove(void). - * Then the NUL would be written into the middle of another copy's string. - */ - if(!(fFlags&kBufferIsReadonly)) { - /* - * We must not write to a readonly buffer, but it is known to be - * NUL-terminated if len1) must not have its contents - * modified, but the NUL at [len] is beyond the string contents, - * and multiple string objects and threads writing the same NUL into the - * same location is harmless. - * In all other cases, the buffer is fully writable and it is anyway safe - * to write the NUL. - * - * Note: An earlier version of this code tested whether there is a NUL - * at [len] already, but, while safe, it generated lots of warnings from - * tools like valgrind and Purify. - */ - array[len] = 0; - } - return array; - } else if(cloneArrayIfNeeded(len+1)) { - array = getArrayStart(); - array[len] = 0; - return array; - } else { - return 0; - } - } -} - inline UnicodeString& -UnicodeString::operator= (UChar ch) +UnicodeString::operator= (char16_t ch) { return doReplace(0, length(), &ch, 0, 1); } inline UnicodeString& @@ -4327,7 +4598,7 @@ UnicodeString::setTo(const UnicodeString& srcText) } inline UnicodeString& -UnicodeString::setTo(const UChar *srcChars, +UnicodeString::setTo(const char16_t *srcChars, int32_t srcLength) { unBogus(); @@ -4335,7 +4606,7 @@ UnicodeString::setTo(const UChar *srcChars, } inline UnicodeString& -UnicodeString::setTo(UChar srcChar) +UnicodeString::setTo(char16_t srcChar) { unBogus(); return doReplace(0, length(), &srcChar, 0, 1); @@ -4352,30 +4623,30 @@ inline UnicodeString& UnicodeString::append(const UnicodeString& srcText, int32_t srcStart, int32_t srcLength) -{ return doReplace(length(), 0, srcText, srcStart, srcLength); } +{ return doAppend(srcText, srcStart, srcLength); } inline UnicodeString& UnicodeString::append(const UnicodeString& srcText) -{ return doReplace(length(), 0, srcText, 0, srcText.length()); } +{ return doAppend(srcText, 0, srcText.length()); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) -{ return doReplace(length(), 0, srcChars, srcStart, srcLength); } +{ return doAppend(srcChars, srcStart, srcLength); } inline UnicodeString& -UnicodeString::append(const UChar *srcChars, +UnicodeString::append(ConstChar16Ptr srcChars, int32_t srcLength) -{ return doReplace(length(), 0, srcChars, 0, srcLength); } +{ return doAppend(srcChars, 0, srcLength); } inline UnicodeString& -UnicodeString::append(UChar srcChar) -{ return doReplace(length(), 0, &srcChar, 0, 1); } +UnicodeString::append(char16_t srcChar) +{ return doAppend(&srcChar, 0, 1); } inline UnicodeString& -UnicodeString::operator+= (UChar ch) -{ return doReplace(length(), 0, &ch, 0, 1); } +UnicodeString::operator+= (char16_t ch) +{ return doAppend(&ch, 0, 1); } inline UnicodeString& UnicodeString::operator+= (UChar32 ch) { @@ -4384,7 +4655,7 @@ UnicodeString::operator+= (UChar32 ch) { inline UnicodeString& UnicodeString::operator+= (const UnicodeString& srcText) -{ return doReplace(length(), 0, srcText, 0, srcText.length()); } +{ return doAppend(srcText, 0, srcText.length()); } inline UnicodeString& UnicodeString::insert(int32_t start, @@ -4400,20 +4671,20 @@ UnicodeString::insert(int32_t start, inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + const char16_t *srcChars, int32_t srcStart, int32_t srcLength) { return doReplace(start, 0, srcChars, srcStart, srcLength); } inline UnicodeString& UnicodeString::insert(int32_t start, - const UChar *srcChars, + ConstChar16Ptr srcChars, int32_t srcLength) { return doReplace(start, 0, srcChars, 0, srcLength); } inline UnicodeString& UnicodeString::insert(int32_t start, - UChar srcChar) + char16_t srcChar) { return doReplace(start, 0, &srcChar, 0, 1); } inline UnicodeString& @@ -4426,12 +4697,10 @@ inline UnicodeString& UnicodeString::remove() { // remove() of a bogus string makes the string empty and non-bogus - // we also un-alias a read-only alias to deal with NUL-termination - // issues with getTerminatedBuffer() - if(fFlags & (kIsBogus|kBufferIsReadonly)) { + if(isBogus()) { setToEmpty(); } else { - fShortLength = 0; + setZeroLength(); } return *this; } @@ -4467,9 +4736,6 @@ UnicodeString::truncate(int32_t targetLength) return FALSE; } else if((uint32_t)targetLength < (uint32_t)length()) { setLength(targetLength); - if(fFlags&kBufferIsReadonly) { - fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more - } return TRUE; } else { return FALSE; @@ -4486,5 +4752,6 @@ UnicodeString::reverse(int32_t start, { return doReverse(start, _length); } U_NAMESPACE_END +#endif // U_SHOW_CPLUSPLUS_API #endif