X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/51004dcb01e06fef634b61be77ed73dd61cb6db9..a62d09fcbc8ca9da27887e04112ec143e19b1caf:/icuSources/common/unicode/unistr.h?ds=sidebyside diff --git a/icuSources/common/unicode/unistr.h b/icuSources/common/unicode/unistr.h index e70af40e..6710d5a7 100644 --- a/icuSources/common/unicode/unistr.h +++ b/icuSources/common/unicode/unistr.h @@ -1,6 +1,6 @@ /* ********************************************************************** -* Copyright (C) 1998-2013, International Business Machines +* Copyright (C) 1998-2016, International Business Machines * Corporation and others. All Rights Reserved. ********************************************************************** * @@ -22,8 +22,8 @@ #define UNISTR_H /** - * \file - * \brief C++ API: Unicode String + * \file + * \brief C++ API: Unicode String */ #include "unicode/utypes.h" @@ -34,7 +34,6 @@ #include "unicode/ucasemap.h" struct UConverter; // unicode/ucnv.h -class StringThreadTest; #ifndef U_COMPARE_CODE_POINT_ORDER /* see also ustring.h and unorm.h */ @@ -54,12 +53,10 @@ U_STABLE int32_t U_EXPORT2 u_strlen(const UChar *s); #endif -#ifndef U_HIDE_INTERNAL_API /** * \def U_STRING_CASE_MAPPER_DEFINED * @internal */ - #ifndef U_STRING_CASE_MAPPER_DEFINED #define U_STRING_CASE_MAPPER_DEFINED @@ -74,7 +71,6 @@ UStringCaseMapper(const UCaseMap *csm, UErrorCode *pErrorCode); #endif -#endif /* U_HIDE_INTERNAL_API */ U_NAMESPACE_BEGIN @@ -176,20 +172,69 @@ class UnicodeStringAppendable; // unicode/appendable.h # endif #endif +/* Cannot make the following #ifndef U_HIDE_DRAFT_API, + it is used to construct other non-internal constants */ +/** + * \def UNISTR_OBJECT_SIZE + * Desired sizeof(UnicodeString) in bytes. + * It should be a multiple of sizeof(pointer) to avoid unusable space for padding. + * The object size may want to be a multiple of 16 bytes, + * which is a common granularity for heap allocation. + * + * Any space inside the object beyond sizeof(vtable pointer) + 2 + * is available for storing short strings inside the object. + * The bigger the object, the longer a string that can be stored inside the object, + * without additional heap allocation. + * + * Depending on a platform's pointer size, pointer alignment requirements, + * and struct padding, the compiler will usually round up sizeof(UnicodeString) + * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models), + * to hold the fields for heap-allocated strings. + * Such a minimum size also ensures that the object is easily large enough + * to hold at least 2 UChars, for one supplementary code point (U16_MAX_LENGTH). + * + * sizeof(UnicodeString) >= 48 should work for all known platforms. + * + * For example, on a 64-bit machine where sizeof(vtable pointer) is 8, + * sizeof(UnicodeString) = 64 would leave space for + * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27 + * UChars stored inside the object. + * + * The minimum object size on a 64-bit machine would be + * 4 * sizeof(pointer) = 4 * 8 = 32 bytes, + * and the internal buffer would hold up to 11 UChars in that case. + * + * @see U16_MAX_LENGTH + * @draft ICU 56 + */ +#ifndef UNISTR_OBJECT_SIZE +# define UNISTR_OBJECT_SIZE 64 +#endif + /** * UnicodeString is a string class that stores Unicode characters directly and provides - * similar functionality as the Java String and StringBuffer classes. + * similar functionality as the Java String and StringBuffer/StringBuilder classes. * It is a concrete implementation of the abstract class Replaceable (for transliteration). * + * A UnicodeString may also "alias" an external array of characters + * (that is, point to it, rather than own the array) + * whose lifetime must then at least match the lifetime of the aliasing object. + * This aliasing may be preserved when returning a UnicodeString by value, + * depending on the compiler and the function implementation, + * via Return Value Optimization (RVO) or the move assignment operator. + * (However, the copy assignment operator does not preserve aliasing.) + * For details see the description of storage models at the end of the class API docs + * and in the User Guide chapter linked from there. + * * The UnicodeString class is not suitable for subclassing. * *
For an overview of Unicode strings in C and C++ see the - * User Guide Strings chapter.
+ * User Guide Strings chapter. * *In ICU, a Unicode string consists of 16-bit Unicode code units. * A Unicode character may be stored with either one code unit * (the most common case) or with a matched pair of special code units - * ("surrogates"). The data type for code units is UChar. + * ("surrogates"). The data type for code units is UChar. * For single-character handling, a Unicode character code point is a value * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
* @@ -239,7 +284,7 @@ class UnicodeStringAppendable; // unicode/appendable.h * significant performance improvements. * Also, the internal buffer is accessible via special functions. * For details see the - * User Guide Strings chapter. + * User Guide Strings chapter. * * @see utf.h * @see CharacterIterator @@ -341,7 +386,8 @@ public: /** * Compare the characters bitwise in the range * [start, start + length) with the characters - * in text + * in the entire string text. + * (The parameters "start" and "length" are not applied to the other text "text".) * @param start the offset at which the compare operation begins * @param length the number of characters of text to compare. * @param text the other text to be compared against this string. @@ -1483,12 +1529,12 @@ public: UnicodeString& target) const; /** - * Copy the characters in the range - * [start, start + length) into an array of characters. + * Copy the characters in the range + * [start, start + startLength) into an array of characters. * All characters must be invariant (see utypes.h). * Use US_INV as the last, signature-distinguishing parameter. * - * This function does not write any more thantargetLength
+ * This function does not write any more than targetCapacity
* characters but returns the length of the entire output string
* so that one can allocate a larger buffer and call the function again
* if necessary.
@@ -1796,7 +1842,7 @@ public:
* For a bogus string, getBuffer() and getTerminatedBuffer() return NULL, and
* length() returns 0.
*
- * @return TRUE if the string is valid, FALSE otherwise
+ * @return TRUE if the string is bogus/invalid, FALSE otherwise
* @see setToBogus()
* @stable ICU 2.0
*/
@@ -1812,9 +1858,20 @@ public:
/**
* Assignment operator. Replace the characters in this UnicodeString
* with the characters from srcText.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * By contrast, the fastCopyFrom() function implements the old,
+ * more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
* @param srcText The text containing the characters to replace
* @return a reference to this
* @stable ICU 2.0
+ * @see fastCopyFrom
*/
UnicodeString &operator=(const UnicodeString &srcText);
@@ -1836,12 +1893,60 @@ public:
* including its contents, for example for strings from resource bundles
* or aliases to string constants.
*
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
* @param src The text containing the characters to replace.
* @return a reference to this
* @stable ICU 2.4
*/
UnicodeString &fastCopyFrom(const UnicodeString &src);
+#ifndef U_HIDE_DRAFT_API
+#if U_HAVE_RVALUE_REFERENCES
+ /**
+ * Move assignment operator, might leave src in bogus state.
+ * This string will have the same contents and state that the source string had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source string
+ * @return *this
+ * @draft ICU 56
+ */
+ UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
+ return moveFrom(src);
+ }
+#endif
+ /**
+ * Move assignment, might leave src in bogus state.
+ * This string will have the same contents and state that the source string had.
+ * The behavior is undefined if *this and src are the same object.
+ *
+ * Can be called explicitly, does not need C++11 support.
+ * @param src source string
+ * @return *this
+ * @draft ICU 56
+ */
+ UnicodeString &moveFrom(UnicodeString &src) U_NOEXCEPT;
+
+ /**
+ * Swap strings.
+ * @param other other string
+ * @draft ICU 56
+ */
+ void swap(UnicodeString &other) U_NOEXCEPT;
+#endif /* U_HIDE_DRAFT_API */
+
+ /**
+ * Non-member UnicodeString swap function.
+ * @param s1 will get s2's contents and state
+ * @param s2 will get s1's contents and state
+ * @draft ICU 56
+ */
+ friend U_COMMON_API inline void U_EXPORT2
+ swap(UnicodeString &s1, UnicodeString &s2) U_NOEXCEPT {
+ s1.swap(s2);
+ }
+
/**
* Assignment operator. Replace the characters in this UnicodeString
* with the code unit ch.
@@ -2845,7 +2950,7 @@ public:
* @see getBuffer()
* @stable ICU 2.2
*/
- inline const UChar *getTerminatedBuffer();
+ const UChar *getTerminatedBuffer();
//========================================
// Constructors
@@ -3091,11 +3196,34 @@ public:
/**
* Copy constructor.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * By contrast, the fastCopyFrom() function implements the old,
+ * more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
* @param that The UnicodeString object to copy.
* @stable ICU 2.0
+ * @see fastCopyFrom
*/
UnicodeString(const UnicodeString& that);
+#ifndef U_HIDE_DRAFT_API
+#if U_HAVE_RVALUE_REFERENCES
+ /**
+ * Move constructor, might leave src in bogus state.
+ * This string will have the same contents and state that the source string had.
+ * @param src source string
+ * @draft ICU 56
+ */
+ UnicodeString(UnicodeString &&src) U_NOEXCEPT;
+#endif
+#endif /* U_HIDE_DRAFT_API */
+
/**
* 'Substring' constructor from tail of source string.
* @param src The UnicodeString object to copy.
@@ -3361,6 +3489,9 @@ private:
int32_t srcStart,
int32_t srcLength);
+ UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+ UnicodeString& doAppend(const UChar *srcChars, int32_t srcStart, int32_t srcLength);
+
UnicodeString& doReverse(int32_t start,
int32_t length);
@@ -3372,6 +3503,9 @@ private:
inline UChar* getArrayStart(void);
inline const UChar* getArrayStart(void) const;
+ inline UBool hasShortLength() const;
+ inline int32_t getShortLength() const;
+
// A UnicodeString object (not necessarily its current buffer)
// is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
inline UBool isWritable() const;
@@ -3380,13 +3514,16 @@ private:
inline UBool isBufferWritable() const;
// None of the following does releaseArray().
- inline void setLength(int32_t len); // sets only fShortLength and fLength
- inline void setToEmpty(); // sets fFlags=kShortString
- inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+ inline void setZeroLength();
+ inline void setShortLength(int32_t len);
+ inline void setLength(int32_t len);
+ inline void setToEmpty();
+ inline void setArray(UChar *array, int32_t len, int32_t capacity); // sets length but not flags
- // allocate the array; result may be fStackBuffer
+ // allocate the array; result may be the stack buffer
// sets refCount to 1 if appropriate
- // sets fArray, fCapacity, and fFlags
+ // sets fArray, fCapacity, and flags
+ // sets length to 0
// returns boolean for success or failure
UBool allocate(int32_t capacity);
@@ -3399,6 +3536,9 @@ private:
// implements assigment operator, copy constructor, and fastCopyFrom()
UnicodeString ©From(const UnicodeString &src, UBool fastCopy=FALSE);
+ // Copies just the fields without memory management.
+ void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) U_NOEXCEPT;
+
// Pin start and limit to acceptable values.
inline void pinIndex(int32_t& start) const;
inline void pinIndices(int32_t& start,
@@ -3470,21 +3610,29 @@ private:
// constants
enum {
- // Set the stack buffer size so that sizeof(UnicodeString) is,
- // naturally (without padding), a multiple of sizeof(pointer).
- US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
- kInvalidUChar=0xffff, // invalid UChar index
- kGrowSize=128, // grow size for this buffer
+ /**
+ * Size of stack buffer for short strings.
+ * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
+ * @see UNISTR_OBJECT_SIZE
+ */
+ US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
+ kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
kInvalidHashCode=0, // invalid hash code
kEmptyHashCode=1, // hash code for empty string
- // bit flag values for fFlags
+ // bit flag values for fLengthAndFlags
kIsBogus=1, // this string is bogus, i.e., not valid or NULL
- kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
+ kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
kRefCounted=4, // there is a refCount field before the characters in fArray
kBufferIsReadonly=8,// do not write to this buffer
kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
// and releaseBuffer(newLength) must be called
+ kAllStorageFlags=0x1f,
+
+ kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
+ kLength1=1<