/*
**********************************************************************
-* Copyright (C) 1998-2004, International Business Machines
+* Copyright (C) 1998-2008, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*
#ifndef UNISTR_H
#define UNISTR_H
+/**
+ * \file
+ * \brief C++ API: Unicode String
+ */
+
#include "unicode/rep.h"
struct UConverter; // unicode/ucnv.h
#endif
#ifndef USTRING_H
-/* see ustring.h */
+/**
+ * \ingroup ustring_ustrlen
+ */
U_STABLE int32_t U_EXPORT2
u_strlen(const UChar *s);
#endif
* therefore recommended over ones taking a charset name string
* (where the empty string "" indicates invariant-character conversion).
*
- * @draft ICU 3.2
+ * @stable ICU 3.2
*/
-#define US_INV UnicodeString::kInvariant
+#define US_INV U_NAMESPACE_QUALIFIER UnicodeString::kInvariant
/**
* Unicode String literals in C++.
* such string variable before it is used.
* @stable ICU 2.0
*/
-#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)L ## cs, _length)
+#if defined(U_DECLARE_UTF16)
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)L ## cs, _length)
#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define UNICODE_STRING(cs, _length) UnicodeString(TRUE, (const UChar *)cs, _length)
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(TRUE, (const UChar *)cs, _length)
#else
-# define UNICODE_STRING(cs, _length) UnicodeString(cs, _length, US_INV)
+# define UNICODE_STRING(cs, _length) U_NAMESPACE_QUALIFIER UnicodeString(cs, _length, US_INV)
#endif
/**
* The string parameter must be a C string literal.
* @stable ICU 2.0
*/
-#if U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)L ## cs, -1)
-#elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
-# define UNICODE_STRING_SIMPLE(cs) UnicodeString(TRUE, (const UChar *)cs, -1)
-#else
-# define UNICODE_STRING_SIMPLE(cs) UnicodeString(cs, -1, US_INV)
-#endif
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
/**
* UnicodeString is a string class that stores Unicode characters directly and provides
* The UnicodeString class is not suitable for subclassing.
*
* <p>For an overview of Unicode strings in C and C++ see the
- * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
*
* <p>In ICU, a Unicode string consists of 16-bit Unicode <em>code units</em>.
- * A Unicode character may be stored with either
- * one code unit — which is the most common case — or with a matched pair of
- * special code units ("surrogates").
- * The data type for code units is UChar.<br>
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is UChar.
* For single-character handling, a Unicode character code <em>point</em> is a value
* in the range 0..0x10ffff. ICU uses the UChar32 type for code points.</p>
*
* significant performance improvements.
* Also, the internal buffer is accessible via special functions.
* For details see the
- * <a href="http://oss.software.ibm.com/icu/userguide/strings.html">User Guide Strings chapter</a>.</p>
+ * <a href="http://icu-project.org/userguide/strings.html">User Guide Strings chapter</a>.</p>
*
* @see utf.h
* @see CharacterIterator
* Use the macro US_INV instead of the full qualification for this value.
*
* @see US_INV
- * @draft ICU 3.2
+ * @stable ICU 3.2
*/
enum EInvariant {
/**
* @see EInvariant
- * @draft ICU 3.2
+ * @stable ICU 3.2
*/
kInvariant
};
/**
* Compare two Unicode strings in code point order.
- * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * The result may be different from the results of compare(), operator<, etc.
* if supplementary characters are present:
*
* In UTF-16, supplementary characters (with code points U+10000 and above) are
/**
* Compare two Unicode strings in code point order.
- * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * The result may be different from the results of compare(), operator<, etc.
* if supplementary characters are present:
*
* In UTF-16, supplementary characters (with code points U+10000 and above) are
/**
* Compare two Unicode strings in code point order.
- * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * The result may be different from the results of compare(), operator<, etc.
* if supplementary characters are present:
*
* In UTF-16, supplementary characters (with code points U+10000 and above) are
/**
* Compare two Unicode strings in code point order.
- * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * The result may be different from the results of compare(), operator<, etc.
* if supplementary characters are present:
*
* In UTF-16, supplementary characters (with code points U+10000 and above) are
/**
* Compare two Unicode strings in code point order.
- * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * The result may be different from the results of compare(), operator<, etc.
* if supplementary characters are present:
*
* In UTF-16, supplementary characters (with code points U+10000 and above) are
/**
* Compare two Unicode strings in code point order.
- * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * The result may be different from the results of compare(), operator<, etc.
* if supplementary characters are present:
*
* In UTF-16, supplementary characters (with code points U+10000 and above) are
/**
* Compare two Unicode strings in code point order.
- * This is different in UTF-16 from how compare(), operator==, startsWith() etc. work
+ * The result may be different from the results of compare(), operator<, etc.
* if supplementary characters are present:
*
* In UTF-16, supplementary characters (with code points U+10000 and above) are
* @param targetCapacity the length of the target buffer
* @param inv Signature-distinguishing paramater, use US_INV.
* @return the output string length, not including the terminating NUL
- * @draft ICU 3.2
+ * @stable ICU 3.2
*/
int32_t extract(int32_t start,
int32_t startLength,
* The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21.
*
- * This function uses only the first() and next() methods of the
+ * This function uses only the setText(), first() and next() methods of the
* provided break iterator.
*
* @param titleIter A break iterator to find the first characters of words
* The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21.
*
- * This function uses only the first() and next() methods of the
+ * This function uses only the setText(), first() and next() methods of the
* provided break iterator.
*
* @param titleIter A break iterator to find the first characters of words
*/
UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+ /**
+ * Titlecase this string, with options.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options.)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @param options Options bit set, see ucasemap_open().
+ * @return A reference to this.
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @see ucasemap_open
+ * @stable ICU 4.0
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
#endif
/**
* @param inv Signature-distinguishing paramater, use US_INV.
*
* @see US_INV
- * @draft ICU 3.2
+ * @stable ICU 3.2
*/
UnicodeString(const char *src, int32_t length, enum EInvariant inv);
int32_t doHashCode(void) const;
// get pointer to start of array
+ // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
inline UChar* getArrayStart(void);
inline const UChar* getArrayStart(void) const;
+ // A UnicodeString object (not necessarily its current buffer)
+ // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+ inline UBool isWritable() const;
+
+ // Is the current buffer writable?
+ inline UBool isBufferWritable() const;
+
+ // None of the following does releaseArray().
+ inline void setLength(int32_t len); // sets only fShortLength and fLength
+ inline void setToEmpty(); // sets fFlags=kShortString
+ inline void setToStackBuffer(int32_t len); // sets fFlags=kShortString
+ inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
+
// allocate the array; result may be fStackBuffer
// sets refCount to 1 if appropriate
// sets fArray, fCapacity, and fFlags
// constants
enum {
- US_STACKBUF_SIZE=7, // Size of stack buffer for small strings
+ // Set the stack buffer size so that sizeof(UnicodeString) is a multiple of sizeof(pointer):
+ // 32-bit pointers: 4+1+1+13*2 = 32 bytes
+ // 64-bit pointers: 8+1+1+15*2 = 40 bytes
+ US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for small strings
kInvalidUChar=0xffff, // invalid UChar index
kGrowSize=128, // grow size for this buffer
kInvalidHashCode=0, // invalid hash code
kWritableAlias=0
};
- friend class StringCharacterIterator;
friend class StringThreadTest;
+ union StackBufferOrFields; // forward declaration necessary before friend declaration
+ friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
/*
* The following are all the class fields that are stored
* in each UnicodeString object.
* on 64-bit machines (8-byte pointers), it should be 40 bytes.
*/
// (implicit) *vtable;
- int32_t fLength; // number of characters in fArray
- int32_t fCapacity; // sizeof fArray
- UChar *fArray; // the Unicode data
- uint16_t fFlags; // bit flags: see constants above
- UChar fStackBuffer [ US_STACKBUF_SIZE ]; // buffer for small strings
-
+ int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
+ uint8_t fFlags; // bit flags: see constants above
+ union StackBufferOrFields {
+ // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
+ // else fFields is used
+ UChar fStackBuffer [US_STACKBUF_SIZE]; // buffer for small strings
+ struct {
+ uint16_t fPadding; // align the following field at 8B (32b pointers) or 12B (64b)
+ int32_t fLength; // number of characters in fArray if >127; else undefined
+ UChar *fArray; // the Unicode data (aligned at 12B (32b pointers) or 16B (64b))
+ int32_t fCapacity; // sizeof fArray
+ } fFields;
+ } fUnion;
};
/**
* @param s1 The first string to be copied to the new one.
* @param s2 The second string to be copied to the new one, after s1.
* @return UnicodeString(s1).append(s2)
- * @draft ICU 2.8
+ * @stable ICU 2.8
*/
U_COMMON_API UnicodeString U_EXPORT2
operator+ (const UnicodeString &s1, const UnicodeString &s2);
-U_NAMESPACE_END
-
-// inline implementations -------------------------------------------------- ***
-
-//========================================
-// Array copying
-//========================================
-/**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
- */
-inline void
-uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t count)
-{ while(count-- > 0) *dst++ = *src++; }
-
-/**
- * Copy an array of UnicodeString OBJECTS (not pointers).
- * @internal
- */
-inline void
-uprv_arrayCopy(const U_NAMESPACE_QUALIFIER UnicodeString *src, int32_t srcStart,
- U_NAMESPACE_QUALIFIER UnicodeString *dst, int32_t dstStart, int32_t count)
-{ uprv_arrayCopy(src+srcStart, dst+dstStart, count); }
-
-U_NAMESPACE_BEGIN
-
//========================================
// Inline members
//========================================
// pin index
if(start < 0) {
start = 0;
- } else if(start > fLength) {
- start = fLength;
+ } else if(start > length()) {
+ start = length();
}
}
int32_t& _length) const
{
// pin indices
+ int32_t len = length();
if(start < 0) {
start = 0;
- } else if(start > fLength) {
- start = fLength;
+ } else if(start > len) {
+ start = len;
}
if(_length < 0) {
_length = 0;
- } else if(_length > (fLength - start)) {
- _length = (fLength - start);
+ } else if(_length > (len - start)) {
+ _length = (len - start);
}
}
inline UChar*
UnicodeString::getArrayStart()
-{ return fArray; }
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
inline const UChar*
UnicodeString::getArrayStart() const
-{ return fArray; }
+{ return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
//========================================
// Read-only implementation methods
//========================================
inline int32_t
UnicodeString::length() const
-{ return fLength; }
+{ return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
inline int32_t
UnicodeString::getCapacity() const
-{ return fCapacity; }
+{ return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
inline int32_t
UnicodeString::hashCode() const
UnicodeString::isBogus() const
{ return (UBool)(fFlags & kIsBogus); }
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+ return (UBool)(
+ !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+ (!(fFlags&kRefCounted) || refCount()==1));
+}
+
inline const UChar *
UnicodeString::getBuffer() const {
- if(!(fFlags&(kIsBogus|kOpenGetBuffer))) {
- return fArray;
- } else {
+ if(fFlags&(kIsBogus|kOpenGetBuffer)) {
return 0;
+ } else if(fFlags&kUsingStackBuffer) {
+ return fUnion.fStackBuffer;
+ } else {
+ return fUnion.fFields.fArray;
}
}
//========================================
inline int8_t
UnicodeString::doCompare(int32_t start,
- int32_t length,
+ int32_t thisLength,
const UnicodeString& srcText,
int32_t srcStart,
int32_t srcLength) const
return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
- return doCompare(start, length, srcText.fArray, srcStart, srcLength);
+ return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
}
}
if(isBogus()) {
return text.isBogus();
} else {
+ int32_t len = length(), textLength = text.length();
return
!text.isBogus() &&
- fLength == text.fLength &&
- doCompare(0, fLength, text, 0, text.fLength) == 0;
+ len == textLength &&
+ doCompare(0, len, text, 0, textLength) == 0;
}
}
inline UBool
UnicodeString::operator> (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) == 1; }
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
inline UBool
UnicodeString::operator< (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) == -1; }
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
inline UBool
UnicodeString::operator>= (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) != -1; }
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
inline UBool
UnicodeString::operator<= (const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength) != 1; }
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
inline int8_t
UnicodeString::compare(const UnicodeString& text) const
-{ return doCompare(0, fLength, text, 0, text.fLength); }
+{ return doCompare(0, length(), text, 0, text.length()); }
inline int8_t
UnicodeString::compare(int32_t start,
int32_t _length,
const UnicodeString& srcText) const
-{ return doCompare(start, _length, srcText, 0, srcText.fLength); }
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
inline int8_t
UnicodeString::compare(const UChar *srcChars,
int32_t srcLength) const
-{ return doCompare(0, fLength, srcChars, 0, srcLength); }
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
inline int8_t
UnicodeString::compare(int32_t start,
inline int8_t
UnicodeString::doCompareCodePointOrder(int32_t start,
- int32_t length,
+ int32_t thisLength,
const UnicodeString& srcText,
int32_t srcStart,
int32_t srcLength) const
return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
- return doCompareCodePointOrder(start, length, srcText.fArray, srcStart, srcLength);
+ return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
}
}
inline int8_t
UnicodeString::compareCodePointOrder(const UnicodeString& text) const
-{ return doCompareCodePointOrder(0, fLength, text, 0, text.fLength); }
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
int32_t _length,
const UnicodeString& srcText) const
-{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.fLength); }
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
inline int8_t
UnicodeString::compareCodePointOrder(const UChar *srcChars,
int32_t srcLength) const
-{ return doCompareCodePointOrder(0, fLength, srcChars, 0, srcLength); }
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
inline int8_t
UnicodeString::compareCodePointOrder(int32_t start,
inline int8_t
UnicodeString::doCaseCompare(int32_t start,
- int32_t length,
+ int32_t thisLength,
const UnicodeString &srcText,
int32_t srcStart,
int32_t srcLength,
return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
} else {
srcText.pinIndices(srcStart, srcLength);
- return doCaseCompare(start, length, srcText.fArray, srcStart, srcLength, options);
+ return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
}
}
inline int8_t
UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
- return doCaseCompare(0, fLength, text, 0, text.fLength, options);
+ return doCaseCompare(0, length(), text, 0, text.length(), options);
}
inline int8_t
int32_t _length,
const UnicodeString &srcText,
uint32_t options) const {
- return doCaseCompare(start, _length, srcText, 0, srcText.fLength, options);
+ return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
}
inline int8_t
UnicodeString::caseCompare(const UChar *srcChars,
int32_t srcLength,
uint32_t options) const {
- return doCaseCompare(0, fLength, srcChars, 0, srcLength, options);
+ return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
}
inline int8_t
inline int32_t
UnicodeString::indexOf(const UnicodeString& text) const
-{ return indexOf(text, 0, text.fLength, 0, fLength); }
+{ return indexOf(text, 0, text.length(), 0, length()); }
inline int32_t
UnicodeString::indexOf(const UnicodeString& text,
int32_t start) const {
pinIndex(start);
- return indexOf(text, 0, text.fLength, start, fLength - start);
+ return indexOf(text, 0, text.length(), start, length() - start);
}
inline int32_t
UnicodeString::indexOf(const UnicodeString& text,
int32_t start,
int32_t _length) const
-{ return indexOf(text, 0, text.fLength, start, _length); }
+{ return indexOf(text, 0, text.length(), start, _length); }
inline int32_t
UnicodeString::indexOf(const UChar *srcChars,
int32_t srcLength,
int32_t start) const {
pinIndex(start);
- return indexOf(srcChars, 0, srcLength, start, fLength - start);
+ return indexOf(srcChars, 0, srcLength, start, length() - start);
}
inline int32_t
inline int32_t
UnicodeString::indexOf(UChar c) const
-{ return doIndexOf(c, 0, fLength); }
+{ return doIndexOf(c, 0, length()); }
inline int32_t
UnicodeString::indexOf(UChar32 c) const
-{ return indexOf(c, 0, fLength); }
+{ return indexOf(c, 0, length()); }
inline int32_t
UnicodeString::indexOf(UChar c,
int32_t start) const {
pinIndex(start);
- return doIndexOf(c, start, fLength - start);
+ return doIndexOf(c, start, length() - start);
}
inline int32_t
UnicodeString::indexOf(UChar32 c,
int32_t start) const {
pinIndex(start);
- return indexOf(c, start, fLength - start);
+ return indexOf(c, start, length() - start);
}
inline int32_t
int32_t srcLength,
int32_t start) const {
pinIndex(start);
- return lastIndexOf(srcChars, 0, srcLength, start, fLength - start);
+ return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
}
inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text,
int32_t start,
int32_t _length) const
-{ return lastIndexOf(text, 0, text.fLength, start, _length); }
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text,
int32_t start) const {
pinIndex(start);
- return lastIndexOf(text, 0, text.fLength, start, fLength - start);
+ return lastIndexOf(text, 0, text.length(), start, length() - start);
}
inline int32_t
UnicodeString::lastIndexOf(const UnicodeString& text) const
-{ return lastIndexOf(text, 0, text.fLength, 0, fLength); }
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
inline int32_t
UnicodeString::lastIndexOf(UChar c,
inline int32_t
UnicodeString::lastIndexOf(UChar c) const
-{ return doLastIndexOf(c, 0, fLength); }
+{ return doLastIndexOf(c, 0, length()); }
inline int32_t
UnicodeString::lastIndexOf(UChar32 c) const {
- return lastIndexOf(c, 0, fLength);
+ return lastIndexOf(c, 0, length());
}
inline int32_t
UnicodeString::lastIndexOf(UChar c,
int32_t start) const {
pinIndex(start);
- return doLastIndexOf(c, start, fLength - start);
+ return doLastIndexOf(c, start, length() - start);
}
inline int32_t
UnicodeString::lastIndexOf(UChar32 c,
int32_t start) const {
pinIndex(start);
- return lastIndexOf(c, start, fLength - start);
+ return lastIndexOf(c, start, length() - start);
}
inline UBool
UnicodeString::startsWith(const UnicodeString& text) const
-{ return compare(0, text.fLength, text, 0, text.fLength) == 0; }
+{ return compare(0, text.length(), text, 0, text.length()) == 0; }
inline UBool
UnicodeString::startsWith(const UnicodeString& srcText,
inline UBool
UnicodeString::endsWith(const UnicodeString& text) const
-{ return doCompare(fLength - text.fLength, text.fLength,
- text, 0, text.fLength) == 0; }
+{ return doCompare(length() - text.length(), text.length(),
+ text, 0, text.length()) == 0; }
inline UBool
UnicodeString::endsWith(const UnicodeString& srcText,
int32_t srcStart,
int32_t srcLength) const {
srcText.pinIndices(srcStart, srcLength);
- return doCompare(fLength - srcLength, srcLength,
+ return doCompare(length() - srcLength, srcLength,
srcText, srcStart, srcLength) == 0;
}
if(srcLength < 0) {
srcLength = u_strlen(srcChars);
}
- return doCompare(fLength - srcLength, srcLength,
+ return doCompare(length() - srcLength, srcLength,
srcChars, 0, srcLength) == 0;
}
if(srcLength < 0) {
srcLength = u_strlen(srcChars + srcStart);
}
- return doCompare(fLength - srcLength, srcLength,
+ return doCompare(length() - srcLength, srcLength,
srcChars, srcStart, srcLength) == 0;
}
UnicodeString::replace(int32_t start,
int32_t _length,
const UnicodeString& srcText)
-{ return doReplace(start, _length, srcText, 0, srcText.fLength); }
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::replace(int32_t start,
UnicodeString::replaceBetween(int32_t start,
int32_t limit,
const UnicodeString& srcText)
-{ return doReplace(start, limit - start, srcText, 0, srcText.fLength); }
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::replaceBetween(int32_t start,
inline UnicodeString&
UnicodeString::findAndReplace(const UnicodeString& oldText,
const UnicodeString& newText)
-{ return findAndReplace(0, fLength, oldText, 0, oldText.fLength,
- newText, 0, newText.fLength); }
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
inline UnicodeString&
UnicodeString::findAndReplace(int32_t start,
int32_t _length,
const UnicodeString& oldText,
const UnicodeString& newText)
-{ return findAndReplace(start, _length, oldText, 0, oldText.fLength,
- newText, 0, newText.fLength); }
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
// ============================
// extract
UnicodeString::doExtract(int32_t start,
int32_t _length,
UnicodeString& target) const
-{ target.replace(0, target.fLength, *this, start, _length); }
+{ target.replace(0, target.length(), *this, start, _length); }
inline void
UnicodeString::extract(int32_t start,
inline UChar
UnicodeString::doCharAt(int32_t offset) const
{
- if((uint32_t)offset < (uint32_t)fLength) {
- return fArray[offset];
+ if((uint32_t)offset < (uint32_t)length()) {
+ return getArrayStart()[offset];
} else {
return kInvalidUChar;
}
inline UChar32
UnicodeString::char32At(int32_t offset) const
{
- if((uint32_t)offset < (uint32_t)fLength) {
+ int32_t len = length();
+ if((uint32_t)offset < (uint32_t)len) {
+ const UChar *array = getArrayStart();
UChar32 c;
- U16_GET(fArray, 0, offset, fLength, c);
+ U16_GET(array, 0, offset, len, c);
return c;
} else {
return kInvalidUChar;
inline int32_t
UnicodeString::getChar32Start(int32_t offset) const {
- if((uint32_t)offset < (uint32_t)fLength) {
- U16_SET_CP_START(fArray, 0, offset);
+ if((uint32_t)offset < (uint32_t)length()) {
+ const UChar *array = getArrayStart();
+ U16_SET_CP_START(array, 0, offset);
return offset;
} else {
return 0;
inline int32_t
UnicodeString::getChar32Limit(int32_t offset) const {
- if((uint32_t)offset < (uint32_t)fLength) {
- U16_SET_CP_LIMIT(fArray, 0, offset, fLength);
+ int32_t len = length();
+ if((uint32_t)offset < (uint32_t)len) {
+ const UChar *array = getArrayStart();
+ U16_SET_CP_LIMIT(array, 0, offset, len);
return offset;
} else {
- return fLength;
+ return len;
}
}
inline UBool
UnicodeString::isEmpty() const {
- return fLength == 0;
+ return fShortLength == 0;
}
//========================================
// Write implementation methods
//========================================
+inline void
+UnicodeString::setLength(int32_t len) {
+ if(len <= 127) {
+ fShortLength = (int8_t)len;
+ } else {
+ fShortLength = (int8_t)-1;
+ fUnion.fFields.fLength = len;
+ }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+ fShortLength = 0;
+ fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setToStackBuffer(int32_t len) {
+ fShortLength = (int8_t)len;
+ fFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
+ setLength(len);
+ fUnion.fFields.fArray = array;
+ fUnion.fFields.fCapacity = capacity;
+}
+
inline const UChar *
UnicodeString::getTerminatedBuffer() {
- if(fFlags&(kIsBogus|kOpenGetBuffer)) {
+ if(!isWritable()) {
return 0;
- } else if(fLength<fCapacity && fArray[fLength]==0) {
- return fArray;
- } else if(cloneArrayIfNeeded(fLength+1)) {
- fArray[fLength]=0;
- return fArray;
} else {
- return 0;
+ UChar *array = getArrayStart();
+ int32_t len = length();
+ if(len < getCapacity() && array[len] == 0) {
+ return array;
+ } else if(cloneArrayIfNeeded(len+1)) {
+ array = getArrayStart();
+ array[len] = 0;
+ return array;
+ } else {
+ return 0;
+ }
}
}
inline UnicodeString&
UnicodeString::operator= (UChar ch)
-{ return doReplace(0, fLength, &ch, 0, 1); }
+{ return doReplace(0, length(), &ch, 0, 1); }
inline UnicodeString&
UnicodeString::operator= (UChar32 ch)
-{ return replace(0, fLength, ch); }
+{ return replace(0, length(), ch); }
inline UnicodeString&
UnicodeString::setTo(const UnicodeString& srcText,
int32_t srcLength)
{
unBogus();
- return doReplace(0, fLength, srcText, srcStart, srcLength);
+ return doReplace(0, length(), srcText, srcStart, srcLength);
}
inline UnicodeString&
{
unBogus();
srcText.pinIndex(srcStart);
- return doReplace(0, fLength, srcText, srcStart, srcText.fLength - srcStart);
+ return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
}
inline UnicodeString&
UnicodeString::setTo(const UnicodeString& srcText)
{
unBogus();
- return doReplace(0, fLength, srcText, 0, srcText.fLength);
+ return doReplace(0, length(), srcText, 0, srcText.length());
}
inline UnicodeString&
int32_t srcLength)
{
unBogus();
- return doReplace(0, fLength, srcChars, 0, srcLength);
+ return doReplace(0, length(), srcChars, 0, srcLength);
}
inline UnicodeString&
UnicodeString::setTo(UChar srcChar)
{
unBogus();
- return doReplace(0, fLength, &srcChar, 0, 1);
+ return doReplace(0, length(), &srcChar, 0, 1);
}
inline UnicodeString&
UnicodeString::setTo(UChar32 srcChar)
{
unBogus();
- return replace(0, fLength, srcChar);
+ return replace(0, length(), srcChar);
}
-inline UnicodeString&
-UnicodeString::operator+= (UChar ch)
-{ return doReplace(fLength, 0, &ch, 0, 1); }
-
-inline UnicodeString&
-UnicodeString::operator+= (UChar32 ch) {
- UChar buffer[U16_MAX_LENGTH];
- int32_t _length = 0;
- UBool isError = FALSE;
- U16_APPEND(buffer, _length, U16_MAX_LENGTH, ch, isError);
- return doReplace(fLength, 0, buffer, 0, _length);
-}
-
-inline UnicodeString&
-UnicodeString::operator+= (const UnicodeString& srcText)
-{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
-
inline UnicodeString&
UnicodeString::append(const UnicodeString& srcText,
int32_t srcStart,
int32_t srcLength)
-{ return doReplace(fLength, 0, srcText, srcStart, srcLength); }
+{ return doReplace(length(), 0, srcText, srcStart, srcLength); }
inline UnicodeString&
UnicodeString::append(const UnicodeString& srcText)
-{ return doReplace(fLength, 0, srcText, 0, srcText.fLength); }
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::append(const UChar *srcChars,
int32_t srcStart,
int32_t srcLength)
-{ return doReplace(fLength, 0, srcChars, srcStart, srcLength); }
+{ return doReplace(length(), 0, srcChars, srcStart, srcLength); }
inline UnicodeString&
UnicodeString::append(const UChar *srcChars,
int32_t srcLength)
-{ return doReplace(fLength, 0, srcChars, 0, srcLength); }
+{ return doReplace(length(), 0, srcChars, 0, srcLength); }
inline UnicodeString&
UnicodeString::append(UChar srcChar)
-{ return doReplace(fLength, 0, &srcChar, 0, 1); }
+{ return doReplace(length(), 0, &srcChar, 0, 1); }
inline UnicodeString&
UnicodeString::append(UChar32 srcChar) {
int32_t _length = 0;
UBool isError = FALSE;
U16_APPEND(buffer, _length, U16_MAX_LENGTH, srcChar, isError);
- return doReplace(fLength, 0, buffer, 0, _length);
+ return doReplace(length(), 0, buffer, 0, _length);
}
+inline UnicodeString&
+UnicodeString::operator+= (UChar ch)
+{ return doReplace(length(), 0, &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+ return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doReplace(length(), 0, srcText, 0, srcText.length()); }
+
inline UnicodeString&
UnicodeString::insert(int32_t start,
const UnicodeString& srcText,
inline UnicodeString&
UnicodeString::insert(int32_t start,
const UnicodeString& srcText)
-{ return doReplace(start, 0, srcText, 0, srcText.fLength); }
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
inline UnicodeString&
UnicodeString::insert(int32_t start,
if(isBogus()) {
unBogus();
} else {
- fLength = 0;
+ setLength(0);
}
return *this;
}
UnicodeString::remove(int32_t start,
int32_t _length)
{
- if(start <= 0 && _length == INT32_MAX) {
- // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
- return remove();
- } else {
+ if(start <= 0 && _length == INT32_MAX) {
+ // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+ return remove();
+ }
return doReplace(start, _length, NULL, 0, 0);
- }
}
inline UnicodeString&
// truncate(0) of a bogus string makes the string empty and non-bogus
unBogus();
return FALSE;
- } else if((uint32_t)targetLength < (uint32_t)fLength) {
- fLength = targetLength;
+ } else if((uint32_t)targetLength < (uint32_t)length()) {
+ setLength(targetLength);
return TRUE;
} else {
return FALSE;
inline UnicodeString&
UnicodeString::reverse()
-{ return doReverse(0, fLength); }
+{ return doReverse(0, length()); }
inline UnicodeString&
UnicodeString::reverse(int32_t start,