+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2005-2009, International Business Machines
+* Copyright (C) 2005-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utext.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/utext.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "cmemory.h"
#include "cstring.h"
#include "uassert.h"
+#include "putilimp.h"
U_NAMESPACE_USE
U_CAPI void U_EXPORT2
utext_setNativeIndex(UText *ut, int64_t index) {
+ // Apple note, at entry ut->chunkContents may be 0, not necessarily a problem
+ // (CF funcs will have set chunkNativeStart/Limit to 0 forcing call to access)
if(index<ut->chunkNativeStart || index>=ut->chunkNativeLimit) {
// The desired position is outside of the current chunk.
// Access the new position. Assume a forward iteration from here,
// utf-16 indexing.
ut->chunkOffset=(int32_t)(index-ut->chunkNativeStart);
} else {
- ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
+ ut->chunkOffset=ut->pFuncs->mapNativeIndexToUTF16(ut, index);
}
+ // Apple note, it can still be valid to have ut->chunkContents==0 at this
+ // point (just not inside the next block), see <rdar://problem/53610517>
+
// The convention is that the index must always be on a code point boundary.
// Adjust the index position if it is in the middle of a surrogate pair.
if (ut->chunkOffset<ut->chunkLength) {
UChar c= ut->chunkContents[ut->chunkOffset];
- if (UTF16_IS_TRAIL(c)) {
+ if (U16_IS_TRAIL(c)) {
if (ut->chunkOffset==0) {
ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
}
if (ut->chunkOffset>0) {
UChar lead = ut->chunkContents[ut->chunkOffset-1];
- if (UTF16_IS_LEAD(lead)) {
+ if (U16_IS_LEAD(lead)) {
ut->chunkOffset--;
}
}
U_CAPI UText * U_EXPORT2
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {
- UText *result;
- result = src->pFuncs->clone(dest, src, deep, status);
+ if (U_FAILURE(*status)) {
+ return dest;
+ }
+ UText *result = src->pFuncs->clone(dest, src, deep, status);
+ if (U_FAILURE(*status)) {
+ return result;
+ }
+ if (result == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return result;
+ }
if (readOnly) {
utext_freeze(result);
}
adjustPointer(dest, &dest->p, src);
adjustPointer(dest, &dest->q, src);
adjustPointer(dest, &dest->r, src);
- adjustPointer(dest, (const void **)&dest->chunkContents, src);
+ adjustPointer(dest, (const void **)&dest->chunkContents, src);
+
+ // The newly shallow-cloned UText does _not_ own the underlying storage for the text.
+ // (The source for the clone may or may not have owned the text.)
+
+ dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
return dest;
}
//------------------------------------------------------------------------------
// Chunk size.
-// Must be less than 85, because of byte mapping from UChar indexes to native indexes.
+// Must be less than 85 (256/3), because of byte mapping from UChar indexes to native indexes.
// Worst case is three native bytes to one UChar. (Supplemenaries are 4 native bytes
// to two UChars.)
+// The longest illegal byte sequence treated as a single error (and converted to U+FFFD)
+// is a three-byte sequence (truncated four-byte sequence).
//
enum { UTF8_TEXT_CHUNK_SIZE=32 };
if (ix>length) {
if (length>=0) {
ix=length;
- } else if (ix>ut->c) {
+ } else if (ix>=ut->c) {
// Zero terminated string, and requested index is beyond
// the region that has already been scanned.
// Scan up to either the end of the string or to the
// Requested index is in this buffer.
u8b = (UTF8Buf *)ut->p; // the current buffer
mapIndex = ix - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
ut->chunkOffset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
return TRUE;
// Swap the UText buffers.
// We want to fill what was previously the alternate buffer,
// and make what was the current buffer be the new alternate.
- UTF8Buf *u8b = (UTF8Buf *)ut->q;
+ UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
ut->q = ut->p;
- ut->p = u8b;
+ ut->p = u8b_swap;
int32_t strLen = ut->b;
UBool nulTerminated = FALSE;
nulTerminated = TRUE;
}
- UChar *buf = u8b->buf;
- uint8_t *mapToNative = u8b->mapToNative;
- uint8_t *mapToUChars = u8b->mapToUChars;
+ UChar *buf = u8b_swap->buf;
+ uint8_t *mapToNative = u8b_swap->mapToNative;
+ uint8_t *mapToUChars = u8b_swap->mapToUChars;
int32_t destIx = 0;
int32_t srcIx = ix;
UBool seenNonAscii = FALSE;
- UChar32 c;
+ UChar32 c = 0;
// Fill the chunk buffer and mapping arrays.
while (destIx<UTF8_TEXT_CHUNK_SIZE) {
if (c>0 && c<0x80) {
// Special case ASCII range for speed.
// zero is excluded to simplify bounds checking.
- buf[destIx] = c;
- mapToNative[destIx] = srcIx - ix;
- mapToUChars[srcIx-ix] = destIx;
+ buf[destIx] = (UChar)c;
+ mapToNative[destIx] = (uint8_t)(srcIx - ix);
+ mapToUChars[srcIx-ix] = (uint8_t)destIx;
srcIx++;
destIx++;
} else {
// General case, handle everything.
if (seenNonAscii == FALSE) {
seenNonAscii = TRUE;
- u8b->bufNILimit = destIx;
+ u8b_swap->bufNILimit = destIx;
}
int32_t cIx = srcIx;
int32_t dIx = destIx;
int32_t dIxSaved = destIx;
- U8_NEXT(s8, srcIx, strLen, c);
+ U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);
if (c==0 && nulTerminated) {
srcIx--;
break;
}
- if (c<0) {
- // Illegal UTF-8. Replace with sub character.
- c = 0x0fffd;
- }
U16_APPEND_UNSAFE(buf, destIx, c);
do {
- mapToNative[dIx++] = cIx - ix;
+ mapToNative[dIx++] = (uint8_t)(cIx - ix);
} while (dIx < destIx);
do {
- mapToUChars[cIx++ - ix] = dIxSaved;
+ mapToUChars[cIx++ - ix] = (uint8_t)dIxSaved;
} while (cIx < srcIx);
}
if (srcIx>=strLen) {
// store Native <--> Chunk Map entries for the end of the buffer.
// There is no actual character here, but the index position is valid.
- mapToNative[destIx] = srcIx - ix;
- mapToUChars[srcIx - ix] = destIx;
+ mapToNative[destIx] = (uint8_t)(srcIx - ix);
+ mapToUChars[srcIx - ix] = (uint8_t)destIx;
// fill in Buffer descriptor
- u8b->bufNativeStart = ix;
- u8b->bufNativeLimit = srcIx;
- u8b->bufStartIdx = 0;
- u8b->bufLimitIdx = destIx;
+ u8b_swap->bufNativeStart = ix;
+ u8b_swap->bufNativeLimit = srcIx;
+ u8b_swap->bufStartIdx = 0;
+ u8b_swap->bufLimitIdx = destIx;
if (seenNonAscii == FALSE) {
- u8b->bufNILimit = destIx;
+ u8b_swap->bufNILimit = destIx;
}
- u8b->toUCharsMapStart = u8b->bufNativeStart;
+ u8b_swap->toUCharsMapStart = u8b_swap->bufNativeStart;
// Set UText chunk to refer to this buffer.
ut->chunkContents = buf;
ut->chunkOffset = 0;
- ut->chunkLength = u8b->bufLimitIdx;
- ut->chunkNativeStart = u8b->bufNativeStart;
- ut->chunkNativeLimit = u8b->bufNativeLimit;
- ut->nativeIndexingLimit = u8b->bufNILimit;
+ ut->chunkLength = u8b_swap->bufLimitIdx;
+ ut->chunkNativeStart = u8b_swap->bufNativeStart;
+ ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
+ ut->nativeIndexingLimit = u8b_swap->bufNILimit;
// For zero terminated strings, keep track of the maximum point
// scanned so far.
// Can only do this if the incoming index is somewhere in the interior of the string.
// If index is at the end, there is no character there to look at.
if (ix != ut->b) {
+ // Note: this function will only move the index back if it is on a trail byte
+ // and there is a preceding lead byte and the sequence from the lead
+ // through this trail could be part of a valid UTF-8 sequence
+ // Otherwise the index remains unchanged.
U8_SET_CP_START(s8, 0, ix);
}
// Swap the UText buffers.
// We want to fill what was previously the alternate buffer,
// and make what was the current buffer be the new alternate.
- UTF8Buf *u8b = (UTF8Buf *)ut->q;
+ UTF8Buf *u8b_swap = (UTF8Buf *)ut->q;
ut->q = ut->p;
- ut->p = u8b;
-
- UChar *buf = u8b->buf;
- uint8_t *mapToNative = u8b->mapToNative;
- uint8_t *mapToUChars = u8b->mapToUChars;
- int32_t toUCharsMapStart = ix - (UTF8_TEXT_CHUNK_SIZE*3 + 1);
+ ut->p = u8b_swap;
+
+ UChar *buf = u8b_swap->buf;
+ uint8_t *mapToNative = u8b_swap->mapToNative;
+ uint8_t *mapToUChars = u8b_swap->mapToUChars;
+ int32_t toUCharsMapStart = ix - sizeof(UTF8Buf::mapToUChars) + 1;
+ // Note that toUCharsMapStart can be negative. Happens when the remaining
+ // text from current position to the beginning is less than the buffer size.
+ // + 1 because mapToUChars must have a slot at the end for the bufNativeLimit entry.
int32_t destIx = UTF8_TEXT_CHUNK_SIZE+2; // Start in the overflow region
// at end of buffer to leave room
// for a surrogate pair at the
// Map to/from Native Indexes, fill in for the position at the end of
// the buffer.
//
- mapToNative[destIx] = srcIx - toUCharsMapStart;
- mapToUChars[srcIx - toUCharsMapStart] = destIx;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
+ mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
// Fill the chunk buffer
// Work backwards, filling from the end of the buffer towards the front.
c = s8[srcIx];
if (c<0x80) {
// Special case ASCII range for speed.
- buf[destIx] = c;
- mapToUChars[srcIx - toUCharsMapStart] = destIx;
- mapToNative[destIx] = srcIx - toUCharsMapStart;
+ buf[destIx] = (UChar)c;
+ U_ASSERT(toUCharsMapStart <= srcIx);
+ mapToUChars[srcIx - toUCharsMapStart] = (uint8_t)destIx;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
} else {
// General case, handle everything non-ASCII.
int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char
// Get the full character from the UTF8 string.
- // use code derived from tbe macros in utf.8
+ // use code derived from tbe macros in utf8.h
// Leaves srcIx pointing at the first byte of the UTF-8 char.
//
- if (c<=0xbf) {
- c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -1);
- // leaves srcIx at first byte of the multi-byte char.
- } else {
- c=0x0fffd;
- }
+ c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3);
+ // leaves srcIx at first byte of the multi-byte char.
// Store the character in UTF-16 buffer.
if (c<0x10000) {
- buf[destIx] = c;
- mapToNative[destIx] = srcIx - toUCharsMapStart;
+ buf[destIx] = (UChar)c;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
} else {
buf[destIx] = U16_TRAIL(c);
- mapToNative[destIx] = srcIx - toUCharsMapStart;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
buf[--destIx] = U16_LEAD(c);
- mapToNative[destIx] = srcIx - toUCharsMapStart;
+ mapToNative[destIx] = (uint8_t)(srcIx - toUCharsMapStart);
}
// Fill in the map from native indexes to UChars buf index.
do {
- mapToUChars[sIx-- - toUCharsMapStart] = destIx;
+ mapToUChars[sIx-- - toUCharsMapStart] = (uint8_t)destIx;
} while (sIx >= srcIx);
+ U_ASSERT(toUCharsMapStart <= (srcIx+1));
// Set native indexing limit to be the current position.
// We are processing a non-ascii, non-native-indexing char now;
bufNILimit = destIx;
}
}
- u8b->bufNativeStart = srcIx;
- u8b->bufNativeLimit = ix;
- u8b->bufStartIdx = destIx;
- u8b->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2;
- u8b->bufNILimit = bufNILimit - u8b->bufStartIdx;
- u8b->toUCharsMapStart = toUCharsMapStart;
-
- ut->chunkContents = &buf[u8b->bufStartIdx];
- ut->chunkLength = u8b->bufLimitIdx - u8b->bufStartIdx;
+ u8b_swap->bufNativeStart = srcIx;
+ u8b_swap->bufNativeLimit = ix;
+ u8b_swap->bufStartIdx = destIx;
+ u8b_swap->bufLimitIdx = UTF8_TEXT_CHUNK_SIZE+2;
+ u8b_swap->bufNILimit = bufNILimit - u8b_swap->bufStartIdx;
+ u8b_swap->toUCharsMapStart = toUCharsMapStart;
+
+ ut->chunkContents = &buf[u8b_swap->bufStartIdx];
+ ut->chunkLength = u8b_swap->bufLimitIdx - u8b_swap->bufStartIdx;
ut->chunkOffset = ut->chunkLength;
- ut->chunkNativeStart = u8b->bufNativeStart;
- ut->chunkNativeLimit = u8b->bufNativeLimit;
- ut->nativeIndexingLimit = u8b->bufNILimit;
+ ut->chunkNativeStart = u8b_swap->bufNativeStart;
+ ut->chunkNativeLimit = u8b_swap->bufNativeLimit;
+ ut->nativeIndexingLimit = u8b_swap->bufNILimit;
return TRUE;
}
{
UChar *pDest = dest;
- UChar *pDestLimit = dest+destCapacity;
+ UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
UChar32 ch=0;
int32_t index = 0;
int32_t reqLength = 0;
if(ch <=0x7f){
*pDest++=(UChar)ch;
}else{
- ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
- if(ch<0){
- ch = 0xfffd;
- }
- if(ch<=0xFFFF){
+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
+ if(U_IS_BMP(ch)){
*(pDest++)=(UChar)ch;
}else{
- *(pDest++)=UTF16_LEAD(ch);
+ *(pDest++)=U16_LEAD(ch);
if(pDest<pDestLimit){
- *(pDest++)=UTF16_TRAIL(ch);
+ *(pDest++)=U16_TRAIL(ch);
}else{
reqLength++;
break;
if(ch <= 0x7f){
reqLength++;
}else{
- ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
- if(ch<0){
- ch = 0xfffd;
- }
- reqLength+=UTF_CHAR_LENGTH(ch);
+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
+ reqLength+=U16_LENGTH(ch);
}
}
int i;
if (start32 < ut->chunkNativeLimit) {
for (i=0; i<3; i++) {
- if (U8_IS_LEAD(buf[start32]) || start32==0) {
+ if (U8_IS_SINGLE(buf[start32]) || U8_IS_LEAD(buf[start32]) || start32==0) {
break;
}
start32--;
if (limit32 < ut->chunkNativeLimit) {
for (i=0; i<3; i++) {
- if (U8_IS_LEAD(buf[limit32]) || limit32==0) {
+ if (U8_IS_SINGLE(buf[limit32]) || U8_IS_LEAD(buf[limit32]) || limit32==0) {
break;
}
limit32--;
utext_strFromUTF8(dest, destCapacity, &destLength,
(const char *)ut->context+start32, limit32-start32,
pErrorCode);
+ utf8TextAccess(ut, limit32, TRUE);
return destLength;
}
U_ASSERT(index>=ut->chunkNativeStart+ut->nativeIndexingLimit);
U_ASSERT(index<=ut->chunkNativeLimit);
int32_t mapIndex = index - u8b->toUCharsMapStart;
+ U_ASSERT(mapIndex < (int32_t)sizeof(UTF8Buf::mapToUChars));
int32_t offset = u8b->mapToUChars[mapIndex] - u8b->bufStartIdx;
U_ASSERT(offset>=0 && offset<=ut->chunkLength);
return offset;
U_CDECL_END
-static const struct UTextFuncs utf8Funcs =
+static const struct UTextFuncs utf8Funcs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
};
+static const char gEmptyString[] = {0};
+
U_CAPI UText * U_EXPORT2
utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status) {
if(U_FAILURE(*status)) {
return NULL;
}
+ if(s==NULL && length==0) {
+ s = gEmptyString;
+ }
+
if(s==NULL || length<-1 || length>INT32_MAX) {
*status=U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
UnicodeString buffer(dest, 0, destCapacity); // writable alias
rep->extractBetween(start32, limit32, buffer);
+ repTextAccess(ut, limit32, TRUE);
+
return u_terminateUChars(dest, destCapacity, length, status);
}
repTextAccess(ut, nativeIterIndex, TRUE);
}
-static const struct UTextFuncs repFuncs =
+static const struct UTextFuncs repFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
repTextLength,
repTextAccess,
repTextExtract,
- repTextReplace,
- repTextCopy,
+ repTextReplace,
+ repTextCopy,
NULL, // MapOffsetToNative,
NULL, // MapIndexToUTF16,
repTextClose,
return NULL;
}
ut = utext_setup(ut, sizeof(ReplExtra), status);
+ if(U_FAILURE(*status)) {
+ return ut;
+ }
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
if(rep->hasMetaData()) {
trimmedLength=destCapacity;
}
us->extract(start32, trimmedLength, dest);
+ t->chunkOffset = start32+trimmedLength;
+ } else {
+ t->chunkOffset = start32;
}
u_terminateUChars(dest, destCapacity, length, pErrorCode);
return length;
}
if(move) {
- // move: copy to destIndex, then replace original with nothing
+ // move: copy to destIndex, then remove original
int32_t segLength=limit32-start32;
us->copy(start32, limit32, destIndex32);
if(destIndex32<start32) {
start32+=segLength;
}
- us->replace(start32, segLength, NULL, 0);
+ us->remove(start32, segLength);
} else {
// copy
us->copy(start32, limit32, destIndex32);
}
-static const struct UTextFuncs unistrFuncs =
+static const struct UTextFuncs unistrFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
unistrTextLength,
unistrTextAccess,
unistrTextExtract,
- unistrTextReplace,
- unistrTextCopy,
+ unistrTextReplace,
+ unistrTextCopy,
NULL, // MapOffsetToNative,
NULL, // MapIndexToUTF16,
unistrTextClose,
U_CAPI UText * U_EXPORT2
utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
- // TODO: use openConstUnicodeString, then add in the differences.
- //
- ut = utext_setup(ut, 0, status);
+ ut = utext_openConstUnicodeString(ut, s, status);
if (U_SUCCESS(*status)) {
- ut->pFuncs = &unistrFuncs;
- ut->context = s;
- ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)|
- I32_FLAG(UTEXT_PROVIDER_WRITABLE);
-
- ut->chunkContents = s->getBuffer();
- ut->chunkLength = s->length();
- ut->chunkNativeStart = 0;
- ut->chunkNativeLimit = ut->chunkLength;
- ut->nativeIndexingLimit = ut->chunkLength;
+ ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
}
return ut;
}
U_CAPI UText * U_EXPORT2
utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) {
+ if (U_SUCCESS(*status) && s->isBogus()) {
+ // The UnicodeString is bogus, but we still need to detach the UText
+ // from whatever it was hooked to before, if anything.
+ utext_openUChars(ut, NULL, 0, status);
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return ut;
+ }
ut = utext_setup(ut, 0, status);
// note: use the standard (writable) function table for UnicodeString.
// The flag settings disable writing, so having the functions in
return 0;
}
- const UChar *s=(const UChar *)ut->context;
+ //const UChar *s=(const UChar *)ut->context;
int32_t si, di;
int32_t start32;
// Pins 'start' to the length of the string, if it came in out-of-bounds.
// Snaps 'start' to the beginning of a code point.
ucstrTextAccess(ut, start, TRUE);
- U_ASSERT(start <= INT32_MAX);
- start32 = (int32_t)start;
+ const UChar *s=ut->chunkContents;
+ start32 = ut->chunkOffset;
int32_t strLength=(int32_t)ut->a;
if (strLength >= 0) {
} else {
limit32 = pinIndex(limit, INT32_MAX);
}
-
di = 0;
for (si=start32; si<limit32; si++) {
if (strLength<0 && s[si]==0) {
ut->chunkLength = si;
ut->nativeIndexingLimit = si;
strLength = si;
+ limit32 = si;
break;
}
+ U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
if (di<destCapacity) {
// only store if there is space.
dest[di] = s[si];
if (strLength>=0) {
// We have filled the destination buffer, and the string length is known.
// Cut the loop short. There is no need to scan string termination.
- di = strLength;
+ di = limit32 - start32;
si = limit32;
break;
}
// If the limit index points to a lead surrogate of a pair,
// add the corresponding trail surrogate to the destination.
if (si>0 && U16_IS_LEAD(s[si-1]) &&
- ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si])))
+ ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si])))
{
if (di<destCapacity) {
// store only if there is space in the output buffer.
- dest[di++] = s[si++];
+ dest[di++] = s[si];
}
+ si++;
}
// Put iteration position at the point just following the extracted text
- ut->chunkOffset = si;
+ if (si <= ut->chunkNativeLimit) {
+ ut->chunkOffset = si;
+ } else {
+ ucstrTextAccess(ut, si, TRUE);
+ }
// Add a terminating NUL if space in the buffer permits,
// and set the error status as required.
return di;
}
-static const struct UTextFuncs ucstrFuncs =
+static const struct UTextFuncs ucstrFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
U_CDECL_END
+static const UChar gEmptyUString[] = {0};
U_CAPI UText * U_EXPORT2
utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status) {
if (U_FAILURE(*status)) {
return NULL;
}
- if (length < -1 || length>INT32_MAX) {
+ if(s==NULL && length==0) {
+ s = gEmptyUString;
+ }
+ if (s==NULL || length < -1 || length>INT32_MAX) {
*status = U_ILLEGAL_ARGUMENT_ERROR;
return NULL;
}
CharacterIterator *srcCI =(CharacterIterator *)src->context;
srcCI = srcCI->clone();
dest = utext_openCharacterIterator(dest, srcCI, status);
+ if (U_FAILURE(*status)) {
+ return dest;
+ }
// cast off const on getNativeIndex.
// For CharacterIterator based UTexts, this is safe, the operation is const.
int64_t ix = utext_getNativeIndex((UText *)src);
int32_t limit32 = pinIndex(limit, length);
int32_t desti = 0;
int32_t srci;
+ int32_t copyLimit;
CharacterIterator *ci = (CharacterIterator *)ut->context;
ci->setIndex32(start32); // Moves ix to lead of surrogate pair, if needed.
srci = ci->getIndex();
+ copyLimit = srci;
while (srci<limit32) {
UChar32 c = ci->next32PostInc();
int32_t len = U16_LENGTH(c);
+ U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */
if (desti+len <= destCapacity) {
U16_APPEND_UNSAFE(dest, desti, c);
+ copyLimit = srci+len;
} else {
desti += len;
*status = U_BUFFER_OVERFLOW_ERROR;
srci += len;
}
+ charIterTextAccess(ut, copyLimit, TRUE);
+
u_terminateUChars(dest, destCapacity, desti, status);
return desti;
}
-static const struct UTextFuncs charIterFuncs =
+static const struct UTextFuncs charIterFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
}
return ut;
}
-
-
-