/*
*******************************************************************************
*
-* Copyright (C) 2005-2011, International Business Machines
+* Copyright (C) 2005-2016, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "unicode/unistr.h"
#include "unicode/chariter.h"
#include "unicode/utext.h"
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
#include "ustr_imp.h"
#include "cmemory.h"
#include "cstring.h"
// Adjust the index position if it is in the middle of a surrogate pair.
if (ut->chunkOffset<ut->chunkLength) {
UChar c= ut->chunkContents[ut->chunkOffset];
- if (UTF16_IS_TRAIL(c)) {
+ if (U16_IS_TRAIL(c)) {
if (ut->chunkOffset==0) {
ut->pFuncs->access(ut, ut->chunkNativeStart, FALSE);
}
if (ut->chunkOffset>0) {
UChar lead = ut->chunkContents[ut->chunkOffset-1];
- if (UTF16_IS_LEAD(lead)) {
+ if (U16_IS_LEAD(lead)) {
ut->chunkOffset--;
}
}
return TRUE;
}
-U_CAPI int32_t U_EXPORT2
-utext_compare(UText *s1, int32_t length1,
- UText *s2, int32_t length2) {
- UChar32 c1 = 0, c2 = 0;
-
- if(length1<0 && length2<0) {
- /* strcmp style, go until end of string */
- for(;;) {
- c1 = UTEXT_NEXT32(s1);
- c2 = UTEXT_NEXT32(s2);
- if(c1 != c2) {
- break;
- } else if(c1 == U_SENTINEL) {
- return 0;
- }
- }
- } else {
- if(length1 < 0) {
- length1 = INT32_MIN;
- } else if (length2 < 0) {
- length2 = INT32_MIN;
- }
-
- /* memcmp/UnicodeString style, both length-specified */
- while((length1 > 0 || length1 == INT32_MIN) && (length2 > 0 || length2 == INT32_MIN)) {
- c1 = UTEXT_NEXT32(s1);
- c2 = UTEXT_NEXT32(s2);
-
- if(c1 != c2) {
- break;
- } else if(c1 == U_SENTINEL) {
- return 0;
- }
-
- if (length1 != INT32_MIN) {
- length1 -= 1;
- }
- if (length2 != INT32_MIN) {
- length2 -= 1;
- }
- }
-
- if(length1 <= 0 && length1 != INT32_MIN) {
- if(length2 <= 0) {
- return 0;
- } else {
- return -1;
- }
- } else if(length2 <= 0 && length2 != INT32_MIN) {
- if (length1 <= 0) {
- return 0;
- } else {
- return 1;
- }
- }
- }
-
- return (int32_t)c1-(int32_t)c2;
-}
-
-U_CAPI int32_t U_EXPORT2
-utext_compareNativeLimit(UText *s1, int64_t limit1,
- UText *s2, int64_t limit2) {
- UChar32 c1, c2;
-
- if(limit1<0 && limit2<0) {
- /* strcmp style, go until end of string */
- for(;;) {
- c1 = UTEXT_NEXT32(s1);
- c2 = UTEXT_NEXT32(s2);
- if(c1 != c2) {
- return (int32_t)c1-(int32_t)c2;
- } else if(c1 == U_SENTINEL) {
- return 0;
- }
- }
- } else {
- /* memcmp/UnicodeString style, both length-specified */
- int64_t index1 = (limit1 >= 0 ? UTEXT_GETNATIVEINDEX(s1) : 0);
- int64_t index2 = (limit2 >= 0 ? UTEXT_GETNATIVEINDEX(s2) : 0);
-
- while((limit1 < 0 || index1 < limit1) && (limit2 < 0 || index2 < limit2)) {
- c1 = UTEXT_NEXT32(s1);
- c2 = UTEXT_NEXT32(s2);
-
- if(c1 != c2) {
- return (int32_t)c1-(int32_t)c2;
- } else if(c1 == U_SENTINEL) {
- return 0;
- }
-
- if (limit1 >= 0) {
- index1 = UTEXT_GETNATIVEINDEX(s1);
- }
- if (limit2 >= 0) {
- index2 = UTEXT_GETNATIVEINDEX(s2);
- }
- }
-
- if(limit1 >= 0 && index1 >= limit1) {
- if(index2 >= limit2) {
- return 0;
- } else {
- return -1;
- }
- } else {
- if(index1 >= limit1) {
- return 0;
- } else {
- return 1;
- }
- }
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-utext_caseCompare(UText *s1, int32_t length1,
- UText *s2, int32_t length2,
- uint32_t options, UErrorCode *pErrorCode) {
- const UCaseProps *csp;
-
- /* case folding variables */
- const UChar *p;
- int32_t length;
-
- /* case folding buffers, only use current-level start/limit */
- UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
- int32_t foldOffset1, foldOffset2, foldLength1, foldLength2;
-
- /* current code points */
- UChar32 c1, c2;
- uint8_t cLength1, cLength2;
-
- /* argument checking */
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(s1==NULL || s2==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- csp=ucase_getSingleton();
-
- /* for variable-length strings */
- if(length1 < 0) {
- length1 = INT32_MIN;
- }
- if (length2 < 0) {
- length2 = INT32_MIN;
- }
-
- /* initialize */
- foldOffset1 = foldOffset2 = foldLength1 = foldLength2 = 0;
-
- /* comparison loop */
- while((foldOffset1 < foldLength1 || length1 > 0 || length1 == INT32_MIN) &&
- (foldOffset2 < foldLength2 || length2 > 0 || length2 == INT32_MIN)) {
- if(foldOffset1 < foldLength1) {
- U16_NEXT_UNSAFE(fold1, foldOffset1, c1);
- cLength1 = 0;
- } else {
- c1 = UTEXT_NEXT32(s1);
- if (c1 != U_SENTINEL) {
- cLength1 = U16_LENGTH(c1);
-
- length = ucase_toFullFolding(csp, c1, &p, options);
- if(length >= 0) {
- if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings
- u_memcpy(fold1, p, length);
- foldOffset1 = 0;
- foldLength1 = length;
- U16_NEXT_UNSAFE(fold1, foldOffset1, c1);
- } else {
- c1 = length;
- }
- }
- }
-
- if(length1 != INT32_MIN) {
- length1 -= 1;
- }
- }
-
- if(foldOffset2 < foldLength2) {
- U16_NEXT_UNSAFE(fold2, foldOffset2, c2);
- cLength2 = 0;
- } else {
- c2 = UTEXT_NEXT32(s2);
- if (c2 != U_SENTINEL) {
- cLength2 = U16_LENGTH(c2);
-
- length = ucase_toFullFolding(csp, c2, &p, options);
- if(length >= 0) {
- if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings
- u_memcpy(fold2, p, length);
- foldOffset2 = 0;
- foldLength2 = length;
- U16_NEXT_UNSAFE(fold2, foldOffset2, c2);
- } else {
- c2 = length;
- }
- }
- } else if(c1 == U_SENTINEL) {
- return 0; // end of both strings at once
- }
-
- if(length2 != INT32_MIN) {
- length2 -= 1;
- }
- }
-
- if(c1 != c2) {
- return (int32_t)c1-(int32_t)c2;
- }
- }
-
- /* By now at least one of the strings is out of characters */
- length1 += foldLength1 - foldOffset1;
- length2 += foldLength2 - foldOffset2;
-
- if(length1 <= 0 && length1 != INT32_MIN) {
- if(length2 <= 0) {
- return 0;
- } else {
- return -1;
- }
- } else {
- if (length1 <= 0) {
- return 0;
- } else {
- return 1;
- }
- }
-}
-
-U_CAPI int32_t U_EXPORT2
-utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
- UText *s2, int64_t limit2,
- uint32_t options, UErrorCode *pErrorCode) {
- const UCaseProps *csp;
-
- /* case folding variables */
- const UChar *p;
- int32_t length;
-
- /* case folding buffers, only use current-level start/limit */
- UChar fold1[UCASE_MAX_STRING_LENGTH+1], fold2[UCASE_MAX_STRING_LENGTH+1];
- int32_t foldOffset1, foldOffset2, foldLength1, foldLength2;
-
- /* current code points */
- UChar32 c1, c2;
-
- /* native indexes into s1 and s2 */
- int64_t index1, index2;
-
- /* argument checking */
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
- if(s1==NULL || s2==NULL) {
- *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- csp=ucase_getSingleton();
-
- /* initialize */
- index1 = (limit1 >= 0 ? UTEXT_GETNATIVEINDEX(s1) : 0);
- index2 = (limit2 >= 0 ? UTEXT_GETNATIVEINDEX(s2) : 0);
-
- foldOffset1 = foldOffset2 = foldLength1 = foldLength2 = 0;
-
- /* comparison loop */
- while((foldOffset1 < foldLength1 || limit1 < 0 || index1 < limit1) &&
- (foldOffset2 < foldLength2 || limit2 < 0 || index2 < limit2)) {
- if(foldOffset1 < foldLength1) {
- U16_NEXT_UNSAFE(fold1, foldOffset1, c1);
- } else {
- c1 = UTEXT_NEXT32(s1);
- if (c1 != U_SENTINEL) {
- length = ucase_toFullFolding(csp, c1, &p, options);
- if(length >= 0) {
- if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings
- u_memcpy(fold1, p, length);
- foldOffset1 = 0;
- foldLength1 = length;
- U16_NEXT_UNSAFE(fold1, foldOffset1, c1);
- } else {
- c1 = length;
- }
- }
- }
-
- if (limit1 >= 0) {
- index1 = UTEXT_GETNATIVEINDEX(s1);
- }
- }
-
- if(foldOffset2 < foldLength2) {
- U16_NEXT_UNSAFE(fold2, foldOffset2, c2);
- } else {
- c2 = UTEXT_NEXT32(s2);
- if (c2 != U_SENTINEL) {
- length = ucase_toFullFolding(csp, c2, &p, options);
- if(length >= 0) {
- if(length <= UCASE_MAX_STRING_LENGTH) { // !!!: Does not correctly handle 0-length folded-case strings
- u_memcpy(fold2, p, length);
- foldOffset2 = 0;
- foldLength2 = length;
- U16_NEXT_UNSAFE(fold2, foldOffset2, c2);
- } else {
- c2 = length;
- }
- }
- } else if(c1 == U_SENTINEL) {
- return 0;
- }
-
- if (limit2 >= 0) {
- index2 = UTEXT_GETNATIVEINDEX(s2);
- }
- }
-
- if(c1 != c2) {
- return (int32_t)c1-(int32_t)c2;
- }
- }
-
- /* By now at least one of the strings is out of characters */
- index1 -= foldLength1 - foldOffset1;
- index2 -= foldLength2 - foldOffset2;
-
- if(limit1 >= 0 && index1 >= limit1) {
- if(index2 >= limit2) {
- return 0;
- } else {
- return -1;
- }
- } else {
- if(index1 >= limit1) {
- return 0;
- } else {
- return 1;
- }
- }
-}
-
-
U_CAPI UBool U_EXPORT2
utext_isWritable(const UText *ut)
{
U_CAPI UText * U_EXPORT2
utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status) {
- UText *result;
- result = src->pFuncs->clone(dest, src, deep, status);
+ if (U_FAILURE(*status)) {
+ return dest;
+ }
+ UText *result = src->pFuncs->clone(dest, src, deep, status);
+ if (U_FAILURE(*status)) {
+ return result;
+ }
+ if (result == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return result;
+ }
if (readOnly) {
utext_freeze(result);
}
adjustPointer(dest, &dest->r, src);
adjustPointer(dest, (const void **)&dest->chunkContents, src);
+ // The newly shallow-cloned UText does _not_ own the underlying storage for the text.
+ // (The source for the clone may or may not have owned the text.)
+
+ dest->providerProperties &= ~I32_FLAG(UTEXT_PROVIDER_OWNS_TEXT);
+
return dest;
}
int32_t cIx = srcIx;
int32_t dIx = destIx;
int32_t dIxSaved = destIx;
- U8_NEXT(s8, srcIx, strLen, c);
+ U8_NEXT_OR_FFFD(s8, srcIx, strLen, c);
if (c==0 && nulTerminated) {
srcIx--;
break;
}
- if (c<0) {
- // Illegal UTF-8. Replace with sub character.
- c = 0x0fffd;
- }
U16_APPEND_UNSAFE(buf, destIx, c);
do {
int32_t sIx = srcIx; // ix of last byte of multi-byte u8 char
// Get the full character from the UTF8 string.
- // use code derived from tbe macros in utf.8
+ // use code derived from tbe macros in utf8.h
// Leaves srcIx pointing at the first byte of the UTF-8 char.
//
- if (c<=0xbf) {
- c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -1);
- // leaves srcIx at first byte of the multi-byte char.
- } else {
- c=0x0fffd;
- }
+ c=utf8_prevCharSafeBody(s8, 0, &srcIx, c, -3);
+ // leaves srcIx at first byte of the multi-byte char.
// Store the character in UTF-16 buffer.
if (c<0x10000) {
{
UChar *pDest = dest;
- UChar *pDestLimit = dest+destCapacity;
+ UChar *pDestLimit = (dest!=NULL)?(dest+destCapacity):NULL;
UChar32 ch=0;
int32_t index = 0;
int32_t reqLength = 0;
if(ch <=0x7f){
*pDest++=(UChar)ch;
}else{
- ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
- if(ch<0){
- ch = 0xfffd;
- }
+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
if(U_IS_BMP(ch)){
*(pDest++)=(UChar)ch;
}else{
- *(pDest++)=UTF16_LEAD(ch);
+ *(pDest++)=U16_LEAD(ch);
if(pDest<pDestLimit){
- *(pDest++)=UTF16_TRAIL(ch);
+ *(pDest++)=U16_TRAIL(ch);
}else{
reqLength++;
break;
if(ch <= 0x7f){
reqLength++;
}else{
- ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -1);
- if(ch<0){
- ch = 0xfffd;
- }
+ ch=utf8_nextCharSafeBody(pSrc, &index, srcLength, ch, -3);
reqLength+=U16_LENGTH(ch);
}
}
U_CDECL_END
-static const struct UTextFuncs utf8Funcs =
+static const struct UTextFuncs utf8Funcs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
UnicodeString buffer(dest, 0, destCapacity); // writable alias
rep->extractBetween(start32, limit32, buffer);
repTextAccess(ut, limit32, TRUE);
-
+
return u_terminateUChars(dest, destCapacity, length, status);
}
repTextAccess(ut, nativeIterIndex, TRUE);
}
-static const struct UTextFuncs repFuncs =
+static const struct UTextFuncs repFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
repTextLength,
repTextAccess,
repTextExtract,
- repTextReplace,
- repTextCopy,
+ repTextReplace,
+ repTextCopy,
NULL, // MapOffsetToNative,
NULL, // MapIndexToUTF16,
repTextClose,
return NULL;
}
ut = utext_setup(ut, sizeof(ReplExtra), status);
+ if(U_FAILURE(*status)) {
+ return ut;
+ }
ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_WRITABLE);
if(rep->hasMetaData()) {
}
-static const struct UTextFuncs unistrFuncs =
+static const struct UTextFuncs unistrFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
unistrTextLength,
unistrTextAccess,
unistrTextExtract,
- unistrTextReplace,
- unistrTextCopy,
+ unistrTextReplace,
+ unistrTextCopy,
NULL, // MapOffsetToNative,
NULL, // MapIndexToUTF16,
unistrTextClose,
U_CAPI UText * U_EXPORT2
utext_openUnicodeString(UText *ut, UnicodeString *s, UErrorCode *status) {
- // TODO: use openConstUnicodeString, then add in the differences.
- //
- ut = utext_setup(ut, 0, status);
+ ut = utext_openConstUnicodeString(ut, s, status);
if (U_SUCCESS(*status)) {
- ut->pFuncs = &unistrFuncs;
- ut->context = s;
- ut->providerProperties = I32_FLAG(UTEXT_PROVIDER_STABLE_CHUNKS)|
- I32_FLAG(UTEXT_PROVIDER_WRITABLE);
-
- ut->chunkContents = s->getBuffer();
- ut->chunkLength = s->length();
- ut->chunkNativeStart = 0;
- ut->chunkNativeLimit = ut->chunkLength;
- ut->nativeIndexingLimit = ut->chunkLength;
+ ut->providerProperties |= I32_FLAG(UTEXT_PROVIDER_WRITABLE);
}
return ut;
}
U_CAPI UText * U_EXPORT2
utext_openConstUnicodeString(UText *ut, const UnicodeString *s, UErrorCode *status) {
+ if (U_SUCCESS(*status) && s->isBogus()) {
+ // The UnicodeString is bogus, but we still need to detach the UText
+ // from whatever it was hooked to before, if anything.
+ utext_openUChars(ut, NULL, 0, status);
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return ut;
+ }
ut = utext_setup(ut, 0, status);
// note: use the standard (writable) function table for UnicodeString.
// The flag settings disable writing, so having the functions in
} else {
limit32 = pinIndex(limit, INT32_MAX);
}
-
di = 0;
for (si=start32; si<limit32; si++) {
if (strLength<0 && s[si]==0) {
ut->chunkLength = si;
ut->nativeIndexingLimit = si;
strLength = si;
+ limit32 = si;
break;
}
+ U_ASSERT(di>=0); /* to ensure di never exceeds INT32_MAX, which must not happen logically */
if (di<destCapacity) {
// only store if there is space.
dest[di] = s[si];
// If the limit index points to a lead surrogate of a pair,
// add the corresponding trail surrogate to the destination.
if (si>0 && U16_IS_LEAD(s[si-1]) &&
- ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si])))
+ ((si<strLength || strLength<0) && U16_IS_TRAIL(s[si])))
{
if (di<destCapacity) {
// store only if there is space in the output buffer.
- dest[di++] = s[si++];
+ dest[di++] = s[si];
}
+ si++;
}
// Put iteration position at the point just following the extracted text
- ut->chunkOffset = uprv_min(strLength, start32 + destCapacity);
+ if (si <= ut->chunkNativeLimit) {
+ ut->chunkOffset = si;
+ } else {
+ ucstrTextAccess(ut, si, TRUE);
+ }
// Add a terminating NUL if space in the buffer permits,
// and set the error status as required.
return di;
}
-static const struct UTextFuncs ucstrFuncs =
+static const struct UTextFuncs ucstrFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
CharacterIterator *srcCI =(CharacterIterator *)src->context;
srcCI = srcCI->clone();
dest = utext_openCharacterIterator(dest, srcCI, status);
+ if (U_FAILURE(*status)) {
+ return dest;
+ }
// cast off const on getNativeIndex.
// For CharacterIterator based UTexts, this is safe, the operation is const.
int64_t ix = utext_getNativeIndex((UText *)src);
while (srci<limit32) {
UChar32 c = ci->next32PostInc();
int32_t len = U16_LENGTH(c);
+ U_ASSERT(desti+len>0); /* to ensure desti+len never exceeds MAX_INT32, which must not happen logically */
if (desti+len <= destCapacity) {
U16_APPEND_UNSAFE(dest, desti, c);
copyLimit = srci+len;
}
srci += len;
}
-
+
charIterTextAccess(ut, copyLimit, TRUE);
u_terminateUChars(dest, destCapacity, desti, status);
return desti;
}
-static const struct UTextFuncs charIterFuncs =
+static const struct UTextFuncs charIterFuncs =
{
sizeof(UTextFuncs),
0, 0, 0, // Reserved alignment padding
}
return ut;
}
-
-
-