/*
*******************************************************************************
*
-* Copyright (C) 2001-2004, International Business Machines
+* Copyright (C) 2001-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
#include "unicode/utypes.h"
#include "unicode/uloc.h"
#include "unicode/ustring.h"
+#include "unicode/ucasemap.h"
#include "unicode/ubrk.h"
#include "cmemory.h"
#include "ucase.h"
-#include "unormimp.h"
#include "ustr_imp.h"
/* string casing ------------------------------------------------------------ */
return U_SENTINEL;
}
-typedef int32_t U_CALLCONV
-UCaseMapFull(const UCaseProps *csp, UChar32 c,
- UCaseContextIterator *iter, void *context,
- const UChar **pString,
- const char *locale, int32_t *locCache);
-
/*
- * Lowercases [srcStart..srcLimit[ but takes
+ * Case-maps [srcStart..srcLimit[ but takes
* context [0..srcLength[ into account.
*/
static int32_t
-_caseMap(UCaseProps *csp, UCaseMapFull *map,
+_caseMap(const UCaseMap *csm, UCaseMapFull *map,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc,
int32_t srcStart, int32_t srcLimit,
- const char *locale, int32_t *locCache,
UErrorCode *pErrorCode) {
const UChar *s;
- UChar32 c;
+ UChar32 c, c2 = 0;
int32_t srcIndex, destIndex;
+ int32_t locCache;
+
+ locCache=csm->locCache;
/* case mapping loop */
srcIndex=srcStart;
csc->cpStart=srcIndex;
U16_NEXT(src, srcIndex, srcLimit, c);
csc->cpLimit=srcIndex;
- c=map(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
- destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ c=map(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &locCache);
+ if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
+ /* fast path version of appendResult() for BMP results */
+ dest[destIndex++]=(UChar)c2;
+ } else {
+ destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ }
}
if(destIndex>destCapacity) {
return destIndex;
}
+static void
+setTempCaseMapLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
+ /*
+ * We could call ucasemap_setLocale(), but here we really only care about
+ * the initial language subtag, we need not return the real string via
+ * ucasemap_getLocale(), and we don't care about only getting "x" from
+ * "x-some-thing" etc.
+ *
+ * We ignore locales with a longer-than-3 initial subtag.
+ *
+ * We also do not fill in the locCache because it is rarely used,
+ * and not worth setting unless we reuse it for many case mapping operations.
+ * (That's why UCaseMap was created.)
+ */
+ int i;
+ char c;
+
+ /* the internal functions require locale!=NULL */
+ if(locale==NULL) {
+ locale=uloc_getDefault();
+ }
+ for(i=0; i<4 && (c=locale[i])!=0 && c!='-' && c!='_'; ++i) {
+ csm->locale[i]=c;
+ }
+ if(i<=3) {
+ csm->locale[i]=0; /* Up to 3 non-separator characters. */
+ } else {
+ csm->locale[0]=0; /* Longer-than-3 initial subtag: Ignore. */
+ }
+}
+
+/*
+ * Set parameters on an empty UCaseMap, for UCaseMap-less API functions.
+ * Do this fast because it is called with every function call.
+ */
+static U_INLINE void
+setTempCaseMap(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode) {
+ if(csm->csp==NULL) {
+ csm->csp=ucase_getSingleton();
+ }
+ if(locale!=NULL && locale[0]==0) {
+ csm->locale[0]=0;
+ } else {
+ setTempCaseMapLocale(csm, locale, pErrorCode);
+ }
+}
+
#if !UCONFIG_NO_BREAK_ITERATION
/*
* Internal titlecasing function.
- *
- * Must get titleIter!=NULL.
*/
static int32_t
-_toTitle(UCaseProps *csp,
+_toTitle(UCaseMap *csm,
UChar *dest, int32_t destCapacity,
const UChar *src, UCaseContext *csc,
int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale, int32_t *locCache,
UErrorCode *pErrorCode) {
const UChar *s;
UChar32 c;
- int32_t prev, index, destIndex;
+ int32_t prev, titleStart, titleLimit, idx, destIndex, length;
UBool isFirstIndex;
+ if(csm->iter!=NULL) {
+ ubrk_setText(csm->iter, src, srcLength, pErrorCode);
+ } else {
+ csm->iter=ubrk_open(UBRK_WORD, csm->locale,
+ src, srcLength,
+ pErrorCode);
+ }
+ if(U_FAILURE(*pErrorCode)) {
+ return 0;
+ }
+
/* set up local variables */
destIndex=0;
prev=0;
/* find next index where to titlecase */
if(isFirstIndex) {
isFirstIndex=FALSE;
- index=ubrk_first(titleIter);
+ idx=ubrk_first(csm->iter);
} else {
- index=ubrk_next(titleIter);
+ idx=ubrk_next(csm->iter);
}
- if(index==UBRK_DONE || index>srcLength) {
- index=srcLength;
+ if(idx==UBRK_DONE || idx>srcLength) {
+ idx=srcLength;
}
- /* lowercase [prev..index[ */
- if(prev<index) {
- destIndex+=
- _caseMap(
- csp, ucase_toFullLower,
- dest+destIndex, destCapacity-destIndex,
- src, csc,
- prev, index,
- locale, locCache,
- pErrorCode);
- }
+ /*
+ * Unicode 4 & 5 section 3.13 Default Case Operations:
+ *
+ * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
+ * #29, "Text Boundaries." Between each pair of word boundaries, find the first
+ * cased character F. If F exists, map F to default_title(F); then map each
+ * subsequent character C to default_lower(C).
+ *
+ * In this implementation, segment [prev..index[ into 3 parts:
+ * a) uncased characters (copy as-is) [prev..titleStart[
+ * b) first case letter (titlecase) [titleStart..titleLimit[
+ * c) subsequent characters (lowercase) [titleLimit..index[
+ */
+ if(prev<idx) {
+ /* find and copy uncased characters [prev..titleStart[ */
+ titleStart=titleLimit=prev;
+ U16_NEXT(src, titleLimit, idx, c);
+ if((csm->options&U_TITLECASE_NO_BREAK_ADJUSTMENT)==0 && UCASE_NONE==ucase_getType(csm->csp, c)) {
+ /* Adjust the titlecasing index (titleStart) to the next cased character. */
+ for(;;) {
+ titleStart=titleLimit;
+ if(titleLimit==idx) {
+ /*
+ * only uncased characters in [prev..index[
+ * stop with titleStart==titleLimit==index
+ */
+ break;
+ }
+ U16_NEXT(src, titleLimit, idx, c);
+ if(UCASE_NONE!=ucase_getType(csm->csp, c)) {
+ break; /* cased letter at [titleStart..titleLimit[ */
+ }
+ }
+ length=titleStart-prev;
+ if(length>0) {
+ if((destIndex+length)<=destCapacity) {
+ uprv_memcpy(dest+destIndex, src+prev, length*U_SIZEOF_UCHAR);
+ }
+ destIndex+=length;
+ }
+ }
- if(index>=srcLength) {
- break;
- }
+ if(titleStart<titleLimit) {
+ /* titlecase c which is from [titleStart..titleLimit[ */
+ csc->cpStart=titleStart;
+ csc->cpLimit=titleLimit;
+ c=ucase_toFullTitle(csm->csp, c, utf16_caseContextIterator, csc, &s, csm->locale, &csm->locCache);
+ destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+
+ /* Special case Dutch IJ titlecasing */
+ if ( titleStart+1 < idx &&
+ ucase_getCaseLocale(csm->locale,&csm->locCache) == UCASE_LOC_DUTCH &&
+ ( src[titleStart] == (UChar32) 0x0049 || src[titleStart] == (UChar32) 0x0069 ) &&
+ ( src[titleStart+1] == (UChar32) 0x004A || src[titleStart+1] == (UChar32) 0x006A )) {
+ c=(UChar32) 0x004A;
+ destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ titleLimit++;
+ }
- /* titlecase the character at the found index */
- csc->cpStart=index;
- U16_NEXT(src, index, srcLength, c);
- csc->cpLimit=index;
- c=ucase_toFullTitle(csp, c, utf16_caseContextIterator, csc, &s, locale, locCache);
- destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ /* lowercase [titleLimit..index[ */
+ if(titleLimit<idx) {
+ if((csm->options&U_TITLECASE_NO_LOWERCASE)==0) {
+ /* Normal operation: Lowercase the rest of the word. */
+ destIndex+=
+ _caseMap(
+ csm, ucase_toFullLower,
+ dest+destIndex, destCapacity-destIndex,
+ src, csc,
+ titleLimit, idx,
+ pErrorCode);
+ } else {
+ /* Optionally just copy the rest of the word unchanged. */
+ length=idx-titleLimit;
+ if((destIndex+length)<=destCapacity) {
+ uprv_memcpy(dest+destIndex, src+titleLimit, length*U_SIZEOF_UCHAR);
+ }
+ destIndex+=length;
+ }
+ }
+ }
+ }
- prev=index;
+ prev=idx;
}
if(destIndex>destCapacity) {
return destIndex;
}
+#endif
+
+/* functions available in the common library (for unistr_case.cpp) */
+
U_CFUNC int32_t
-ustr_toTitle(UCaseProps *csp,
+ustr_toLower(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
+ UCaseMap csm={ NULL };
UCaseContext csc={ NULL };
- int32_t locCache;
+ csm.csp=csp;
+ setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
- locCache=0;
- return _toTitle(csp,
+ return _caseMap(&csm, ucase_toFullLower,
dest, destCapacity,
- src, &csc, srcLength,
- titleIter, locale, &locCache, pErrorCode);
+ src, &csc, 0, srcLength,
+ pErrorCode);
}
-#endif
-
-/* functions available in the common library (for unistr_case.cpp) */
-
U_CFUNC int32_t
-ustr_toLower(UCaseProps *csp,
+ustr_toUpper(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
+ UCaseMap csm={ NULL };
UCaseContext csc={ NULL };
- int32_t locCache;
+ csm.csp=csp;
+ setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
- locCache=0;
- return _caseMap(csp, ucase_toFullLower,
+ return _caseMap(&csm, ucase_toFullUpper,
dest, destCapacity,
src, &csc, 0, srcLength,
- locale, &locCache, pErrorCode);
+ pErrorCode);
}
+#if !UCONFIG_NO_BREAK_ITERATION
+
U_CFUNC int32_t
-ustr_toUpper(UCaseProps *csp,
+ustr_toTitle(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
- const char *locale,
+ UBreakIterator *titleIter,
+ const char *locale, uint32_t options,
UErrorCode *pErrorCode) {
+ UCaseMap csm={ NULL };
UCaseContext csc={ NULL };
- int32_t locCache;
+ int32_t length;
+ csm.csp=csp;
+ csm.iter=titleIter;
+ csm.options=options;
+ setTempCaseMap(&csm, locale, pErrorCode);
csc.p=(void *)src;
csc.limit=srcLength;
- locCache=0;
- return _caseMap(csp, ucase_toFullUpper,
+ length=_toTitle(&csm,
dest, destCapacity,
- src, &csc, 0, srcLength,
- locale, &locCache, pErrorCode);
+ src, &csc, srcLength,
+ pErrorCode);
+ if(titleIter==NULL && csm.iter!=NULL) {
+ ubrk_close(csm.iter);
+ }
+ return length;
}
+#endif
+
U_CFUNC int32_t
-ustr_foldCase(UCaseProps *csp,
+ustr_foldCase(const UCaseProps *csp,
UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
uint32_t options,
int32_t srcIndex, destIndex;
const UChar *s;
- UChar32 c;
+ UChar32 c, c2 = 0;
/* case mapping loop */
srcIndex=destIndex=0;
while(srcIndex<srcLength) {
U16_NEXT(src, srcIndex, srcLength, c);
c=ucase_toFullFolding(csp, c, &s, options);
- destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ if((destIndex<destCapacity) && (c<0 ? (c2=~c)<=0xffff : UCASE_MAX_STRING_LENGTH<c && (c2=c)<=0xffff)) {
+ /* fast path version of appendResult() for BMP results */
+ dest[destIndex++]=(UChar)c2;
+ } else {
+ destIndex=appendResult(dest, destIndex, destCapacity, c, s);
+ }
}
if(destIndex>destCapacity) {
* Implement argument checking and buffer handling
* for string case mapping as a common function.
*/
-enum {
- TO_LOWER,
- TO_UPPER,
- TO_TITLE,
- FOLD_CASE
-};
/* common internal function for public API functions */
static int32_t
-caseMap(UChar *dest, int32_t destCapacity,
+caseMap(const UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
- UBreakIterator *titleIter,
- const char *locale,
- uint32_t options,
int32_t toWhichCase,
UErrorCode *pErrorCode) {
UChar buffer[300];
UChar *temp;
- UCaseProps *csp;
-
int32_t destLength;
- UBool ownTitleIter;
/* check argument values */
if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
return 0;
}
- csp=ucase_getSingleton(pErrorCode);
- if(U_FAILURE(*pErrorCode)) {
- return 0;
- }
-
/* get the string length */
if(srcLength==-1) {
srcLength=u_strlen(src);
temp=dest;
}
- ownTitleIter=FALSE;
destLength=0;
if(toWhichCase==FOLD_CASE) {
- destLength=ustr_foldCase(csp, temp, destCapacity, src, srcLength,
- options, pErrorCode);
+ destLength=ustr_foldCase(csm->csp, temp, destCapacity, src, srcLength,
+ csm->options, pErrorCode);
} else {
UCaseContext csc={ NULL };
- int32_t locCache;
csc.p=(void *)src;
csc.limit=srcLength;
- locCache=0;
-
- /* the internal functions require locale!=NULL */
- if(locale==NULL) {
- locale=uloc_getDefault();
- }
if(toWhichCase==TO_LOWER) {
- destLength=_caseMap(csp, ucase_toFullLower,
+ destLength=_caseMap(csm, ucase_toFullLower,
temp, destCapacity,
src, &csc,
0, srcLength,
- locale, &locCache, pErrorCode);
+ pErrorCode);
} else if(toWhichCase==TO_UPPER) {
- destLength=_caseMap(csp, ucase_toFullUpper,
+ destLength=_caseMap(csm, ucase_toFullUpper,
temp, destCapacity,
src, &csc,
0, srcLength,
- locale, &locCache, pErrorCode);
+ pErrorCode);
} else /* if(toWhichCase==TO_TITLE) */ {
- #if UCONFIG_NO_BREAK_ITERATION
+#if UCONFIG_NO_BREAK_ITERATION
*pErrorCode=U_UNSUPPORTED_ERROR;
- #else
- if(titleIter==NULL) {
- titleIter=ubrk_open(UBRK_WORD, locale,
- src, srcLength,
- pErrorCode);
- ownTitleIter=(UBool)U_SUCCESS(*pErrorCode);
- }
- if(U_SUCCESS(*pErrorCode)) {
- destLength=_toTitle(csp, temp, destCapacity,
- src, &csc, srcLength,
- titleIter, locale, &locCache, pErrorCode);
- }
- #endif
+#else
+ /* UCaseMap is actually non-const in toTitle() APIs. */
+ destLength=_toTitle((UCaseMap *)csm, temp, destCapacity,
+ src, &csc, srcLength,
+ pErrorCode);
+#endif
}
}
if(temp!=dest) {
}
}
-#if !UCONFIG_NO_BREAK_ITERATION
- if(ownTitleIter) {
- ubrk_close(titleIter);
- }
-#endif
-
return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
}
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
- return caseMap(dest, destCapacity,
+ UCaseMap csm={ NULL };
+ setTempCaseMap(&csm, locale, pErrorCode);
+ return caseMap(&csm,
+ dest, destCapacity,
src, srcLength,
- NULL, locale, 0,
TO_LOWER, pErrorCode);
}
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode) {
- return caseMap(dest, destCapacity,
+ UCaseMap csm={ NULL };
+ setTempCaseMap(&csm, locale, pErrorCode);
+ return caseMap(&csm,
+ dest, destCapacity,
src, srcLength,
- NULL, locale, 0,
TO_UPPER, pErrorCode);
}
UBreakIterator *titleIter,
const char *locale,
UErrorCode *pErrorCode) {
- return caseMap(dest, destCapacity,
+ UCaseMap csm={ NULL };
+ int32_t length;
+
+ csm.iter=titleIter;
+ setTempCaseMap(&csm, locale, pErrorCode);
+ length=caseMap(&csm,
+ dest, destCapacity,
+ src, srcLength,
+ TO_TITLE, pErrorCode);
+ if(titleIter==NULL && csm.iter!=NULL) {
+ ubrk_close(csm.iter);
+ }
+ return length;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode) {
+ return caseMap(csm,
+ dest, destCapacity,
src, srcLength,
- titleIter, locale, 0,
TO_TITLE, pErrorCode);
}
const UChar *src, int32_t srcLength,
uint32_t options,
UErrorCode *pErrorCode) {
- return caseMap(dest, destCapacity,
+ UCaseMap csm={ NULL };
+ csm.csp=ucase_getSingleton();
+ csm.options=options;
+ return caseMap(&csm,
+ dest, destCapacity,
src, srcLength,
- NULL, NULL, options,
FOLD_CASE, pErrorCode);
}
const UChar *s2, int32_t length2,
uint32_t options,
UErrorCode *pErrorCode) {
- UCaseProps *csp;
+ const UCaseProps *csp;
/* current-level start/limit - s1/s2 as current */
const UChar *start1, *start2, *limit1, *limit2;
* assume that at least the option U_COMPARE_IGNORE_CASE is set
* otherwise this function would have to behave exactly as uprv_strCompare()
*/
- csp=ucase_getSingleton(pErrorCode);
+ csp=ucase_getSingleton();
if(U_FAILURE(*pErrorCode)) {
return 0;
}