+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2004-2010, International Business Machines
+* Copyright (C) 2004-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utext.h
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* @stable ICU 3.4
*/
U_STABLE UText * U_EXPORT2
-utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
/**
* @stable ICU 3.4
*/
U_STABLE UText * U_EXPORT2
-utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
/**
* @stable ICU 3.4
*/
U_STABLE UText * U_EXPORT2
-utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
+utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
/**
* Open a UText implementation over an ICU CharacterIterator.
* @stable ICU 3.4
*/
U_STABLE UText * U_EXPORT2
-utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
+utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
#endif
/*****************************************************************************
*
- * Functions to work with the text represeted by a UText wrapper
+ * Functions to work with the text represented by a UText wrapper
*
*****************************************************************************/
*
* The iteration position will be set to the start of the returned code point.
*
- * This function is roughly equivalent to the the sequence
+ * This function is roughly equivalent to the sequence
* utext_setNativeIndex(index);
* utext_current32();
* (There is a subtle difference if the index is out of bounds by being less than zero -
utext_setNativeIndex(UText *ut, int64_t nativeIndex);
/**
- * Move the iterator postion by delta code points. The number of code points
+ * Move the iterator position by delta code points. The number of code points
* is a signed number; a negative delta will move the iterator backwards,
* towards the start of the text.
* <p>
utext_moveIndex32(UText *ut, int32_t delta);
/**
- * Get the native index of the character preceeding the current position.
+ * Get the native index of the character preceding the current position.
* If the iteration position is already at the start of the text, zero
* is returned.
* The value returned is the same as that obtained from the following sequence,
* native index of the character most recently returned from utext_next().
*
* @param ut the text to be accessed
- * @return the native index of the character preceeding the current index position,
+ * @return the native index of the character preceding the current index position,
* or zero if the current position is at the start of the text.
* @stable ICU 3.6
*/
* @param ut the UText from which to extract data.
* @param nativeStart the native index of the first character to extract.\
* If the specified index is out of range,
- * it will be pinned to to be within 0 <= index <= textLength
+ * it will be pinned to be within 0 <= index <= textLength
* @param nativeLimit the native string index of the position following the last
* character to extract. If the specified index is out of range,
- * it will be pinned to to be within 0 <= index <= textLength.
+ * it will be pinned to be within 0 <= index <= textLength.
* nativeLimit must be >= nativeStart.
* @param dest the UChar (UTF-16) buffer into which the extracted text is placed
* @param destCapacity The size, in UChars, of the destination buffer. May be zero
UErrorCode *status);
-/**
- * Compare two UTexts (binary order). The comparison begins at each source text's
- * iteration position. The iteration position of each UText will be left following
- * the last character compared.
- *
- * The comparison is done in code point order; unlike u_strCompare, you
- * cannot choose to use code unit order. This is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A length argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both length arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string in UTF-32 code points.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string in UTF-32 code points.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_compare(UText *s1, int32_t length1,
- UText *s2, int32_t length2);
-
-/**
- * Compare two UTexts (binary order). The comparison begins at each source text's
- * iteration position. The iteration position of each UText will be left following
- * the last character compared. This method differs from utext_compare in that
- * it accepts native limits rather than lengths for each string.
- *
- * The comparison is done in code point order; unlike u_strCompare, you
- * cannot choose to use code unit order. This is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A limit argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both limit arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param limit1 Native index of the last character in the first source string to be considered.
- *
- * @param s2 Second source string.
- * @param limit2 Native index of the last character in the second source string to be considered.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_compareNativeLimit(UText *s1, int64_t limit1,
- UText *s2, int64_t limit2);
-
-/**
- * Compare two UTexts case-insensitively using full case folding. The comparison
- * begins at each source text's iteration position. The iteration position of each
- * UText will be left following the last character compared.
- *
- * The comparison is done in code point order; this is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A length argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both length arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string in UTF-32 code points.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string in UTF-32 code points.
- *
- * @param options A bit set of options:
- * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- * Comparison in code point order with default case folding.
- *
- * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @param pErrorCode Must be a valid pointer to an error code value,
- * which must not indicate a failure before the function call.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_caseCompare(UText *s1, int32_t length1,
- UText *s2, int32_t length2,
- uint32_t options, UErrorCode *pErrorCode);
-
-/**
- * Compare two UTexts case-insensitively using full case folding. The comparison
- * begins at each source text's iteration position. The iteration position of each
- * UText will be left following the last character compared. This method differs from
- * utext_caseCompare in that it accepts native limits rather than lengths for each
- * string.
- *
- * The comparison is done in code point order; this is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A limit argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both length arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param limit1 Native index of the last character in the first source string to be considered.
- *
- * @param s2 Second source string.
- * @param limit2 Native index of the last character in the second source string to be considered.
- *
- * @param options A bit set of options:
- * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- * Comparison in code point order with default case folding.
- *
- * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @param pErrorCode Must be a valid pointer to an error code value,
- * which must not indicate a failure before the function call.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
- UText *s2, int64_t limit2,
- uint32_t options, UErrorCode *pErrorCode);
-
/************************************************************************************
*
*
************************************************************************************/
+#ifndef U_HIDE_INTERNAL_API
/**
* inline version of utext_current32(), for performance-critical situations.
*
#define UTEXT_CURRENT32(ut) \
((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
+#endif /* U_HIDE_INTERNAL_API */
/**
* inline version of utext_next32(), for performance-critical situations.
*
* @stable ICU 3.8
*/
-#define UTEXT_SETNATIVEINDEX(ut, ix) \
- { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
- if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
- (ut)->chunkOffset=(int32_t)__offset; \
- } else { \
- utext_setNativeIndex((ut), (ix)); } }
+#if LOG_UTEXT_SETNATIVEINDEX
+/* Add logging for <rdar://problem/44884660> */
+#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
+ int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+ if ((ut)->chunkContents!=0 && __offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
+ (ut)->chunkOffset=(int32_t)__offset; \
+ } else if ((ut)->chunkContents==0 && __offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit) { \
+ os_log(OS_LOG_DEFAULT, "# UTEXT_SETNATIVEINDEX (ut) %p, (ut)->chunkContents 0, __offset %lld", (ut), __offset); \
+ } else { \
+ utext_setNativeIndex((ut), (ix));
+ } \
+} UPRV_BLOCK_MACRO_END
+#else
+#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
+ int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+ if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
+ (ut)->chunkOffset=(int32_t)__offset; \
+ } else { \
+ utext_setNativeIndex((ut), (ix)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+#endif
* Caution: freezing a UText will disable changes made via the specific
* frozen UText wrapper only; it will not have any effect on the ability to
* directly modify the text by bypassing the UText. Any such backdoor modifications
- * are always an error while UText access is occuring because the underlying
+ * are always an error while UText access is occurring because the underlying
* text can get out of sync with UText's buffering.
* </p>
*
* be NUL-terminated if there is sufficient space in the destination buffer.
*
* @param ut the UText from which to extract data.
- * @param nativeStart the native index of the first characer to extract.
+ * @param nativeStart the native index of the first character to extract.
* @param nativeLimit the native string index of the position following the last
* character to extract.
* @param dest the UChar (UTF-16) buffer into which the extracted text is placed
struct UTextFuncs {
/**
* (public) Function table size, sizeof(UTextFuncs)
- * Intended for use should the table grow to accomodate added
+ * Intended for use should the table grow to accommodate added
* functions in the future, to allow tests for older format
* function tables that do not contain the extensions.
*
struct UText {
/**
* (private) Magic. Used to help detect when UText functions are handed
- * invalid or unitialized UText structs.
+ * invalid or uninitialized UText structs.
* utext_openXYZ() functions take an initialized,
* but not necessarily open, UText struct as an
* optional fill-in parameter. This magic field
/**
- * Text provider properties. This set of flags is maintainted by the
+ * Text provider properties. This set of flags is maintained by the
* text provider implementation.
* @stable ICU 3.4
*/
void *pExtra;
/**
- * (protected) Pointer to string or text-containin object or similar.
+ * (protected) Pointer to string or text-containing object or similar.
* This is the source of the text that this UText is wrapping, in a format
* that is known to the text provider functions.
* @stable ICU 3.4
U_STABLE UText * U_EXPORT2
utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
+// do not use #ifndef U_HIDE_INTERNAL_API around the following!
/**
* @internal
* Value used to help identify correctly initialized UText structs.