X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/729e4ab9bc6618bc3d8a898e575df7f4019e29ca..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/common/unicode/utext.h diff --git a/icuSources/common/unicode/utext.h b/icuSources/common/unicode/utext.h index 088a5327..94d1275e 100644 --- a/icuSources/common/unicode/utext.h +++ b/icuSources/common/unicode/utext.h @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * -* Copyright (C) 2004-2010, International Business Machines +* Copyright (C) 2004-2012, International Business Machines * Corporation and others. All Rights Reserved. * ******************************************************************************* * file name: utext.h -* encoding: US-ASCII +* encoding: UTF-8 * tab size: 8 (not used) * indentation:4 * @@ -257,7 +259,7 @@ utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status); * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 -utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status); +utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status); /** @@ -273,7 +275,7 @@ utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErro * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 -utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status); +utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status); /** @@ -289,7 +291,7 @@ utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeStrin * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 -utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status); +utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status); /** * Open a UText implementation over an ICU CharacterIterator. @@ -304,7 +306,7 @@ utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorC * @stable ICU 3.4 */ U_STABLE UText * U_EXPORT2 -utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status); +utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status); #endif @@ -387,7 +389,7 @@ utext_equals(const UText *a, const UText *b); /***************************************************************************** * - * Functions to work with the text represeted by a UText wrapper + * Functions to work with the text represented by a UText wrapper * *****************************************************************************/ @@ -431,7 +433,7 @@ utext_isLengthExpensive(const UText *ut); * * The iteration position will be set to the start of the returned code point. * - * This function is roughly equivalent to the the sequence + * This function is roughly equivalent to the sequence * utext_setNativeIndex(index); * utext_current32(); * (There is a subtle difference if the index is out of bounds by being less than zero - @@ -590,7 +592,7 @@ U_STABLE void U_EXPORT2 utext_setNativeIndex(UText *ut, int64_t nativeIndex); /** - * Move the iterator postion by delta code points. The number of code points + * Move the iterator position by delta code points. The number of code points * is a signed number; a negative delta will move the iterator backwards, * towards the start of the text. *

@@ -609,7 +611,7 @@ U_STABLE UBool U_EXPORT2 utext_moveIndex32(UText *ut, int32_t delta); /** - * Get the native index of the character preceeding the current position. + * Get the native index of the character preceding the current position. * If the iteration position is already at the start of the text, zero * is returned. * The value returned is the same as that obtained from the following sequence, @@ -626,7 +628,7 @@ utext_moveIndex32(UText *ut, int32_t delta); * native index of the character most recently returned from utext_next(). * * @param ut the text to be accessed - * @return the native index of the character preceeding the current index position, + * @return the native index of the character preceding the current index position, * or zero if the current position is at the start of the text. * @stable ICU 3.6 */ @@ -653,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut); * @param ut the UText from which to extract data. * @param nativeStart the native index of the first character to extract.\ * If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength + * it will be pinned to be within 0 <= index <= textLength * @param nativeLimit the native string index of the position following the last * character to extract. If the specified index is out of range, - * it will be pinned to to be within 0 <= index <= textLength. + * it will be pinned to be within 0 <= index <= textLength. * nativeLimit must be >= nativeStart. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed * @param destCapacity The size, in UChars, of the destination buffer. May be zero @@ -675,147 +677,6 @@ utext_extract(UText *ut, UErrorCode *status); -/** - * Compare two UTexts (binary order). The comparison begins at each source text's - * iteration position. The iteration position of each UText will be left following - * the last character compared. - * - * The comparison is done in code point order; unlike u_strCompare, you - * cannot choose to use code unit order. This is because the characters - * in a UText are accessed one code point at a time, and may not be from a UTF-16 - * context. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * A length argument of -1 signifies that as much of the string should be used as - * is necessary to compare with the other string. If both length arguments are -1, - * the entire remaining portionss of both strings are used. - * - * @param s1 First source string. - * @param length1 Length of first source string in UTF-32 code points. - * - * @param s2 Second source string. - * @param length2 Length of second source string in UTF-32 code points. - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @internal ICU 4.4 technology preview - */ -U_INTERNAL int32_t U_EXPORT2 -utext_compare(UText *s1, int32_t length1, - UText *s2, int32_t length2); - -/** - * Compare two UTexts (binary order). The comparison begins at each source text's - * iteration position. The iteration position of each UText will be left following - * the last character compared. This method differs from utext_compare in that - * it accepts native limits rather than lengths for each string. - * - * The comparison is done in code point order; unlike u_strCompare, you - * cannot choose to use code unit order. This is because the characters - * in a UText are accessed one code point at a time, and may not be from a UTF-16 - * context. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * A limit argument of -1 signifies that as much of the string should be used as - * is necessary to compare with the other string. If both limit arguments are -1, - * the entire remaining portionss of both strings are used. - * - * @param s1 First source string. - * @param limit1 Native index of the last character in the first source string to be considered. - * - * @param s2 Second source string. - * @param limit2 Native index of the last character in the second source string to be considered. - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @internal ICU 4.4 technology preview - */ -U_INTERNAL int32_t U_EXPORT2 -utext_compareNativeLimit(UText *s1, int64_t limit1, - UText *s2, int64_t limit2); - -/** - * Compare two UTexts case-insensitively using full case folding. The comparison - * begins at each source text's iteration position. The iteration position of each - * UText will be left following the last character compared. - * - * The comparison is done in code point order; this is because the characters - * in a UText are accessed one code point at a time, and may not be from a UTF-16 - * context. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * A length argument of -1 signifies that as much of the string should be used as - * is necessary to compare with the other string. If both length arguments are -1, - * the entire remaining portionss of both strings are used. - * - * @param s1 First source string. - * @param length1 Length of first source string in UTF-32 code points. - * - * @param s2 Second source string. - * @param length2 Length of second source string in UTF-32 code points. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code point order with default case folding. - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @internal ICU 4.4 technology preview - */ -U_INTERNAL int32_t U_EXPORT2 -utext_caseCompare(UText *s1, int32_t length1, - UText *s2, int32_t length2, - uint32_t options, UErrorCode *pErrorCode); - -/** - * Compare two UTexts case-insensitively using full case folding. The comparison - * begins at each source text's iteration position. The iteration position of each - * UText will be left following the last character compared. This method differs from - * utext_caseCompare in that it accepts native limits rather than lengths for each - * string. - * - * The comparison is done in code point order; this is because the characters - * in a UText are accessed one code point at a time, and may not be from a UTF-16 - * context. - * - * This functions works with strings of different explicitly specified lengths - * unlike the ANSI C-like u_strcmp() and u_memcmp() etc. - * A limit argument of -1 signifies that as much of the string should be used as - * is necessary to compare with the other string. If both length arguments are -1, - * the entire remaining portionss of both strings are used. - * - * @param s1 First source string. - * @param limit1 Native index of the last character in the first source string to be considered. - * - * @param s2 Second source string. - * @param limit2 Native index of the last character in the second source string to be considered. - * - * @param options A bit set of options: - * - U_FOLD_CASE_DEFAULT or 0 is used for default options: - * Comparison in code point order with default case folding. - * - * - U_FOLD_CASE_EXCLUDE_SPECIAL_I - * - * @param pErrorCode Must be a valid pointer to an error code value, - * which must not indicate a failure before the function call. - * - * @return <0 or 0 or >0 as usual for string comparisons - * - * @internal ICU 4.4 technology preview - */ -U_INTERNAL int32_t U_EXPORT2 -utext_caseCompareNativeLimit(UText *s1, int64_t limit1, - UText *s2, int64_t limit2, - uint32_t options, UErrorCode *pErrorCode); - /************************************************************************************ * @@ -832,6 +693,7 @@ utext_caseCompareNativeLimit(UText *s1, int64_t limit1, * ************************************************************************************/ +#ifndef U_HIDE_INTERNAL_API /** * inline version of utext_current32(), for performance-critical situations. * @@ -844,6 +706,7 @@ utext_caseCompareNativeLimit(UText *s1, int64_t limit1, #define UTEXT_CURRENT32(ut) \ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \ ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut)) +#endif /* U_HIDE_INTERNAL_API */ /** * inline version of utext_next32(), for performance-critical situations. @@ -903,12 +766,28 @@ utext_caseCompareNativeLimit(UText *s1, int64_t limit1, * * @stable ICU 3.8 */ -#define UTEXT_SETNATIVEINDEX(ut, ix) \ - { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ - if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ - (ut)->chunkOffset=(int32_t)__offset; \ - } else { \ - utext_setNativeIndex((ut), (ix)); } } +#if LOG_UTEXT_SETNATIVEINDEX +/* Add logging for */ +#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \ + int64_t __offset = (ix) - (ut)->chunkNativeStart; \ + if ((ut)->chunkContents!=0 && __offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ + (ut)->chunkOffset=(int32_t)__offset; \ + } else if ((ut)->chunkContents==0 && __offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit) { \ + os_log(OS_LOG_DEFAULT, "# UTEXT_SETNATIVEINDEX (ut) %p, (ut)->chunkContents 0, __offset %lld", (ut), __offset); \ + } else { \ + utext_setNativeIndex((ut), (ix)); + } \ +} UPRV_BLOCK_MACRO_END +#else +#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \ + int64_t __offset = (ix) - (ut)->chunkNativeStart; \ + if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ + (ut)->chunkOffset=(int32_t)__offset; \ + } else { \ + utext_setNativeIndex((ut), (ix)); \ + } \ +} UPRV_BLOCK_MACRO_END +#endif @@ -1043,7 +922,7 @@ utext_copy(UText *ut, * Caution: freezing a UText will disable changes made via the specific * frozen UText wrapper only; it will not have any effect on the ability to * directly modify the text by bypassing the UText. Any such backdoor modifications - * are always an error while UText access is occuring because the underlying + * are always an error while UText access is occurring because the underlying * text can get out of sync with UText's buffering. *

* @@ -1191,7 +1070,7 @@ UTextAccess(UText *ut, int64_t nativeIndex, UBool forward); * be NUL-terminated if there is sufficient space in the destination buffer. * * @param ut the UText from which to extract data. - * @param nativeStart the native index of the first characer to extract. + * @param nativeStart the native index of the first character to extract. * @param nativeLimit the native string index of the position following the last * character to extract. * @param dest the UChar (UTF-16) buffer into which the extracted text is placed @@ -1348,7 +1227,7 @@ UTextClose(UText *ut); struct UTextFuncs { /** * (public) Function table size, sizeof(UTextFuncs) - * Intended for use should the table grow to accomodate added + * Intended for use should the table grow to accommodate added * functions in the future, to allow tests for older format * function tables that do not contain the extensions. * @@ -1482,7 +1361,7 @@ typedef struct UTextFuncs UTextFuncs; struct UText { /** * (private) Magic. Used to help detect when UText functions are handed - * invalid or unitialized UText structs. + * invalid or uninitialized UText structs. * utext_openXYZ() functions take an initialized, * but not necessarily open, UText struct as an * optional fill-in parameter. This magic field @@ -1504,7 +1383,7 @@ struct UText { /** - * Text provider properties. This set of flags is maintainted by the + * Text provider properties. This set of flags is maintained by the * text provider implementation. * @stable ICU 3.4 */ @@ -1589,7 +1468,7 @@ struct UText { void *pExtra; /** - * (protected) Pointer to string or text-containin object or similar. + * (protected) Pointer to string or text-containing object or similar. * This is the source of the text that this UText is wrapping, in a format * that is known to the text provider functions. * @stable ICU 3.4 @@ -1692,6 +1571,7 @@ struct UText { U_STABLE UText * U_EXPORT2 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status); +// do not use #ifndef U_HIDE_INTERNAL_API around the following! /** * @internal * Value used to help identify correctly initialized UText structs.