ICU-66108.tar.gz

[apple/icu.git] / icuSources / common / unicode / utext.h
diff --git a/icuSources/common/unicode/utext.h b/icuSources/common/unicode/utext.h

index 088a5327397fd6cf85eb3412057eff5d90de012f..94d1275e72eb4f8582156811f932a43095dca73f 100644 (file)
--- a/icuSources/common/unicode/utext.h
+++ b/icuSources/common/unicode/utext.h
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
  /*
  *******************************************************************************
  *
-*   Copyright (C) 2004-2010, International Business Machines
+*   Copyright (C) 2004-2012, International Business Machines
  *   Corporation and others.  All Rights Reserved.
  *
  *******************************************************************************
  *   file name:  utext.h
-*   encoding:   US-ASCII
+*   encoding:   UTF-8
  *   tab size:   8 (not used)
  *   indentation:4
  *
@@ -257,7 +259,7 @@ utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
   * @stable ICU 3.4
   */
  U_STABLE UText * U_EXPORT2
-utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
  
  
  /**
@@ -273,7 +275,7 @@ utext_openUnicodeString(UText *ut, U_NAMESPACE_QUALIFIER UnicodeString *s, UErro
   * @stable ICU 3.4
   */
  U_STABLE UText * U_EXPORT2
-utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeString *s, UErrorCode *status);
+utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
  
  
  /**
@@ -289,7 +291,7 @@ utext_openConstUnicodeString(UText *ut, const U_NAMESPACE_QUALIFIER UnicodeStrin
   * @stable ICU 3.4
   */
  U_STABLE UText * U_EXPORT2
-utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorCode *status);
+utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
  
  /**
   * Open a  UText implementation over an ICU CharacterIterator.
@@ -304,7 +306,7 @@ utext_openReplaceable(UText *ut, U_NAMESPACE_QUALIFIER Replaceable *rep, UErrorC
   * @stable ICU 3.4
   */
  U_STABLE UText * U_EXPORT2
-utext_openCharacterIterator(UText *ut, U_NAMESPACE_QUALIFIER CharacterIterator *ic, UErrorCode *status);
+utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
  
  #endif
  
@@ -387,7 +389,7 @@ utext_equals(const UText *a, const UText *b);
  
  /*****************************************************************************
   *
- *   Functions to work with the text represeted by a UText wrapper
+ *   Functions to work with the text represented by a UText wrapper
   *
   *****************************************************************************/
  
@@ -431,7 +433,7 @@ utext_isLengthExpensive(const UText *ut);
   *
   * The iteration position will be set to the start of the returned code point.
   *
- * This function is roughly equivalent to the the sequence
+ * This function is roughly equivalent to the sequence
   *    utext_setNativeIndex(index);
   *    utext_current32();
   * (There is a subtle difference if the index is out of bounds by being less than zero - 
@@ -590,7 +592,7 @@ U_STABLE void U_EXPORT2
  utext_setNativeIndex(UText *ut, int64_t nativeIndex);
  
  /**
- * Move the iterator postion by delta code points.  The number of code points
+ * Move the iterator position by delta code points.  The number of code points
   * is a signed number; a negative delta will move the iterator backwards,
   * towards the start of the text.
   * <p>
@@ -609,7 +611,7 @@ U_STABLE UBool U_EXPORT2
  utext_moveIndex32(UText *ut, int32_t delta);
  
  /**
- * Get the native index of the character preceeding the current position.
+ * Get the native index of the character preceding the current position.
   * If the iteration position is already at the start of the text, zero
   * is returned.
   * The value returned is the same as that obtained from the following sequence,
@@ -626,7 +628,7 @@ utext_moveIndex32(UText *ut, int32_t delta);
   *   native index of the character most recently returned from utext_next().
   *
   * @param ut the text to be accessed
- * @return the native index of the character preceeding the current index position,
+ * @return the native index of the character preceding the current index position,
   *         or zero if the current position is at the start of the text.
   * @stable ICU 3.6
   */
@@ -653,10 +655,10 @@ utext_getPreviousNativeIndex(UText *ut);
   * @param  ut    the UText from which to extract data.
   * @param  nativeStart the native index of the first character to extract.\
   *               If the specified index is out of range,
- *               it will be pinned to to be within 0 <= index <= textLength
+ *               it will be pinned to be within 0 <= index <= textLength
   * @param  nativeLimit the native string index of the position following the last
   *               character to extract.  If the specified index is out of range,
- *               it will be pinned to to be within 0 <= index <= textLength.
+ *               it will be pinned to be within 0 <= index <= textLength.
   *               nativeLimit must be >= nativeStart.
   * @param  dest  the UChar (UTF-16) buffer into which the extracted text is placed
   * @param  destCapacity  The size, in UChars, of the destination buffer.  May be zero
@@ -675,147 +677,6 @@ utext_extract(UText *ut,
               UErrorCode *status);
  
  
-/**
- * Compare two UTexts (binary order). The comparison begins at each source text's
- * iteration position. The iteration position of each UText will be left following
- * the last character compared.
- *
- * The comparison is done in code point order; unlike u_strCompare, you
- * cannot choose to use code unit order. This is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A length argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both length arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string in UTF-32 code points.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string in UTF-32 code points.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_compare(UText *s1, int32_t length1,
-              UText *s2, int32_t length2);    
-
-/**
- * Compare two UTexts (binary order). The comparison begins at each source text's
- * iteration position. The iteration position of each UText will be left following
- * the last character compared. This method differs from utext_compare in that
- * it accepts native limits rather than lengths for each string.
- *
- * The comparison is done in code point order; unlike u_strCompare, you
- * cannot choose to use code unit order. This is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A limit argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both limit arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param limit1 Native index of the last character in the first source string to be considered.
- *
- * @param s2 Second source string.
- * @param limit2 Native index of the last character in the second source string to be considered.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_compareNativeLimit(UText *s1, int64_t limit1,
-                         UText *s2, int64_t limit2);    
-
-/**
- * Compare two UTexts case-insensitively using full case folding. The comparison
- * begins at each source text's iteration position. The iteration position of each
- * UText will be left following the last character compared.
- *
- * The comparison is done in code point order; this is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A length argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both length arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param length1 Length of first source string in UTF-32 code points.
- *
- * @param s2 Second source string.
- * @param length2 Length of second source string in UTF-32 code points.
- *
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Comparison in code point order with default case folding.
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_caseCompare(UText *s1, int32_t length1,
-                  UText *s2, int32_t length2,
-                  uint32_t options, UErrorCode *pErrorCode);    
-
-/**
- * Compare two UTexts case-insensitively using full case folding. The comparison
- * begins at each source text's iteration position. The iteration position of each
- * UText will be left following the last character compared. This method differs from
- * utext_caseCompare in that it accepts native limits rather than lengths for each
- * string.
- *
- * The comparison is done in code point order; this is because the characters
- * in a UText are accessed one code point at a time, and may not be from a UTF-16
- * context.
- *
- * This functions works with strings of different explicitly specified lengths
- * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
- * A limit argument of -1 signifies that as much of the string should be used as
- * is necessary to compare with the other string. If both length arguments are -1,
- * the entire remaining portionss of both strings are used.
- *
- * @param s1 First source string.
- * @param limit1 Native index of the last character in the first source string to be considered.
- *
- * @param s2 Second source string.
- * @param limit2 Native index of the last character in the second source string to be considered.
- *
- * @param options A bit set of options:
- *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
- *     Comparison in code point order with default case folding.
- *
- *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
- *
- * @param pErrorCode Must be a valid pointer to an error code value,
- *                  which must not indicate a failure before the function call.
- *
- * @return <0 or 0 or >0 as usual for string comparisons
- *
- * @internal ICU 4.4 technology preview
- */
-U_INTERNAL int32_t U_EXPORT2
-utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
-                             UText *s2, int64_t limit2,
-                             uint32_t options, UErrorCode *pErrorCode);    
-
  
  /************************************************************************************
   *
@@ -832,6 +693,7 @@ utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
   *
   ************************************************************************************/
  
+#ifndef U_HIDE_INTERNAL_API
  /**
   * inline version of utext_current32(), for performance-critical situations.
   *
@@ -844,6 +706,7 @@ utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
  #define UTEXT_CURRENT32(ut)  \
      ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
      ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
+#endif  /* U_HIDE_INTERNAL_API */
  
  /**
   * inline version of utext_next32(), for performance-critical situations.
@@ -903,12 +766,28 @@ utext_caseCompareNativeLimit(UText *s1, int64_t limit1,
    *
    * @stable ICU 3.8
    */
-#define UTEXT_SETNATIVEINDEX(ut, ix)                       \
-    { int64_t __offset = (ix) - (ut)->chunkNativeStart; \
-      if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
-          (ut)->chunkOffset=(int32_t)__offset; \
-      } else { \
-          utext_setNativeIndex((ut), (ix)); } }
+#if LOG_UTEXT_SETNATIVEINDEX
+/* Add logging for <rdar://problem/44884660> */
+#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
+    int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+    if ((ut)->chunkContents!=0 && __offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
+        (ut)->chunkOffset=(int32_t)__offset; \
+    } else if ((ut)->chunkContents==0 && __offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit) { \
+        os_log(OS_LOG_DEFAULT, "# UTEXT_SETNATIVEINDEX (ut) %p, (ut)->chunkContents 0, __offset %lld", (ut), __offset); \
+    } else { \
+        utext_setNativeIndex((ut), (ix));
+    } \
+} UPRV_BLOCK_MACRO_END
+#else
+#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
+    int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+    if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
+        (ut)->chunkOffset=(int32_t)__offset; \
+    } else { \
+        utext_setNativeIndex((ut), (ix)); \
+    } \
+} UPRV_BLOCK_MACRO_END
+#endif
  
  
  
@@ -1043,7 +922,7 @@ utext_copy(UText *ut,
    *  Caution:  freezing a UText will disable changes made via the specific
    *   frozen UText wrapper only; it will not have any effect on the ability to
    *   directly modify the text by bypassing the UText.  Any such backdoor modifications
-  *   are always an error while UText access is occuring because the underlying
+  *   are always an error while UText access is occurring because the underlying
    *   text can get out of sync with UText's buffering.
    *  </p>
    *
@@ -1191,7 +1070,7 @@ UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
   * be NUL-terminated if there is sufficient space in the destination buffer.
   *
   * @param  ut            the UText from which to extract data.
- * @param  nativeStart   the native index of the first characer to extract.
+ * @param  nativeStart   the native index of the first character to extract.
   * @param  nativeLimit   the native string index of the position following the last
   *                       character to extract.
   * @param  dest          the UChar (UTF-16) buffer into which the extracted text is placed
@@ -1348,7 +1227,7 @@ UTextClose(UText *ut);
  struct UTextFuncs {
      /**
       *   (public)  Function table size, sizeof(UTextFuncs)
-     *             Intended for use should the table grow to accomodate added
+     *             Intended for use should the table grow to accommodate added
       *             functions in the future, to allow tests for older format
       *             function tables that do not contain the extensions.
       *
@@ -1482,7 +1361,7 @@ typedef struct UTextFuncs UTextFuncs;
  struct UText {
      /**
       *     (private)  Magic.  Used to help detect when UText functions are handed
-     *                        invalid or unitialized UText structs.
+     *                        invalid or uninitialized UText structs.
       *                        utext_openXYZ() functions take an initialized,
       *                        but not necessarily open, UText struct as an
       *                        optional fill-in parameter.  This magic field
@@ -1504,7 +1383,7 @@ struct UText {
  
  
      /**
-      *  Text provider properties.  This set of flags is maintainted by the
+      *  Text provider properties.  This set of flags is maintained by the
        *                             text provider implementation.
        *  @stable ICU 3.4
        */
@@ -1589,7 +1468,7 @@ struct UText {
      void          *pExtra;
  
      /**
-     * (protected) Pointer to string or text-containin object or similar.
+     * (protected) Pointer to string or text-containing object or similar.
       * This is the source of the text that this UText is wrapping, in a format
       *  that is known to the text provider functions.
       * @stable ICU 3.4
@@ -1692,6 +1571,7 @@ struct UText {
  U_STABLE UText * U_EXPORT2
  utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
  
+// do not use #ifndef U_HIDE_INTERNAL_API around the following!
  /**
    * @internal
    *  Value used to help identify correctly initialized UText structs.