icuSources/i18n/unicode/uspoof.h

   1 // © 2016 and later: Unicode, Inc. and others.
   2 // License & terms of use: http://www.unicode.org/copyright.html
   3 /*
   4 ***************************************************************************
   5 * Copyright (C) 2008-2016, International Business Machines Corporation
   6 * and others. All Rights Reserved.
   7 ***************************************************************************
   8 *   file name:  uspoof.h
   9 *   encoding:   UTF-8
  10 *   tab size:   8 (not used)
  11 *   indentation:4
  12 *
  13 *   created on: 2008Feb13
  14 *   created by: Andy Heninger
  15 *
  16 *   Unicode Spoof Detection
  17 */
  18
  19 #ifndef USPOOF_H
  20 #define USPOOF_H
  21
  22 #include "unicode/utypes.h"
  23 #include "unicode/uset.h"
  24 #include "unicode/parseerr.h"
  25 #include "unicode/localpointer.h"
  26
  27 #if !UCONFIG_NO_NORMALIZATION
  28
  29
  30 #if U_SHOW_CPLUSPLUS_API
  31 #include "unicode/unistr.h"
  32 #include "unicode/uniset.h"
  33 #endif // U_SHOW_CPLUSPLUS_API
  34
  35
  36 /**
  37  * \file
  38  * \brief Unicode Security and Spoofing Detection, C API.
  39  *
  40  * <p>
  41  * This class, based on <a href="http://unicode.org/reports/tr36">Unicode Technical Report #36</a> and
  42  * <a href="http://unicode.org/reports/tr39">Unicode Technical Standard #39</a>, has two main functions:
  43  *
  44  * <ol>
  45  * <li>Checking whether two strings are visually <em>confusable</em> with each other, such as "Harvest" and
  46  * &quot;&Eta;arvest&quot;, where the second string starts with the Greek capital letter Eta.</li>
  47  * <li>Checking whether an individual string is likely to be an attempt at confusing the reader (<em>spoof
  48  * detection</em>), such as "paypal" with some Latin characters substituted with Cyrillic look-alikes.</li>
  49  * </ol>
  50  *
  51  * <p>
  52  * Although originally designed as a method for flagging suspicious identifier strings such as URLs,
  53  * <code>USpoofChecker</code> has a number of other practical use cases, such as preventing attempts to evade bad-word
  54  * content filters.
  55  *
  56  * <p>
  57  * The functions of this class are exposed as C API, with a handful of syntactical conveniences for C++.
  58  *
  59  * <h2>Confusables</h2>
  60  *
  61  * <p>
  62  * The following example shows how to use <code>USpoofChecker</code> to check for confusability between two strings:
  63  *
  64  * \code{.c}
  65  * UErrorCode status = U_ZERO_ERROR;
  66  * UChar* str1 = (UChar*) u"Harvest";
  67  * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
  68  *
  69  * USpoofChecker* sc = uspoof_open(&status);
  70  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
  71  *
  72  * int32_t bitmask = uspoof_areConfusable(sc, str1, -1, str2, -1, &status);
  73  * UBool result = bitmask != 0;
  74  * // areConfusable: 1 (status: U_ZERO_ERROR)
  75  * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
  76  * uspoof_close(sc);
  77  * \endcode
  78  *
  79  * <p>
  80  * The call to {@link uspoof_open} creates a <code>USpoofChecker</code> object; the call to {@link uspoof_setChecks}
  81  * enables confusable checking and disables all other checks; the call to {@link uspoof_areConfusable} performs the
  82  * confusability test; and the following line extracts the result out of the return value. For best performance,
  83  * the instance should be created once (e.g., upon application startup), and the efficient
  84  * {@link uspoof_areConfusable} method can be used at runtime.
  85  *
  86  * <p>
  87  * The type {@link LocalUSpoofCheckerPointer} is exposed for C++ programmers.  It will automatically call
  88  * {@link uspoof_close} when the object goes out of scope:
  89  *
  90  * \code{.cpp}
  91  * UErrorCode status = U_ZERO_ERROR;
  92  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
  93  * uspoof_setChecks(sc.getAlias(), USPOOF_CONFUSABLE, &status);
  94  * // ...
  95  * \endcode
  96  *
  97  * UTS 39 defines two strings to be <em>confusable</em> if they map to the same <em>skeleton string</em>. A skeleton can
  98  * be thought of as a "hash code". {@link uspoof_getSkeleton} computes the skeleton for a particular string, so
  99  * the following snippet is equivalent to the example above:
 100  *
 101  * \code{.c}
 102  * UErrorCode status = U_ZERO_ERROR;
 103  * UChar* str1 = (UChar*) u"Harvest";
 104  * UChar* str2 = (UChar*) u"\u0397arvest";  // with U+0397 GREEK CAPITAL LETTER ETA
 105  *
 106  * USpoofChecker* sc = uspoof_open(&status);
 107  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
 108  *
 109  * // Get skeleton 1
 110  * int32_t skel1Len = uspoof_getSkeleton(sc, 0, str1, -1, NULL, 0, &status);
 111  * UChar* skel1 = (UChar*) malloc(++skel1Len * sizeof(UChar));
 112  * status = U_ZERO_ERROR;
 113  * uspoof_getSkeleton(sc, 0, str1, -1, skel1, skel1Len, &status);
 114  *
 115  * // Get skeleton 2
 116  * int32_t skel2Len = uspoof_getSkeleton(sc, 0, str2, -1, NULL, 0, &status);
 117  * UChar* skel2 = (UChar*) malloc(++skel2Len * sizeof(UChar));
 118  * status = U_ZERO_ERROR;
 119  * uspoof_getSkeleton(sc, 0, str2, -1, skel2, skel2Len, &status);
 120  *
 121  * // Are the skeletons the same?
 122  * UBool result = u_strcmp(skel1, skel2) == 0;
 123  * // areConfusable: 1 (status: U_ZERO_ERROR)
 124  * printf("areConfusable: %d (status: %s)\n", result, u_errorName(status));
 125  * uspoof_close(sc);
 126  * free(skel1);
 127  * free(skel2);
 128  * \endcode
 129  *
 130  * If you need to check if a string is confusable with any string in a dictionary of many strings, rather than calling
 131  * {@link uspoof_areConfusable} many times in a loop, {@link uspoof_getSkeleton} can be used instead, as shown below:
 132  *
 133  * \code{.c}
 134  * UErrorCode status = U_ZERO_ERROR;
 135  * #define DICTIONARY_LENGTH 2
 136  * UChar* dictionary[DICTIONARY_LENGTH] = { (UChar*) u"lorem", (UChar*) u"ipsum" };
 137  * UChar* skeletons[DICTIONARY_LENGTH];
 138  * UChar* str = (UChar*) u"1orern";
 139  *
 140  * // Setup:
 141  * USpoofChecker* sc = uspoof_open(&status);
 142  * uspoof_setChecks(sc, USPOOF_CONFUSABLE, &status);
 143  * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
 144  *     UChar* word = dictionary[i];
 145  *     int32_t len = uspoof_getSkeleton(sc, 0, word, -1, NULL, 0, &status);
 146  *     skeletons[i] = (UChar*) malloc(++len * sizeof(UChar));
 147  *     status = U_ZERO_ERROR;
 148  *     uspoof_getSkeleton(sc, 0, word, -1, skeletons[i], len, &status);
 149  * }
 150  *
 151  * // Live Check:
 152  * {
 153  *     int32_t len = uspoof_getSkeleton(sc, 0, str, -1, NULL, 0, &status);
 154  *     UChar* skel = (UChar*) malloc(++len * sizeof(UChar));
 155  *     status = U_ZERO_ERROR;
 156  *     uspoof_getSkeleton(sc, 0, str, -1, skel, len, &status);
 157  *     UBool result = FALSE;
 158  *     for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
 159  *         result = u_strcmp(skel, skeletons[i]) == 0;
 160  *         if (result == TRUE) { break; }
 161  *     }
 162  *     // Has confusable in dictionary: 1 (status: U_ZERO_ERROR)
 163  *     printf("Has confusable in dictionary: %d (status: %s)\n", result, u_errorName(status));
 164  *     free(skel);
 165  * }
 166  *
 167  * for (size_t i=0; i<DICTIONARY_LENGTH; i++) {
 168  *     free(skeletons[i]);
 169  * }
 170  * uspoof_close(sc);
 171  * \endcode
 172  *
 173  * <b>Note:</b> Since the Unicode confusables mapping table is frequently updated, confusable skeletons are <em>not</em>
 174  * guaranteed to be the same between ICU releases. We therefore recommend that you always compute confusable skeletons
 175  * at runtime and do not rely on creating a permanent, or difficult to update, database of skeletons.
 176  *
 177  * <h2>Spoof Detection</h2>
 178  *
 179  * The following snippet shows a minimal example of using <code>USpoofChecker</code> to perform spoof detection on a
 180  * string:
 181  *
 182  * \code{.c}
 183  * UErrorCode status = U_ZERO_ERROR;
 184  * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
 185  *
 186  * // Get the default set of allowable characters:
 187  * USet* allowed = uset_openEmpty();
 188  * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
 189  * uset_addAll(allowed, uspoof_getInclusionSet(&status));
 190  *
 191  * USpoofChecker* sc = uspoof_open(&status);
 192  * uspoof_setAllowedChars(sc, allowed, &status);
 193  * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
 194  *
 195  * int32_t bitmask = uspoof_check(sc, str, -1, NULL, &status);
 196  * UBool result = bitmask != 0;
 197  * // fails checks: 1 (status: U_ZERO_ERROR)
 198  * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
 199  * uspoof_close(sc);
 200  * uset_close(allowed);
 201  * \endcode
 202  *
 203  * As in the case for confusability checking, it is good practice to create one <code>USpoofChecker</code> instance at
 204  * startup, and call the cheaper {@link uspoof_check} online. We specify the set of
 205  * allowed characters to be those with type RECOMMENDED or INCLUSION, according to the recommendation in UTS 39.
 206  *
 207  * In addition to {@link uspoof_check}, the function {@link uspoof_checkUTF8} is exposed for UTF8-encoded char* strings,
 208  * and {@link uspoof_checkUnicodeString} is exposed for C++ programmers.
 209  *
 210  * If the {@link USPOOF_AUX_INFO} check is enabled, a limited amount of information on why a string failed the checks
 211  * is available in the returned bitmask.  For complete information, use the {@link uspoof_check2} class of functions
 212  * with a {@link USpoofCheckResult} parameter:
 213  *
 214  * \code{.c}
 215  * UErrorCode status = U_ZERO_ERROR;
 216  * UChar* str = (UChar*) u"p\u0430ypal";  // with U+0430 CYRILLIC SMALL LETTER A
 217  *
 218  * // Get the default set of allowable characters:
 219  * USet* allowed = uset_openEmpty();
 220  * uset_addAll(allowed, uspoof_getRecommendedSet(&status));
 221  * uset_addAll(allowed, uspoof_getInclusionSet(&status));
 222  *
 223  * USpoofChecker* sc = uspoof_open(&status);
 224  * uspoof_setAllowedChars(sc, allowed, &status);
 225  * uspoof_setRestrictionLevel(sc, USPOOF_MODERATELY_RESTRICTIVE);
 226  *
 227  * USpoofCheckResult* checkResult = uspoof_openCheckResult(&status);
 228  * int32_t bitmask = uspoof_check2(sc, str, -1, checkResult, &status);
 229  *
 230  * int32_t failures1 = bitmask;
 231  * int32_t failures2 = uspoof_getCheckResultChecks(checkResult, &status);
 232  * assert(failures1 == failures2);
 233  * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
 234  * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
 235  *
 236  * // Cleanup:
 237  * uspoof_close(sc);
 238  * uset_close(allowed);
 239  * uspoof_closeCheckResult(checkResult);
 240  * \endcode
 241  *
 242  * C++ users can take advantage of a few syntactical conveniences.  The following snippet is functionally
 243  * equivalent to the one above:
 244  *
 245  * \code{.cpp}
 246  * UErrorCode status = U_ZERO_ERROR;
 247  * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
 248  *
 249  * // Get the default set of allowable characters:
 250  * UnicodeSet allowed;
 251  * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
 252  * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
 253  *
 254  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
 255  * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
 256  * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
 257  *
 258  * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
 259  * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
 260  *
 261  * int32_t failures1 = bitmask;
 262  * int32_t failures2 = uspoof_getCheckResultChecks(checkResult.getAlias(), &status);
 263  * assert(failures1 == failures2);
 264  * // checks that failed: 0x00000010 (status: U_ZERO_ERROR)
 265  * printf("checks that failed: %#010x (status: %s)\n", failures1, u_errorName(status));
 266  *
 267  * // Explicit cleanup not necessary.
 268  * \endcode
 269  *
 270  * The return value is a bitmask of the checks that failed. In this case, there was one check that failed:
 271  * {@link USPOOF_RESTRICTION_LEVEL}, corresponding to the fifth bit (16). The possible checks are:
 272  *
 273  * <ul>
 274  * <li><code>RESTRICTION_LEVEL</code>: flags strings that violate the
 275  * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">Restriction Level</a> test as specified in UTS
 276  * 39; in most cases, this means flagging strings that contain characters from multiple different scripts.</li>
 277  * <li><code>INVISIBLE</code>: flags strings that contain invisible characters, such as zero-width spaces, or character
 278  * sequences that are likely not to display, such as multiple occurrences of the same non-spacing mark.</li>
 279  * <li><code>CHAR_LIMIT</code>: flags strings that contain characters outside of a specified set of acceptable
 280  * characters. See {@link uspoof_setAllowedChars} and {@link uspoof_setAllowedLocales}.</li>
 281  * <li><code>MIXED_NUMBERS</code>: flags strings that contain digits from multiple different numbering systems.</li>
 282  * </ul>
 283  *
 284  * <p>
 285  * These checks can be enabled independently of each other. For example, if you were interested in checking for only the
 286  * INVISIBLE and MIXED_NUMBERS conditions, you could do:
 287  *
 288  * \code{.c}
 289  * UErrorCode status = U_ZERO_ERROR;
 290  * UChar* str = (UChar*) u"8\u09EA";  // 8 mixed with U+09EA BENGALI DIGIT FOUR
 291  *
 292  * USpoofChecker* sc = uspoof_open(&status);
 293  * uspoof_setChecks(sc, USPOOF_INVISIBLE | USPOOF_MIXED_NUMBERS, &status);
 294  *
 295  * int32_t bitmask = uspoof_check2(sc, str, -1, NULL, &status);
 296  * UBool result = bitmask != 0;
 297  * // fails checks: 1 (status: U_ZERO_ERROR)
 298  * printf("fails checks: %d (status: %s)\n", result, u_errorName(status));
 299  * uspoof_close(sc);
 300  * \endcode
 301  *
 302  * Here is an example in C++ showing how to compute the restriction level of a string:
 303  *
 304  * \code{.cpp}
 305  * UErrorCode status = U_ZERO_ERROR;
 306  * UnicodeString str((UChar*) u"p\u0430ypal");  // with U+0430 CYRILLIC SMALL LETTER A
 307  *
 308  * // Get the default set of allowable characters:
 309  * UnicodeSet allowed;
 310  * allowed.addAll(*uspoof_getRecommendedUnicodeSet(&status));
 311  * allowed.addAll(*uspoof_getInclusionUnicodeSet(&status));
 312  *
 313  * LocalUSpoofCheckerPointer sc(uspoof_open(&status));
 314  * uspoof_setAllowedChars(sc.getAlias(), allowed.toUSet(), &status);
 315  * uspoof_setRestrictionLevel(sc.getAlias(), USPOOF_MODERATELY_RESTRICTIVE);
 316  * uspoof_setChecks(sc.getAlias(), USPOOF_RESTRICTION_LEVEL | USPOOF_AUX_INFO, &status);
 317  *
 318  * LocalUSpoofCheckResultPointer checkResult(uspoof_openCheckResult(&status));
 319  * int32_t bitmask = uspoof_check2UnicodeString(sc.getAlias(), str, checkResult.getAlias(), &status);
 320  *
 321  * URestrictionLevel restrictionLevel = uspoof_getCheckResultRestrictionLevel(checkResult.getAlias(), &status);
 322  * // Since USPOOF_AUX_INFO was enabled, the restriction level is also available in the upper bits of the bitmask:
 323  * assert((restrictionLevel & bitmask) == restrictionLevel);
 324  * // Restriction level: 0x50000000 (status: U_ZERO_ERROR)
 325  * printf("Restriction level: %#010x (status: %s)\n", restrictionLevel, u_errorName(status));
 326  * \endcode
 327  *
 328  * The code '0x50000000' corresponds to the restriction level USPOOF_MINIMALLY_RESTRICTIVE.  Since
 329  * USPOOF_MINIMALLY_RESTRICTIVE is weaker than USPOOF_MODERATELY_RESTRICTIVE, the string fails the check.
 330  *
 331  * <b>Note:</b> The Restriction Level is the most powerful of the checks. The full logic is documented in
 332  * <a href="http://unicode.org/reports/tr39/#Restriction_Level_Detection">UTS 39</a>, but the basic idea is that strings
 333  * are restricted to contain characters from only a single script, <em>except</em> that most scripts are allowed to have
 334  * Latin characters interspersed. Although the default restriction level is <code>HIGHLY_RESTRICTIVE</code>, it is
 335  * recommended that users set their restriction level to <code>MODERATELY_RESTRICTIVE</code>, which allows Latin mixed
 336  * with all other scripts except Cyrillic, Greek, and Cherokee, with which it is often confusable. For more details on
 337  * the levels, see UTS 39 or {@link URestrictionLevel}. The Restriction Level test is aware of the set of
 338  * allowed characters set in {@link uspoof_setAllowedChars}. Note that characters which have script code
 339  * COMMON or INHERITED, such as numbers and punctuation, are ignored when computing whether a string has multiple
 340  * scripts.
 341  *
 342  * <h2>Additional Information</h2>
 343  *
 344  * A <code>USpoofChecker</code> instance may be used repeatedly to perform checks on any number of identifiers.
 345  *
 346  * <b>Thread Safety:</b> The test functions for checking a single identifier, or for testing whether
 347  * two identifiers are possible confusable, are thread safe. They may called concurrently, from multiple threads,
 348  * using the same USpoofChecker instance.
 349  *
 350  * More generally, the standard ICU thread safety rules apply: functions that take a const USpoofChecker parameter are
 351  * thread safe. Those that take a non-const USpoofChecker are not thread safe..
 352  *
 353  * @stable ICU 4.6
 354  */
 355
 356 struct USpoofChecker;
 357 /**
 358  * @stable ICU 4.2
 359  */
 360 typedef struct USpoofChecker USpoofChecker; /**< typedef for C of USpoofChecker */
 361
 362 struct USpoofCheckResult;
 363 /**
 364  * @see uspoof_openCheckResult
 365  * @stable ICU 58
 366  */
 367 typedef struct USpoofCheckResult USpoofCheckResult;
 368
 369 /**
 370  * Enum for the kinds of checks that USpoofChecker can perform.
 371  * These enum values are used both to select the set of checks that
 372  * will be performed, and to report results from the check function.
 373  *
 374  * @stable ICU 4.2
 375  */
 376 typedef enum USpoofChecks {
 377     /**
 378      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
 379      * that the two strings are visually confusable and that they are from the same script, according to UTS 39 section
 380      * 4.
 381      *
 382      * @see uspoof_areConfusable
 383      * @stable ICU 4.2
 384      */
 385     USPOOF_SINGLE_SCRIPT_CONFUSABLE =   1,
 386
 387     /**
 388      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
 389      * that the two strings are visually confusable and that they are <b>not</b> from the same script, according to UTS
 390      * 39 section 4.
 391      *
 392      * @see uspoof_areConfusable
 393      * @stable ICU 4.2
 394      */
 395     USPOOF_MIXED_SCRIPT_CONFUSABLE  =   2,
 396
 397     /**
 398      * When performing the two-string {@link uspoof_areConfusable} test, this flag in the return value indicates
 399      * that the two strings are visually confusable and that they are not from the same script but both of them are
 400      * single-script strings, according to UTS 39 section 4.
 401      *
 402      * @see uspoof_areConfusable
 403      * @stable ICU 4.2
 404      */
 405     USPOOF_WHOLE_SCRIPT_CONFUSABLE  =   4,
 406
 407     /**
 408      * Enable this flag in {@link uspoof_setChecks} to turn on all types of confusables.  You may set
 409      * the checks to some subset of SINGLE_SCRIPT_CONFUSABLE, MIXED_SCRIPT_CONFUSABLE, or WHOLE_SCRIPT_CONFUSABLE to
 410      * make {@link uspoof_areConfusable} return only those types of confusables.
 411      *
 412      * @see uspoof_areConfusable
 413      * @see uspoof_getSkeleton
 414      * @stable ICU 58
 415      */
 416     USPOOF_CONFUSABLE               =   USPOOF_SINGLE_SCRIPT_CONFUSABLE | USPOOF_MIXED_SCRIPT_CONFUSABLE | USPOOF_WHOLE_SCRIPT_CONFUSABLE,
 417
 418 #ifndef U_HIDE_DEPRECATED_API
 419     /**
 420       * This flag is deprecated and no longer affects the behavior of SpoofChecker.
 421       *
 422       * @deprecated ICU 58  Any case confusable mappings were removed from UTS 39; the corresponding ICU API was deprecated.
 423       */
 424     USPOOF_ANY_CASE                 =   8,
 425 #endif  /* U_HIDE_DEPRECATED_API */
 426
 427     /**
 428       * Check that an identifier is no looser than the specified RestrictionLevel.
 429       * The default if {@link uspoof_setRestrictionLevel} is not called is HIGHLY_RESTRICTIVE.
 430       *
 431       * If USPOOF_AUX_INFO is enabled the actual restriction level of the
 432       * identifier being tested will also be returned by uspoof_check().
 433       *
 434       * @see URestrictionLevel
 435       * @see uspoof_setRestrictionLevel
 436       * @see USPOOF_AUX_INFO
 437       *
 438       * @stable ICU 51
 439       */
 440     USPOOF_RESTRICTION_LEVEL        = 16,
 441
 442 #ifndef U_HIDE_DEPRECATED_API
 443     /** Check that an identifier contains only characters from a
 444       * single script (plus chars from the common and inherited scripts.)
 445       * Applies to checks of a single identifier check only.
 446       * @deprecated ICU 51  Use RESTRICTION_LEVEL instead.
 447       */
 448     USPOOF_SINGLE_SCRIPT            =  USPOOF_RESTRICTION_LEVEL,
 449 #endif  /* U_HIDE_DEPRECATED_API */
 450
 451     /** Check an identifier for the presence of invisible characters,
 452       * such as zero-width spaces, or character sequences that are
 453       * likely not to display, such as multiple occurrences of the same
 454       * non-spacing mark.  This check does not test the input string as a whole
 455       * for conformance to any particular syntax for identifiers.
 456       */
 457     USPOOF_INVISIBLE                =  32,
 458
 459     /** Check that an identifier contains only characters from a specified set
 460       * of acceptable characters.  See {@link uspoof_setAllowedChars} and
 461       * {@link uspoof_setAllowedLocales}.  Note that a string that fails this check
 462       * will also fail the {@link USPOOF_RESTRICTION_LEVEL} check.
 463       */
 464     USPOOF_CHAR_LIMIT               =  64,
 465
 466     /**
 467      * Check that an identifier does not mix numbers from different numbering systems.
 468      * For more information, see UTS 39 section 5.3.
 469      *
 470      * @stable ICU 51
 471      */
 472     USPOOF_MIXED_NUMBERS            = 128,
 473
 474 #ifndef U_HIDE_DRAFT_API
 475     /**
 476      * Check that an identifier does not have a combining character following a character in which that
 477      * combining character would be hidden; for example 'i' followed by a U+0307 combining dot.
 478      *
 479      * More specifically, the following characters are forbidden from preceding a U+0307:
 480      * <ul>
 481      * <li>Those with the Soft_Dotted Unicode property (which includes 'i' and 'j')</li>
 482      * <li>Latin lowercase letter 'l'</li>
 483      * <li>Dotless 'i' and 'j' ('ı' and 'ȷ', U+0131 and U+0237)</li>
 484      * <li>Any character whose confusable prototype ends with such a character
 485      * (Soft_Dotted, 'l', 'ı', or 'ȷ')</li>
 486      * </ul>
 487      * In addition, combining characters are allowed between the above characters and U+0307 except those
 488      * with combining class 0 or combining class "Above" (230, same class as U+0307).
 489      *
 490      * This list and the number of combing characters considered by this check may grow over time.
 491      *
 492      * @draft ICU 62
 493      */
 494     USPOOF_HIDDEN_OVERLAY            = 256,
 495 #endif  /* U_HIDE_DRAFT_API */
 496
 497    /**
 498      * Enable all spoof checks.
 499      *
 500      * @stable ICU 4.6
 501      */
 502     USPOOF_ALL_CHECKS               = 0xFFFF,
 503
 504     /**
 505       * Enable the return of auxillary (non-error) information in the
 506       * upper bits of the check results value.
 507       *
 508       * If this "check" is not enabled, the results of {@link uspoof_check} will be
 509       * zero when an identifier passes all of the enabled checks.
 510       *
 511       * If this "check" is enabled, (uspoof_check() & {@link USPOOF_ALL_CHECKS}) will
 512       * be zero when an identifier passes all checks.
 513       *
 514       * @stable ICU 51
 515       */
 516     USPOOF_AUX_INFO                  = 0x40000000
 517
 518     } USpoofChecks;
 519
 520
 521     /**
 522      * Constants from UAX #39 for use in {@link uspoof_setRestrictionLevel}, and
 523      * for returned identifier restriction levels in check results.
 524      *
 525      * @stable ICU 51
 526      *
 527      * @see uspoof_setRestrictionLevel
 528      * @see uspoof_check
 529      */
 530     typedef enum URestrictionLevel {
 531         /**
 532          * All characters in the string are in the identifier profile and all characters in the string are in the
 533          * ASCII range.
 534          *
 535          * @stable ICU 51
 536          */
 537         USPOOF_ASCII = 0x10000000,
 538         /**
 539          * The string classifies as ASCII-Only, or all characters in the string are in the identifier profile and
 540          * the string is single-script, according to the definition in UTS 39 section 5.1.
 541          *
 542          * @stable ICU 53
 543          */
 544         USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
 545         /**
 546          * The string classifies as Single Script, or all characters in the string are in the identifier profile and
 547          * the string is covered by any of the following sets of scripts, according to the definition in UTS 39
 548          * section 5.1:
 549          * <ul>
 550          *   <li>Latin + Han + Bopomofo (or equivalently: Latn + Hanb)</li>
 551          *   <li>Latin + Han + Hiragana + Katakana (or equivalently: Latn + Jpan)</li>
 552          *   <li>Latin + Han + Hangul (or equivalently: Latn +Kore)</li>
 553          * </ul>
 554          * This is the default restriction in ICU.
 555          *
 556          * @stable ICU 51
 557          */
 558         USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
 559         /**
 560          * The string classifies as Highly Restrictive, or all characters in the string are in the identifier profile
 561          * and the string is covered by Latin and any one other Recommended or Aspirational script, except Cyrillic,
 562          * Greek, and Cherokee.
 563          *
 564          * @stable ICU 51
 565          */
 566         USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
 567         /**
 568          * All characters in the string are in the identifier profile.  Allow arbitrary mixtures of scripts.
 569          *
 570          * @stable ICU 51
 571          */
 572         USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
 573         /**
 574          * Any valid identifiers, including characters outside of the Identifier Profile.
 575          *
 576          * @stable ICU 51
 577          */
 578         USPOOF_UNRESTRICTIVE = 0x60000000,
 579         /**
 580          * Mask for selecting the Restriction Level bits from the return value of {@link uspoof_check}.
 581          *
 582          * @stable ICU 53
 583          */
 584         USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000,
 585 #ifndef U_HIDE_INTERNAL_API
 586         /**
 587          * An undefined restriction level.
 588          * @internal
 589          */
 590         USPOOF_UNDEFINED_RESTRICTIVE = -1
 591 #endif  /* U_HIDE_INTERNAL_API */
 592     } URestrictionLevel;
 593
 594 /**
 595  *  Create a Unicode Spoof Checker, configured to perform all
 596  *  checks except for USPOOF_LOCALE_LIMIT and USPOOF_CHAR_LIMIT.
 597  *  Note that additional checks may be added in the future,
 598  *  resulting in the changes to the default checking behavior.
 599  *
 600  *  @param status  The error code, set if this function encounters a problem.
 601  *  @return        the newly created Spoof Checker
 602  *  @stable ICU 4.2
 603  */
 604 U_STABLE USpoofChecker * U_EXPORT2
 605 uspoof_open(UErrorCode *status);
 606
 607
 608 /**
 609  * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
 610  * Inverse of uspoof_serialize().
 611  * The memory containing the serialized data must remain valid and unchanged
 612  * as long as the spoof checker, or any cloned copies of the spoof checker,
 613  * are in use.  Ownership of the memory remains with the caller.
 614  * The spoof checker (and any clones) must be closed prior to deleting the
 615  * serialized data.
 616  *
 617  * @param data a pointer to 32-bit-aligned memory containing the serialized form of spoof data
 618  * @param length the number of bytes available at data;
 619  *               can be more than necessary
 620  * @param pActualLength receives the actual number of bytes at data taken up by the data;
 621  *                      can be NULL
 622  * @param pErrorCode ICU error code
 623  * @return the spoof checker.
 624  *
 625  * @see uspoof_open
 626  * @see uspoof_serialize
 627  * @stable ICU 4.2
 628  */
 629 U_STABLE USpoofChecker * U_EXPORT2
 630 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
 631                           UErrorCode *pErrorCode);
 632
 633 /**
 634   * Open a Spoof Checker from the source form of the spoof data.
 635   * The input corresponds to the Unicode data file confusables.txt
 636   * as described in Unicode UAX #39.  The syntax of the source data
 637   * is as described in UAX #39 for this file, and the content of
 638   * this file is acceptable input.
 639   *
 640   * The character encoding of the (char *) input text is UTF-8.
 641   *
 642   * @param confusables a pointer to the confusable characters definitions,
 643   *                    as found in file confusables.txt from unicode.org.
 644   * @param confusablesLen The length of the confusables text, or -1 if the
 645   *                    input string is zero terminated.
 646   * @param confusablesWholeScript
 647   *                    Deprecated in ICU 58.  No longer used.
 648   * @param confusablesWholeScriptLen
 649   *                    Deprecated in ICU 58.  No longer used.
 650   * @param errType     In the event of an error in the input, indicates
 651   *                    which of the input files contains the error.
 652   *                    The value is one of USPOOF_SINGLE_SCRIPT_CONFUSABLE or
 653   *                    USPOOF_WHOLE_SCRIPT_CONFUSABLE, or
 654   *                    zero if no errors are found.
 655   * @param pe          In the event of an error in the input, receives the position
 656   *                    in the input text (line, offset) of the error.
 657   * @param status      an in/out ICU UErrorCode.  Among the possible errors is
 658   *                    U_PARSE_ERROR, which is used to report syntax errors
 659   *                    in the input.
 660   * @return            A spoof checker that uses the rules from the input files.
 661   * @stable ICU 4.2
 662   */
 663 U_STABLE USpoofChecker * U_EXPORT2
 664 uspoof_openFromSource(const char *confusables,  int32_t confusablesLen,
 665                       const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
 666                       int32_t *errType, UParseError *pe, UErrorCode *status);
 667
 668
 669 /**
 670   * Close a Spoof Checker, freeing any memory that was being held by
 671   *   its implementation.
 672   * @stable ICU 4.2
 673   */
 674 U_STABLE void U_EXPORT2
 675 uspoof_close(USpoofChecker *sc);
 676
 677 #if U_SHOW_CPLUSPLUS_API
 678
 679 U_NAMESPACE_BEGIN
 680
 681 /**
 682  * \class LocalUSpoofCheckerPointer
 683  * "Smart pointer" class, closes a USpoofChecker via uspoof_close().
 684  * For most methods see the LocalPointerBase base class.
 685  *
 686  * @see LocalPointerBase
 687  * @see LocalPointer
 688  * @stable ICU 4.4
 689  */
 690 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckerPointer, USpoofChecker, uspoof_close);
 691
 692 U_NAMESPACE_END
 693
 694 #endif // U_SHOW_CPLUSPLUS_API
 695
 696 /**
 697  * Clone a Spoof Checker.  The clone will be set to perform the same checks
 698  *   as the original source.
 699  *
 700  * @param sc       The source USpoofChecker
 701  * @param status   The error code, set if this function encounters a problem.
 702  * @return
 703  * @stable ICU 4.2
 704  */
 705 U_STABLE USpoofChecker * U_EXPORT2
 706 uspoof_clone(const USpoofChecker *sc, UErrorCode *status);
 707
 708
 709 /**
 710  * Specify the bitmask of checks that will be performed by {@link uspoof_check}. Calling this method
 711  * overwrites any checks that may have already been enabled. By default, all checks are enabled.
 712  *
 713  * To enable specific checks and disable all others, the "whitelisted" checks should be ORed together. For
 714  * example, to fail strings containing characters outside of the set specified by {@link uspoof_setAllowedChars} and
 715  * also strings that contain digits from mixed numbering systems:
 716  *
 717  * <pre>
 718  * {@code
 719  * uspoof_setChecks(USPOOF_CHAR_LIMIT | USPOOF_MIXED_NUMBERS);
 720  * }
 721  * </pre>
 722  *
 723  * To disable specific checks and enable all others, the "blacklisted" checks should be ANDed away from
 724  * ALL_CHECKS. For example, if you are not planning to use the {@link uspoof_areConfusable} functionality,
 725  * it is good practice to disable the CONFUSABLE check:
 726  *
 727  * <pre>
 728  * {@code
 729  * uspoof_setChecks(USPOOF_ALL_CHECKS & ~USPOOF_CONFUSABLE);
 730  * }
 731  * </pre>
 732  *
 733  * Note that methods such as {@link uspoof_setAllowedChars}, {@link uspoof_setAllowedLocales}, and
 734  * {@link uspoof_setRestrictionLevel} will enable certain checks when called. Those methods will OR the check they
 735  * enable onto the existing bitmask specified by this method. For more details, see the documentation of those
 736  * methods.
 737  *
 738  * @param sc       The USpoofChecker
 739  * @param checks         The set of checks that this spoof checker will perform.
 740  *                 The value is a bit set, obtained by OR-ing together
 741  *                 values from enum USpoofChecks.
 742  * @param status   The error code, set if this function encounters a problem.
 743  * @stable ICU 4.2
 744  *
 745  */
 746 U_STABLE void U_EXPORT2
 747 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
 748
 749 /**
 750  * Get the set of checks that this Spoof Checker has been configured to perform.
 751  *
 752  * @param sc       The USpoofChecker
 753  * @param status   The error code, set if this function encounters a problem.
 754  * @return         The set of checks that this spoof checker will perform.
 755  *                 The value is a bit set, obtained by OR-ing together
 756  *                 values from enum USpoofChecks.
 757  * @stable ICU 4.2
 758  *
 759  */
 760 U_STABLE int32_t U_EXPORT2
 761 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
 762
 763 /**
 764  * Set the loosest restriction level allowed for strings. The default if this is not called is
 765  * {@link USPOOF_HIGHLY_RESTRICTIVE}. Calling this method enables the {@link USPOOF_RESTRICTION_LEVEL} and
 766  * {@link USPOOF_MIXED_NUMBERS} checks, corresponding to Sections 5.1 and 5.2 of UTS 39. To customize which checks are
 767  * to be performed by {@link uspoof_check}, see {@link uspoof_setChecks}.
 768  *
 769  * @param sc       The USpoofChecker
 770  * @param restrictionLevel The loosest restriction level allowed.
 771  * @see URestrictionLevel
 772  * @stable ICU 51
 773  */
 774 U_STABLE void U_EXPORT2
 775 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
 776
 777
 778 /**
 779   * Get the Restriction Level that will be tested if the checks include {@link USPOOF_RESTRICTION_LEVEL}.
 780   *
 781   * @return The restriction level
 782   * @see URestrictionLevel
 783   * @stable ICU 51
 784   */
 785 U_STABLE URestrictionLevel U_EXPORT2
 786 uspoof_getRestrictionLevel(const USpoofChecker *sc);
 787
 788 /**
 789  * Limit characters that are acceptable in identifiers being checked to those
 790  * normally used with the languages associated with the specified locales.
 791  * Any previously specified list of locales is replaced by the new settings.
 792  *
 793  * A set of languages is determined from the locale(s), and
 794  * from those a set of acceptable Unicode scripts is determined.
 795  * Characters from this set of scripts, along with characters from
 796  * the "common" and "inherited" Unicode Script categories
 797  * will be permitted.
 798  *
 799  * Supplying an empty string removes all restrictions;
 800  * characters from any script will be allowed.
 801  *
 802  * The {@link USPOOF_CHAR_LIMIT} test is automatically enabled for this
 803  * USpoofChecker when calling this function with a non-empty list
 804  * of locales.
 805  *
 806  * The Unicode Set of characters that will be allowed is accessible
 807  * via the uspoof_getAllowedChars() function.  uspoof_setAllowedLocales()
 808  * will <i>replace</i> any previously applied set of allowed characters.
 809  *
 810  * Adjustments, such as additions or deletions of certain classes of characters,
 811  * can be made to the result of uspoof_setAllowedLocales() by
 812  * fetching the resulting set with uspoof_getAllowedChars(),
 813  * manipulating it with the Unicode Set API, then resetting the
 814  * spoof detectors limits with uspoof_setAllowedChars().
 815  *
 816  * @param sc           The USpoofChecker
 817  * @param localesList  A list list of locales, from which the language
 818  *                     and associated script are extracted.  The locales
 819  *                     are comma-separated if there is more than one.
 820  *                     White space may not appear within an individual locale,
 821  *                     but is ignored otherwise.
 822  *                     The locales are syntactically like those from the
 823  *                     HTTP Accept-Language header.
 824  *                     If the localesList is empty, no restrictions will be placed on
 825  *                     the allowed characters.
 826  *
 827  * @param status       The error code, set if this function encounters a problem.
 828  * @stable ICU 4.2
 829  */
 830 U_STABLE void U_EXPORT2
 831 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
 832
 833 /**
 834  * Get a list of locales for the scripts that are acceptable in strings
 835  *  to be checked.  If no limitations on scripts have been specified,
 836  *  an empty string will be returned.
 837  *
 838  *  uspoof_setAllowedChars() will reset the list of allowed to be empty.
 839  *
 840  *  The format of the returned list is the same as that supplied to
 841  *  uspoof_setAllowedLocales(), but returned list may not be identical
 842  *  to the originally specified string; the string may be reformatted,
 843  *  and information other than languages from
 844  *  the originally specified locales may be omitted.
 845  *
 846  * @param sc           The USpoofChecker
 847  * @param status       The error code, set if this function encounters a problem.
 848  * @return             A string containing a list of  locales corresponding
 849  *                     to the acceptable scripts, formatted like an
 850  *                     HTTP Accept Language value.
 851  *
 852  * @stable ICU 4.2
 853  */
 854 U_STABLE const char * U_EXPORT2
 855 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status);
 856
 857
 858 /**
 859  * Limit the acceptable characters to those specified by a Unicode Set.
 860  *   Any previously specified character limit is
 861  *   is replaced by the new settings.  This includes limits on
 862  *   characters that were set with the uspoof_setAllowedLocales() function.
 863  *
 864  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
 865  * USpoofChecker by this function.
 866  *
 867  * @param sc       The USpoofChecker
 868  * @param chars    A Unicode Set containing the list of
 869  *                 characters that are permitted.  Ownership of the set
 870  *                 remains with the caller.  The incoming set is cloned by
 871  *                 this function, so there are no restrictions on modifying
 872  *                 or deleting the USet after calling this function.
 873  * @param status   The error code, set if this function encounters a problem.
 874  * @stable ICU 4.2
 875  */
 876 U_STABLE void U_EXPORT2
 877 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status);
 878
 879
 880 /**
 881  * Get a USet for the characters permitted in an identifier.
 882  * This corresponds to the limits imposed by the Set Allowed Characters
 883  * functions. Limitations imposed by other checks will not be
 884  * reflected in the set returned by this function.
 885  *
 886  * The returned set will be frozen, meaning that it cannot be modified
 887  * by the caller.
 888  *
 889  * Ownership of the returned set remains with the Spoof Detector.  The
 890  * returned set will become invalid if the spoof detector is closed,
 891  * or if a new set of allowed characters is specified.
 892  *
 893  *
 894  * @param sc       The USpoofChecker
 895  * @param status   The error code, set if this function encounters a problem.
 896  * @return         A USet containing the characters that are permitted by
 897  *                 the USPOOF_CHAR_LIMIT test.
 898  * @stable ICU 4.2
 899  */
 900 U_STABLE const USet * U_EXPORT2
 901 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status);
 902
 903
 904 #if U_SHOW_CPLUSPLUS_API
 905 /**
 906  * Limit the acceptable characters to those specified by a Unicode Set.
 907  *   Any previously specified character limit is
 908  *   is replaced by the new settings.    This includes limits on
 909  *   characters that were set with the uspoof_setAllowedLocales() function.
 910  *
 911  * The USPOOF_CHAR_LIMIT test is automatically enabled for this
 912  * USoofChecker by this function.
 913  *
 914  * @param sc       The USpoofChecker
 915  * @param chars    A Unicode Set containing the list of
 916  *                 characters that are permitted.  Ownership of the set
 917  *                 remains with the caller.  The incoming set is cloned by
 918  *                 this function, so there are no restrictions on modifying
 919  *                 or deleting the UnicodeSet after calling this function.
 920  * @param status   The error code, set if this function encounters a problem.
 921  * @stable ICU 4.2
 922  */
 923 U_STABLE void U_EXPORT2
 924 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status);
 925
 926
 927 /**
 928  * Get a UnicodeSet for the characters permitted in an identifier.
 929  * This corresponds to the limits imposed by the Set Allowed Characters /
 930  * UnicodeSet functions. Limitations imposed by other checks will not be
 931  * reflected in the set returned by this function.
 932  *
 933  * The returned set will be frozen, meaning that it cannot be modified
 934  * by the caller.
 935  *
 936  * Ownership of the returned set remains with the Spoof Detector.  The
 937  * returned set will become invalid if the spoof detector is closed,
 938  * or if a new set of allowed characters is specified.
 939  *
 940  *
 941  * @param sc       The USpoofChecker
 942  * @param status   The error code, set if this function encounters a problem.
 943  * @return         A UnicodeSet containing the characters that are permitted by
 944  *                 the USPOOF_CHAR_LIMIT test.
 945  * @stable ICU 4.2
 946  */
 947 U_STABLE const icu::UnicodeSet * U_EXPORT2
 948 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status);
 949 #endif // U_SHOW_CPLUSPLUS_API
 950
 951
 952 /**
 953  * Check the specified string for possible security issues.
 954  * The text to be checked will typically be an identifier of some sort.
 955  * The set of checks to be performed is specified with uspoof_setChecks().
 956  *
 957  * \note
 958  *   Consider using the newer API, {@link uspoof_check2}, instead.
 959  *   The newer API exposes additional information from the check procedure
 960  *   and is otherwise identical to this method.
 961  *
 962  * @param sc      The USpoofChecker
 963  * @param id      The identifier to be checked for possible security issues,
 964  *                in UTF-16 format.
 965  * @param length  the length of the string to be checked, expressed in
 966  *                16 bit UTF-16 code units, or -1 if the string is
 967  *                zero terminated.
 968  * @param position  Deprecated in ICU 51.  Always returns zero.
 969  *                Originally, an out parameter for the index of the first
 970  *                string position that failed a check.
 971  *                This parameter may be NULL.
 972  * @param status  The error code, set if an error occurred while attempting to
 973  *                perform the check.
 974  *                Spoofing or security issues detected with the input string are
 975  *                not reported here, but through the function's return value.
 976  * @return        An integer value with bits set for any potential security
 977  *                or spoofing issues detected.  The bits are defined by
 978  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
 979  *                will be zero if the input string passes all of the
 980  *                enabled checks.
 981  * @see uspoof_check2
 982  * @stable ICU 4.2
 983  */
 984 U_STABLE int32_t U_EXPORT2
 985 uspoof_check(const USpoofChecker *sc,
 986                          const UChar *id, int32_t length,
 987                          int32_t *position,
 988                          UErrorCode *status);
 989
 990
 991 /**
 992  * Check the specified string for possible security issues.
 993  * The text to be checked will typically be an identifier of some sort.
 994  * The set of checks to be performed is specified with uspoof_setChecks().
 995  *
 996  * \note
 997  *   Consider using the newer API, {@link uspoof_check2UTF8}, instead.
 998  *   The newer API exposes additional information from the check procedure
 999  *   and is otherwise identical to this method.
1000  *
1001  * @param sc      The USpoofChecker
1002  * @param id      A identifier to be checked for possible security issues, in UTF8 format.
1003  * @param length  the length of the string to be checked, or -1 if the string is
1004  *                zero terminated.
1005  * @param position  Deprecated in ICU 51.  Always returns zero.
1006  *                Originally, an out parameter for the index of the first
1007  *                string position that failed a check.
1008  *                This parameter may be NULL.
1009  * @param status  The error code, set if an error occurred while attempting to
1010  *                perform the check.
1011  *                Spoofing or security issues detected with the input string are
1012  *                not reported here, but through the function's return value.
1013  *                If the input contains invalid UTF-8 sequences,
1014  *                a status of U_INVALID_CHAR_FOUND will be returned.
1015  * @return        An integer value with bits set for any potential security
1016  *                or spoofing issues detected.  The bits are defined by
1017  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
1018  *                will be zero if the input string passes all of the
1019  *                enabled checks.
1020  * @see uspoof_check2UTF8
1021  * @stable ICU 4.2
1022  */
1023 U_STABLE int32_t U_EXPORT2
1024 uspoof_checkUTF8(const USpoofChecker *sc,
1025                  const char *id, int32_t length,
1026                  int32_t *position,
1027                  UErrorCode *status);
1028
1029
1030 #if U_SHOW_CPLUSPLUS_API
1031 /**
1032  * Check the specified string for possible security issues.
1033  * The text to be checked will typically be an identifier of some sort.
1034  * The set of checks to be performed is specified with uspoof_setChecks().
1035  *
1036  * \note
1037  *   Consider using the newer API, {@link uspoof_check2UnicodeString}, instead.
1038  *   The newer API exposes additional information from the check procedure
1039  *   and is otherwise identical to this method.
1040  *
1041  * @param sc      The USpoofChecker
1042  * @param id      A identifier to be checked for possible security issues.
1043  * @param position  Deprecated in ICU 51.  Always returns zero.
1044  *                Originally, an out parameter for the index of the first
1045  *                string position that failed a check.
1046  *                This parameter may be NULL.
1047  * @param status  The error code, set if an error occurred while attempting to
1048  *                perform the check.
1049  *                Spoofing or security issues detected with the input string are
1050  *                not reported here, but through the function's return value.
1051  * @return        An integer value with bits set for any potential security
1052  *                or spoofing issues detected.  The bits are defined by
1053  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
1054  *                will be zero if the input string passes all of the
1055  *                enabled checks.
1056  * @see uspoof_check2UnicodeString
1057  * @stable ICU 4.2
1058  */
1059 U_STABLE int32_t U_EXPORT2
1060 uspoof_checkUnicodeString(const USpoofChecker *sc,
1061                           const icu::UnicodeString &id,
1062                           int32_t *position,
1063                           UErrorCode *status);
1064 #endif // U_SHOW_CPLUSPLUS_API
1065
1066
1067 /**
1068  * Check the specified string for possible security issues.
1069  * The text to be checked will typically be an identifier of some sort.
1070  * The set of checks to be performed is specified with uspoof_setChecks().
1071  *
1072  * @param sc      The USpoofChecker
1073  * @param id      The identifier to be checked for possible security issues,
1074  *                in UTF-16 format.
1075  * @param length  the length of the string to be checked, or -1 if the string is
1076  *                zero terminated.
1077  * @param checkResult  An instance of USpoofCheckResult to be filled with
1078  *                details about the identifier.  Can be NULL.
1079  * @param status  The error code, set if an error occurred while attempting to
1080  *                perform the check.
1081  *                Spoofing or security issues detected with the input string are
1082  *                not reported here, but through the function's return value.
1083  * @return        An integer value with bits set for any potential security
1084  *                or spoofing issues detected.  The bits are defined by
1085  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
1086  *                will be zero if the input string passes all of the
1087  *                enabled checks.  Any information in this bitmask will be
1088  *                consistent with the information saved in the optional
1089  *                checkResult parameter.
1090  * @see uspoof_openCheckResult
1091  * @see uspoof_check2UTF8
1092  * @see uspoof_check2UnicodeString
1093  * @stable ICU 58
1094  */
1095 U_STABLE int32_t U_EXPORT2
1096 uspoof_check2(const USpoofChecker *sc,
1097     const UChar* id, int32_t length,
1098     USpoofCheckResult* checkResult,
1099     UErrorCode *status);
1100
1101 /**
1102  * Check the specified string for possible security issues.
1103  * The text to be checked will typically be an identifier of some sort.
1104  * The set of checks to be performed is specified with uspoof_setChecks().
1105  *
1106  * This version of {@link uspoof_check} accepts a USpoofCheckResult, which
1107  * returns additional information about the identifier.  For more
1108  * information, see {@link uspoof_openCheckResult}.
1109  *
1110  * @param sc      The USpoofChecker
1111  * @param id      A identifier to be checked for possible security issues, in UTF8 format.
1112  * @param length  the length of the string to be checked, or -1 if the string is
1113  *                zero terminated.
1114  * @param checkResult  An instance of USpoofCheckResult to be filled with
1115  *                details about the identifier.  Can be NULL.
1116  * @param status  The error code, set if an error occurred while attempting to
1117  *                perform the check.
1118  *                Spoofing or security issues detected with the input string are
1119  *                not reported here, but through the function's return value.
1120  * @return        An integer value with bits set for any potential security
1121  *                or spoofing issues detected.  The bits are defined by
1122  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
1123  *                will be zero if the input string passes all of the
1124  *                enabled checks.  Any information in this bitmask will be
1125  *                consistent with the information saved in the optional
1126  *                checkResult parameter.
1127  * @see uspoof_openCheckResult
1128  * @see uspoof_check2
1129  * @see uspoof_check2UnicodeString
1130  * @stable ICU 58
1131  */
1132 U_STABLE int32_t U_EXPORT2
1133 uspoof_check2UTF8(const USpoofChecker *sc,
1134     const char *id, int32_t length,
1135     USpoofCheckResult* checkResult,
1136     UErrorCode *status);
1137
1138 #if U_SHOW_CPLUSPLUS_API
1139 /**
1140  * Check the specified string for possible security issues.
1141  * The text to be checked will typically be an identifier of some sort.
1142  * The set of checks to be performed is specified with uspoof_setChecks().
1143  *
1144  * @param sc      The USpoofChecker
1145  * @param id      A identifier to be checked for possible security issues.
1146  * @param checkResult  An instance of USpoofCheckResult to be filled with
1147  *                details about the identifier.  Can be NULL.
1148  * @param status  The error code, set if an error occurred while attempting to
1149  *                perform the check.
1150  *                Spoofing or security issues detected with the input string are
1151  *                not reported here, but through the function's return value.
1152  * @return        An integer value with bits set for any potential security
1153  *                or spoofing issues detected.  The bits are defined by
1154  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
1155  *                will be zero if the input string passes all of the
1156  *                enabled checks.  Any information in this bitmask will be
1157  *                consistent with the information saved in the optional
1158  *                checkResult parameter.
1159  * @see uspoof_openCheckResult
1160  * @see uspoof_check2
1161  * @see uspoof_check2UTF8
1162  * @stable ICU 58
1163  */
1164 U_STABLE int32_t U_EXPORT2
1165 uspoof_check2UnicodeString(const USpoofChecker *sc,
1166     const icu::UnicodeString &id,
1167     USpoofCheckResult* checkResult,
1168     UErrorCode *status);
1169 #endif // U_SHOW_CPLUSPLUS_API
1170
1171 /**
1172  * Create a USpoofCheckResult, used by the {@link uspoof_check2} class of functions to return
1173  * information about the identifier.  Information includes:
1174  * <ul>
1175  *   <li>A bitmask of the checks that failed</li>
1176  *   <li>The identifier's restriction level (UTS 39 section 5.2)</li>
1177  *   <li>The set of numerics in the string (UTS 39 section 5.3)</li>
1178  * </ul>
1179  * The data held in a USpoofCheckResult is cleared whenever it is passed into a new call
1180  * of {@link uspoof_check2}.
1181  *
1182  * @param status  The error code, set if this function encounters a problem.
1183  * @return        the newly created USpoofCheckResult
1184  * @see uspoof_check2
1185  * @see uspoof_check2UTF8
1186  * @see uspoof_check2UnicodeString
1187  * @stable ICU 58
1188  */
1189 U_STABLE USpoofCheckResult* U_EXPORT2
1190 uspoof_openCheckResult(UErrorCode *status);
1191
1192 /**
1193  * Close a USpoofCheckResult, freeing any memory that was being held by
1194  *   its implementation.
1195  *
1196  * @param checkResult  The instance of USpoofCheckResult to close
1197  * @stable ICU 58
1198  */
1199 U_STABLE void U_EXPORT2
1200 uspoof_closeCheckResult(USpoofCheckResult *checkResult);
1201
1202 #if U_SHOW_CPLUSPLUS_API
1203
1204 U_NAMESPACE_BEGIN
1205
1206 /**
1207  * \class LocalUSpoofCheckResultPointer
1208  * "Smart pointer" class, closes a USpoofCheckResult via `uspoof_closeCheckResult()`.
1209  * For most methods see the LocalPointerBase base class.
1210  *
1211  * @see LocalPointerBase
1212  * @see LocalPointer
1213  * @stable ICU 58
1214  */
1215
1216 /**
1217  * \cond
1218  * Note: Doxygen is giving a bogus warning on this U_DEFINE_LOCAL_OPEN_POINTER.
1219  *       For now, suppress with a Doxygen cond
1220  */
1221 U_DEFINE_LOCAL_OPEN_POINTER(LocalUSpoofCheckResultPointer, USpoofCheckResult, uspoof_closeCheckResult);
1222 /** \endcond */
1223
1224 U_NAMESPACE_END
1225
1226 #endif // U_SHOW_CPLUSPLUS_API
1227
1228 /**
1229  * Indicates which of the spoof check(s) have failed. The value is a bitwise OR of the constants for the tests
1230  * in question: USPOOF_RESTRICTION_LEVEL, USPOOF_CHAR_LIMIT, and so on.
1231  *
1232  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1233  * @param status       The error code, set if an error occurred.
1234  * @return        An integer value with bits set for any potential security
1235  *                or spoofing issues detected.  The bits are defined by
1236  *                enum USpoofChecks.  (returned_value & USPOOF_ALL_CHECKS)
1237  *                will be zero if the input string passes all of the
1238  *                enabled checks.
1239  * @see uspoof_setChecks
1240  * @stable ICU 58
1241  */
1242 U_STABLE int32_t U_EXPORT2
1243 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status);
1244
1245 /**
1246  * Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check
1247  * was enabled; otherwise, undefined.
1248  *
1249  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1250  * @param status       The error code, set if an error occurred.
1251  * @return             The restriction level contained in the USpoofCheckResult
1252  * @see uspoof_setRestrictionLevel
1253  * @stable ICU 58
1254  */
1255 U_STABLE URestrictionLevel U_EXPORT2
1256 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status);
1257
1258 /**
1259  * Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled;
1260  * otherwise, undefined.  The set will contain the zero digit from each decimal number system found
1261  * in the input string.  Ownership of the returned USet remains with the USpoofCheckResult.
1262  * The USet will be free'd when {@link uspoof_closeCheckResult} is called.
1263  *
1264  * @param checkResult  The instance of USpoofCheckResult created by {@link uspoof_openCheckResult}
1265  * @return             The set of numerics contained in the USpoofCheckResult
1266  * @param status       The error code, set if an error occurred.
1267  * @stable ICU 58
1268  */
1269 U_STABLE const USet* U_EXPORT2
1270 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status);
1271
1272
1273 /**
1274  * Check the whether two specified strings are visually confusable.
1275  *
1276  * If the strings are confusable, the return value will be nonzero, as long as
1277  * {@link USPOOF_CONFUSABLE} was enabled in uspoof_setChecks().
1278  *
1279  * The bits in the return value correspond to flags for each of the classes of
1280  * confusables applicable to the two input strings.  According to UTS 39
1281  * section 4, the possible flags are:
1282  *
1283  * <ul>
1284  *   <li>{@link USPOOF_SINGLE_SCRIPT_CONFUSABLE}</li>
1285  *   <li>{@link USPOOF_MIXED_SCRIPT_CONFUSABLE}</li>
1286  *   <li>{@link USPOOF_WHOLE_SCRIPT_CONFUSABLE}</li>
1287  * </ul>
1288  *
1289  * If one or more of the above flags were not listed in uspoof_setChecks(), this
1290  * function will never report that class of confusable.  The check
1291  * {@link USPOOF_CONFUSABLE} enables all three flags.
1292  *
1293  *
1294  * @param sc      The USpoofChecker
1295  * @param id1     The first of the two identifiers to be compared for
1296  *                confusability.  The strings are in UTF-16 format.
1297  * @param length1 the length of the first identifer, expressed in
1298  *                16 bit UTF-16 code units, or -1 if the string is
1299  *                nul terminated.
1300  * @param id2     The second of the two identifiers to be compared for
1301  *                confusability.  The identifiers are in UTF-16 format.
1302  * @param length2 The length of the second identifiers, expressed in
1303  *                16 bit UTF-16 code units, or -1 if the string is
1304  *                nul terminated.
1305  * @param status  The error code, set if an error occurred while attempting to
1306  *                perform the check.
1307  *                Confusability of the identifiers is not reported here,
1308  *                but through this function's return value.
1309  * @return        An integer value with bit(s) set corresponding to
1310  *                the type of confusability found, as defined by
1311  *                enum USpoofChecks.  Zero is returned if the identifiers
1312  *                are not confusable.
1313  *
1314  * @stable ICU 4.2
1315  */
1316 U_STABLE int32_t U_EXPORT2
1317 uspoof_areConfusable(const USpoofChecker *sc,
1318                      const UChar *id1, int32_t length1,
1319                      const UChar *id2, int32_t length2,
1320                      UErrorCode *status);
1321
1322
1323
1324 /**
1325  * A version of {@link uspoof_areConfusable} accepting strings in UTF-8 format.
1326  *
1327  * @param sc      The USpoofChecker
1328  * @param id1     The first of the two identifiers to be compared for
1329  *                confusability.  The strings are in UTF-8 format.
1330  * @param length1 the length of the first identifiers, in bytes, or -1
1331  *                if the string is nul terminated.
1332  * @param id2     The second of the two identifiers to be compared for
1333  *                confusability.  The strings are in UTF-8 format.
1334  * @param length2 The length of the second string in bytes, or -1
1335  *                if the string is nul terminated.
1336  * @param status  The error code, set if an error occurred while attempting to
1337  *                perform the check.
1338  *                Confusability of the strings is not reported here,
1339  *                but through this function's return value.
1340  * @return        An integer value with bit(s) set corresponding to
1341  *                the type of confusability found, as defined by
1342  *                enum USpoofChecks.  Zero is returned if the strings
1343  *                are not confusable.
1344  *
1345  * @stable ICU 4.2
1346  *
1347  * @see uspoof_areConfusable
1348  */
1349 U_STABLE int32_t U_EXPORT2
1350 uspoof_areConfusableUTF8(const USpoofChecker *sc,
1351                          const char *id1, int32_t length1,
1352                          const char *id2, int32_t length2,
1353                          UErrorCode *status);
1354
1355
1356
1357
1358 #if U_SHOW_CPLUSPLUS_API
1359 /**
1360  * A version of {@link uspoof_areConfusable} accepting UnicodeStrings.
1361  *
1362  * @param sc      The USpoofChecker
1363  * @param s1     The first of the two identifiers to be compared for
1364  *                confusability.  The strings are in UTF-8 format.
1365  * @param s2     The second of the two identifiers to be compared for
1366  *                confusability.  The strings are in UTF-8 format.
1367  * @param status  The error code, set if an error occurred while attempting to
1368  *                perform the check.
1369  *                Confusability of the identifiers is not reported here,
1370  *                but through this function's return value.
1371  * @return        An integer value with bit(s) set corresponding to
1372  *                the type of confusability found, as defined by
1373  *                enum USpoofChecks.  Zero is returned if the identifiers
1374  *                are not confusable.
1375  *
1376  * @stable ICU 4.2
1377  *
1378  * @see uspoof_areConfusable
1379  */
1380 U_STABLE int32_t U_EXPORT2
1381 uspoof_areConfusableUnicodeString(const USpoofChecker *sc,
1382                                   const icu::UnicodeString &s1,
1383                                   const icu::UnicodeString &s2,
1384                                   UErrorCode *status);
1385 #endif // U_SHOW_CPLUSPLUS_API
1386
1387
1388 /**
1389  *  Get the "skeleton" for an identifier.
1390  *  Skeletons are a transformation of the input identifier;
1391  * Two identifiers are confusable if their skeletons are identical.
1392  *  See Unicode UAX #39 for additional information.
1393  *
1394  *  Using skeletons directly makes it possible to quickly check
1395  *  whether an identifier is confusable with any of some large
1396  *  set of existing identifiers, by creating an efficiently
1397  *  searchable collection of the skeletons.
1398  *
1399  * @param sc      The USpoofChecker
1400  * @param type    Deprecated in ICU 58.  You may pass any number.
1401  *                Originally, controlled which of the Unicode confusable data
1402  *                tables to use.
1403  * @param id      The input identifier whose skeleton will be computed.
1404  * @param length  The length of the input identifier, expressed in 16 bit
1405  *                UTF-16 code units, or -1 if the string is zero terminated.
1406  * @param dest    The output buffer, to receive the skeleton string.
1407  * @param destCapacity  The length of the output buffer, in 16 bit units.
1408  *                The destCapacity may be zero, in which case the function will
1409  *                return the actual length of the skeleton.
1410  * @param status  The error code, set if an error occurred while attempting to
1411  *                perform the check.
1412  * @return        The length of the skeleton string.  The returned length
1413  *                is always that of the complete skeleton, even when the
1414  *                supplied buffer is too small (or of zero length)
1415  *
1416  * @stable ICU 4.2
1417  * @see uspoof_areConfusable
1418  */
1419 U_STABLE int32_t U_EXPORT2
1420 uspoof_getSkeleton(const USpoofChecker *sc,
1421                    uint32_t type,
1422                    const UChar *id,  int32_t length,
1423                    UChar *dest, int32_t destCapacity,
1424                    UErrorCode *status);
1425
1426 /**
1427  *  Get the "skeleton" for an identifier.
1428  *  Skeletons are a transformation of the input identifier;
1429  *  Two identifiers are confusable if their skeletons are identical.
1430  *  See Unicode UAX #39 for additional information.
1431  *
1432  *  Using skeletons directly makes it possible to quickly check
1433  *  whether an identifier is confusable with any of some large
1434  *  set of existing identifiers, by creating an efficiently
1435  *  searchable collection of the skeletons.
1436  *
1437  * @param sc      The USpoofChecker
1438  * @param type    Deprecated in ICU 58.  You may pass any number.
1439  *                Originally, controlled which of the Unicode confusable data
1440  *                tables to use.
1441  * @param id      The UTF-8 format identifier whose skeleton will be computed.
1442  * @param length  The length of the input string, in bytes,
1443  *                or -1 if the string is zero terminated.
1444  * @param dest    The output buffer, to receive the skeleton string.
1445  * @param destCapacity  The length of the output buffer, in bytes.
1446  *                The destCapacity may be zero, in which case the function will
1447  *                return the actual length of the skeleton.
1448  * @param status  The error code, set if an error occurred while attempting to
1449  *                perform the check.  Possible Errors include U_INVALID_CHAR_FOUND
1450  *                   for invalid UTF-8 sequences, and
1451  *                   U_BUFFER_OVERFLOW_ERROR if the destination buffer is too small
1452  *                   to hold the complete skeleton.
1453  * @return        The length of the skeleton string, in bytes.  The returned length
1454  *                is always that of the complete skeleton, even when the
1455  *                supplied buffer is too small (or of zero length)
1456  *
1457  * @stable ICU 4.2
1458  */
1459 U_STABLE int32_t U_EXPORT2
1460 uspoof_getSkeletonUTF8(const USpoofChecker *sc,
1461                        uint32_t type,
1462                        const char *id,  int32_t length,
1463                        char *dest, int32_t destCapacity,
1464                        UErrorCode *status);
1465
1466 #if U_SHOW_CPLUSPLUS_API
1467 /**
1468  *  Get the "skeleton" for an identifier.
1469  *  Skeletons are a transformation of the input identifier;
1470  *  Two identifiers are confusable if their skeletons are identical.
1471  *  See Unicode UAX #39 for additional information.
1472  *
1473  *  Using skeletons directly makes it possible to quickly check
1474  *  whether an identifier is confusable with any of some large
1475  *  set of existing identifiers, by creating an efficiently
1476  *  searchable collection of the skeletons.
1477  *
1478  * @param sc      The USpoofChecker.
1479  * @param type    Deprecated in ICU 58.  You may pass any number.
1480  *                Originally, controlled which of the Unicode confusable data
1481  *                tables to use.
1482  * @param id      The input identifier whose skeleton will be computed.
1483  * @param dest    The output identifier, to receive the skeleton string.
1484  * @param status  The error code, set if an error occurred while attempting to
1485  *                perform the check.
1486  * @return        A reference to the destination (skeleton) string.
1487  *
1488  * @stable ICU 4.2
1489  */
1490 U_I18N_API icu::UnicodeString & U_EXPORT2
1491 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc,
1492                                 uint32_t type,
1493                                 const icu::UnicodeString &id,
1494                                 icu::UnicodeString &dest,
1495                                 UErrorCode *status);
1496 #endif // U_SHOW_CPLUSPLUS_API
1497
1498 /**
1499   * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
1500   * in http://unicode.org/Public/security/latest/xidmodifications.txt
1501   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1502   *
1503   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1504   * be deleted by the caller.
1505   *
1506   * @param status The error code, set if a problem occurs while creating the set.
1507   *
1508   * @stable ICU 51
1509   */
1510 U_STABLE const USet * U_EXPORT2
1511 uspoof_getInclusionSet(UErrorCode *status);
1512
1513 /**
1514   * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
1515   * in http://unicode.org/Public/security/latest/xidmodifications.txt
1516   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1517   *
1518   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1519   * be deleted by the caller.
1520   *
1521   * @param status The error code, set if a problem occurs while creating the set.
1522   *
1523   * @stable ICU 51
1524   */
1525 U_STABLE const USet * U_EXPORT2
1526 uspoof_getRecommendedSet(UErrorCode *status);
1527
1528 #if U_SHOW_CPLUSPLUS_API
1529
1530 /**
1531   * Get the set of Candidate Characters for Inclusion in Identifiers, as defined
1532   * in http://unicode.org/Public/security/latest/xidmodifications.txt
1533   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1534   *
1535   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1536   * be deleted by the caller.
1537   *
1538   * @param status The error code, set if a problem occurs while creating the set.
1539   *
1540   * @stable ICU 51
1541   */
1542 U_STABLE const icu::UnicodeSet * U_EXPORT2
1543 uspoof_getInclusionUnicodeSet(UErrorCode *status);
1544
1545 /**
1546   * Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined
1547   * in http://unicode.org/Public/security/latest/xidmodifications.txt
1548   * and documented in http://www.unicode.org/reports/tr39/, Unicode Security Mechanisms.
1549   *
1550   * The returned set is frozen. Ownership of the set remains with the ICU library; it must not
1551   * be deleted by the caller.
1552   *
1553   * @param status The error code, set if a problem occurs while creating the set.
1554   *
1555   * @stable ICU 51
1556   */
1557 U_STABLE const icu::UnicodeSet * U_EXPORT2
1558 uspoof_getRecommendedUnicodeSet(UErrorCode *status);
1559
1560 #endif // U_SHOW_CPLUSPLUS_API
1561
1562 /**
1563  * Serialize the data for a spoof detector into a chunk of memory.
1564  * The flattened spoof detection tables can later be used to efficiently
1565  * instantiate a new Spoof Detector.
1566  *
1567  * The serialized spoof checker includes only the data compiled from the
1568  * Unicode data tables by uspoof_openFromSource(); it does not include
1569  * include any other state or configuration that may have been set.
1570  *
1571  * @param sc   the Spoof Detector whose data is to be serialized.
1572  * @param data a pointer to 32-bit-aligned memory to be filled with the data,
1573  *             can be NULL if capacity==0
1574  * @param capacity the number of bytes available at data,
1575  *                 or 0 for preflighting
1576  * @param status an in/out ICU UErrorCode; possible errors include:
1577  * - U_BUFFER_OVERFLOW_ERROR if the data storage block is too small for serialization
1578  * - U_ILLEGAL_ARGUMENT_ERROR  the data or capacity parameters are bad
1579  * @return the number of bytes written or needed for the spoof data
1580  *
1581  * @see utrie2_openFromSerialized()
1582  * @stable ICU 4.2
1583  */
1584 U_STABLE int32_t U_EXPORT2
1585 uspoof_serialize(USpoofChecker *sc,
1586                  void *data, int32_t capacity,
1587                  UErrorCode *status);
1588
1589
1590 #endif
1591
1592 #endif   /* USPOOF_H */