/*
***************************************************************************
-* Copyright (C) 2008-2013, International Business Machines Corporation
+* Copyright (C) 2008-2015, International Business Machines Corporation
* and others. All Rights Reserved.
***************************************************************************
* file name: uspoof.h
* identifying new pairs or sequences of characters that are visually
* confusable, and thus must be mapped to the same skeleton character(s).
*
+ * Skeletons are computed using the algorithm and data describe in Unicode UAX 39.
+ * The latest proposed update, UAX 39 Version 8 draft 1, says "the tables SL, SA, and ML
+ * were still problematic, and discouraged from use in [Uniocde] 7.0.
+ * They were thus removed from version 8.0"
+ *
+ * In light of this, the default mapping data included with ICU 55 uses the
+ * Unicode 7 MA (Multi script Any case) table data for the other type options
+ * (Single Script, Any Case), (Single Script, Lower Case) and (Multi Script, Lower Case).
*/
struct USpoofChecker;
*/
USPOOF_CHAR_LIMIT = 64,
-#ifndef U_HIDE_DRAFT_API
/**
* Check that an identifier does not include decimal digits from
* more than one numbering system.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
USPOOF_MIXED_NUMBERS = 128,
-#endif /* U_HIDE_DRAFT_API */
/**
* Enable all spoof checks.
*/
USPOOF_ALL_CHECKS = 0xFFFF,
-#ifndef U_HIDE_DRAFT_API
/**
* Enable the return of auxillary (non-error) information in the
* upper bits of the check results value.
* If this "check" is enabled, (uspoof_check() & USPOOF_ALL_CHECKS) will be zero
* when an identifier passes all checks.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
USPOOF_AUX_INFO = 0x40000000
-#endif /* U_HIDE_DRAFT_API */
} USpoofChecks;
-#ifndef U_HIDE_DRAFT_API
/**
* Constants from UAX #39 for use in setRestrictionLevel(), and
* for returned identifier restriction levels in check results.
- * @draft ICU 51
+ * @stable ICU 51
*/
typedef enum URestrictionLevel {
/**
* Only ASCII characters: U+0000..U+007F
*
- * @draft ICU 51
+ * @stable ICU 51
*/
USPOOF_ASCII = 0x10000000,
+ /**
+ * All characters in each identifier must be from a single script.
+ *
+ * @stable ICU 53
+ */
+ USPOOF_SINGLE_SCRIPT_RESTRICTIVE = 0x20000000,
/**
* All characters in each identifier must be from a single script, or from the combinations: Latin + Han +
* Hiragana + Katakana; Latin + Han + Bopomofo; or Latin + Han + Hangul. Note that this level will satisfy the
* vast majority of Latin-script users; also that TR36 has ASCII instead of Latin.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
- USPOOF_HIGHLY_RESTRICTIVE = 0x20000000,
+ USPOOF_HIGHLY_RESTRICTIVE = 0x30000000,
/**
* Allow Latin with other scripts except Cyrillic, Greek, Cherokee Otherwise, the same as Highly Restrictive
*
- * @draft ICU 51
+ * @stable ICU 51
*/
- USPOOF_MODERATELY_RESTRICTIVE = 0x30000000,
+ USPOOF_MODERATELY_RESTRICTIVE = 0x40000000,
/**
* Allow arbitrary mixtures of scripts. Otherwise, the same as Moderately Restrictive.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
- USPOOF_MINIMALLY_RESTRICTIVE = 0x40000000,
+ USPOOF_MINIMALLY_RESTRICTIVE = 0x50000000,
/**
* Any valid identifiers, including characters outside of the Identifier Profile.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
- USPOOF_UNRESTRICTIVE = 0x50000000
+ USPOOF_UNRESTRICTIVE = 0x60000000,
+ /**
+ * Mask for selecting the Restriction Level bits from the return value of uspoof_check().
+ *
+ * @stable ICU 53
+ */
+ USPOOF_RESTRICTION_LEVEL_MASK = 0x7F000000
} URestrictionLevel;
-#endif /* U_HIDE_DRAFT_API */
/**
* Create a Unicode Spoof Checker, configured to perform all
/**
- * Open a Spoof checker from its serialized from, stored in 32-bit-aligned memory.
+ * Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
* Inverse of uspoof_serialize().
* The memory containing the serialized data must remain valid and unchanged
* as long as the spoof checker, or any cloned copies of the spoof checker,
/**
* Open a Spoof Checker from the source form of the spoof data.
- * The Three inputs correspond to the Unicode data files confusables.txt
- * confusablesWholeScript.txt and xidmdifications.txt as described in
- * Unicode UAX #39. The syntax of the source data is as described in UAX #39 for
+ * The two inputs correspond to the Unicode data files confusables.txt
+ * and confusablesWholeScript.txt as described in Unicode UAX #39.
+ * The syntax of the source data is as described in UAX #39 for
* these files, and the content of these files is acceptable input.
*
* The character encoding of the (char *) input text is UTF-8.
U_STABLE int32_t U_EXPORT2
uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status);
-#ifndef U_HIDE_DRAFT_API
/**
* Set the loosest restriction level allowed. The default if this function
* is not called is HIGHLY_RESTRICTIVE.
* Calling this function also enables the RESTRICTION_LEVEL check.
* @param restrictionLevel The loosest restriction level allowed.
* @see URestrictionLevel
- * @draft ICU 51
+ * @stable ICU 51
*/
-U_DRAFT void U_EXPORT2
+U_STABLE void U_EXPORT2
uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel);
*
* @return The restriction level
* @see URestrictionLevel
- * @draft ICU 51
+ * @stable ICU 51
*/
-U_DRAFT URestrictionLevel U_EXPORT2
+U_STABLE URestrictionLevel U_EXPORT2
uspoof_getRestrictionLevel(const USpoofChecker *sc);
-#endif /* U_HIDE_DRAFT_API */
/**
* Limit characters that are acceptable in identifiers being checked to those
* USpoofChecker.
*
* @param sc The USpoofChecker
- * @param id1 The first of the two identifiers to be compared for
+ * @param s1 The first of the two identifiers to be compared for
* confusability. The strings are in UTF-8 format.
- * @param id2 The second of the two identifiers to be compared for
+ * @param s2 The second of the two identifiers to be compared for
* confusability. The strings are in UTF-8 format.
* @param status The error code, set if an error occurred while attempting to
* perform the check.
* of the Unicode confusable data tables to use.
* The default is Mixed-Script, Lowercase.
* Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
- * USPOOF_ANY_CASE_CONFUSABLE. The two flags may be ORed.
+ * USPOOF_ANY_CASE. The two flags may be ORed.
* @param id The input identifier whose skeleton will be computed.
* @param length The length of the input identifier, expressed in 16 bit
* UTF-16 code units, or -1 if the string is zero terminated.
* of the Unicode confusable data tables to use.
* The default is Mixed-Script, Lowercase.
* Allowed options are USPOOF_SINGLE_SCRIPT_CONFUSABLE and
- * USPOOF_ANY_CASE_CONFUSABLE. The two flags may be ORed.
+ * USPOOF_ANY_CASE. The two flags may be ORed.
* @param id The input identifier whose skeleton will be computed.
* @param dest The output identifier, to receive the skeleton string.
* @param status The error code, set if an error occurred while attempting to
UErrorCode *status);
#endif /* U_SHOW_CPLUSPLUS_API */
-
-#ifndef U_HIDE_DRAFT_API
/**
* Get the set of Candidate Characters for Inclusion in Identifiers, as defined
* in Unicode UAX #31, http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
*
* @param status The error code, set if a problem occurs while creating the set.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
-U_DRAFT const USet * U_EXPORT2
+U_STABLE const USet * U_EXPORT2
uspoof_getInclusionSet(UErrorCode *status);
/**
*
* @param status The error code, set if a problem occurs while creating the set.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
-U_DRAFT const USet * U_EXPORT2
+U_STABLE const USet * U_EXPORT2
uspoof_getRecommendedSet(UErrorCode *status);
#if U_SHOW_CPLUSPLUS_API
*
* @param status The error code, set if a problem occurs while creating the set.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
-U_DRAFT const icu::UnicodeSet * U_EXPORT2
+U_STABLE const icu::UnicodeSet * U_EXPORT2
uspoof_getInclusionUnicodeSet(UErrorCode *status);
/**
*
* @param status The error code, set if a problem occurs while creating the set.
*
- * @draft ICU 51
+ * @stable ICU 51
*/
-U_DRAFT const icu::UnicodeSet * U_EXPORT2
+U_STABLE const icu::UnicodeSet * U_EXPORT2
uspoof_getRecommendedUnicodeSet(UErrorCode *status);
#endif /* U_SHOW_CPLUSPLUS_API */
-#endif /* U_HIDE_DRAFT_API */
/**
* Serialize the data for a spoof detector into a chunk of memory.