1 // © 2016 and later: Unicode, Inc. and others. 
   2 // License & terms of use: http://www.unicode.org/copyright.html 
   4  ******************************************************************************* 
   6  *   Copyright (C) 2003-2014, International Business Machines 
   7  *   Corporation and others.  All Rights Reserved. 
   9  ******************************************************************************* 
  12  *   tab size:   8 (not used) 
  15  *   created on: 2003jul2 
  16  *   created by: Ram Viswanadha 
  24  * \brief C API: Implements the StringPrep algorithm. 
  27 #include "unicode/utypes.h" 
  28 #include "unicode/localpointer.h" 
  32  * StringPrep API implements the StingPrep framework as described by RFC 3454. 
  33  * StringPrep prepares Unicode strings for use in network protocols. 
  34  * Profiles of StingPrep are set of rules and data according to with the 
  35  * Unicode Strings are prepared. Each profiles contains tables which describe 
  36  * how a code point should be treated. The tables are broadly classified into 
  38  *     <li> Unassigned Table: Contains code points that are unassigned  
  39  *          in the Unicode Version supported by StringPrep. Currently  
  40  *          RFC 3454 supports Unicode 3.2. </li> 
  41  *     <li> Prohibited Table: Contains code points that are prohibited from 
  42  *          the output of the StringPrep processing function. </li> 
  43  *     <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li> 
  46  * The procedure for preparing Unicode strings: 
  48  *      <li> Map: For each character in the input, check if it has a mapping 
  49  *           and, if so, replace it with its mapping. </li> 
  50  *      <li> Normalize: Possibly normalize the result of step 1 using Unicode 
  51  *           normalization. </li> 
  52  *      <li> Prohibit: Check for any characters that are not allowed in the 
  53  *        output.  If any are found, return an error.</li> 
  54  *      <li> Check bidi: Possibly check for right-to-left characters, and if 
  55  *           any are found, make sure that the whole string satisfies the 
  56  *           requirements for bidirectional strings.  If the string does not 
  57  *           satisfy the requirements for bidirectional strings, return an 
  60  * @author Ram Viswanadha 
  64 #include "unicode/parseerr.h" 
  67  * The StringPrep profile 
  70 typedef struct UStringPrepProfile UStringPrepProfile
; 
  74  * Option to prohibit processing of unassigned code points in the input 
  79 #define USPREP_DEFAULT 0x0000 
  82  * Option to allow processing of unassigned code points in the input 
  87 #define USPREP_ALLOW_UNASSIGNED 0x0001 
  90  * enums for the standard stringprep profile types 
  91  * supported by usprep_openByType. 
  92  * @see usprep_openByType 
  95 typedef enum UStringPrepProfileType 
{ 
 100     USPREP_RFC3491_NAMEPREP
, 
 102      * RFC3530 nfs4_cs_prep 
 105         USPREP_RFC3530_NFS4_CS_PREP
, 
 107      * RFC3530 nfs4_cs_prep with case insensitive option 
 110         USPREP_RFC3530_NFS4_CS_PREP_CI
, 
 112      * RFC3530 nfs4_cis_prep 
 115         USPREP_RFC3530_NFS4_CIS_PREP
, 
 117      * RFC3530 nfs4_mixed_prep for prefix 
 120         USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX
, 
 122      * RFC3530 nfs4_mixed_prep for suffix 
 125         USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX
, 
 130         USPREP_RFC3722_ISCSI
, 
 132      * RFC3920 XMPP Nodeprep 
 135         USPREP_RFC3920_NODEPREP
, 
 137      * RFC3920 XMPP Resourceprep 
 140         USPREP_RFC3920_RESOURCEPREP
, 
 142      * RFC4011 Policy MIB Stringprep 
 150     USPREP_RFC4013_SASLPREP
, 
 155         USPREP_RFC4505_TRACE
, 
 162      * RFC4518 LDAP for case ignore, numeric and stored prefix 
 166         USPREP_RFC4518_LDAP_CI
 
 167 } UStringPrepProfileType
; 
 170  * Creates a StringPrep profile from the data file. 
 172  * @param path      string containing the full path pointing to the directory 
 173  *                  where the profile reside followed by the package name 
 174  *                  e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system. 
 175  *                  if NULL, ICU default data files will be used. 
 176  * @param fileName  name of the profile file to be opened 
 177  * @param status    ICU error code in/out parameter. Must not be NULL. 
 178  *                  Must fulfill U_SUCCESS before the function call. 
 179  * @return Pointer to UStringPrepProfile that is opened. Should be closed by 
 180  * calling usprep_close() 
 181  * @see usprep_close() 
 184 U_STABLE UStringPrepProfile
* U_EXPORT2
 
 185 usprep_open(const char* path
,  
 186             const char* fileName
, 
 190  * Creates a StringPrep profile for the specified profile type. 
 192  * @param type          The profile type 
 193  * @param status    ICU error code in/out parameter. Must not be NULL. 
 194  *                  Must fulfill U_SUCCESS before the function call. 
 195  * @return          Pointer to UStringPrepProfile that is opened. Should be closed by 
 196  *                  calling usprep_close() 
 197  * @see usprep_close() 
 200 U_STABLE UStringPrepProfile
* U_EXPORT2
 
 201 usprep_openByType(UStringPrepProfileType type
, 
 206  * @param profile The profile to close 
 209 U_STABLE 
void U_EXPORT2
 
 210 usprep_close(UStringPrepProfile
* profile
); 
 212 #if U_SHOW_CPLUSPLUS_API 
 217  * \class LocalUStringPrepProfilePointer 
 218  * "Smart pointer" class, closes a UStringPrepProfile via usprep_close(). 
 219  * For most methods see the LocalPointerBase base class. 
 221  * @see LocalPointerBase 
 225 U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer
, UStringPrepProfile
, usprep_close
); 
 232  * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC), 
 233  * checks for prohibited and BiDi characters in the order defined by RFC 3454 
 234  * depending on the options specified in the profile. 
 236  * @param prep          The profile to use  
 237  * @param src           Pointer to UChar buffer containing the string to prepare 
 238  * @param srcLength     Number of characters in the source string 
 239  * @param dest          Pointer to the destination buffer to receive the output 
 240  * @param destCapacity  The capacity of destination array 
 241  * @param options       A bit set of options: 
 243  *  - USPREP_DEFAULT            Prohibit processing of unassigned code points in the input 
 245  *  - USPREP_ALLOW_UNASSIGNED   Treat the unassigned code points are in the input  
 246  *                              as normal Unicode code points. 
 248  * @param parseError        Pointer to UParseError struct to receive information on position  
 249  *                          of error if an error is encountered. Can be NULL. 
 250  * @param status            ICU in/out error code parameter. 
 251  *                          U_INVALID_CHAR_FOUND if src contains 
 252  *                          unmatched single surrogates. 
 253  *                          U_INDEX_OUTOFBOUNDS_ERROR if src contains 
 254  *                          too many code points. 
 255  *                          U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 
 256  * @return The number of UChars in the destination buffer 
 260 U_STABLE 
int32_t U_EXPORT2
 
 261 usprep_prepare(   const UStringPrepProfile
* prep
, 
 262                   const UChar
* src
, int32_t srcLength
,  
 263                   UChar
* dest
, int32_t destCapacity
, 
 265                   UParseError
* parseError
, 
 266                   UErrorCode
* status 
); 
 269 #endif /* #if !UCONFIG_NO_IDNA */