2 *******************************************************************************
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
7 *******************************************************************************
10 * tab size: 8 (not used)
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
20 #include "unicode/utypes.h"
24 #include "unicode/uobject.h"
25 #include "unicode/uniset.h"
26 #include "unicode/parseerr.h"
32 * This API implements RF 3454 StringPrep standard.
34 * The steps for preparing strings are:
36 * 1) Map -- For each character in the input, check if it has a mapping
37 * and, if so, replace it with its mapping.
39 * <li>Delete certain codepoints from the input because their
40 * presence or absence in the protocol identifies should not
41 * make two strings different</li>
43 * <br>If Normalization is turned off
44 * <br> Get mappings from case map tables
46 * <br> Get mappings from case map tables for normalization
47 * <br> Use u_getFC_NFKC_Closure for obtaining extra mappings
50 * 2) Normalize -- Possibly normalize the result of step 1 using Unicode
53 * 3) Prohibit -- Check for any characters that are not allowed in the
54 * output. If any are found, return an error.
56 * 4) Check bidi -- Possibly check for right-to-left characters, and if
57 * any are found, make sure that the whole string satisfies the
58 * requirements for bidirectional strings. If the string does not
59 * satisfy the requirements for bidirectional strings, return an
62 * Some StringPrep profiles:
63 * IDN: "Nameprep" http://www.ietf.org/rfc/rfc3491.txt
64 * XMPP Node Identifiers: "Nodeprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
65 * XMPP Resource Identifiers: "Resourceprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
66 * ANONYMOUS SASL tokens: "plain" http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
67 * iSCSI http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-03.txt
69 class StringPrep
: public UObject
{
72 UVersionInfo unicodeVersion
; /** The Character repertoire version of this profile */
73 UBool bidiCheck
; /** Option to turn BiDi checking on */
74 UBool doNFKC
; /** Option to turn NFKC on */
77 * Protected default constructor sub classes
85 virtual inline ~StringPrep(){};
88 * Map every character in input stream with mapping character
89 * in the mapping table and populate the output stream.
90 * For any individual character the mapping table may specify
91 * that that a character be mapped to nothing, mapped to one
92 * other character or to a string of other characters.
94 * @param src Pointer to UChar buffer containing a single label
95 * @param srcLength Number of characters in the source label
96 * @param dest Pointer to the destination buffer to receive the output
97 * @param destCapacity The capacity of destination array
98 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
99 * If TRUE unassigned values are treated as normal Unicode code point.
100 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
101 * @param status ICU error code in/out parameter.
102 * Must fulfill U_SUCCESS before the function call.
103 * @return The number of UChars in the destination buffer
106 virtual int32_t map(const UChar
* src
, int32_t srcLength
,
107 UChar
* dest
, int32_t destCapacity
,
108 UBool allowUnassigned
,
109 UParseError
* parseError
,
110 UErrorCode
& status
);
113 * Normalize the input stream using Normalization Form KC (NFKC)
115 * @param src Pointer to UChar buffer containing a single label
116 * @param srcLength Number of characters in the source label
117 * @param dest Pointer to the destination buffer to receive the output
118 * @param destCapacity The capacity of destination array
119 * @param status ICU error code in/out parameter.
120 * Must fulfill U_SUCCESS before the function call.
121 * @return The number of UChars in the destination buffer
125 virtual int32_t normalize( const UChar
* src
, int32_t srcLength
,
126 UChar
* dest
, int32_t destCapacity
,
127 UErrorCode
& status
);
131 * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
132 * checks for prohited and BiDi characters in the order defined by RFC 3454
134 * @param src Pointer to UChar buffer containing a single label
135 * @param srcLength Number of characters in the source label
136 * @param dest Pointer to the destination buffer to receive the output
137 * @param destCapacity The capacity of destination array
138 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
139 * If TRUE unassigned values are treated as normal Unicode code point.
140 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
141 * @param status ICU error code in/out parameter.
142 * Must fulfill U_SUCCESS before the function call.
143 * @return The number of UChars in the destination buffer
147 virtual int32_t process(const UChar
* src
, int32_t srcLength
,
148 UChar
* dest
, int32_t destCapacity
,
149 UBool allowUnassigned
,
150 UParseError
* parseError
,
151 UErrorCode
& status
);
154 * Create a profile from prebuilt default Nameprep profile conforming to
155 * nameprep internet draft (http://www.ietf.org/html.charters/idn-charter.html).
156 * This is a built-in/unmodifiable profile.
158 * @param status ICU error code in/out parameter.
159 * Must fulfill U_SUCCESS before the function call.
160 * @return Pointer to StringPrep object that is created. Should be deleted by
165 static StringPrep
* createNameprepInstance(UErrorCode
& status
);
168 * Create a profile from prebuilt default StringPrep profile conforming to
169 * RFC 3454 (ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt).
170 * User defined profiles can be created by getting the default profile and
171 * adding mappings, removing mappings, turning options ON/OFF and prohibiting
172 * characters from the output.
174 * @param status ICU error code in/out parameter.
175 * Must fulfill U_SUCCESS before the function call.
176 * @return Pointer to StringPrep object that is created. Should be deleted by
181 static StringPrep
* createDefaultInstance(UErrorCode
& status
);
184 * Ascertain if the given code point is a Letter/Digit/Hyphen in the ASCII range
186 * @return TRUE is the code point is a Letter/Digit/Hyphen
190 static inline UBool
isLDHChar(UChar32 ch
);
193 * Ascertain if the given code point is a label separator as specified by IDNA
195 * @return TRUE is the code point is a label separator
199 virtual UBool
isLabelSeparator(UChar32 ch
, UErrorCode
& status
);
202 * Get the BiDi option of this profile
206 inline UBool
getCheckBiDi();
209 * Get the normalization (NFKC) option of this profile
211 * @return The normalization option
215 inline UBool
getNormalization();
218 * Get the Unicode version which this profile
223 inline void getUnicodeVersion(UVersionInfo
& info
);
232 StringPrep(const StringPrep
&);
235 * Assignment operator.
238 StringPrep
& operator=(const StringPrep
&);
241 * Return true if another object is semantically equal to this one.
243 * @param other the object to be compared with.
244 * @return true if another object is semantically equal to this one.
247 UBool
operator==(const StringPrep
& other
) const {return FALSE
;};
250 * Return true if another object is semantically unequal to this one.
252 * @param other the object to be compared with.
253 * @return true if another object is semantically unequal to this one.
256 UBool
operator!=(const StringPrep
& other
) const { return !operator==(other
); }
261 * ICU "poor man's RTTI", returns a UClassID for this class.
265 static inline UClassID
getStaticClassID();
268 * ICU "poor man's RTTI", returns a UClassID for the actual class.
272 virtual inline UClassID
getDynamicClassID() const;
277 * Sub classes that slightly modify the default profile
278 * implement this method to remove characters to
279 * the prohibited list. The default implementation does not
280 * check if the data is loaded or not. The caller is responsible
281 * for checking for data.
284 virtual UBool
isNotProhibited(UChar32 ch
);
287 * Sub classes that slightly modify the default profile
288 * implement this method to remove characters to
289 * the unassigned list. The default implementation does not
290 * check if the data is loaded or not. The caller is responsible
291 * for checking for data.
293 virtual UBool
isUnassigned(UChar32 ch
);
296 * Ascertains if uidna.icu data file is loaded.
297 * If data is not loaded, loads the data file.
301 static UBool
isDataLoaded(UErrorCode
& status
);
306 * The address of this static class variable serves as this class's ID
307 * for ICU "poor man's RTTI".
309 static const char fgClassID
;
313 inline UBool
StringPrep::getCheckBiDi(){
318 inline UBool
StringPrep::getNormalization(){
322 inline void StringPrep::getUnicodeVersion(UVersionInfo
& info
){
323 for(int32_t i
=0; i
< (int32_t)(sizeof(info
)/sizeof(info
[0])); i
++){
324 info
[i
] = unicodeVersion
[i
];
328 inline UClassID
StringPrep::getStaticClassID() {
329 return (UClassID
)&fgClassID
;
332 inline UClassID
StringPrep::getDynamicClassID() const {
333 return getStaticClassID();
336 inline UBool
StringPrep::isLDHChar(UChar32 ch
){
341 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
343 (0x0030 <= ch
&& ch
<= 0x0039) ||
344 (0x0041 <= ch
&& ch
<= 0x005A) ||
345 (0x0061 <= ch
&& ch
<= 0x007A)
354 #endif /* #if !UCONFIG_NO_IDNA */
359 * Hey, Emacs, please set the following:
362 * indent-tabs-mode: nil