]> git.saurik.com Git - apple/icu.git/blob - icuSources/common/strprep.h
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / common / strprep.h
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: strprep.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17 #ifndef STRPREP_H
18 #define STRPREP_H
19
20 #include "unicode/utypes.h"
21
22 #if !UCONFIG_NO_IDNA
23
24 #include "unicode/uobject.h"
25 #include "unicode/uniset.h"
26 #include "unicode/parseerr.h"
27
28 U_NAMESPACE_BEGIN
29
30 /**\file
31 *
32 * This API implements RF 3454 StringPrep standard.
33 *
34 * The steps for preparing strings are:
35 *
36 * 1) Map -- For each character in the input, check if it has a mapping
37 * and, if so, replace it with its mapping.
38 * <ul>
39 * <li>Delete certain codepoints from the input because their
40 * presence or absence in the protocol identifies should not
41 * make two strings different</li>
42 * <li>Case Mapings
43 * <br>If Normalization is turned off
44 * <br> Get mappings from case map tables
45 * <br>else
46 * <br> Get mappings from case map tables for normalization
47 * <br> Use u_getFC_NFKC_Closure for obtaining extra mappings
48 * </li>
49 * </ul>
50 * 2) Normalize -- Possibly normalize the result of step 1 using Unicode
51 * normalization NFKC.
52 *
53 * 3) Prohibit -- Check for any characters that are not allowed in the
54 * output. If any are found, return an error.
55 *
56 * 4) Check bidi -- Possibly check for right-to-left characters, and if
57 * any are found, make sure that the whole string satisfies the
58 * requirements for bidirectional strings. If the string does not
59 * satisfy the requirements for bidirectional strings, return an
60 * error.
61 *
62 * Some StringPrep profiles:
63 * IDN: "Nameprep" http://www.ietf.org/rfc/rfc3491.txt
64 * XMPP Node Identifiers: "Nodeprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-nodeprep-01.txt
65 * XMPP Resource Identifiers: "Resourceprep" http://www.ietf.org/internet-drafts/draft-ietf-xmpp-resourceprep-01.txt
66 * ANONYMOUS SASL tokens: "plain" http://www.ietf.org/internet-drafts/draft-ietf-sasl-anon-00.txt
67 * iSCSI http://www.ietf.org/internet-drafts/draft-ietf-ips-iscsi-string-prep-03.txt
68 */
69 class StringPrep : public UObject{
70
71 protected:
72 UVersionInfo unicodeVersion; /** The Character repertoire version of this profile */
73 UBool bidiCheck; /** Option to turn BiDi checking on */
74 UBool doNFKC; /** Option to turn NFKC on */
75
76 /**
77 * Protected default constructor sub classes
78 */
79 StringPrep(){};
80
81 public:
82 /**
83 * Destructor
84 */
85 virtual inline ~StringPrep(){};
86
87 /**
88 * Map every character in input stream with mapping character
89 * in the mapping table and populate the output stream.
90 * For any individual character the mapping table may specify
91 * that that a character be mapped to nothing, mapped to one
92 * other character or to a string of other characters.
93 *
94 * @param src Pointer to UChar buffer containing a single label
95 * @param srcLength Number of characters in the source label
96 * @param dest Pointer to the destination buffer to receive the output
97 * @param destCapacity The capacity of destination array
98 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
99 * If TRUE unassigned values are treated as normal Unicode code point.
100 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code.
101 * @param status ICU error code in/out parameter.
102 * Must fulfill U_SUCCESS before the function call.
103 * @return The number of UChars in the destination buffer
104 *
105 */
106 virtual int32_t map(const UChar* src, int32_t srcLength,
107 UChar* dest, int32_t destCapacity,
108 UBool allowUnassigned,
109 UParseError* parseError,
110 UErrorCode& status );
111
112 /**
113 * Normalize the input stream using Normalization Form KC (NFKC)
114 *
115 * @param src Pointer to UChar buffer containing a single label
116 * @param srcLength Number of characters in the source label
117 * @param dest Pointer to the destination buffer to receive the output
118 * @param destCapacity The capacity of destination array
119 * @param status ICU error code in/out parameter.
120 * Must fulfill U_SUCCESS before the function call.
121 * @return The number of UChars in the destination buffer
122 *
123 *
124 */
125 virtual int32_t normalize( const UChar* src, int32_t srcLength,
126 UChar* dest, int32_t destCapacity,
127 UErrorCode& status );
128
129
130 /**
131 * Prepare the input stream with for use. This operation maps, normalizes(NFKC),
132 * checks for prohited and BiDi characters in the order defined by RFC 3454
133 *
134 * @param src Pointer to UChar buffer containing a single label
135 * @param srcLength Number of characters in the source label
136 * @param dest Pointer to the destination buffer to receive the output
137 * @param destCapacity The capacity of destination array
138 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations
139 * If TRUE unassigned values are treated as normal Unicode code point.
140 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code.
141 * @param status ICU error code in/out parameter.
142 * Must fulfill U_SUCCESS before the function call.
143 * @return The number of UChars in the destination buffer
144 *
145 *
146 */
147 virtual int32_t process(const UChar* src, int32_t srcLength,
148 UChar* dest, int32_t destCapacity,
149 UBool allowUnassigned,
150 UParseError* parseError,
151 UErrorCode& status );
152
153 /**
154 * Create a profile from prebuilt default Nameprep profile conforming to
155 * nameprep internet draft (http://www.ietf.org/html.charters/idn-charter.html).
156 * This is a built-in/unmodifiable profile.
157 *
158 * @param status ICU error code in/out parameter.
159 * Must fulfill U_SUCCESS before the function call.
160 * @return Pointer to StringPrep object that is created. Should be deleted by
161 * by caller
162 *
163 *
164 */
165 static StringPrep* createNameprepInstance(UErrorCode& status);
166
167 /**
168 * Create a profile from prebuilt default StringPrep profile conforming to
169 * RFC 3454 (ftp://ftp.rfc-editor.org/in-notes/rfc3454.txt).
170 * User defined profiles can be created by getting the default profile and
171 * adding mappings, removing mappings, turning options ON/OFF and prohibiting
172 * characters from the output.
173 *
174 * @param status ICU error code in/out parameter.
175 * Must fulfill U_SUCCESS before the function call.
176 * @return Pointer to StringPrep object that is created. Should be deleted by
177 * the caller.
178 *
179 *
180 */
181 static StringPrep* createDefaultInstance(UErrorCode& status);
182
183 /**
184 * Ascertain if the given code point is a Letter/Digit/Hyphen in the ASCII range
185 *
186 * @return TRUE is the code point is a Letter/Digit/Hyphen
187 *
188 *
189 */
190 static inline UBool isLDHChar(UChar32 ch);
191
192 /**
193 * Ascertain if the given code point is a label separator as specified by IDNA
194 *
195 * @return TRUE is the code point is a label separator
196 *
197 *
198 */
199 virtual UBool isLabelSeparator(UChar32 ch, UErrorCode& status);
200
201 /**
202 * Get the BiDi option of this profile
203 *
204 *
205 */
206 inline UBool getCheckBiDi();
207
208 /**
209 * Get the normalization (NFKC) option of this profile
210 *
211 * @return The normalization option
212 *
213 *
214 */
215 inline UBool getNormalization();
216
217 /**
218 * Get the Unicode version which this profile
219 * conforms to
220 *
221 *
222 */
223 inline void getUnicodeVersion(UVersionInfo& info);
224
225 private:
226 // Boiler plate
227
228 /**
229 * Copy constructor.
230 *
231 */
232 StringPrep(const StringPrep&);
233
234 /**
235 * Assignment operator.
236 *
237 */
238 StringPrep& operator=(const StringPrep&);
239
240 /**
241 * Return true if another object is semantically equal to this one.
242 *
243 * @param other the object to be compared with.
244 * @return true if another object is semantically equal to this one.
245 *
246 */
247 UBool operator==(const StringPrep& other) const {return FALSE;};
248
249 /**
250 * Return true if another object is semantically unequal to this one.
251 *
252 * @param other the object to be compared with.
253 * @return true if another object is semantically unequal to this one.
254 *
255 */
256 UBool operator!=(const StringPrep& other) const { return !operator==(other); }
257
258 public:
259
260 /**
261 * ICU "poor man's RTTI", returns a UClassID for this class.
262 *
263 *
264 */
265 static inline UClassID getStaticClassID();
266
267 /**
268 * ICU "poor man's RTTI", returns a UClassID for the actual class.
269 *
270 *
271 */
272 virtual inline UClassID getDynamicClassID() const;
273
274 protected:
275
276 /**
277 * Sub classes that slightly modify the default profile
278 * implement this method to remove characters to
279 * the prohibited list. The default implementation does not
280 * check if the data is loaded or not. The caller is responsible
281 * for checking for data.
282 *
283 */
284 virtual UBool isNotProhibited(UChar32 ch);
285
286 /**
287 * Sub classes that slightly modify the default profile
288 * implement this method to remove characters to
289 * the unassigned list. The default implementation does not
290 * check if the data is loaded or not. The caller is responsible
291 * for checking for data.
292 */
293 virtual UBool isUnassigned(UChar32 ch);
294
295 /**
296 * Ascertains if uidna.icu data file is loaded.
297 * If data is not loaded, loads the data file.
298 *
299 *
300 */
301 static UBool isDataLoaded(UErrorCode& status);
302
303 private:
304
305 /**
306 * The address of this static class variable serves as this class's ID
307 * for ICU "poor man's RTTI".
308 */
309 static const char fgClassID;
310
311 };
312
313 inline UBool StringPrep::getCheckBiDi(){
314 return bidiCheck;
315 }
316
317
318 inline UBool StringPrep::getNormalization(){
319 return doNFKC;
320 }
321
322 inline void StringPrep::getUnicodeVersion(UVersionInfo& info){
323 for(int32_t i=0; i< (int32_t)(sizeof(info)/sizeof(info[0])); i++){
324 info[i] = unicodeVersion[i];
325 }
326 }
327
328 inline UClassID StringPrep::getStaticClassID() {
329 return (UClassID)&fgClassID;
330 }
331
332 inline UClassID StringPrep::getDynamicClassID() const {
333 return getStaticClassID();
334 }
335
336 inline UBool StringPrep::isLDHChar(UChar32 ch){
337 // high runner case
338 if(ch>0x007A){
339 return FALSE;
340 }
341 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A]
342 if( (ch==0x002D) ||
343 (0x0030 <= ch && ch <= 0x0039) ||
344 (0x0041 <= ch && ch <= 0x005A) ||
345 (0x0061 <= ch && ch <= 0x007A)
346 ){
347 return TRUE;
348 }
349 return FALSE;
350 }
351
352 U_NAMESPACE_END
353
354 #endif /* #if !UCONFIG_NO_IDNA */
355
356 #endif
357
358 /*
359 * Hey, Emacs, please set the following:
360 *
361 * Local Variables:
362 * indent-tabs-mode: nil
363 * End:
364 *
365 */