]>
Commit | Line | Data |
---|---|---|
1 | // © 2016 and later: Unicode, Inc. and others. | |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | /* | |
4 | ******************************************************************************* | |
5 | * | |
6 | * Copyright (C) 2003-2014, International Business Machines | |
7 | * Corporation and others. All Rights Reserved. | |
8 | * | |
9 | ******************************************************************************* | |
10 | * file name: nptrans.h | |
11 | * encoding: UTF-8 | |
12 | * tab size: 8 (not used) | |
13 | * indentation:4 | |
14 | * | |
15 | * created on: 2003feb1 | |
16 | * created by: Ram Viswanadha | |
17 | */ | |
18 | ||
19 | #include "unicode/utypes.h" | |
20 | ||
21 | #if !UCONFIG_NO_TRANSLITERATION | |
22 | #if !UCONFIG_NO_IDNA | |
23 | ||
24 | #include "nptrans.h" | |
25 | #include "unicode/resbund.h" | |
26 | #include "unicode/uniset.h" | |
27 | #include "sprpimpl.h" | |
28 | #include "cmemory.h" | |
29 | #include "ustr_imp.h" | |
30 | #include "intltest.h" | |
31 | ||
32 | #ifdef NPTRANS_DEBUG | |
33 | #include <stdio.h> | |
34 | #endif | |
35 | ||
36 | const char NamePrepTransform::fgClassID=0; | |
37 | ||
38 | //Factory method | |
39 | NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){ | |
40 | NamePrepTransform* transform = new NamePrepTransform(parseError, status); | |
41 | if(U_FAILURE(status)){ | |
42 | delete transform; | |
43 | return NULL; | |
44 | } | |
45 | return transform; | |
46 | } | |
47 | ||
48 | //constructor | |
49 | NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status) | |
50 | : mapping(nullptr), unassigned(), prohibited(), labelSeparatorSet(), bundle(nullptr) { | |
51 | ||
52 | LocalPointer<Transliterator> lmapping; | |
53 | LocalUResourceBundlePointer lbundle; | |
54 | ||
55 | const char* testDataName = IntlTest::loadTestData(status); | |
56 | ||
57 | if(U_FAILURE(status)){ | |
58 | return; | |
59 | } | |
60 | ||
61 | lbundle.adoptInstead(ures_openDirect(testDataName,"idna_rules",&status)); | |
62 | ||
63 | if(lbundle.isValid() && U_SUCCESS(status)){ | |
64 | // create the mapping transliterator | |
65 | int32_t ruleLen = 0; | |
66 | const UChar* ruleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNFKC",&ruleLen, &status); | |
67 | int32_t mapRuleLen = 0; | |
68 | const UChar *mapRuleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNoNormalization", &mapRuleLen, &status); | |
69 | UnicodeString rule(mapRuleUChar, mapRuleLen); | |
70 | rule.append(ruleUChar, ruleLen); | |
71 | ||
72 | lmapping.adoptInstead( Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule, | |
73 | UTRANS_FORWARD, parseError,status)); | |
74 | if(U_FAILURE(status)) { | |
75 | return; | |
76 | } | |
77 | ||
78 | //create the unassigned set | |
79 | int32_t patternLen =0; | |
80 | const UChar* pattern = ures_getStringByKey(lbundle.getAlias(),"UnassignedSet",&patternLen, &status); | |
81 | unassigned.applyPattern(UnicodeString(pattern, patternLen), status); | |
82 | ||
83 | //create prohibited set | |
84 | patternLen=0; | |
85 | pattern = ures_getStringByKey(lbundle.getAlias(),"ProhibitedSet",&patternLen, &status); | |
86 | UnicodeString test(pattern,patternLen); | |
87 | prohibited.applyPattern(test,status); | |
88 | #ifdef NPTRANS_DEBUG | |
89 | if(U_FAILURE(status)){ | |
90 | printf("Construction of Unicode set failed\n"); | |
91 | } | |
92 | ||
93 | if(U_SUCCESS(status)){ | |
94 | if(prohibited.contains((UChar) 0x644)){ | |
95 | printf("The string contains 0x644 ... !!\n"); | |
96 | } | |
97 | UnicodeString temp; | |
98 | prohibited.toPattern(temp,TRUE); | |
99 | ||
100 | for(int32_t i=0;i<temp.length();i++){ | |
101 | printf("%c", (char)temp.charAt(i)); | |
102 | } | |
103 | printf("\n"); | |
104 | } | |
105 | #endif | |
106 | ||
107 | //create label separator set | |
108 | patternLen=0; | |
109 | pattern = ures_getStringByKey(lbundle.getAlias(), "LabelSeparatorSet", &patternLen, &status); | |
110 | labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status); | |
111 | } | |
112 | ||
113 | if(U_SUCCESS(status) && (lmapping.isNull())) { | |
114 | status = U_MEMORY_ALLOCATION_ERROR; | |
115 | } | |
116 | if (U_FAILURE(status)) { | |
117 | return; | |
118 | } | |
119 | mapping = lmapping.orphan(); | |
120 | bundle = lbundle.orphan(); | |
121 | } | |
122 | ||
123 | ||
124 | UBool NamePrepTransform::isProhibited(UChar32 ch){ | |
125 | return (UBool)(ch != ASCII_SPACE); | |
126 | } | |
127 | ||
128 | NamePrepTransform::~NamePrepTransform(){ | |
129 | delete mapping; | |
130 | mapping = NULL; | |
131 | ||
132 | //close the bundle | |
133 | ures_close(bundle); | |
134 | bundle = NULL; | |
135 | } | |
136 | ||
137 | ||
138 | int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength, | |
139 | UChar* dest, int32_t destCapacity, | |
140 | UBool allowUnassigned, | |
141 | UParseError* /*parseError*/, | |
142 | UErrorCode& status ){ | |
143 | ||
144 | if(U_FAILURE(status)){ | |
145 | return 0; | |
146 | } | |
147 | //check arguments | |
148 | if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { | |
149 | status=U_ILLEGAL_ARGUMENT_ERROR; | |
150 | return 0; | |
151 | } | |
152 | ||
153 | UnicodeString rsource(src,srcLength); | |
154 | // map the code points | |
155 | // transliteration also performs NFKC | |
156 | mapping->transliterate(rsource); | |
157 | ||
158 | const UChar* buffer = rsource.getBuffer(); | |
159 | int32_t bufLen = rsource.length(); | |
160 | // check if unassigned | |
161 | if(allowUnassigned == FALSE){ | |
162 | int32_t bufIndex=0; | |
163 | UChar32 ch =0 ; | |
164 | for(;bufIndex<bufLen;){ | |
165 | U16_NEXT(buffer, bufIndex, bufLen, ch); | |
166 | if(unassigned.contains(ch)){ | |
167 | status = U_IDNA_UNASSIGNED_ERROR; | |
168 | return 0; | |
169 | } | |
170 | } | |
171 | } | |
172 | // check if there is enough room in the output | |
173 | if(bufLen < destCapacity){ | |
174 | u_memcpy(dest, buffer, bufLen); | |
175 | } | |
176 | ||
177 | return u_terminateUChars(dest, destCapacity, bufLen, &status); | |
178 | } | |
179 | ||
180 | ||
181 | #define MAX_BUFFER_SIZE 300 | |
182 | ||
183 | int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength, | |
184 | UChar* dest, int32_t destCapacity, | |
185 | UBool allowUnassigned, | |
186 | UParseError* parseError, | |
187 | UErrorCode& status ){ | |
188 | // check error status | |
189 | if(U_FAILURE(status)){ | |
190 | return 0; | |
191 | } | |
192 | ||
193 | //check arguments | |
194 | if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) { | |
195 | status=U_ILLEGAL_ARGUMENT_ERROR; | |
196 | return 0; | |
197 | } | |
198 | ||
199 | UnicodeString b1String; | |
200 | UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE); | |
201 | int32_t b1Len; | |
202 | ||
203 | int32_t b1Index = 0; | |
204 | UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT; | |
205 | UBool leftToRight=FALSE, rightToLeft=FALSE; | |
206 | ||
207 | b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); | |
208 | b1String.releaseBuffer(b1Len); | |
209 | ||
210 | if(status == U_BUFFER_OVERFLOW_ERROR){ | |
211 | // redo processing of string | |
212 | /* we do not have enough room so grow the buffer*/ | |
213 | b1 = b1String.getBuffer(b1Len); | |
214 | status = U_ZERO_ERROR; // reset error | |
215 | b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status); | |
216 | b1String.releaseBuffer(b1Len); | |
217 | } | |
218 | ||
219 | if(U_FAILURE(status)){ | |
220 | b1Len = 0; | |
221 | goto CLEANUP; | |
222 | } | |
223 | ||
224 | ||
225 | for(; b1Index<b1Len; ){ | |
226 | ||
227 | UChar32 ch = 0; | |
228 | ||
229 | U16_NEXT(b1, b1Index, b1Len, ch); | |
230 | ||
231 | if(prohibited.contains(ch) && ch!=0x0020){ | |
232 | status = U_IDNA_PROHIBITED_ERROR; | |
233 | b1Len = 0; | |
234 | goto CLEANUP; | |
235 | } | |
236 | ||
237 | direction = u_charDirection(ch); | |
238 | if(firstCharDir==U_CHAR_DIRECTION_COUNT){ | |
239 | firstCharDir = direction; | |
240 | } | |
241 | if(direction == U_LEFT_TO_RIGHT){ | |
242 | leftToRight = TRUE; | |
243 | } | |
244 | if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){ | |
245 | rightToLeft = TRUE; | |
246 | } | |
247 | } | |
248 | ||
249 | // satisfy 2 | |
250 | if( leftToRight == TRUE && rightToLeft == TRUE){ | |
251 | status = U_IDNA_CHECK_BIDI_ERROR; | |
252 | b1Len = 0; | |
253 | goto CLEANUP; | |
254 | } | |
255 | ||
256 | //satisfy 3 | |
257 | if( rightToLeft == TRUE && | |
258 | !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) && | |
259 | (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC)) | |
260 | ){ | |
261 | status = U_IDNA_CHECK_BIDI_ERROR; | |
262 | return FALSE; | |
263 | } | |
264 | ||
265 | if(b1Len <= destCapacity){ | |
266 | u_memmove(dest, b1, b1Len); | |
267 | } | |
268 | ||
269 | CLEANUP: | |
270 | return u_terminateUChars(dest, destCapacity, b1Len, &status); | |
271 | } | |
272 | ||
273 | UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){ | |
274 | // check error status | |
275 | if(U_FAILURE(status)){ | |
276 | return FALSE; | |
277 | } | |
278 | ||
279 | return labelSeparatorSet.contains(ch); | |
280 | } | |
281 | ||
282 | #endif /* #if !UCONFIG_NO_IDNA */ | |
283 | #endif /* #if !UCONFIG_NO_TRANSLITERATION */ |