]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/nptrans.cpp
ICU-66108.tar.gz
[apple/icu.git] / icuSources / test / intltest / nptrans.cpp
CommitLineData
f3c0d7a5
A
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
b75a7d8f
A
3/*
4 *******************************************************************************
5 *
b331163b 6 * Copyright (C) 2003-2014, International Business Machines
b75a7d8f
A
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: nptrans.h
f3c0d7a5 11 * encoding: UTF-8
b75a7d8f
A
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2003feb1
16 * created by: Ram Viswanadha
17 */
18
19#include "unicode/utypes.h"
20
21#if !UCONFIG_NO_TRANSLITERATION
22#if !UCONFIG_NO_IDNA
23
24#include "nptrans.h"
25#include "unicode/resbund.h"
26#include "unicode/uniset.h"
27#include "sprpimpl.h"
28#include "cmemory.h"
29#include "ustr_imp.h"
30#include "intltest.h"
31
b331163b 32#ifdef NPTRANS_DEBUG
b75a7d8f
A
33#include <stdio.h>
34#endif
35
36const char NamePrepTransform::fgClassID=0;
37
38//Factory method
39NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
40 NamePrepTransform* transform = new NamePrepTransform(parseError, status);
41 if(U_FAILURE(status)){
42 delete transform;
43 return NULL;
44 }
45 return transform;
46}
47
48//constructor
49NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
340931cb 50 : mapping(nullptr), unassigned(), prohibited(), labelSeparatorSet(), bundle(nullptr) {
b75a7d8f 51
340931cb
A
52 LocalPointer<Transliterator> lmapping;
53 LocalUResourceBundlePointer lbundle;
b75a7d8f
A
54
55 const char* testDataName = IntlTest::loadTestData(status);
56
57 if(U_FAILURE(status)){
58 return;
59 }
60
340931cb 61 lbundle.adoptInstead(ures_openDirect(testDataName,"idna_rules",&status));
b75a7d8f 62
340931cb 63 if(lbundle.isValid() && U_SUCCESS(status)){
b75a7d8f
A
64 // create the mapping transliterator
65 int32_t ruleLen = 0;
340931cb 66 const UChar* ruleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNFKC",&ruleLen, &status);
374ca955 67 int32_t mapRuleLen = 0;
340931cb 68 const UChar *mapRuleUChar = ures_getStringByKey(lbundle.getAlias(), "MapNoNormalization", &mapRuleLen, &status);
374ca955
A
69 UnicodeString rule(mapRuleUChar, mapRuleLen);
70 rule.append(ruleUChar, ruleLen);
71
340931cb
A
72 lmapping.adoptInstead( Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
73 UTRANS_FORWARD, parseError,status));
b75a7d8f 74 if(U_FAILURE(status)) {
340931cb 75 return;
b75a7d8f
A
76 }
77
78 //create the unassigned set
79 int32_t patternLen =0;
340931cb 80 const UChar* pattern = ures_getStringByKey(lbundle.getAlias(),"UnassignedSet",&patternLen, &status);
b75a7d8f
A
81 unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
82
83 //create prohibited set
84 patternLen=0;
340931cb 85 pattern = ures_getStringByKey(lbundle.getAlias(),"ProhibitedSet",&patternLen, &status);
b75a7d8f
A
86 UnicodeString test(pattern,patternLen);
87 prohibited.applyPattern(test,status);
b331163b 88#ifdef NPTRANS_DEBUG
b75a7d8f
A
89 if(U_FAILURE(status)){
90 printf("Construction of Unicode set failed\n");
91 }
92
93 if(U_SUCCESS(status)){
94 if(prohibited.contains((UChar) 0x644)){
b331163b 95 printf("The string contains 0x644 ... !!\n");
b75a7d8f
A
96 }
97 UnicodeString temp;
98 prohibited.toPattern(temp,TRUE);
99
100 for(int32_t i=0;i<temp.length();i++){
101 printf("%c", (char)temp.charAt(i));
102 }
103 printf("\n");
104 }
105#endif
106
107 //create label separator set
108 patternLen=0;
340931cb 109 pattern = ures_getStringByKey(lbundle.getAlias(), "LabelSeparatorSet", &patternLen, &status);
b75a7d8f
A
110 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
111 }
112
340931cb 113 if(U_SUCCESS(status) && (lmapping.isNull())) {
b75a7d8f 114 status = U_MEMORY_ALLOCATION_ERROR;
b75a7d8f 115 }
340931cb
A
116 if (U_FAILURE(status)) {
117 return;
118 }
119 mapping = lmapping.orphan();
120 bundle = lbundle.orphan();
b75a7d8f
A
121}
122
123
124UBool NamePrepTransform::isProhibited(UChar32 ch){
125 return (UBool)(ch != ASCII_SPACE);
126}
127
128NamePrepTransform::~NamePrepTransform(){
129 delete mapping;
130 mapping = NULL;
131
132 //close the bundle
133 ures_close(bundle);
134 bundle = NULL;
135}
136
137
138int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
139 UChar* dest, int32_t destCapacity,
140 UBool allowUnassigned,
141 UParseError* /*parseError*/,
142 UErrorCode& status ){
143
144 if(U_FAILURE(status)){
145 return 0;
146 }
147 //check arguments
148 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
149 status=U_ILLEGAL_ARGUMENT_ERROR;
150 return 0;
151 }
152
153 UnicodeString rsource(src,srcLength);
154 // map the code points
155 // transliteration also performs NFKC
156 mapping->transliterate(rsource);
157
158 const UChar* buffer = rsource.getBuffer();
159 int32_t bufLen = rsource.length();
160 // check if unassigned
161 if(allowUnassigned == FALSE){
162 int32_t bufIndex=0;
163 UChar32 ch =0 ;
164 for(;bufIndex<bufLen;){
165 U16_NEXT(buffer, bufIndex, bufLen, ch);
166 if(unassigned.contains(ch)){
374ca955 167 status = U_IDNA_UNASSIGNED_ERROR;
b75a7d8f
A
168 return 0;
169 }
170 }
171 }
172 // check if there is enough room in the output
173 if(bufLen < destCapacity){
a62d09fc 174 u_memcpy(dest, buffer, bufLen);
b75a7d8f
A
175 }
176
177 return u_terminateUChars(dest, destCapacity, bufLen, &status);
178}
179
180
181#define MAX_BUFFER_SIZE 300
182
183int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
184 UChar* dest, int32_t destCapacity,
185 UBool allowUnassigned,
186 UParseError* parseError,
187 UErrorCode& status ){
188 // check error status
189 if(U_FAILURE(status)){
190 return 0;
191 }
192
193 //check arguments
194 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
195 status=U_ILLEGAL_ARGUMENT_ERROR;
196 return 0;
197 }
198
729e4ab9
A
199 UnicodeString b1String;
200 UChar *b1 = b1String.getBuffer(MAX_BUFFER_SIZE);
201 int32_t b1Len;
b75a7d8f
A
202
203 int32_t b1Index = 0;
204 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
205 UBool leftToRight=FALSE, rightToLeft=FALSE;
206
729e4ab9
A
207 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
208 b1String.releaseBuffer(b1Len);
b75a7d8f
A
209
210 if(status == U_BUFFER_OVERFLOW_ERROR){
211 // redo processing of string
212 /* we do not have enough room so grow the buffer*/
729e4ab9 213 b1 = b1String.getBuffer(b1Len);
b75a7d8f 214 status = U_ZERO_ERROR; // reset error
729e4ab9
A
215 b1Len = map(src, srcLength, b1, b1String.getCapacity(), allowUnassigned, parseError, status);
216 b1String.releaseBuffer(b1Len);
b75a7d8f
A
217 }
218
219 if(U_FAILURE(status)){
729e4ab9 220 b1Len = 0;
b75a7d8f
A
221 goto CLEANUP;
222 }
223
224
225 for(; b1Index<b1Len; ){
226
227 UChar32 ch = 0;
228
229 U16_NEXT(b1, b1Index, b1Len, ch);
230
231 if(prohibited.contains(ch) && ch!=0x0020){
374ca955 232 status = U_IDNA_PROHIBITED_ERROR;
729e4ab9 233 b1Len = 0;
b75a7d8f
A
234 goto CLEANUP;
235 }
236
237 direction = u_charDirection(ch);
238 if(firstCharDir==U_CHAR_DIRECTION_COUNT){
239 firstCharDir = direction;
240 }
241 if(direction == U_LEFT_TO_RIGHT){
242 leftToRight = TRUE;
243 }
244 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
245 rightToLeft = TRUE;
246 }
247 }
248
249 // satisfy 2
250 if( leftToRight == TRUE && rightToLeft == TRUE){
251 status = U_IDNA_CHECK_BIDI_ERROR;
729e4ab9 252 b1Len = 0;
b75a7d8f
A
253 goto CLEANUP;
254 }
255
256 //satisfy 3
257 if( rightToLeft == TRUE &&
258 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
259 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
260 ){
261 status = U_IDNA_CHECK_BIDI_ERROR;
262 return FALSE;
263 }
264
265 if(b1Len <= destCapacity){
a62d09fc 266 u_memmove(dest, b1, b1Len);
b75a7d8f
A
267 }
268
269CLEANUP:
b75a7d8f
A
270 return u_terminateUChars(dest, destCapacity, b1Len, &status);
271}
272
273UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
274 // check error status
275 if(U_FAILURE(status)){
276 return FALSE;
277 }
278
279 return labelSeparatorSet.contains(ch);
280}
281
282#endif /* #if !UCONFIG_NO_IDNA */
283#endif /* #if !UCONFIG_NO_TRANSLITERATION */