]> git.saurik.com Git - apple/icu.git/blame - icuSources/test/intltest/nptrans.cpp
ICU-3.13.tar.gz
[apple/icu.git] / icuSources / test / intltest / nptrans.cpp
CommitLineData
b75a7d8f
A
1/*
2 *******************************************************************************
3 *
4 * Copyright (C) 2003, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: nptrans.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2003feb1
14 * created by: Ram Viswanadha
15 */
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_TRANSLITERATION
20#if !UCONFIG_NO_IDNA
21
22#include "nptrans.h"
23#include "unicode/resbund.h"
24#include "unicode/uniset.h"
25#include "sprpimpl.h"
26#include "cmemory.h"
27#include "ustr_imp.h"
28#include "intltest.h"
29
30#ifdef DEBUG
31#include <stdio.h>
32#endif
33
34const char NamePrepTransform::fgClassID=0;
35
36//Factory method
37NamePrepTransform* NamePrepTransform::createInstance(UParseError& parseError, UErrorCode& status){
38 NamePrepTransform* transform = new NamePrepTransform(parseError, status);
39 if(U_FAILURE(status)){
40 delete transform;
41 return NULL;
42 }
43 return transform;
44}
45
46//constructor
47NamePrepTransform::NamePrepTransform(UParseError& parseError, UErrorCode& status)
48: unassigned(), prohibited(), labelSeparatorSet(){
49
50 mapping = NULL;
51 bundle = NULL;
52
53
54 const char* testDataName = IntlTest::loadTestData(status);
55
56 if(U_FAILURE(status)){
57 return;
58 }
59
60 bundle = ures_openDirect(testDataName,"idna_rules",&status);
61
62 if(bundle != NULL && U_SUCCESS(status)){
63 // create the mapping transliterator
64 int32_t ruleLen = 0;
65 const UChar* ruleUChar = ures_getStringByKey(bundle, "MapNFKC",&ruleLen, &status);
66 UnicodeString rule(ruleUChar, ruleLen);
67
68 mapping = Transliterator::createFromRules(UnicodeString("NamePrepTransform", ""), rule,
69 UTRANS_FORWARD, parseError,status);
70 if(U_FAILURE(status)) {
71 return;
72 }
73
74 //create the unassigned set
75 int32_t patternLen =0;
76 const UChar* pattern = ures_getStringByKey(bundle,"UnassignedSet",&patternLen, &status);
77 unassigned.applyPattern(UnicodeString(pattern, patternLen), status);
78
79 //create prohibited set
80 patternLen=0;
81 pattern = ures_getStringByKey(bundle,"ProhibitedSet",&patternLen, &status);
82 UnicodeString test(pattern,patternLen);
83 prohibited.applyPattern(test,status);
84#ifdef DEBUG
85 if(U_FAILURE(status)){
86 printf("Construction of Unicode set failed\n");
87 }
88
89 if(U_SUCCESS(status)){
90 if(prohibited.contains((UChar) 0x644)){
91 printf("The string contains 0x644 ... damn !!\n");
92 }
93 UnicodeString temp;
94 prohibited.toPattern(temp,TRUE);
95
96 for(int32_t i=0;i<temp.length();i++){
97 printf("%c", (char)temp.charAt(i));
98 }
99 printf("\n");
100 }
101#endif
102
103 //create label separator set
104 patternLen=0;
105 pattern = ures_getStringByKey(bundle,"LabelSeparatorSet",&patternLen, &status);
106 labelSeparatorSet.applyPattern(UnicodeString(pattern,patternLen),status);
107 }
108
109 if(U_SUCCESS(status) &&
110 (mapping == NULL)
111 ){
112 status = U_MEMORY_ALLOCATION_ERROR;
113 delete mapping;
114 ures_close(bundle);
115 mapping = NULL;
116 bundle = NULL;
117 }
118
119}
120
121
122UBool NamePrepTransform::isProhibited(UChar32 ch){
123 return (UBool)(ch != ASCII_SPACE);
124}
125
126NamePrepTransform::~NamePrepTransform(){
127 delete mapping;
128 mapping = NULL;
129
130 //close the bundle
131 ures_close(bundle);
132 bundle = NULL;
133}
134
135
136int32_t NamePrepTransform::map(const UChar* src, int32_t srcLength,
137 UChar* dest, int32_t destCapacity,
138 UBool allowUnassigned,
139 UParseError* /*parseError*/,
140 UErrorCode& status ){
141
142 if(U_FAILURE(status)){
143 return 0;
144 }
145 //check arguments
146 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
147 status=U_ILLEGAL_ARGUMENT_ERROR;
148 return 0;
149 }
150
151 UnicodeString rsource(src,srcLength);
152 // map the code points
153 // transliteration also performs NFKC
154 mapping->transliterate(rsource);
155
156 const UChar* buffer = rsource.getBuffer();
157 int32_t bufLen = rsource.length();
158 // check if unassigned
159 if(allowUnassigned == FALSE){
160 int32_t bufIndex=0;
161 UChar32 ch =0 ;
162 for(;bufIndex<bufLen;){
163 U16_NEXT(buffer, bufIndex, bufLen, ch);
164 if(unassigned.contains(ch)){
165 status = U_IDNA_UNASSIGNED_CODEPOINT_FOUND_ERROR;
166 rsource.releaseBuffer();
167 return 0;
168 }
169 }
170 }
171 // check if there is enough room in the output
172 if(bufLen < destCapacity){
173 uprv_memcpy(dest,buffer,bufLen*U_SIZEOF_UCHAR);
174 }
175
176 return u_terminateUChars(dest, destCapacity, bufLen, &status);
177}
178
179
180#define MAX_BUFFER_SIZE 300
181
182int32_t NamePrepTransform::process( const UChar* src, int32_t srcLength,
183 UChar* dest, int32_t destCapacity,
184 UBool allowUnassigned,
185 UParseError* parseError,
186 UErrorCode& status ){
187 // check error status
188 if(U_FAILURE(status)){
189 return 0;
190 }
191
192 //check arguments
193 if(src==NULL || srcLength<-1 || (dest==NULL && destCapacity!=0)) {
194 status=U_ILLEGAL_ARGUMENT_ERROR;
195 return 0;
196 }
197
198 UChar b1Stack[MAX_BUFFER_SIZE];
199 UChar *b1 = b1Stack;
200 int32_t b1Len,b1Capacity = MAX_BUFFER_SIZE;
201
202 int32_t b1Index = 0;
203 UCharDirection direction=U_CHAR_DIRECTION_COUNT, firstCharDir=U_CHAR_DIRECTION_COUNT;
204 UBool leftToRight=FALSE, rightToLeft=FALSE;
205
206 b1Len = map(src,srcLength, b1, b1Capacity,allowUnassigned,parseError, status);
207
208 if(status == U_BUFFER_OVERFLOW_ERROR){
209 // redo processing of string
210 /* we do not have enough room so grow the buffer*/
211 if(!u_growBufferFromStatic(b1Stack,&b1,&b1Capacity,b1Len,0)){
212 status = U_MEMORY_ALLOCATION_ERROR;
213 goto CLEANUP;
214 }
215
216 status = U_ZERO_ERROR; // reset error
217
218 b1Len = map(src,srcLength, b1, b1Len,allowUnassigned, parseError, status);
219
220 }
221
222 if(U_FAILURE(status)){
223 goto CLEANUP;
224 }
225
226
227 for(; b1Index<b1Len; ){
228
229 UChar32 ch = 0;
230
231 U16_NEXT(b1, b1Index, b1Len, ch);
232
233 if(prohibited.contains(ch) && ch!=0x0020){
234 status = U_IDNA_PROHIBITED_CODEPOINT_FOUND_ERROR;
235 goto CLEANUP;
236 }
237
238 direction = u_charDirection(ch);
239 if(firstCharDir==U_CHAR_DIRECTION_COUNT){
240 firstCharDir = direction;
241 }
242 if(direction == U_LEFT_TO_RIGHT){
243 leftToRight = TRUE;
244 }
245 if(direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC){
246 rightToLeft = TRUE;
247 }
248 }
249
250 // satisfy 2
251 if( leftToRight == TRUE && rightToLeft == TRUE){
252 status = U_IDNA_CHECK_BIDI_ERROR;
253 goto CLEANUP;
254 }
255
256 //satisfy 3
257 if( rightToLeft == TRUE &&
258 !((firstCharDir == U_RIGHT_TO_LEFT || firstCharDir == U_RIGHT_TO_LEFT_ARABIC) &&
259 (direction == U_RIGHT_TO_LEFT || direction == U_RIGHT_TO_LEFT_ARABIC))
260 ){
261 status = U_IDNA_CHECK_BIDI_ERROR;
262 return FALSE;
263 }
264
265 if(b1Len <= destCapacity){
266 uprv_memmove(dest,b1, b1Len*U_SIZEOF_UCHAR);
267 }
268
269CLEANUP:
270 if(b1!=b1Stack){
271 uprv_free(b1);
272 }
273
274 return u_terminateUChars(dest, destCapacity, b1Len, &status);
275}
276
277UBool NamePrepTransform::isLabelSeparator(UChar32 ch, UErrorCode& status){
278 // check error status
279 if(U_FAILURE(status)){
280 return FALSE;
281 }
282
283 return labelSeparatorSet.contains(ch);
284}
285
286#endif /* #if !UCONFIG_NO_IDNA */
287#endif /* #if !UCONFIG_NO_TRANSLITERATION */