]> git.saurik.com Git - apple/icu.git/blame - icuSources/common/ushape.c
ICU-400.37.tar.gz
[apple/icu.git] / icuSources / common / ushape.c
CommitLineData
b75a7d8f 1/*
46f4442e
A
2 ******************************************************************************
3 *
4 * Copyright (C) 2000-2008, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 ******************************************************************************
8 * file name: ushape.c
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2000jun29
14 * created by: Markus W. Scherer
15 *
16 * Arabic letter shaping implemented by Ayman Roshdy
17 */
b75a7d8f
A
18
19#include "unicode/utypes.h"
20#include "unicode/uchar.h"
21#include "unicode/ustring.h"
b75a7d8f 22#include "unicode/ushape.h"
374ca955
A
23#include "cmemory.h"
24#include "putilimp.h"
b75a7d8f 25#include "ustr_imp.h"
73c04bcf 26#include "ubidi_props.h"
b75a7d8f
A
27
28#if UTF_SIZE<16
29 /*
30 * This implementation assumes that the internal encoding is UTF-16
31 * or UTF-32, not UTF-8.
32 * The main assumption is that the Arabic characters and their
33 * presentation forms each fit into a single UChar.
34 * With UTF-8, they occupy 2 or 3 bytes, and more than the ASCII
35 * characters.
36 */
37# error This implementation assumes UTF-16 or UTF-32 (check UTF_SIZE)
38#endif
39
40/*
41 * ### TODO in general for letter shaping:
42 * - the letter shaping code is UTF-16-unaware; needs update
43 * + especially invertBuffer()?!
44 * - needs to handle the "Arabic Tail" that is used in some legacy codepages
45 * as a glyph fragment of wide-glyph letters
46 * + IBM Unicode conversion tables map it to U+200B (ZWSP)
47 * + IBM Egypt has proposed to encode the tail in Unicode among Arabic Presentation Forms
48 */
49
50/* definitions for Arabic letter shaping ------------------------------------ */
51
52#define IRRELEVANT 4
53#define LAMTYPE 16
54#define ALEFTYPE 32
55#define LINKR 1
56#define LINKL 2
46f4442e
A
57#define APRESENT 8
58#define SHADDA 64
59#define CSHADDA 128
60#define COMBINE (SHADDA+CSHADDA)
61
b75a7d8f 62
46f4442e 63static const uint8_t IrrelevantPos[] = {
b75a7d8f
A
64 0x0, 0x2, 0x4, 0x6,
65 0x8, 0xA, 0xC, 0xE,
66};
67
68static const UChar convertLamAlef[] =
69{
70/*FEF5*/ 0x0622,
71/*FEF6*/ 0x0622,
72/*FEF7*/ 0x0623,
73/*FEF8*/ 0x0623,
74/*FEF9*/ 0x0625,
75/*FEFA*/ 0x0625,
76/*FEFB*/ 0x0627,
77/*FEFC*/ 0x0627
78};
79
80static const UChar araLink[178]=
81{
82 1 + 32 + 256 * 0x11,/*0x0622*/
83 1 + 32 + 256 * 0x13,/*0x0623*/
84 1 + 256 * 0x15,/*0x0624*/
85 1 + 32 + 256 * 0x17,/*0x0625*/
86 1 + 2 + 256 * 0x19,/*0x0626*/
87 1 + 32 + 256 * 0x1D,/*0x0627*/
88 1 + 2 + 256 * 0x1F,/*0x0628*/
89 1 + 256 * 0x23,/*0x0629*/
90 1 + 2 + 256 * 0x25,/*0x062A*/
91 1 + 2 + 256 * 0x29,/*0x062B*/
92 1 + 2 + 256 * 0x2D,/*0x062C*/
93 1 + 2 + 256 * 0x31,/*0x062D*/
94 1 + 2 + 256 * 0x35,/*0x062E*/
95 1 + 256 * 0x39,/*0x062F*/
96 1 + 256 * 0x3B,/*0x0630*/
97 1 + 256 * 0x3D,/*0x0631*/
98 1 + 256 * 0x3F,/*0x0632*/
99 1 + 2 + 256 * 0x41,/*0x0633*/
100 1 + 2 + 256 * 0x45,/*0x0634*/
101 1 + 2 + 256 * 0x49,/*0x0635*/
102 1 + 2 + 256 * 0x4D,/*0x0636*/
103 1 + 2 + 256 * 0x51,/*0x0637*/
104 1 + 2 + 256 * 0x55,/*0x0638*/
105 1 + 2 + 256 * 0x59,/*0x0639*/
106 1 + 2 + 256 * 0x5D,/*0x063A*/
107 0, 0, 0, 0, 0, /*0x063B-0x063F*/
108 1 + 2, /*0x0640*/
109 1 + 2 + 256 * 0x61,/*0x0641*/
110 1 + 2 + 256 * 0x65,/*0x0642*/
111 1 + 2 + 256 * 0x69,/*0x0643*/
112 1 + 2 + 16 + 256 * 0x6D,/*0x0644*/
113 1 + 2 + 256 * 0x71,/*0x0645*/
114 1 + 2 + 256 * 0x75,/*0x0646*/
115 1 + 2 + 256 * 0x79,/*0x0647*/
116 1 + 256 * 0x7D,/*0x0648*/
117 1 + 256 * 0x7F,/*0x0649*/
118 1 + 2 + 256 * 0x81,/*0x064A*/
46f4442e
A
119 4 + 256 * 1, /*0x064B*/
120 4 + 128 + 256 * 1, /*0x064C*/
121 4 + 128 + 256 * 1, /*0x064D*/
122 4 + 128 + 256 * 1, /*0x064E*/
123 4 + 128 + 256 * 1, /*0x064F*/
124 4 + 128 + 256 * 1, /*0x0650*/
125 4 + 64 + 256 * 3, /*0x0651*/
126 4 + 256 * 1, /*0x0652*/
127 4 + 256 * 7, /*0x0653*/
128 4 + 256 * 8, /*0x0654*/
129 4 + 256 * 8, /*0x0655*/
130 4 + 256 * 1, /*0x0656*/
131 0, 0, 0, 0, 0, /*0x0657-0x065B*/
b75a7d8f
A
132 1 + 256 * 0x85,/*0x065C*/
133 1 + 256 * 0x87,/*0x065D*/
134 1 + 256 * 0x89,/*0x065E*/
135 1 + 256 * 0x8B,/*0x065F*/
136 0, 0, 0, 0, 0, /*0x0660-0x0664*/
137 0, 0, 0, 0, 0, /*0x0665-0x0669*/
138 0, 0, 0, 0, 0, 0, /*0x066A-0x066F*/
46f4442e
A
139 4 + 256 * 6, /*0x0670*/
140 1 + 8 + 256 * 0x00,/*0x0671*/
141 1 + 32, /*0x0672*/
142 1 + 32, /*0x0673*/
b75a7d8f 143 0, /*0x0674*/
46f4442e 144 1 + 32, /*0x0675*/
b75a7d8f
A
145 1, 1, /*0x0676-0x0677*/
146 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x0678-0x067D*/
46f4442e
A
147 1+2+8+256 * 0x06, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x067E-0x0683*/
148 1+2, 1+2, 1+2+8+256 * 0x2A, 1+2, /*0x0684-0x0687*/
b75a7d8f 149 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x0688-0x0691*/
46f4442e 150 1, 1, 1, 1, 1, 1, 1+8+256 * 0x3A, 1, /*0x0692-0x0699*/
b75a7d8f
A
151 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
152 1+2, 1+2, 1+2, 1+2, /*0x069A-0x06A3*/
46f4442e 153 1+2, 1+2, 1+2, 1+2, 1+2, 1+2+8+256 * 0x3E, /*0x06A4-0x06AD*/
b75a7d8f 154 1+2, 1+2, 1+2, 1+2, /*0x06A4-0x06AD*/
46f4442e 155 1+2, 1+2+8+256 * 0x42, 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
b75a7d8f
A
156 1+2, 1+2, 1+2, 1+2, /*0x06AE-0x06B7*/
157 1+2, 1+2, 1+2, 1+2, 1+2, 1+2, /*0x06B8-0x06BF*/
158 1+2, 1+2, /*0x06B8-0x06BF*/
159 1, /*0x06C0*/
160 1+2, /*0x06C1*/
161 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*0x06C2-0x06CB*/
46f4442e 162 1+2+8+256 * 0xAC, /*0x06CC*/
b75a7d8f
A
163 1, /*0x06CD*/
164 1+2, 1+2, 1+2, 1+2, /*0x06CE-0x06D1*/
165 1, 1 /*0x06D2-0x06D3*/
166};
167
46f4442e
A
168static const uint8_t presALink[] = {
169/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
170/*FB5*/ 0, 1, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0,
171/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
172/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2, 0, 0,
173/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1,
174/*FB9*/ 2,1 + 2, 0, 1, 2,1 + 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
175/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
176/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
177/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,1 + 2,
181/*FC0*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
182/*FC1*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183/*FC2*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184/*FC3*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185/*FC4*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
186/*FC5*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4,
187/*FC6*/ 4, 4, 4
188};
189
190static const uint8_t presBLink[]=
b75a7d8f 191{
46f4442e
A
192/***********0*****1*****2*****3*****4*****5*****6*****7*****8*****9*****A*****B*****C*****D*****E*****F*/
193/*FE7*/1 + 2,1 + 2,1 + 2, 0,1 + 2, 0,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,1 + 2,
194/*FE8*/ 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2,1 + 2, 0, 1, 0,
195/*FE9*/ 1, 2,1 + 2, 0, 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
196/*FEA*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0,
197/*FEB*/ 1, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
198/*FEC*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
199/*FED*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,
200/*FEE*/1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 2,1 + 2, 0, 1, 0,
201/*FEF*/ 1, 0, 1, 2,1 + 2, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0
202};
203
204static const UChar convertFBto06[] =
205{
206/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
207/*FB5*/ 0x671, 0x671, 0, 0, 0, 0, 0x07E, 0x07E, 0x07E, 0x07E, 0, 0, 0, 0, 0, 0,
208/*FB6*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
209/*FB7*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x686, 0x686, 0x686, 0x686, 0, 0,
210/*FB8*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x698, 0x698, 0, 0, 0x6A9, 0x6A9,
211/*FB9*/ 0x6A9, 0x6A9, 0x6AF, 0x6AF, 0x6AF, 0x6AF, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212/*FBA*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213/*FBB*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214/*FBC*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
215/*FBD*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
216/*FBE*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217/*FBF*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x6CC, 0x6CC, 0x6CC, 0x6CC
b75a7d8f
A
218};
219
220static const UChar convertFEto06[] =
221{
222/***********0******1******2******3******4******5******6******7******8******9******A******B******C******D******E******F***/
223/*FE7*/ 0x64B, 0x64B, 0x64C, 0x64C, 0x64D, 0x64D, 0x64E, 0x64E, 0x64F, 0x64F, 0x650, 0x650, 0x651, 0x651, 0x652, 0x652,
224/*FE8*/ 0x621, 0x622, 0x622, 0x623, 0x623, 0x624, 0x624, 0x625, 0x625, 0x626, 0x626, 0x626, 0x626, 0x627, 0x627, 0x628,
225/*FE9*/ 0x628, 0x628, 0x628, 0x629, 0x629, 0x62A, 0x62A, 0x62A, 0x62A, 0x62B, 0x62B, 0x62B, 0x62B, 0x62C, 0x62C, 0x62C,
226/*FEA*/ 0x62C, 0x62D, 0x62D, 0x62D, 0x62D, 0x62E, 0x62E, 0x62E, 0x62E, 0x62F, 0x62F, 0x630, 0x630, 0x631, 0x631, 0x632,
227/*FEB*/ 0x632, 0x633, 0x633, 0x633, 0x633, 0x634, 0x634, 0x634, 0x634, 0x635, 0x635, 0x635, 0x635, 0x636, 0x636, 0x636,
228/*FEC*/ 0x636, 0x637, 0x637, 0x637, 0x637, 0x638, 0x638, 0x638, 0x638, 0x639, 0x639, 0x639, 0x639, 0x63A, 0x63A, 0x63A,
229/*FED*/ 0x63A, 0x641, 0x641, 0x641, 0x641, 0x642, 0x642, 0x642, 0x642, 0x643, 0x643, 0x643, 0x643, 0x644, 0x644, 0x644,
230/*FEE*/ 0x644, 0x645, 0x645, 0x645, 0x645, 0x646, 0x646, 0x646, 0x646, 0x647, 0x647, 0x647, 0x647, 0x648, 0x648, 0x649,
231/*FEF*/ 0x649, 0x64A, 0x64A, 0x64A, 0x64A, 0x65C, 0x65C, 0x65D, 0x65D, 0x65E, 0x65E, 0x65F, 0x65F
232};
233
46f4442e 234static const uint8_t shapeTable[4][4][4]=
b75a7d8f
A
235{
236 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,1} },
237 { {0,0,2,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} },
238 { {0,0,0,0}, {0,0,0,0}, {0,1,0,3}, {0,1,0,3} },
239 { {0,0,1,2}, {0,0,1,2}, {0,1,1,2}, {0,1,1,3} }
240};
241
242/*
243 * This function shapes European digits to Arabic-Indic digits
244 * in-place, writing over the input characters.
245 * Since we know that we are only looking for BMP code points,
246 * we can safely just work with code units (again, at least UTF-16).
247 */
248static void
249_shapeToArabicDigitsWithContext(UChar *s, int32_t length,
250 UChar digitBase,
251 UBool isLogical, UBool lastStrongWasAL) {
73c04bcf
A
252 const UBiDiProps *bdp;
253 UErrorCode errorCode;
254
b75a7d8f
A
255 int32_t i;
256 UChar c;
257
73c04bcf
A
258 errorCode=U_ZERO_ERROR;
259 bdp=ubidi_getSingleton(&errorCode);
260 if(U_FAILURE(errorCode)) {
261 return;
262 }
263
b75a7d8f
A
264 digitBase-=0x30;
265
266 /* the iteration direction depends on the type of input */
267 if(isLogical) {
268 for(i=0; i<length; ++i) {
269 c=s[i];
73c04bcf 270 switch(ubidi_getClass(bdp, c)) {
b75a7d8f
A
271 case U_LEFT_TO_RIGHT: /* L */
272 case U_RIGHT_TO_LEFT: /* R */
273 lastStrongWasAL=FALSE;
274 break;
275 case U_RIGHT_TO_LEFT_ARABIC: /* AL */
276 lastStrongWasAL=TRUE;
277 break;
278 case U_EUROPEAN_NUMBER: /* EN */
279 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
280 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
281 }
282 break;
283 default :
284 break;
285 }
286 }
287 } else {
288 for(i=length; i>0; /* pre-decrement in the body */) {
289 c=s[--i];
73c04bcf 290 switch(ubidi_getClass(bdp, c)) {
b75a7d8f
A
291 case U_LEFT_TO_RIGHT: /* L */
292 case U_RIGHT_TO_LEFT: /* R */
293 lastStrongWasAL=FALSE;
294 break;
295 case U_RIGHT_TO_LEFT_ARABIC: /* AL */
296 lastStrongWasAL=TRUE;
297 break;
298 case U_EUROPEAN_NUMBER: /* EN */
299 if(lastStrongWasAL && (uint32_t)(c-0x30)<10) {
300 s[i]=(UChar)(digitBase+c); /* digitBase+(c-0x30) - digitBase was modified above */
301 }
302 break;
303 default :
304 break;
305 }
306 }
307 }
308}
309
310/*
311 *Name : invertBuffer
312 *Function : This function inverts the buffer, it's used
313 * in case the user specifies the buffer to be
314 * U_SHAPE_TEXT_DIRECTION_LOGICAL
315 */
316static void
46f4442e 317invertBuffer(UChar *buffer,int32_t size,uint32_t options,int32_t lowlimit,int32_t highlimit) {
b75a7d8f
A
318 UChar temp;
319 int32_t i=0,j=0;
b75a7d8f
A
320 for(i=lowlimit,j=size-highlimit-1;i<j;i++,j--) {
321 temp = buffer[i];
322 buffer[i] = buffer[j];
323 buffer[j] = temp;
324 }
325}
326
327/*
328 *Name : changeLamAlef
329 *Function : Converts the Alef characters into an equivalent
330 * LamAlef location in the 0x06xx Range, this is an
331 * intermediate stage in the operation of the program
332 * later it'll be converted into the 0xFExx LamAlefs
333 * in the shaping function.
334 */
73c04bcf 335static U_INLINE UChar
b75a7d8f 336changeLamAlef(UChar ch) {
b75a7d8f
A
337 switch(ch) {
338 case 0x0622 :
73c04bcf 339 return 0x065C;
b75a7d8f 340 case 0x0623 :
73c04bcf 341 return 0x065D;
b75a7d8f 342 case 0x0625 :
73c04bcf 343 return 0x065E;
b75a7d8f 344 case 0x0627 :
73c04bcf 345 return 0x065F;
b75a7d8f 346 }
73c04bcf 347 return 0;
b75a7d8f
A
348}
349
b75a7d8f
A
350/*
351 *Name : getLink
352 *Function : Resolves the link between the characters as
353 * Arabic characters have four forms :
354 * Isolated, Initial, Middle and Final Form
355 */
356static UChar
357getLink(UChar ch) {
b75a7d8f
A
358 if(ch >= 0x0622 && ch <= 0x06D3) {
359 return(araLink[ch-0x0622]);
360 } else if(ch == 0x200D) {
361 return(3);
362 } else if(ch >= 0x206D && ch <= 0x206F) {
363 return(4);
46f4442e
A
364 } else if(ch >= 0xFB50 && ch <= 0xFC62) {
365 return(presALink[ch-0xFB50]);
b75a7d8f 366 } else if(ch >= 0xFE70 && ch <= 0xFEFC) {
46f4442e 367 return(presBLink[ch-0xFE70]);
b75a7d8f
A
368 } else {
369 return(0);
370 }
371}
372
373/*
374 *Name : countSpaces
375 *Function : Counts the number of spaces
376 * at each end of the logical buffer
377 */
378static void
379countSpaces(UChar *dest,int32_t size,uint32_t options,int32_t *spacesCountl,int32_t *spacesCountr) {
b75a7d8f
A
380 int32_t i = 0;
381 int32_t countl = 0,countr = 0;
b75a7d8f
A
382 while(dest[i] == 0x0020) {
383 countl++;
384 i++;
385 }
386 while(dest[size-1] == 0x0020) {
387 countr++;
388 size--;
389 }
390 *spacesCountl = countl;
391 *spacesCountr = countr;
392}
393
394/*
395 *Name : isTashkeelChar
396 *Function : Returns 1 for Tashkeel characters else return 0
397 */
73c04bcf 398static U_INLINE int32_t
b75a7d8f 399isTashkeelChar(UChar ch) {
73c04bcf 400 return (int32_t)( ch>=0x064B && ch<= 0x0652 );
b75a7d8f
A
401}
402
403/*
404 *Name : isAlefChar
405 *Function : Returns 1 for Alef characters else return 0
406 */
73c04bcf 407static U_INLINE int32_t
b75a7d8f 408isAlefChar(UChar ch) {
73c04bcf 409 return (int32_t)( (ch==0x0622)||(ch==0x0623)||(ch==0x0625)||(ch==0x0627) );
b75a7d8f
A
410}
411
412/*
413 *Name : isLamAlefChar
414 *Function : Returns 1 for LamAlef characters else return 0
415 */
73c04bcf 416static U_INLINE int32_t
b75a7d8f 417isLamAlefChar(UChar ch) {
73c04bcf 418 return (int32_t)( (ch>=0xFEF5)&&(ch<=0xFEFC) );
b75a7d8f
A
419}
420
421/*
422 *Name : calculateSize
423 *Function : This function calculates the destSize to be used in preflighting
424 * when the destSize is equal to 0
425 */
426static int32_t
427calculateSize(const UChar *source, int32_t sourceLength,
428 int32_t destSize,uint32_t options) {
b75a7d8f
A
429 int32_t i = 0;
430 destSize = sourceLength;
b75a7d8f 431 switch(options&U_SHAPE_LETTERS_MASK) {
b75a7d8f 432 case U_SHAPE_LETTERS_SHAPE :
46f4442e 433 case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED:
b75a7d8f
A
434 if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
435 for(i=0;i<sourceLength;i++) {
436 if( (isAlefChar(source[i]))&&(source[i+1]==0x0644) ) {
437 destSize--;
438 }
439 }
46f4442e 440 } else if((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL) {
b75a7d8f 441 for(i=0;i<sourceLength;i++) {
46f4442e
A
442 if( (isAlefChar(source[i+1]))&&(source[i]==0x0644) ) {
443 destSize--;
444 }
b75a7d8f
A
445 }
446 }
447 break;
448
449 case U_SHAPE_LETTERS_UNSHAPE :
450 for(i=0;i<sourceLength;i++) {
451 if( isLamAlefChar(source[i]) ) {
452 destSize++;
453 }
454 }
455 break;
456
457 default :
458 /* will never occur because of validity checks at the begin of u_shapeArabic */
459 break;
460 }
461
462 return destSize;
463}
464
465/*
466 *Name : removeLamAlefSpaces
467 *Function : The shapeUnicode function converts Lam + Alef into LamAlef + space,
468 * this function removes the spaces behind the LamAlefs according to
469 * the options the user specifies, the spaces are removed to the end
470 * of the buffer, or shrink the buffer ab=nd remove spaces for good
471 * or leave the buffer as it is LamAlef + space.
472 */
473static int32_t
474removeLamAlefSpaces(UChar *dest, int32_t sourceLength,
475 int32_t destSize,
476 uint32_t options,
477 UErrorCode *pErrorCode) {
478
479 int32_t i = 0, j = 0;
480 int32_t count = 0;
481 UChar *tempbuffer=NULL;
482
483 switch(options&U_SHAPE_LENGTH_MASK) {
484 case U_SHAPE_LENGTH_GROW_SHRINK :
485 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
486 /* Test for NULL */
487 if(tempbuffer == NULL) {
488 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
489 return 0;
490 }
491
492 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
493
494 i = j = 0;
495 while(i < sourceLength) {
496 if(dest[i] == 0xFFFF) {
497 j--;
498 count++;
499 }
500 else
501 tempbuffer[j] = dest[i];
502 i++;
503 j++;
504 }
505
506 while(count >= 0) {
507 tempbuffer[i] = 0x0000;
508 i--;
509 count--;
510 }
511
512 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR);
513 destSize = u_strlen(dest);
514 break;
515
516 case U_SHAPE_LENGTH_FIXED_SPACES_NEAR :
517 /* Lam+Alef is already shaped into LamAlef + FFFF */
518 i = 0;
519 while(i < sourceLength) {
520 if(dest[i] == 0xFFFF)
521 dest[i] = 0x0020;
522 i++;
523 }
524 destSize = sourceLength;
525 break;
526
527 case U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING :
528 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
529
530 /* Test for NULL */
531 if(tempbuffer == NULL) {
532 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
533 return 0;
534 }
535
536 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
537
538 i = j = sourceLength;
539 while(i >= 0) {
540 if(dest[i] == 0xFFFF) {
541 j++;
542 count++;
543 }
544 else
545 tempbuffer[j] = dest[i];
546 i--;
547 j--;
548 }
549 for(i=0;i<count;i++)
550 tempbuffer[i] = 0x0020;
551
552 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR);
553 destSize = sourceLength;
554 break;
555
556 case U_SHAPE_LENGTH_FIXED_SPACES_AT_END :
557 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
558
559 /* Test for NULL */
560 if(tempbuffer == NULL) {
561 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
562 return 0;
563 }
564
565 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
566
567 i = j = 0;
568 while(i < sourceLength) {
569 if(dest[i] == 0xFFFF) {
570 j--;
571 count++;
572 }
573 else
574 tempbuffer[j] = dest[i];
575 i++;
576 j++;
577 }
578
579 while(count >= 0) {
580 tempbuffer[i] = 0x0020;
581 i--;
582 count--;
583 }
584
585 uprv_memcpy(dest,tempbuffer, sourceLength*U_SIZEOF_UCHAR);
586 destSize = sourceLength;
587 break;
588
589 default :
590 /* will not occur */
591 break;
592 }
593
594 if(tempbuffer)
595 uprv_free(tempbuffer);
596
597 return destSize;
598}
599
600/*
601 *Name : expandLamAlef
602 *Function : LamAlef needs special handling as the LamAlef is
603 * one character while expanding it will give two
604 * characters Lam + Alef, so we need to expand the LamAlef
605 * in near or far spaces according to the options the user
606 * specifies or increase the buffer size.
607 * If there are no spaces to expand the LamAlef, an error
608 * will be set to U_NO_SPACE_AVAILABLE as defined in utypes.h
609 */
610static int32_t
611expandLamAlef(UChar *dest, int32_t sourceLength,
612 int32_t destSize,uint32_t options,
613 UErrorCode *pErrorCode) {
614
615 int32_t i = 0,j = 0;
616 int32_t countl = 0;
617 int32_t countr = 0;
618 int32_t inpsize = sourceLength;
619 UChar lamalefChar;
620 UChar *tempbuffer=NULL;
621
622 switch(options&U_SHAPE_LENGTH_MASK) {
623
624 case U_SHAPE_LENGTH_GROW_SHRINK :
625 destSize = calculateSize(dest,sourceLength,destSize,options);
626 tempbuffer = (UChar *)uprv_malloc((destSize+1)*U_SIZEOF_UCHAR);
627
628 /* Test for NULL */
629 if(tempbuffer == NULL) {
630 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
631 return 0;
632 }
633
634 uprv_memset(tempbuffer, 0, (destSize+1)*U_SIZEOF_UCHAR);
635
636 i = j = 0;
637 while(i < destSize && j < destSize) {
638 if( isLamAlefChar(dest[i]) ) {
639 tempbuffer[j] = convertLamAlef[ dest[i] - 0xFEF5 ];
640 tempbuffer[j+1] = 0x0644;
641 j++;
642 }
643 else
644 tempbuffer[j] = dest[i];
645 i++;
646 j++;
647 }
648
649 uprv_memcpy(dest, tempbuffer, destSize*U_SIZEOF_UCHAR);
650 break;
651
652 case U_SHAPE_LENGTH_FIXED_SPACES_NEAR :
653 for(i = 0;i<sourceLength;i++) {
654 if((dest[i] == 0x0020) && isLamAlefChar(dest[i+1])) {
655 lamalefChar = dest[i+1];
656 dest[i+1] = 0x0644;
657 dest[i] = convertLamAlef[ lamalefChar - 0xFEF5 ];
658 }
659 else
660 if((dest[i] != 0x0020) && isLamAlefChar(dest[i+1])) {
661 *pErrorCode=U_NO_SPACE_AVAILABLE;
662 }
663 }
664 destSize = sourceLength;
665 break;
666
667 case U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING :
668 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
669
670 /* Test for NULL */
671 if(tempbuffer == NULL) {
672 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
673 return 0;
674 }
675
676 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
677
678 i = 0;
679 while(dest[i] == 0x0020) {
680 countl++;
681 i++;
682 }
683
684 i = j = sourceLength-1;
685 while(i >= 0 && j >= 0) {
686 if( countl>0 && isLamAlefChar(dest[i]) ) {
687 tempbuffer[j] = 0x0644;
688 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
689 j--;
690 countl--;
691 }
692 else {
693 if( countl == 0 && isLamAlefChar(dest[i]) )
694 *pErrorCode=U_NO_SPACE_AVAILABLE;
695 tempbuffer[j] = dest[i];
696 }
697 i--;
698 j--;
699 }
700
701 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR);
702 destSize = sourceLength;
703 break;
704
705 case U_SHAPE_LENGTH_FIXED_SPACES_AT_END :
706 /* LamAlef expansion below is done from right to left to make sure that we consume
707 * the spaces with the LamAlefs as they appear in the visual buffer from right to left
708 */
709 tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
710
711 /* Test for NULL */
712 if(tempbuffer == NULL) {
713 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
714 return 0;
715 }
716
717 uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
718
719 while(dest[inpsize-1] == 0x0020) {
720 countr++;
721 inpsize--;
722 }
723
724 i = sourceLength - countr - 1;
725 j = sourceLength - 1;
726
727 while(i >= 0 && j >= 0) {
728 if( countr>0 && isLamAlefChar(dest[i]) ) {
729 tempbuffer[j] = 0x0644;
730 tempbuffer[j-1] = convertLamAlef[ dest[i] - 0xFEF5 ];
731 j--;
732 countr--;
733 }
734 else {
735 if( countr == 0 && isLamAlefChar(dest[i]) )
736 *pErrorCode=U_NO_SPACE_AVAILABLE;
737 tempbuffer[j] = dest[i];
738 }
739 i--;
740 j--;
741 }
742
743 if(countr > 0) {
46f4442e 744 uprv_memmove(tempbuffer, tempbuffer+countr, sourceLength*U_SIZEOF_UCHAR);
b75a7d8f 745 if(u_strlen(tempbuffer) < sourceLength) {
46f4442e 746 for(i=sourceLength-1;i>=sourceLength-countr;i--) {
b75a7d8f 747 tempbuffer[i] = 0x0020;
46f4442e 748 }
b75a7d8f
A
749 }
750 }
751
752 uprv_memcpy(dest, tempbuffer, sourceLength*U_SIZEOF_UCHAR);
753
754 destSize = sourceLength;
755 break;
756
757 default :
758 /* will never occur because of validity checks */
759 break;
760 }
761
762 if(tempbuffer)
763 uprv_free(tempbuffer);
764
765 return destSize;
766}
767
768/*
769 *Name : shapeUnicode
770 *Function : Converts an Arabic Unicode buffer in 06xx Range into a shaped
771 * arabic Unicode buffer in FExx Range
772 */
773static int32_t
774shapeUnicode(UChar *dest, int32_t sourceLength,
775 int32_t destSize,uint32_t options,
776 UErrorCode *pErrorCode,
777 int tashkeelFlag) {
778
779 int32_t i, iend;
780 int32_t step;
73c04bcf 781 int32_t lastPos,Nx, Nw;
b75a7d8f 782 unsigned int Shape;
b75a7d8f
A
783 int32_t lamalef_found = 0;
784 UChar prevLink = 0, lastLink = 0, currLink, nextLink = 0;
785 UChar wLamalef;
786
787 /*
788 * Converts the input buffer from FExx Range into 06xx Range
789 * to make sure that all characters are in the 06xx range
790 * even the lamalef is converted to the special region in
791 * the 06xx range
792 */
46f4442e
A
793 if ((options & U_SHAPE_PRESERVE_PRESENTATION_MASK) == U_SHAPE_PRESERVE_PRESENTATION_NOOP) {
794 for (i = 0; i < sourceLength; i++) {
795 UChar inputChar = dest[i];
796 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) {
797 UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
798 if (c != 0)
799 dest[i] = c;
800 } else if ( (inputChar >= 0xFE70) && (inputChar <= 0xFEFC)) {
801 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
802 } else {
803 dest[i] = inputChar ;
804 }
b75a7d8f
A
805 }
806 }
807
808 /* sets the index to the end of the buffer, together with the step point to -1 */
809 i = sourceLength - 1;
810 iend = -1;
811 step = -1;
812
813 /*
814 * This function resolves the link between the characters .
815 * Arabic characters have four forms :
816 * Isolated Form, Initial Form, Middle Form and Final Form
817 */
818 currLink = getLink(dest[i]);
819
b75a7d8f
A
820 lastPos = i;
821 Nx = -2, Nw = 0;
822
823 while (i != iend) {
824 /* If high byte of currLink > 0 then more than one shape */
46f4442e 825 if ((currLink & 0xFF00) > 0 || (getLink(dest[i]) & IRRELEVANT) != 0) {
b75a7d8f
A
826 Nw = i + step;
827 while (Nx < 0) { /* we need to know about next char */
828 if(Nw == iend) {
829 nextLink = 0;
830 Nx = 3000;
831 } else {
832 nextLink = getLink(dest[Nw]);
833 if((nextLink & IRRELEVANT) == 0) {
834 Nx = Nw;
835 } else {
836 Nw = Nw + step;
837 }
838 }
839 }
840
841 if ( ((currLink & ALEFTYPE) > 0) && ((lastLink & LAMTYPE) > 0) ) {
842 lamalef_found = 1;
843 wLamalef = changeLamAlef(dest[i]); /*get from 0x065C-0x065f */
844 if ( wLamalef != 0) {
845 dest[i] = 0xFFFF; /* The default case is to drop the Alef and replace */
846 dest[lastPos] =wLamalef; /* it by 0xFFFF which is the last character in the */
847 i=lastPos; /* unicode private use area, this is done to make */
848 } /* sure that removeLamAlefSpaces() handles only the */
849 lastLink = prevLink; /* spaces generated during lamalef generation. */
850 currLink = getLink(wLamalef); /* 0xFFFF is added here and is replaced by spaces */
851 } /* in removeLamAlefSpaces() */
46f4442e 852 /*
b75a7d8f
A
853 * get the proper shape according to link ability of neighbors
854 * and of character; depends on the order of the shapes
855 * (isolated, initial, middle, final) in the compatibility area
856 */
46f4442e
A
857 Shape = shapeTable[nextLink & (LINKR + LINKL)]
858 [lastLink & (LINKR + LINKL)]
859 [currLink & (LINKR + LINKL)];
860
861 if ((currLink & (LINKR+LINKL)) == 1) {
862 Shape &= 1;
863 } else if(isTashkeelChar(dest[i])) {
864 if( (lastLink & LINKL) && (nextLink & LINKR) && (tashkeelFlag == 1) &&
865 dest[i] != 0x064C && dest[i] != 0x064D )
866 {
867 Shape = 1;
868 if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) {
869 Shape = 0;
870 }
871 }
872 else {
873 Shape = 0;
874 }
875 }
876 if ((dest[i] ^ 0x0600) < 0x100) {
877 if(isTashkeelChar(dest[i]))
878 dest[i] = 0xFE70 + IrrelevantPos[(dest[i] - 0x064B)] + Shape;
879 else if ((currLink & APRESENT) > 0)
880 dest[i] = (UChar)(0xFB50 + (currLink >> 8) + Shape);
881 else if ((currLink >> 8) > 0 && (currLink & IRRELEVANT) == 0)
882 dest[i] = (UChar)(0xFE70 + (currLink >> 8) + Shape);
883 }
b75a7d8f
A
884 }
885
886 /* move one notch forward */
887 if ((currLink & IRRELEVANT) == 0) {
46f4442e
A
888 prevLink = lastLink;
889 lastLink = currLink;
890 lastPos = i;
b75a7d8f
A
891 }
892
893 i = i + step;
894 if (i == Nx) {
895 currLink = nextLink;
896 Nx = -2;
46f4442e 897 } else if(i != iend) {
b75a7d8f
A
898 currLink = getLink(dest[i]);
899 }
900 }
901
b75a7d8f
A
902 if(lamalef_found != 0)
903 destSize = removeLamAlefSpaces(dest,sourceLength,destSize,options,pErrorCode);
904 else
905 destSize = sourceLength;
906
907 return destSize;
908}
909
910/*
911 *Name : deShapeUnicode
912 *Function : Converts an Arabic Unicode buffer in FExx Range into unshaped
913 * arabic Unicode buffer in 06xx Range
914 */
915static int32_t
916deShapeUnicode(UChar *dest, int32_t sourceLength,
917 int32_t destSize,uint32_t options,
918 UErrorCode *pErrorCode) {
919 int32_t i = 0;
920 int32_t lamalef_found = 0;
921
922 /*
923 *This for loop changes the buffer from the Unicode FE range to
924 *the Unicode 06 range
925 */
926 for(i = 0; i < sourceLength; i++) {
927 UChar inputChar = dest[i];
46f4442e
A
928 if ( (inputChar >= 0xFB50) && (inputChar <= 0xFBFF)) { /* FBxx Arabic range */
929 UChar c = convertFBto06 [ (inputChar - 0xFB50) ];
930 if (c != 0)
931 dest[i] = c;
932 } else if (( inputChar >= 0xFE70) && (inputChar <= 0xFEF4 )) { /* FExx Arabic range */
b75a7d8f
A
933 dest[i] = convertFEto06 [ (inputChar - 0xFE70) ] ;
934 } else {
935 dest[i] = inputChar ;
936 }
937 if( isLamAlefChar(dest[i]) )
938 lamalef_found = 1;
939 }
940
941 /* If there is lamalef in the buffer call expandLamAlef */
942 if(lamalef_found != 0)
943 destSize = expandLamAlef(dest,sourceLength,destSize,options,pErrorCode);
944 else
945 destSize = sourceLength;
946
947 return destSize;
948}
949
950U_CAPI int32_t U_EXPORT2
951u_shapeArabic(const UChar *source, int32_t sourceLength,
952 UChar *dest, int32_t destCapacity,
953 uint32_t options,
954 UErrorCode *pErrorCode) {
955
956 int32_t destLength;
957
958 /* usual error checking */
959 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
960 return 0;
961 }
962
963 /* make sure that no reserved options values are used; allow dest==NULL only for preflighting */
46f4442e
A
964 if( source==NULL || sourceLength<-1 || (dest==NULL && destCapacity!=0) || destCapacity<0 ||
965 (options&U_SHAPE_DIGIT_TYPE_RESERVED)==U_SHAPE_DIGIT_TYPE_RESERVED ||
966 (options&U_SHAPE_DIGITS_MASK)==U_SHAPE_DIGITS_RESERVED ||
967 ((options&U_SHAPE_LENGTH_MASK) != U_SHAPE_LENGTH_GROW_SHRINK &&
968 (options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) != 0) ||
969 ((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK) == U_SHAPE_AGGREGATE_TASHKEEL &&
970 (options&U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED) != U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)
b75a7d8f
A
971 ) {
972 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
973 return 0;
974 }
46f4442e 975
b75a7d8f
A
976 /* determine the source length */
977 if(sourceLength==-1) {
978 sourceLength=u_strlen(source);
979 }
73c04bcf 980 if(sourceLength<=0) {
b75a7d8f
A
981 return u_terminateUChars(dest, destCapacity, 0, pErrorCode);
982 }
983
984 /* check that source and destination do not overlap */
985 if( dest!=NULL &&
986 ((source<=dest && dest<source+sourceLength) ||
46f4442e 987 (dest<=source && source<dest+destCapacity))) {
b75a7d8f
A
988 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
989 return 0;
990 }
991
992 if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
993 UChar buffer[300];
46f4442e 994 UChar *tempbuffer, *tempsource = NULL;
b75a7d8f
A
995 int32_t outputSize, spacesCountl=0, spacesCountr=0;
996
46f4442e
A
997 if((options&U_SHAPE_AGGREGATE_TASHKEEL_MASK)>0) {
998 int32_t logical_order = (options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL;
999 int32_t aggregate_tashkeel =
1000 (options&(U_SHAPE_AGGREGATE_TASHKEEL_MASK+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED)) ==
1001 (U_SHAPE_AGGREGATE_TASHKEEL+U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED);
1002 int step=logical_order?1:-1;
1003 int j=logical_order?-1:2*sourceLength;
1004 int i=logical_order?-1:sourceLength;
1005 int end=logical_order?sourceLength:-1;
1006 int aggregation_possible = 1;
1007 UChar prev = 0;
1008 UChar prevLink, currLink = 0;
1009 int newSourceLength = 0;
1010 tempsource = (UChar *)uprv_malloc(2*sourceLength*U_SIZEOF_UCHAR);
1011 if(tempsource == NULL) {
1012 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
1013 return 0;
1014 }
1015
1016 while ((i+=step) != end) {
1017 prevLink = currLink;
1018 currLink = getLink(source[i]);
1019 if (aggregate_tashkeel && ((prevLink|currLink)&COMBINE) == COMBINE && aggregation_possible) {
1020 aggregation_possible = 0;
1021 tempsource[j] = (prev<source[i]?prev:source[i])-0x064C+0xFC5E;
1022 currLink = getLink(tempsource[j]);
1023 } else {
1024 aggregation_possible = 1;
1025 tempsource[j+=step] = source[i];
1026 prev = source[i];
1027 newSourceLength++;
1028 }
1029 }
1030 source = tempsource+(logical_order?0:j);
1031 sourceLength = newSourceLength;
1032 }
1033
b75a7d8f
A
1034 /* calculate destination size */
1035 /* TODO: do we ever need to do this pure preflighting? */
1036 if((options&U_SHAPE_LENGTH_MASK)==U_SHAPE_LENGTH_GROW_SHRINK) {
1037 outputSize=calculateSize(source,sourceLength,destCapacity,options);
1038 } else {
1039 outputSize=sourceLength;
1040 }
1041 if(outputSize>destCapacity) {
1042 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
46f4442e 1043 if (tempsource != NULL) uprv_free(tempsource);
b75a7d8f
A
1044 return outputSize;
1045 }
1046
1047 /*
1048 * need a temporary buffer of size max(outputSize, sourceLength)
1049 * because at first we copy source->temp
1050 */
1051 if(sourceLength>outputSize) {
1052 outputSize=sourceLength;
1053 }
1054
1055 /* Start of Arabic letter shaping part */
1056 if(outputSize<=sizeof(buffer)/U_SIZEOF_UCHAR) {
1057 outputSize=sizeof(buffer)/U_SIZEOF_UCHAR;
1058 tempbuffer=buffer;
1059 } else {
1060 tempbuffer = (UChar *)uprv_malloc(outputSize*U_SIZEOF_UCHAR);
1061
1062 /*Test for NULL*/
1063 if(tempbuffer == NULL) {
1064 *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
46f4442e 1065 if (tempsource != NULL) uprv_free(tempsource);
b75a7d8f
A
1066 return 0;
1067 }
1068 }
1069 uprv_memcpy(tempbuffer, source, sourceLength*U_SIZEOF_UCHAR);
46f4442e 1070 if (tempsource != NULL) uprv_free(tempsource);
b75a7d8f
A
1071 if(sourceLength<outputSize) {
1072 uprv_memset(tempbuffer+sourceLength, 0, (outputSize-sourceLength)*U_SIZEOF_UCHAR);
1073 }
1074
1075 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
1076 countSpaces(tempbuffer,sourceLength,options,&spacesCountl,&spacesCountr);
46f4442e 1077 invertBuffer(tempbuffer,sourceLength,options,spacesCountl,spacesCountr);
b75a7d8f
A
1078 }
1079
1080 switch(options&U_SHAPE_LETTERS_MASK) {
1081 case U_SHAPE_LETTERS_SHAPE :
1082 /* Call the shaping function with tashkeel flag == 1 */
1083 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1);
1084 break;
1085 case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
1086 /* Call the shaping function with tashkeel flag == 0 */
1087 destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0);
1088 break;
1089 case U_SHAPE_LETTERS_UNSHAPE :
1090 /* Call the deshaping function */
1091 destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode);
1092 break;
1093 default :
1094 /* will never occur because of validity checks above */
1095 destLength = 0;
1096 break;
1097 }
1098
1099 /*
1100 * TODO: (markus 2002aug01)
1101 * For as long as we always preflight the outputSize above
1102 * we should U_ASSERT(outputSize==destLength)
1103 * except for the adjustment above before the tempbuffer allocation
1104 */
1105
1106 if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_LOGICAL) {
1107 countSpaces(tempbuffer,destLength,options,&spacesCountl,&spacesCountr);
46f4442e 1108 invertBuffer(tempbuffer,destLength,options,spacesCountl,spacesCountr);
b75a7d8f
A
1109 }
1110 uprv_memcpy(dest, tempbuffer, uprv_min(destLength, destCapacity)*U_SIZEOF_UCHAR);
1111
1112 if(tempbuffer!=buffer) {
1113 uprv_free(tempbuffer);
1114 }
1115
1116 if(destLength>destCapacity) {
1117 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1118 return destLength;
1119 }
1120
1121 /* End of Arabic letter shaping part */
1122 } else {
1123 /*
1124 * No letter shaping:
1125 * just make sure the destination is large enough and copy the string.
1126 */
1127 if(destCapacity<sourceLength) {
1128 /* this catches preflighting, too */
1129 *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
1130 return sourceLength;
1131 }
1132 uprv_memcpy(dest, source, sourceLength*U_SIZEOF_UCHAR);
1133 destLength=sourceLength;
1134 }
1135
1136 /*
1137 * Perform number shaping.
1138 * With UTF-16 or UTF-32, the length of the string is constant.
1139 * The easiest way to do this is to operate on the destination and
1140 * "shape" the digits in-place.
1141 */
1142 if((options&U_SHAPE_DIGITS_MASK)!=U_SHAPE_DIGITS_NOOP) {
1143 UChar digitBase;
1144 int32_t i;
1145
1146 /* select the requested digit group */
1147 switch(options&U_SHAPE_DIGIT_TYPE_MASK) {
1148 case U_SHAPE_DIGIT_TYPE_AN:
1149 digitBase=0x660; /* Unicode: "Arabic-Indic digits" */
1150 break;
1151 case U_SHAPE_DIGIT_TYPE_AN_EXTENDED:
1152 digitBase=0x6f0; /* Unicode: "Eastern Arabic-Indic digits (Persian and Urdu)" */
1153 break;
1154 default:
1155 /* will never occur because of validity checks above */
1156 digitBase=0;
1157 break;
1158 }
1159
1160 /* perform the requested operation */
1161 switch(options&U_SHAPE_DIGITS_MASK) {
1162 case U_SHAPE_DIGITS_EN2AN:
1163 /* add (digitBase-'0') to each European (ASCII) digit code point */
1164 digitBase-=0x30;
1165 for(i=0; i<destLength; ++i) {
1166 if(((uint32_t)dest[i]-0x30)<10) {
1167 dest[i]+=digitBase;
1168 }
1169 }
1170 break;
1171 case U_SHAPE_DIGITS_AN2EN:
1172 /* subtract (digitBase-'0') from each Arabic digit code point */
1173 for(i=0; i<destLength; ++i) {
1174 if(((uint32_t)dest[i]-(uint32_t)digitBase)<10) {
1175 dest[i]-=digitBase-0x30;
1176 }
1177 }
1178 break;
1179 case U_SHAPE_DIGITS_ALEN2AN_INIT_LR:
1180 _shapeToArabicDigitsWithContext(dest, destLength,
1181 digitBase,
1182 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
1183 FALSE);
1184 break;
1185 case U_SHAPE_DIGITS_ALEN2AN_INIT_AL:
1186 _shapeToArabicDigitsWithContext(dest, destLength,
1187 digitBase,
1188 (UBool)((options&U_SHAPE_TEXT_DIRECTION_MASK)==U_SHAPE_TEXT_DIRECTION_LOGICAL),
1189 TRUE);
1190 break;
1191 default:
1192 /* will never occur because of validity checks above */
1193 break;
1194 }
1195 }
1196
1197 return u_terminateUChars(dest, destCapacity, destLength, pErrorCode);
1198}