]> git.saurik.com Git - apple/icu.git/blobdiff - icuSources/common/ushape.cpp
ICU-511.25.tar.gz
[apple/icu.git] / icuSources / common / ushape.cpp
index c09c13228f4a9df6d0122af202d32dfe7e5361af..05bd5a4fa313c2e19f9cb806ae8c6fd5e2c7a59f 100644 (file)
 #define SHADDA_CHAR        0xFE7C
 #define TATWEEL_CHAR       0x0640
 #define SHADDA_TATWEEL_CHAR  0xFE7D
+#define SHADDA06_CHAR      0x0651
 
 #define SHAPE_MODE   0
 #define DESHAPE_MODE 1
 
-static UChar tailChar = OLD_TAIL_CHAR;
-static uint32_t uShapeLamalefBegin = U_SHAPE_LAMALEF_BEGIN;
-static uint32_t uShapeLamalefEnd    = U_SHAPE_LAMALEF_END;
-static uint32_t uShapeTashkeelBegin = U_SHAPE_TASHKEEL_BEGIN;
-static uint32_t uShapeTashkeelEnd = U_SHAPE_TASHKEEL_END;
-static int spacesRelativeToTextBeginEnd = 0;
+struct uShapeVariables {
+     UChar tailChar;
+     uint32_t uShapeLamalefBegin;
+     uint32_t uShapeLamalefEnd;
+     uint32_t uShapeTashkeelBegin;
+     uint32_t uShapeTashkeelEnd;
+     int spacesRelativeToTextBeginEnd;
+};
 
 static const uint8_t tailFamilyIsolatedFinal[] = {
     /* FEB1 */ 1,
@@ -702,7 +705,7 @@ static int32_t
 handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
                     int32_t destSize,
                     uint32_t options,
-                    UErrorCode *pErrorCode ) {
+                    UErrorCode *pErrorCode,struct uShapeVariables shapeVars ) {
 
     int32_t i = 0, j = 0;
     int32_t count = 0;
@@ -721,14 +724,15 @@ handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
         }
     }
 
-    if (lamAlefOption || tashkeelOption){
-        tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
-        /* Test for NULL */
-        if(tempbuffer == NULL) {
-            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
+    tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
+    /* Test for NULL */
+    if(tempbuffer == NULL) {
+        *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
+        return 0;
+    }
 
+
+    if (lamAlefOption || tashkeelOption){
         uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
 
         i = j = 0; count = 0;
@@ -777,27 +781,21 @@ handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
     tashkeelOption = 0;
 
     if (shapingMode == 0) {
-        if ( ((options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefBegin) ||
+        if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin) ||
               (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
-              && (spacesRelativeToTextBeginEnd==1)) ) {
+              && (shapeVars.spacesRelativeToTextBeginEnd==1)) ) {
             lamAlefOption = 1;
         }
-        if ( (options&U_SHAPE_TASHKEEL_MASK) == uShapeTashkeelBegin ) {
+        if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelBegin ) {
             tashkeelOption = 1;
         }
     }
 
     if(lamAlefOption || tashkeelOption){
-        tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
-
-        /* Test for NULL */
-        if(tempbuffer == NULL) {
-            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
-
         uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
+        
         i = j = sourceLength; count = 0;
+        
         while(i >= 0) {
             if ( (lamAlefOption && dest[i] == LAMALEF_SPACE_SUB) ||
                  (tashkeelOption && dest[i] == TASHKEEL_SPACE_SUB) ){
@@ -824,24 +822,17 @@ handleGeneratedSpaces(UChar *dest, int32_t sourceLength,
     tashkeelOption = 0;
 
     if (shapingMode == 0) {
-        if ( ((options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefEnd) ||
+        if ( ((options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd) ||
               (((options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO )
-              && (spacesRelativeToTextBeginEnd==0)) ) {
+              && (shapeVars.spacesRelativeToTextBeginEnd==0)) ) {
             lamAlefOption = 1;
         }
-        if ( (options&U_SHAPE_TASHKEEL_MASK) == uShapeTashkeelEnd ){
+        if ( (options&U_SHAPE_TASHKEEL_MASK) == shapeVars.uShapeTashkeelEnd ){
             tashkeelOption = 1;
         }
     }
 
     if(lamAlefOption || tashkeelOption){
-        tempbuffer = (UChar *)uprv_malloc((sourceLength+1)*U_SIZEOF_UCHAR);
-       /* Test for NULL */
-        if(tempbuffer == NULL) {
-            *pErrorCode = U_MEMORY_ALLOCATION_ERROR;
-            return 0;
-        }
-
         uprv_memset(tempbuffer, 0, (sourceLength+1)*U_SIZEOF_UCHAR);
 
         i = j = 0; count = 0;
@@ -1014,7 +1005,7 @@ expandCompositCharAtEnd(UChar *dest, int32_t sourceLength, int32_t destSize,UErr
 
 static int32_t
 expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UErrorCode *pErrorCode,
-                         int yehHamzaOption, int seenTailOption, int lamAlefOption) {
+                         int yehHamzaOption, int seenTailOption, int lamAlefOption, struct uShapeVariables shapeVars) {
     int32_t      i = 0;
 
 
@@ -1023,7 +1014,7 @@ expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UEr
     for(i = 0 ;i<=sourceLength-1;i++) {
             if (seenTailOption && isSeenTailFamilyChar(dest[i])) {
                 if ((i>0) && (dest[i-1] == SPACE_CHAR) ) {
-                    dest[i-1] = tailChar;
+                    dest[i-1] = shapeVars.tailChar;
                 }else {
                     *pErrorCode=U_NO_SPACE_AVAILABLE;
                 }
@@ -1068,7 +1059,7 @@ expandCompositCharAtNear(UChar *dest, int32_t sourceLength, int32_t destSize,UEr
 static int32_t
 expandCompositChar(UChar *dest, int32_t sourceLength,
               int32_t destSize,uint32_t options,
-              UErrorCode *pErrorCode, int shapingMode) {
+              UErrorCode *pErrorCode, int shapingMode,struct uShapeVariables shapeVars) {
 
     int32_t      i = 0,j = 0;
 
@@ -1080,7 +1071,7 @@ expandCompositChar(UChar *dest, int32_t sourceLength,
     if (shapingMode == 1){
         if ( (options&U_SHAPE_LAMALEF_MASK) == U_SHAPE_LAMALEF_AUTO){
 
-            if(spacesRelativeToTextBeginEnd == 0) {
+            if(shapeVars.spacesRelativeToTextBeginEnd == 0) {
                 destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
 
                 if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
@@ -1099,19 +1090,19 @@ expandCompositChar(UChar *dest, int32_t sourceLength,
             if(*pErrorCode == U_NO_SPACE_AVAILABLE) {
                 *pErrorCode = U_ZERO_ERROR;
                 destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
-                                                seenTailOption, 1);
+                                                seenTailOption, 1,shapeVars);
             }
         }
     }
 
     if (shapingMode == 1){
-        if ( (options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefEnd){
+        if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefEnd){
             destSize = expandCompositCharAtEnd(dest, sourceLength, destSize, pErrorCode);
         }
     }
 
     if (shapingMode == 1){
-        if ( (options&U_SHAPE_LAMALEF_MASK) == uShapeLamalefBegin){
+        if ( (options&U_SHAPE_LAMALEF_MASK) == shapeVars.uShapeLamalefBegin){
             destSize = expandCompositCharAtBegin(dest, sourceLength, destSize, pErrorCode);
         }
     }
@@ -1133,7 +1124,7 @@ expandCompositChar(UChar *dest, int32_t sourceLength,
 
     if (yehHamzaOption || seenTailOption || lamAlefOption){
         destSize = expandCompositCharAtNear(dest, sourceLength, destSize, pErrorCode, yehHamzaOption,
-                                            seenTailOption,lamAlefOption);
+                                            seenTailOption,lamAlefOption,shapeVars);
     }
 
 
@@ -1182,7 +1173,7 @@ static int32_t
 shapeUnicode(UChar *dest, int32_t sourceLength,
              int32_t destSize,uint32_t options,
              UErrorCode *pErrorCode,
-             int tashkeelFlag) {
+             int tashkeelFlag, struct uShapeVariables shapeVars) {
 
     int32_t          i, iend;
     int32_t          step;
@@ -1260,20 +1251,20 @@ shapeUnicode(UChar *dest, int32_t sourceLength,
                 currLink = getLink(wLamalef);    /* LAMALEF_SPACE_SUB is added here and is replaced by spaces   */
             }                                    /* in removeLamAlefSpaces()                         */
 
-                       if ((i > 0) && (dest[i-1] == SPACE_CHAR)){
-             if ( isSeenFamilyChar(dest[i])){
+            if ((i > 0) && (dest[i-1] == SPACE_CHAR)){
+                if ( isSeenFamilyChar(dest[i])) {
                     seenfamFound = 1;
                 } else if (dest[i] == YEH_HAMZA_CHAR) {
                     yehhamzaFound = 1;
                 }
-                       }
-                       else if(i==0){
-                               if ( isSeenFamilyChar(dest[i])){
+            }
+            else if(i==0){
+                if ( isSeenFamilyChar(dest[i])){
                     seenfamFound = 1;
                 } else if (dest[i] == YEH_HAMZA_CHAR) {
                     yehhamzaFound = 1;
                 }
-                       }
+            }
 
             /*
              * get the proper shape according to link ability of neighbors
@@ -1294,14 +1285,15 @@ shapeUnicode(UChar *dest, int32_t sourceLength,
                     if( (nextLink&ALEFTYPE) == ALEFTYPE && (lastLink&LAMTYPE) == LAMTYPE ) {
                         Shape = 0;
                     }
-                }
-                else {
+                } else if(tashkeelFlag == 2 && dest[i] == SHADDA06_CHAR){
+                    Shape = 1;
+                } else {
                     Shape = 0;
                 }
             }
             if ((dest[i] ^ 0x0600) < 0x100) {
                 if ( isTashkeelChar(dest[i]) ){
-                    if (tashkeelFlag == 2){
+                    if (tashkeelFlag == 2  && dest[i] != SHADDA06_CHAR){
                         dest[i] = TASHKEEL_SPACE_SUB;
                         tashkeelFound  = 1;
                     } else {
@@ -1335,11 +1327,11 @@ shapeUnicode(UChar *dest, int32_t sourceLength,
     }
     destSize = sourceLength;
     if ( (lamalef_found != 0 ) || (tashkeelFound  != 0) ){
-        destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode);
+        destSize = handleGeneratedSpaces(dest,sourceLength,destSize,options,pErrorCode, shapeVars);
     }
 
     if ( (seenfamFound != 0) || (yehhamzaFound != 0) ) {
-        destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE);
+        destSize = expandCompositChar(dest, sourceLength,destSize,options,pErrorCode, SHAPE_MODE,shapeVars);
     }
     return destSize;
 }
@@ -1352,7 +1344,7 @@ shapeUnicode(UChar *dest, int32_t sourceLength,
 static int32_t
 deShapeUnicode(UChar *dest, int32_t sourceLength,
                int32_t destSize,uint32_t options,
-               UErrorCode *pErrorCode) {
+               UErrorCode *pErrorCode, struct uShapeVariables shapeVars) {
     int32_t i = 0;
     int32_t lamalef_found = 0;
     int32_t yehHamzaComposeEnabled = 0;
@@ -1391,7 +1383,7 @@ deShapeUnicode(UChar *dest, int32_t sourceLength,
 
    destSize = sourceLength;
    if (lamalef_found != 0){
-          destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE);
+          destSize = expandCompositChar(dest,sourceLength,destSize,options,pErrorCode,DESHAPE_MODE, shapeVars);
    }
    return destSize;
 }
@@ -1409,12 +1401,7 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
               UErrorCode *pErrorCode) {
 
     int32_t destLength;
-
-    spacesRelativeToTextBeginEnd = 0;
-    uShapeLamalefBegin = U_SHAPE_LAMALEF_BEGIN;
-    uShapeLamalefEnd = U_SHAPE_LAMALEF_END;
-    uShapeTashkeelBegin = U_SHAPE_TASHKEEL_BEGIN;
-    uShapeTashkeelEnd = U_SHAPE_TASHKEEL_END;
+    struct  uShapeVariables shapeVars = { OLD_TAIL_CHAR,U_SHAPE_LAMALEF_BEGIN,U_SHAPE_LAMALEF_END,U_SHAPE_TASHKEEL_BEGIN,U_SHAPE_TASHKEEL_END,0};
 
     /* usual error checking */
     if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
@@ -1438,27 +1425,27 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
     }
-       /* Validate  lamalef options */
-       if(((options&U_SHAPE_LAMALEF_MASK) > 0)&&
+    /* Validate  lamalef options */
+    if(((options&U_SHAPE_LAMALEF_MASK) > 0)&&
               !(((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_BEGIN) ||
                 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_END ) ||
                 ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_RESIZE )||
                  ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_AUTO) ||
                  ((options & U_SHAPE_LAMALEF_MASK)==U_SHAPE_LAMALEF_NEAR)))
-       {
-                *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+    {
+         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
-       }
-       /* Validate  Tashkeel options */
-       if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&&
+    }
+    /* Validate  Tashkeel options */
+    if(((options&U_SHAPE_TASHKEEL_MASK) > 0)&&
                    !(((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_BEGIN) ||
                      ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_END )
                     ||((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_RESIZE )||
-                                       ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)))
-       {
-                *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
+                    ((options & U_SHAPE_TASHKEEL_MASK)==U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)))
+    {
+         *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
         return 0;
-       }
+    }
     /* determine the source length */
     if(sourceLength==-1) {
         sourceLength=u_strlen(source);
@@ -1477,9 +1464,9 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
 
     /* Does Options contain the new Seen Tail Unicode code point option */
     if ( (options&U_SHAPE_TAIL_TYPE_MASK) == U_SHAPE_TAIL_NEW_UNICODE){
-        tailChar = NEW_TAIL_CHAR;
+        shapeVars.tailChar = NEW_TAIL_CHAR;
     }else {
-        tailChar = OLD_TAIL_CHAR;
+        shapeVars.tailChar = OLD_TAIL_CHAR;
     }
 
     if((options&U_SHAPE_LETTERS_MASK)!=U_SHAPE_LETTERS_NOOP) {
@@ -1577,12 +1564,11 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
 
         if((options&U_SHAPE_TEXT_DIRECTION_MASK) == U_SHAPE_TEXT_DIRECTION_VISUAL_LTR) {
             if((options&U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK) == U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END) {
-                spacesRelativeToTextBeginEnd = 1;
-                uShapeLamalefBegin = U_SHAPE_LAMALEF_END;
-                uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN;
-
-                uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END;
-                uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN;
+                shapeVars.spacesRelativeToTextBeginEnd = 1;
+                shapeVars.uShapeLamalefBegin = U_SHAPE_LAMALEF_END;
+                shapeVars.uShapeLamalefEnd = U_SHAPE_LAMALEF_BEGIN;
+                shapeVars.uShapeTashkeelBegin = U_SHAPE_TASHKEEL_END;
+                shapeVars.uShapeTashkeelEnd = U_SHAPE_TASHKEEL_BEGIN;
             }
         }
 
@@ -1591,10 +1577,10 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
              if( (options&U_SHAPE_TASHKEEL_MASK)> 0
                  && ((options&U_SHAPE_TASHKEEL_MASK) !=U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL)) {
                 /* Call the shaping function with tashkeel flag == 2 for removal of tashkeel */
-                destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2);
+                destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,2,shapeVars);
              }else {
                 /* default Call the shaping function with tashkeel flag == 1 */
-                destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1);
+                destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,1,shapeVars);
 
                 /*After shaping text check if user wants to remove tashkeel and replace it with tatweel*/
                 if( (options&U_SHAPE_TASHKEEL_MASK) == U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL){
@@ -1604,12 +1590,12 @@ u_shapeArabic(const UChar *source, int32_t sourceLength,
             break;
         case U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED :
             /* Call the shaping function with tashkeel flag == 0 */
-            destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0);
+            destLength = shapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,0,shapeVars);
             break;
 
         case U_SHAPE_LETTERS_UNSHAPE :
             /* Call the deshaping function */
-            destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode);
+            destLength = deShapeUnicode(tempbuffer,sourceLength,destCapacity,options,pErrorCode,shapeVars);
             break;
         default :
             /* will never occur because of validity checks above */