X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/efa1e6592fb03ce23b15276b2b91d885a3ee7da5..57a6839dcb3bba09e8228b822b290604668416fe:/icuSources/common/ubidi.c diff --git a/icuSources/common/ubidi.c b/icuSources/common/ubidi.c index 4a2db2a9..295a1351 100644 --- a/icuSources/common/ubidi.c +++ b/icuSources/common/ubidi.c @@ -1,7 +1,7 @@ /* ****************************************************************************** * -* Copyright (C) 1999-2012, International Business Machines +* Copyright (C) 1999-2014, International Business Machines * Corporation and others. All Rights Reserved. * ****************************************************************************** @@ -12,6 +12,7 @@ * * created on: 1999jul27 * created by: Markus W. Scherer, updated by Matitiahu Allouche +* */ #include "cmemory.h" @@ -28,8 +29,8 @@ * General implementation notes: * * Throughout the implementation, there are comments like (W2) that refer to - * rules of the BiDi algorithm in its version 5, in this example to the second - * rule of the resolution of weak types. + * rules of the BiDi algorithm, in this example to the second rule of the + * resolution of weak types. * * For handling surrogate pairs, where two UChar's form one "abstract" (or UTF-32) * character according to UTF-16, the second UChar gets the directional property of @@ -59,12 +60,12 @@ * For the purpose of conformance, the levels of all these codes * do not matter. * - * Note that this implementation never modifies the dirProps - * after the initial setup. - * + * Note that this implementation modifies the dirProps + * after the initial setup, when applying X5c (replace FSI by LRI or RLI), + * X6, N0 (replace paired brackets by L or R). * - * In this implementation, the resolution of weak types (Wn), - * neutrals (Nn), and the assignment of the resolved level (In) + * In this implementation, the resolution of weak types (W1 to W6), + * neutrals (N1 and N2), and the assignment of the resolved level (In) * are all done in one single loop, in resolveImplicitLevels(). * Changes of dirProp values are done on the fly, without writing * them back to the dirProps array. @@ -111,8 +112,12 @@ static const Flags flagE[2]={ DIRPROP_FLAG(LRE), DIRPROP_FLAG(RLE) }; static const Flags flagO[2]={ DIRPROP_FLAG(LRO), DIRPROP_FLAG(RLO) }; #define DIRPROP_FLAG_LR(level) flagLR[(level)&1] -#define DIRPROP_FLAG_E(level) flagE[(level)&1] -#define DIRPROP_FLAG_O(level) flagO[(level)&1] +#define DIRPROP_FLAG_E(level) flagE[(level)&1] +#define DIRPROP_FLAG_O(level) flagO[(level)&1] + +#define DIR_FROM_STRONG(strong) ((strong)==L ? L : R) + +#define NO_OVERRIDE(level) ((level)&~UBIDI_LEVEL_OVERRIDE) /* UBiDi object management -------------------------------------------------- */ @@ -240,12 +245,18 @@ ubidi_close(UBiDi *pBiDi) { if(pBiDi->levelsMemory!=NULL) { uprv_free(pBiDi->levelsMemory); } - if(pBiDi->runsMemory!=NULL) { - uprv_free(pBiDi->runsMemory); + if(pBiDi->openingsMemory!=NULL) { + uprv_free(pBiDi->openingsMemory); } if(pBiDi->parasMemory!=NULL) { uprv_free(pBiDi->parasMemory); } + if(pBiDi->runsMemory!=NULL) { + uprv_free(pBiDi->runsMemory); + } + if(pBiDi->isolatesMemory!=NULL) { + uprv_free(pBiDi->isolatesMemory); + } if(pBiDi->insertPoints.points!=NULL) { uprv_free(pBiDi->insertPoints.points); } @@ -356,9 +367,13 @@ int32_t length){ /* perform (P2)..(P3) ------------------------------------------------------- */ +/** + * Returns the directionality of the first strong character + * after the last B in prologue, if any. + * Requires prologue!=null. + */ static DirProp firstL_R_AL(UBiDi *pBiDi) { - /* return first strong char after the last B in prologue if any */ const UChar *text=pBiDi->prologue; int32_t length=pBiDi->proLength; int32_t i; @@ -382,22 +397,46 @@ firstL_R_AL(UBiDi *pBiDi) { } /* - * Get the directional properties for the text, - * calculate the flags bit-set, and - * determine the paragraph level if necessary. + * Check that there are enough entries in the array pointed to by pBiDi->paras */ -static void +static UBool +checkParaCount(UBiDi *pBiDi) { + int32_t count=pBiDi->paraCount; + if(pBiDi->paras==pBiDi->simpleParas) { + if(count<=SIMPLE_PARAS_SIZE) + return TRUE; + if(!getInitialParasMemory(pBiDi, SIMPLE_PARAS_SIZE * 2)) + return FALSE; + pBiDi->paras=pBiDi->parasMemory; + uprv_memcpy(pBiDi->parasMemory, pBiDi->simpleParas, SIMPLE_PARAS_SIZE * sizeof(Para)); + return TRUE; + } + if(!getInitialParasMemory(pBiDi, count * 2)) + return FALSE; + pBiDi->paras=pBiDi->parasMemory; + return TRUE; +} + +/* + * Get the directional properties for the text, calculate the flags bit-set, and + * determine the paragraph level if necessary (in pBiDi->paras[i].level). + * FSI initiators are also resolved and their dirProp replaced with LRI or RLI. + * When encountering an FSI, it is initially replaced with an LRI, which is the + * default. Only if a strong R or AL is found within its scope will the LRI be + * replaced by an RLI. + */ +static UBool getDirProps(UBiDi *pBiDi) { const UChar *text=pBiDi->text; DirProp *dirProps=pBiDi->dirPropsMemory; /* pBiDi->dirProps is const */ - int32_t i=0, i1, length=pBiDi->originalLength; + int32_t i=0, originalLength=pBiDi->originalLength; Flags flags=0; /* collect all directionalities in the text */ UChar32 uchar; - DirProp dirProp=0, paraDirDefault=0;/* initialize to avoid compiler warnings */ + DirProp dirProp=0, defaultParaLevel=0; /* initialize to avoid compiler warnings */ UBool isDefaultLevel=IS_DEFAULT_LEVEL(pBiDi->paraLevel); /* for inverse BiDi, the default para level is set to RTL if there is a - strong R or AL character at either end of the text */ + strong R or AL character at either end of the text */ UBool isDefaultLevelInverse=isDefaultLevel && (UBool) (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL); @@ -407,138 +446,555 @@ getDirProps(UBiDi *pBiDi) { UBIDI_OPTION_REMOVE_CONTROLS); typedef enum { - NOT_CONTEXTUAL, /* 0: not contextual paraLevel */ - LOOKING_FOR_STRONG, /* 1: looking for first strong char */ - FOUND_STRONG_CHAR /* 2: found first strong char */ + NOT_SEEKING_STRONG, /* 0: not contextual paraLevel, not after FSI */ + SEEKING_STRONG_FOR_PARA, /* 1: looking for first strong char in para */ + SEEKING_STRONG_FOR_FSI, /* 2: looking for first strong after FSI */ + LOOKING_FOR_PDI /* 3: found strong after FSI, looking for PDI */ } State; State state; - int32_t paraStart=0; /* index of first char in paragraph */ - DirProp paraDir; /* == CONTEXT_RTL within paragraphs - starting with strong R char */ - DirProp lastStrongDir=0; /* for default level & inverse BiDi */ - int32_t lastStrongLTR=0; /* for STREAMING option */ - - if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { + DirProp lastStrong=ON; /* for default level & inverse BiDi */ + /* The following stacks are used to manage isolate sequences. Those + sequences may be nested, but obviously never more deeply than the + maximum explicit embedding level. + lastStack is the index of the last used entry in the stack. A value of -1 + means that there is no open isolate sequence. + lastStack is reset to -1 on paragraph boundaries. */ + /* The following stack contains the position of the initiator of + each open isolate sequence */ + int32_t isolateStartStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; + /* The following stack contains the last known state before + encountering the initiator of an isolate sequence */ + int8_t previousStateStack[UBIDI_MAX_EXPLICIT_LEVEL+1]; + int32_t stackLast=-1; + + if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) pBiDi->length=0; - lastStrongLTR=0; - } + defaultParaLevel=pBiDi->paraLevel&1; if(isDefaultLevel) { - DirProp lastStrong; - paraDirDefault=pBiDi->paraLevel&1 ? CONTEXT_RTL : 0; - if(pBiDi->proLength>0 && - (lastStrong=firstL_R_AL(pBiDi))!=ON) { - paraDir=(lastStrong==L) ? 0 : CONTEXT_RTL; - state=FOUND_STRONG_CHAR; + pBiDi->paras[0].level=defaultParaLevel; + lastStrong=defaultParaLevel; + if(pBiDi->proLength>0 && /* there is a prologue */ + (dirProp=firstL_R_AL(pBiDi))!=ON) { /* with a strong character */ + if(dirProp==L) + pBiDi->paras[0].level=0; /* set the default para level */ + else + pBiDi->paras[0].level=1; /* set the default para level */ + state=NOT_SEEKING_STRONG; } else { - paraDir=paraDirDefault; - state=LOOKING_FOR_STRONG; + state=SEEKING_STRONG_FOR_PARA; } - lastStrongDir=paraDir; } else { - state=NOT_CONTEXTUAL; - paraDir=0; + pBiDi->paras[0].level=pBiDi->paraLevel; + state=NOT_SEEKING_STRONG; } /* count paragraphs and determine the paragraph level (P2..P3) */ /* * see comment in ubidi.h: - * the DEFAULT_XXX values are designed so that + * the UBIDI_DEFAULT_XXX values are designed so that * their bit 0 alone yields the intended default */ - for( /* i=0 above */ ; i0xffff) { /* set the lead surrogate's property to BN */ flags|=DIRPROP_FLAG(BN); - dirProps[i-2]=(DirProp)(BN|paraDir); - } - if(state==LOOKING_FOR_STRONG) { - if(dirProp==L) { - state=FOUND_STRONG_CHAR; - if(paraDir) { - paraDir=0; - for(i1=paraStart; i1paras[pBiDi->paraCount-1].level=0; + state=NOT_SEEKING_STRONG; } - if(dirProp==R || dirProp==AL) { - state=FOUND_STRONG_CHAR; - if(paraDir==0) { - paraDir=CONTEXT_RTL; - for(i1=paraStart; i1paras[pBiDi->paraCount-1].level=1; + state=NOT_SEEKING_STRONG; + } + else if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + dirProps[isolateStartStack[stackLast]]=RLI; + flags|=DIRPROP_FLAG(RLI); + } + state=LOOKING_FOR_PDI; + } + lastStrong=R; + if(dirProp==AL) + lastArabicPos=i-1; + continue; } - else if(dirProp==R) { - lastStrongDir=CONTEXT_RTL; + if(dirProp>=FSI && dirProp<=RLI) { /* FSI, LRI or RLI */ + stackLast++; + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + isolateStartStack[stackLast]=i-1; + previousStateStack[stackLast]=state; + } + if(dirProp==FSI) { + dirProps[i-1]=LRI; /* default if no strong char */ + state=SEEKING_STRONG_FOR_FSI; + } + else + state=LOOKING_FOR_PDI; + continue; } - else if(dirProp==AL) { - lastStrongDir=CONTEXT_RTL; - lastArabicPos=i-1; + if(dirProp==PDI) { + if(state==SEEKING_STRONG_FOR_FSI) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + } + } + if(stackLast>=0) { + if(stackLast<=UBIDI_MAX_EXPLICIT_LEVEL) + state=previousStateStack[stackLast]; + stackLast--; + } + continue; } - else if(dirProp==B) { + if(dirProp==B) { + if(iparas[pBiDi->paraCount-1].limit=i; + if(isDefaultLevelInverse && lastStrong==R) + pBiDi->paras[pBiDi->paraCount-1].level=1; if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { + /* When streaming, we only process whole paragraphs + thus some updates are only done on paragraph boundaries */ pBiDi->length=i; /* i is index to next character */ + pBiDi->controlCount=controlCount; } - if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { - for( ; paraStartparaCount++; - } + if(iparaCount++; + if(checkParaCount(pBiDi)==FALSE) /* not enough memory for a new para entry */ + return FALSE; if(isDefaultLevel) { - state=LOOKING_FOR_STRONG; - paraStart=i; /* i is index to next character */ - paraDir=paraDirDefault; - lastStrongDir=paraDirDefault; + pBiDi->paras[pBiDi->paraCount-1].level=defaultParaLevel; + state=SEEKING_STRONG_FOR_PARA; + lastStrong=defaultParaLevel; + } else { + pBiDi->paras[pBiDi->paraCount-1].level=pBiDi->paraLevel; + state=NOT_SEEKING_STRONG; } + stackLast=-1; } - } - if(removeBiDiControls && IS_BIDI_CONTROL_CHAR(uchar)) { - controlCount++; + continue; } } - if(isDefaultLevelInverse && (lastStrongDir==CONTEXT_RTL) &&(paraDir!=lastStrongDir)) { - for(i1=paraStart; i1UBIDI_MAX_EXPLICIT_LEVEL) { + stackLast=UBIDI_MAX_EXPLICIT_LEVEL; + state=SEEKING_STRONG_FOR_FSI; /* to be on the safe side */ + } + /* Resolve direction of still unresolved open FSI sequences */ + while(stackLast>=0) { + if(state==SEEKING_STRONG_FOR_FSI) { + /* no need for next statement, already set by default */ + /* dirProps[isolateStartStack[stackLast]]=LRI; */ + flags|=DIRPROP_FLAG(LRI); + break; } + state=previousStateStack[stackLast]; + stackLast--; } - if(isDefaultLevel) { - pBiDi->paraLevel=GET_PARALEVEL(pBiDi, 0); - } + /* When streaming, ignore text after the last paragraph separator */ if(pBiDi->reorderingOptions & UBIDI_OPTION_STREAMING) { - if((lastStrongLTR>pBiDi->length) && - (GET_PARALEVEL(pBiDi, lastStrongLTR)==0)) { - pBiDi->length = lastStrongLTR; - } - if(pBiDi->lengthoriginalLength) { + if(pBiDi->lengthparaCount--; - } + } else { + pBiDi->paras[pBiDi->paraCount-1].limit=originalLength; + pBiDi->controlCount=controlCount; + } + /* For inverse bidi, default para direction is RTL if there is + a strong R or AL at either end of the paragraph */ + if(isDefaultLevelInverse && lastStrong==R) { + pBiDi->paras[pBiDi->paraCount-1].level=1; } - /* The following line does nothing new for contextual paraLevel, but is - needed for absolute paraLevel. */ - flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); + if(isDefaultLevel) { + pBiDi->paraLevel=pBiDi->paras[0].level; + } + /* The following is needed to resolve the text direction for default level + paragraphs containing no strong character */ + for(i=0; iparaCount; i++) + flags|=DIRPROP_FLAG_LR(pBiDi->paras[i].level); if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { flags|=DIRPROP_FLAG(L); } - - pBiDi->controlCount = controlCount; pBiDi->flags=flags; pBiDi->lastArabicPos=lastArabicPos; + return TRUE; +} + +/* determine the paragraph level at position index */ +U_CFUNC UBiDiLevel +ubidi_getParaLevelAtIndex(const UBiDi *pBiDi, int32_t pindex) { + int32_t i; + for(i=0; iparaCount; i++) + if(pindexparas[i].limit) + break; + if(i>=pBiDi->paraCount) + i=pBiDi->paraCount-1; + return (UBiDiLevel)(pBiDi->paras[i].level); +} + +/* Functions for handling paired brackets ----------------------------------- */ + +/* In the isoRuns array, the first entry is used for text outside of any + isolate sequence. Higher entries are used for each more deeply nested + isolate sequence. isoRunLast is the index of the last used entry. The + openings array is used to note the data of opening brackets not yet + matched by a closing bracket, or matched but still susceptible to change + level. + Each isoRun entry contains the index of the first and + one-after-last openings entries for pending opening brackets it + contains. The next openings entry to use is the one-after-last of the + most deeply nested isoRun entry. + isoRun entries also contain their current embedding level and the last + encountered strong character, since these will be needed to resolve + the level of paired brackets. */ + +static void +bracketInit(UBiDi *pBiDi, BracketData *bd) { + bd->pBiDi=pBiDi; + bd->isoRunLast=0; + bd->isoRuns[0].start=0; + bd->isoRuns[0].limit=0; + bd->isoRuns[0].level=GET_PARALEVEL(pBiDi, 0); + bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=GET_PARALEVEL(pBiDi, 0)&1; + bd->isoRuns[0].contextPos=0; + if(pBiDi->openingsMemory) { + bd->openings=pBiDi->openingsMemory; + bd->openingsSize=pBiDi->openingsSize; + } else { + bd->openings=bd->simpleOpenings; + bd->openingsSize=SIMPLE_OPENINGS_SIZE; + } + bd->isNumbersSpecial=bd->pBiDi->reorderingMode==UBIDI_REORDER_NUMBERS_SPECIAL || + bd->pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL; +} + +/* paragraph boundary */ +static void +bracketProcessB(BracketData *bd, UBiDiLevel level) { + bd->isoRunLast=0; + bd->isoRuns[0].limit=0; + bd->isoRuns[0].level=level; + bd->isoRuns[0].lastStrong=bd->isoRuns[0].lastBase=bd->isoRuns[0].contextDir=level&1; + bd->isoRuns[0].contextPos=0; +} + +/* LRE, LRO, RLE, RLO, PDF */ +static void +bracketProcessBoundary(BracketData *bd, int32_t lastCcPos, + UBiDiLevel contextLevel, UBiDiLevel embeddingLevel) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + DirProp *dirProps=bd->pBiDi->dirProps; + if(DIRPROP_FLAG(dirProps[lastCcPos])&MASK_ISO) /* after an isolate */ + return; + if(NO_OVERRIDE(embeddingLevel)>NO_OVERRIDE(contextLevel)) /* not a PDF */ + contextLevel=embeddingLevel; + pLastIsoRun->limit=pLastIsoRun->start; + pLastIsoRun->level=embeddingLevel; + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=contextLevel&1; + pLastIsoRun->contextPos=lastCcPos; +} + +/* LRI or RLI */ +static void +bracketProcessLRI_RLI(BracketData *bd, UBiDiLevel level) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + int16_t lastLimit; + pLastIsoRun->lastBase=ON; + lastLimit=pLastIsoRun->limit; + bd->isoRunLast++; + pLastIsoRun++; + pLastIsoRun->start=pLastIsoRun->limit=lastLimit; + pLastIsoRun->level=level; + pLastIsoRun->lastStrong=pLastIsoRun->lastBase=pLastIsoRun->contextDir=level&1; + pLastIsoRun->contextPos=0; +} + +/* PDI */ +static void +bracketProcessPDI(BracketData *bd) { + IsoRun *pLastIsoRun; + bd->isoRunLast--; + pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + pLastIsoRun->lastBase=ON; +} + +/* newly found opening bracket: create an openings entry */ +static UBool /* return TRUE if success */ +bracketAddOpening(BracketData *bd, UChar match, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *pOpening; + if(pLastIsoRun->limit>=bd->openingsSize) { /* no available new entry */ + UBiDi *pBiDi=bd->pBiDi; + if(!getInitialOpeningsMemory(pBiDi, pLastIsoRun->limit * 2)) + return FALSE; + if(bd->openings==bd->simpleOpenings) + uprv_memcpy(pBiDi->openingsMemory, bd->simpleOpenings, + SIMPLE_OPENINGS_SIZE * sizeof(Opening)); + bd->openings=pBiDi->openingsMemory; /* may have changed */ + bd->openingsSize=pBiDi->openingsSize; + } + pOpening=&bd->openings[pLastIsoRun->limit]; + pOpening->position=position; + pOpening->match=match; + pOpening->contextDir=pLastIsoRun->contextDir; + pOpening->contextPos=pLastIsoRun->contextPos; + pOpening->flags=0; + pLastIsoRun->limit++; + return TRUE; +} + +/* change N0c1 to N0c2 when a preceding bracket is assigned the embedding level */ +static void +fixN0c(BracketData *bd, int32_t openingIndex, int32_t newPropPosition, DirProp newProp) { + /* This function calls itself recursively */ + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *qOpening; + DirProp *dirProps=bd->pBiDi->dirProps; + int32_t k, openingPosition, closingPosition; + for(k=openingIndex+1, qOpening=&bd->openings[k]; klimit; k++, qOpening++) { + if(qOpening->match>=0) /* not an N0c match */ + continue; + if(newPropPositioncontextPos) + break; + if(newPropPosition>=qOpening->position) + continue; + if(newProp==qOpening->contextDir) + break; + openingPosition=qOpening->position; + dirProps[openingPosition]=newProp; + closingPosition=-(qOpening->match); + dirProps[closingPosition]=newProp; + qOpening->match=0; /* prevent further changes */ + fixN0c(bd, k, openingPosition, newProp); + fixN0c(bd, k, closingPosition, newProp); + } +} + +/* process closing bracket */ +static DirProp /* return L or R if N0b or N0c, ON if N0d */ +bracketProcessClosing(BracketData *bd, int32_t openIdx, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + Opening *pOpening, *qOpening; + UBiDiDirection direction; + UBool stable; + DirProp newProp; + pOpening=&bd->openings[openIdx]; + direction=pLastIsoRun->level&1; + stable=TRUE; /* assume stable until proved otherwise */ + + /* The stable flag is set when brackets are paired and their + level is resolved and cannot be changed by what will be + found later in the source string. + An unstable match can occur only when applying N0c, where + the resolved level depends on the preceding context, and + this context may be affected by text occurring later. + Example: RTL paragraph containing: abc[(latin) HEBREW] + When the closing parenthesis is encountered, it appears + that N0c1 must be applied since 'abc' sets an opposite + direction context and both parentheses receive level 2. + However, when the closing square bracket is processed, + N0b applies because of 'HEBREW' being included within the + brackets, thus the square brackets are treated like R and + receive level 1. However, this changes the preceding + context of the opening parenthesis, and it now appears + that N0c2 must be applied to the parentheses rather than + N0c1. */ + + if((direction==0 && pOpening->flags&FOUND_L) || + (direction==1 && pOpening->flags&FOUND_R)) { /* N0b */ + newProp=direction; + } + else if(pOpening->flags&(FOUND_L|FOUND_R)) { /* N0c */ + /* it is stable if there is no containing pair or in + conditions too complicated and not worth checking */ + stable=(openIdx==pLastIsoRun->start); + if(direction!=pOpening->contextDir) + newProp=pOpening->contextDir; /* N0c1 */ + else + newProp=direction; /* N0c2 */ + } else { + /* forget this and any brackets nested within this pair */ + pLastIsoRun->limit=openIdx; + return ON; /* N0d */ + } + bd->pBiDi->dirProps[pOpening->position]=newProp; + bd->pBiDi->dirProps[position]=newProp; + /* Update nested N0c pairs that may be affected */ + fixN0c(bd, openIdx, pOpening->position, newProp); + if(stable) { + pLastIsoRun->limit=openIdx; /* forget any brackets nested within this pair */ + /* remove lower located synonyms if any */ + while(pLastIsoRun->limit>pLastIsoRun->start && + bd->openings[pLastIsoRun->limit-1].position==pOpening->position) + pLastIsoRun->limit--; + } else { + int32_t k; + pOpening->match=-position; + /* neutralize lower located synonyms if any */ + k=openIdx-1; + while(k>=pLastIsoRun->start && + bd->openings[k].position==pOpening->position) + bd->openings[k--].match=0; + /* neutralize any unmatched opening between the current pair; + this will also neutralize higher located synonyms if any */ + for(k=openIdx+1; klimit; k++) { + qOpening=&bd->openings[k]; + if(qOpening->position>=position) + break; + if(qOpening->match>0) + qOpening->match=0; + } + } + return newProp; +} + +/* handle strong characters, digits and candidates for closing brackets */ +static UBool /* return TRUE if success */ +bracketProcessChar(BracketData *bd, int32_t position) { + IsoRun *pLastIsoRun=&bd->isoRuns[bd->isoRunLast]; + DirProp *dirProps, dirProp, newProp; + UBiDiLevel level; + dirProps=bd->pBiDi->dirProps; + dirProp=dirProps[position]; + if(dirProp==ON) { + UChar c, match; + int32_t idx; + /* First see if it is a matching closing bracket. Hopefully, this is + more efficient than checking if it is a closing bracket at all */ + c=bd->pBiDi->text[position]; + for(idx=pLastIsoRun->limit-1; idx>=pLastIsoRun->start; idx--) { + if(bd->openings[idx].match!=c) + continue; + /* We have a match */ + newProp=bracketProcessClosing(bd, idx, position); + if(newProp==ON) { /* N0d */ + c=0; /* prevent handling as an opening */ + break; + } + pLastIsoRun->lastBase=ON; + pLastIsoRun->contextDir=newProp; + pLastIsoRun->contextPos=position; + level=bd->pBiDi->levels[position]; + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ + uint16_t flag; + int32_t i; + newProp=level&1; + pLastIsoRun->lastStrong=newProp; + flag=DIRPROP_FLAG(newProp); + for(i=pLastIsoRun->start; iopenings[i].flags|=flag; + /* matching brackets are not overridden by LRO/RLO */ + bd->pBiDi->levels[position]&=~UBIDI_LEVEL_OVERRIDE; + } + /* matching brackets are not overridden by LRO/RLO */ + bd->pBiDi->levels[bd->openings[idx].position]&=~UBIDI_LEVEL_OVERRIDE; + return TRUE; + } + /* We get here only if the ON character is not a matching closing + bracket or it is a case of N0d */ + /* Now see if it is an opening bracket */ + if(c) + match=u_getBidiPairedBracket(c); /* get the matching char */ + else + match=0; + if(match!=c && /* has a matching char */ + ubidi_getPairedBracketType(bd->pBiDi->bdp, c)==U_BPT_OPEN) { /* opening bracket */ + /* special case: process synonyms + create an opening entry for each synonym */ + if(match==0x232A) { /* RIGHT-POINTING ANGLE BRACKET */ + if(!bracketAddOpening(bd, 0x3009, position)) + return FALSE; + } + else if(match==0x3009) { /* RIGHT ANGLE BRACKET */ + if(!bracketAddOpening(bd, 0x232A, position)) + return FALSE; + } + if(!bracketAddOpening(bd, match, position)) + return FALSE; + } + } + level=bd->pBiDi->levels[position]; + if(level&UBIDI_LEVEL_OVERRIDE) { /* X4, X5 */ + newProp=level&1; + if(dirProp!=S && dirProp!=WS && dirProp!=ON) + dirProps[position]=newProp; + pLastIsoRun->lastBase=newProp; + pLastIsoRun->lastStrong=newProp; + pLastIsoRun->contextDir=newProp; + pLastIsoRun->contextPos=position; + } + else if(dirProp<=R || dirProp==AL) { + newProp=DIR_FROM_STRONG(dirProp); + pLastIsoRun->lastBase=dirProp; + pLastIsoRun->lastStrong=dirProp; + pLastIsoRun->contextDir=newProp; + pLastIsoRun->contextPos=position; + } + else if(dirProp==EN) { + pLastIsoRun->lastBase=EN; + if(pLastIsoRun->lastStrong==L) { + newProp=L; /* W7 */ + if(!bd->isNumbersSpecial) + dirProps[position]=ENL; + pLastIsoRun->contextDir=L; + pLastIsoRun->contextPos=position; + } + else { + newProp=R; /* N0 */ + if(pLastIsoRun->lastStrong==AL) + dirProps[position]=AN; /* W2 */ + else + dirProps[position]=ENR; + pLastIsoRun->contextDir=R; + pLastIsoRun->contextPos=position; + } + } + else if(dirProp==AN) { + newProp=R; /* N0 */ + pLastIsoRun->lastBase=AN; + pLastIsoRun->contextDir=R; + pLastIsoRun->contextPos=position; + } + else if(dirProp==NSM) { + /* if the last real char was ON, change NSM to ON so that it + will stay ON even if the last real char is a bracket which + may be changed to L or R */ + newProp=pLastIsoRun->lastBase; + if(newProp==ON) + dirProps[position]=newProp; + } + else { + newProp=dirProp; + pLastIsoRun->lastBase=dirProp; + } + if(newProp<=R || newProp==AL) { + int32_t i; + uint16_t flag=DIRPROP_FLAG(DIR_FROM_STRONG(newProp)); + for(i=pLastIsoRun->start; ilimit; i++) + if(position>bd->openings[i].position) + bd->openings[i].flags|=flag; + } + return TRUE; } /* perform (X1)..(X9) ------------------------------------------------------- */ @@ -564,14 +1020,14 @@ directionFromFlags(UBiDi *pBiDi) { * * The BiDi algorithm is designed to result in the same behavior whether embedding * levels are externally specified (from "styled text", supposedly the preferred - * method) or set by explicit embedding codes (LRx, RLx, PDF) in the plain text. - * That is why (X9) instructs to remove all explicit codes (and BN). - * However, in a real implementation, this removal of these codes and their index + * method) or set by explicit embedding codes (LRx, RLx, PDF, FSI, PDI) in the plain text. + * That is why (X9) instructs to remove all not-isolate explicit codes (and BN). + * However, in a real implementation, the removal of these codes and their index * positions in the plain text is undesirable since it would result in * reallocated, reindexed text. * Instead, this implementation leaves the codes in there and just ignores them * in the subsequent processing. - * In order to get the same reordering behavior, positions with a BN or an + * In order to get the same reordering behavior, positions with a BN or a not-isolate * explicit embedding code just get the same level assigned as the last "real" * character. * @@ -583,7 +1039,8 @@ directionFromFlags(UBiDi *pBiDi) { * This limits the scope of the implicit rules in effectively * the same way as the run limits. * - * Instead, this implementation does not modify these codes. + * Instead, this implementation does not modify these codes, except for + * paired brackets whose properties (ON) may be replaced by L or R. * On one hand, the paragraph has to be scanned for same-level-runs, but * on the other hand, this saves another loop to reset these codes, * or saves making and modifying a copy of dirProps[]. @@ -594,24 +1051,22 @@ directionFromFlags(UBiDi *pBiDi) { * * Handling the stack of explicit levels (Xn): * - * With the BiDi stack of explicit levels, - * as pushed with each LRE, RLE, LRO, and RLO and popped with each PDF, - * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL==61. + * With the BiDi stack of explicit levels, as pushed with each + * LRE, RLE, LRO, RLO, LRI, RLI and FSI and popped with each PDF and PDI, + * the explicit level must never exceed UBIDI_MAX_EXPLICIT_LEVEL. * * In order to have a correct push-pop semantics even in the case of overflows, - * there are two overflow counters: - * - countOver60 is incremented with each LRx at level 60 - * - from level 60, one RLx increases the level to 61 - * - countOver61 is incremented with each LRx and RLx at level 61 - * - * Popping levels with PDF must work in the opposite order so that level 61 - * is correct at the correct point. Underflows (too many PDFs) must be checked. + * overflow counters and a valid isolate counter are used as described in UAX#9 + * section 3.3.2 "Explicit Levels and Directions". * * This implementation assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd. + * + * Returns normally the direction; -1 if there was a memory shortage + * */ static UBiDiDirection -resolveExplicitLevels(UBiDi *pBiDi) { - const DirProp *dirProps=pBiDi->dirProps; +resolveExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { + DirProp *dirProps=pBiDi->dirProps; UBiDiLevel *levels=pBiDi->levels; const UChar *text=pBiDi->text; @@ -619,154 +1074,257 @@ resolveExplicitLevels(UBiDi *pBiDi) { Flags flags=pBiDi->flags; /* collect all directionalities in the text */ DirProp dirProp; UBiDiLevel level=GET_PARALEVEL(pBiDi, 0); - UBiDiDirection direction; - int32_t paraIndex=0; + pBiDi->isolateCount=0; + + if(U_FAILURE(*pErrorCode)) { return UBIDI_LTR; } /* determine if the text is mixed-directional or single-directional */ direction=directionFromFlags(pBiDi); - /* we may not need to resolve any explicit levels, but for multiple - paragraphs we want to loop on all chars to set the para boundaries */ - if((direction!=UBIDI_MIXED) && (pBiDi->paraCount==1)) { + /* we may not need to resolve any explicit levels */ + if((direction!=UBIDI_MIXED)) { /* not mixed directionality: levels don't matter - trailingWSStart will be 0 */ - } else if((pBiDi->paraCount==1) && - (!(flags&MASK_EXPLICIT) || - (pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL))) { - /* mixed, but all characters are at the same embedding level */ - /* or we are in "inverse BiDi" */ - /* and we don't have contextual multiple paragraphs with some B char */ + return direction; + } + if(pBiDi->reorderingMode > UBIDI_REORDER_LAST_LOGICAL_TO_VISUAL) { + /* inverse BiDi: mixed, but all characters are at the same embedding level */ /* set all levels to the paragraph level */ - for(i=0; iparaCount; paraIndex++) { + if(paraIndex==0) + start=0; + else + start=pBiDi->paras[paraIndex-1].limit; + limit=pBiDi->paras[paraIndex].limit; + level=pBiDi->paras[paraIndex].level; + for(i=start; iparaCount; paraIndex++) { + if(paraIndex==0) + start=0; + else + start=pBiDi->paras[paraIndex-1].limit; + limit=pBiDi->paras[paraIndex].limit; + level=pBiDi->paras[paraIndex].level; + for(i=start; i=UBIDI_MAX_EXPLICIT_LEVEL */ - uint32_t countOver60=0, countOver61=0; /* count overflows of explicit levels */ + UBiDiLevel embeddingLevel=level, newLevel; + UBiDiLevel previousLevel=level; /* previous level for regular (not CC) characters */ + int32_t lastCcPos=0; /* index of last effective LRx,RLx, PDx */ + + /* The following stack remembers the embedding level and the ISOLATE flag of level runs. + stackLast points to its current entry. */ + uint16_t stack[UBIDI_MAX_EXPLICIT_LEVEL+2]; /* we never push anything >=UBIDI_MAX_EXPLICIT_LEVEL + but we need one more entry as base */ + uint32_t stackLast=0; + int32_t overflowIsolateCount=0; + int32_t overflowEmbeddingCount=0; + int32_t validIsolateCount=0; + BracketData bracketData; + bracketInit(pBiDi, &bracketData); + stack[0]=level; /* initialize base entry to para level, no override, no isolate */ /* recalculate the flags */ flags=0; for(i=0; i0) { - --countOver61; - } else if(countOver60>0 && (embeddingLevel&~UBIDI_LEVEL_OVERRIDE)!=UBIDI_MAX_EXPLICIT_LEVEL) { - /* handle LRx overflows from level 60 */ - --countOver60; - } else if(stackTop>0) { - /* this is the pop operation; it also pops level 61 while countOver60>0 */ - --stackTop; - embeddingLevel=stack[stackTop]; - /* } else { (underflow) */ + if(overflowIsolateCount) { + break; } - flags|=DIRPROP_FLAG(BN); + if(overflowEmbeddingCount) { + overflowEmbeddingCount--; + break; + } + if(stackLast>0 && stack[stackLast]pBiDi->isolateCount) + pBiDi->isolateCount=validIsolateCount; + embeddingLevel=newLevel; + /* we can increment stackLast without checking because newLevel + will exceed UBIDI_MAX_EXPLICIT_LEVEL before stackLast overflows */ + stackLast++; + stack[stackLast]=embeddingLevel+ISOLATE; + bracketProcessLRI_RLI(&bracketData, embeddingLevel); + } else { + /* make it WS so that it is handled by adjustWSLevels() */ + dirProps[i]=WS; + overflowIsolateCount++; + } + break; + case PDI: + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { + bracketProcessBoundary(&bracketData, lastCcPos, + previousLevel, embeddingLevel); + flags|=DIRPROP_FLAG_MULTI_RUNS; + } + /* (X6a) */ + if(overflowIsolateCount) { + overflowIsolateCount--; + /* make it WS so that it is handled by adjustWSLevels() */ + dirProps[i]=WS; + } + else if(validIsolateCount) { + flags|=DIRPROP_FLAG(PDI); + lastCcPos=i; + overflowEmbeddingCount=0; + while(stack[stackLast]paras[paraIndex++]=i+1; - } + if(text[i]==CR && text[i+1]==LF) + break; /* skip CR when followed by LF */ + overflowEmbeddingCount=overflowIsolateCount=0; + validIsolateCount=0; + stackLast=0; + previousLevel=embeddingLevel=GET_PARALEVEL(pBiDi, i+1); + stack[0]=embeddingLevel; /* initialize base entry to para level, no override, no isolate */ + bracketProcessB(&bracketData, embeddingLevel); } - flags|=DIRPROP_FLAG(B); break; case BN: /* BN, LRE, RLE, and PDF are supposed to be removed (X9) */ /* they will get their levels set correctly in adjustWSLevels() */ + levels[i]=previousLevel; flags|=DIRPROP_FLAG(BN); break; default: - /* all other types get the "real" level */ - if(level!=embeddingLevel) { - level=embeddingLevel; - if(level&UBIDI_LEVEL_OVERRIDE) { - flags|=DIRPROP_FLAG_O(level)|DIRPROP_FLAG_MULTI_RUNS; - } else { - flags|=DIRPROP_FLAG_E(level)|DIRPROP_FLAG_MULTI_RUNS; - } - } - if(!(level&UBIDI_LEVEL_OVERRIDE)) { - flags|=DIRPROP_FLAG(dirProp); + /* all other types are normal characters and get the "real" level */ + if(NO_OVERRIDE(embeddingLevel)!=NO_OVERRIDE(previousLevel)) { + bracketProcessBoundary(&bracketData, lastCcPos, + previousLevel, embeddingLevel); + flags|=DIRPROP_FLAG_MULTI_RUNS; + if(embeddingLevel&UBIDI_LEVEL_OVERRIDE) + flags|=DIRPROP_FLAG_O(embeddingLevel); + else + flags|=DIRPROP_FLAG_E(embeddingLevel); } + previousLevel=embeddingLevel; + levels[i]=embeddingLevel; + if(!bracketProcessChar(&bracketData, i)) + return -1; + /* the dirProp may have been changed in bracketProcessChar() */ + flags|=DIRPROP_FLAG(dirProps[i]); break; } - - /* - * We need to set reasonable levels even on BN codes and - * explicit codes because we will later look at same-level runs (X10). - */ - levels[i]=level; } - if(flags&MASK_EMBEDDING) { + if(flags&MASK_EMBEDDING) flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); - } - if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) { + if(pBiDi->orderParagraphsLTR && (flags&DIRPROP_FLAG(B))) flags|=DIRPROP_FLAG(L); - } - - /* subsequently, ignore the explicit codes and BN (X9) */ - /* again, determine if the text is mixed-directional or single-directional */ pBiDi->flags=flags; direction=directionFromFlags(pBiDi); } - return direction; } @@ -782,19 +1340,28 @@ resolveExplicitLevels(UBiDi *pBiDi) { */ static UBiDiDirection checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { - const DirProp *dirProps=pBiDi->dirProps; + DirProp *dirProps=pBiDi->dirProps; DirProp dirProp; UBiDiLevel *levels=pBiDi->levels; - const UChar *text=pBiDi->text; + int32_t isolateCount=0; int32_t i, length=pBiDi->length; Flags flags=0; /* collect all directionalities in the text */ UBiDiLevel level; - uint32_t paraIndex=0; + pBiDi->isolateCount=0; for(i=0; ipBiDi->isolateCount) + pBiDi->isolateCount=isolateCount; + } + else if(dirProp==PDI) + isolateCount--; + else if(dirProp==B) + isolateCount=0; if(level&UBIDI_LEVEL_OVERRIDE) { /* keep the override flag in levels[i] but adjust the flags */ level&=~UBIDI_LEVEL_OVERRIDE; /* make the range check below simpler */ @@ -810,16 +1377,9 @@ checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; return UBIDI_LTR; } - if((dirProp==B) && ((i+1)paras[paraIndex++]=i+1; - } - } } - if(flags&MASK_EMBEDDING) { + if(flags&MASK_EMBEDDING) flags|=DIRPROP_FLAG_LR(pBiDi->paraLevel); - } - /* determine if the text is mixed-directional or single-directional */ pBiDi->flags=flags; return directionFromFlags(pBiDi); @@ -842,7 +1402,7 @@ checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { Definitions and type for properties state table ******************************************************************* */ -#define IMPTABPROPS_COLUMNS 14 +#define IMPTABPROPS_COLUMNS 16 #define IMPTABPROPS_RES (IMPTABPROPS_COLUMNS - 1) #define GET_STATEPROPS(cell) ((cell)&0x1f) #define GET_ACTIONPROPS(cell) ((cell)>>5) @@ -850,8 +1410,8 @@ checkExplicitLevels(UBiDi *pBiDi, UErrorCode *pErrorCode) { static const uint8_t groupProp[] = /* dirProp regrouped */ { -/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN */ - 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10 +/* L R EN ES ET AN CS B S WS ON LRE LRO AL RLE RLO PDF NSM BN FSI LRI RLI PDI ENL ENR */ + 0, 1, 2, 7, 8, 3, 9, 6, 5, 4, 4, 10, 10, 12, 10, 10, 10, 11, 10, 4, 4, 4, 4, 13, 14 }; enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirProp_S=5, DirProp_B=6 }; /* reduced dirProp */ @@ -860,7 +1420,7 @@ enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirPr PROPERTIES STATE TABLE In table impTabProps, - - the ON column regroups ON and WS + - the ON column regroups ON and WS, FSI, RLI, LRI and PDI - the BN column regroups BN, LRE, RLE, LRO, RLO, PDF - the Res column is the reduced property assigned to a run @@ -892,28 +1452,34 @@ enum { DirProp_L=0, DirProp_R=1, DirProp_EN=2, DirProp_AN=3, DirProp_ON=4, DirPr */ static const uint8_t impTabProps[][IMPTABPROPS_COLUMNS] = { -/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , Res */ -/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , DirProp_ON }, -/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3), DirProp_L }, -/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3), DirProp_R }, -/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 , DirProp_R }, -/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), DirProp_EN }, -/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3), DirProp_AN }, -/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), DirProp_AN }, -/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3), DirProp_ON }, -/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3), DirProp_ON }, -/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), DirProp_ON }, -/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), DirProp_EN }, -/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), DirProp_EN }, -/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3), DirProp_AN }, -/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), DirProp_AN }, -/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3), DirProp_ON }, -/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3), DirProp_S }, -/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3), DirProp_S }, -/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3), DirProp_B } +/* L , R , EN , AN , ON , S , B , ES , ET , CS , BN , NSM , AL , ENL , ENR , Res */ +/* 0 Init */ { 1 , 2 , 4 , 5 , 7 , 15 , 17 , 7 , 9 , 7 , 0 , 7 , 3 , 18 , 21 , DirProp_ON }, +/* 1 L */ { 1 , s(1,2), s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 1 , 1 , s(1,3),s(1,18),s(1,21), DirProp_L }, +/* 2 R */ { s(1,1), 2 , s(1,4), s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), s(1,9), s(1,7), 2 , 2 , s(1,3),s(1,18),s(1,21), DirProp_R }, +/* 3 AL */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8),s(1,16),s(1,17), s(1,8), s(1,8), s(1,8), 3 , 3 , 3 ,s(1,18),s(1,21), DirProp_R }, +/* 4 EN */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,10), 11 ,s(2,10), 4 , 4 , s(1,3), 18 , 21 , DirProp_EN }, +/* 5 AN */ { s(1,1), s(1,2), s(1,4), 5 , s(1,7),s(1,15),s(1,17), s(1,7), s(1,9),s(2,12), 5 , 5 , s(1,3),s(1,18),s(1,21), DirProp_AN }, +/* 6 AL:EN/AN */ { s(1,1), s(1,2), 6 , 6 , s(1,8),s(1,16),s(1,17), s(1,8), s(1,8),s(2,13), 6 , 6 , s(1,3), 18 , 21 , DirProp_AN }, +/* 7 ON */ { s(1,1), s(1,2), s(1,4), s(1,5), 7 ,s(1,15),s(1,17), 7 ,s(2,14), 7 , 7 , 7 , s(1,3),s(1,18),s(1,21), DirProp_ON }, +/* 8 AL:ON */ { s(1,1), s(1,2), s(1,6), s(1,6), 8 ,s(1,16),s(1,17), 8 , 8 , 8 , 8 , 8 , s(1,3),s(1,18),s(1,21), DirProp_ON }, +/* 9 ET */ { s(1,1), s(1,2), 4 , s(1,5), 7 ,s(1,15),s(1,17), 7 , 9 , 7 , 9 , 9 , s(1,3), 18 , 21 , DirProp_ON }, +/*10 EN+ES/CS */ { s(3,1), s(3,2), 4 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 10 , s(4,7), s(3,3), 18 , 21 , DirProp_EN }, +/*11 EN+ET */ { s(1,1), s(1,2), 4 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 11 , s(1,7), 11 , 11 , s(1,3), 18 , 21 , DirProp_EN }, +/*12 AN+CS */ { s(3,1), s(3,2), s(3,4), 5 , s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 12 , s(4,7), s(3,3),s(3,18),s(3,21), DirProp_AN }, +/*13 AL:EN/AN+CS */ { s(3,1), s(3,2), 6 , 6 , s(4,8),s(3,16),s(3,17), s(4,8), s(4,8), s(4,8), 13 , s(4,8), s(3,3), 18 , 21 , DirProp_AN }, +/*14 ON+ET */ { s(1,1), s(1,2), s(4,4), s(1,5), 7 ,s(1,15),s(1,17), 7 , 14 , 7 , 14 , 14 , s(1,3),s(4,18),s(4,21), DirProp_ON }, +/*15 S */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7), 15 ,s(1,17), s(1,7), s(1,9), s(1,7), 15 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_S }, +/*16 AL:S */ { s(1,1), s(1,2), s(1,6), s(1,6), s(1,8), 16 ,s(1,17), s(1,8), s(1,8), s(1,8), 16 , s(1,8), s(1,3),s(1,18),s(1,21), DirProp_S }, +/*17 B */ { s(1,1), s(1,2), s(1,4), s(1,5), s(1,7),s(1,15), 17 , s(1,7), s(1,9), s(1,7), 17 , s(1,7), s(1,3),s(1,18),s(1,21), DirProp_B }, +/*18 ENL */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,19), 20 ,s(2,19), 18 , 18 , s(1,3), 18 , 21 , DirProp_L }, +/*19 ENL+ES/CS */ { s(3,1), s(3,2), 18 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 19 , s(4,7), s(3,3), 18 , 21 , DirProp_L }, +/*20 ENL+ET */ { s(1,1), s(1,2), 18 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 20 , s(1,7), 20 , 20 , s(1,3), 18 , 21 , DirProp_L }, +/*21 ENR */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17),s(2,22), 23 ,s(2,22), 21 , 21 , s(1,3), 18 , 21 , DirProp_AN }, +/*22 ENR+ES/CS */ { s(3,1), s(3,2), 21 , s(3,5), s(4,7),s(3,15),s(3,17), s(4,7),s(4,14), s(4,7), 22 , s(4,7), s(3,3), 18 , 21 , DirProp_AN }, +/*23 ENR+ET */ { s(1,1), s(1,2), 21 , s(1,5), s(1,7),s(1,15),s(1,17), s(1,7), 23 , s(1,7), 23 , 23 , s(1,3), 18 , 21 , DirProp_AN } }; -/* we must undef macro s because the levels table have a different +/* we must undef macro s because the levels tables have a different * structure (4 bits for action and 4 bits for next state. */ #undef s @@ -992,7 +1558,7 @@ typedef struct ImpTabPair { */ static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ -/* In this table, conditional sequences receive the higher possible level +/* In this table, conditional sequences receive the lower possible level until proven otherwise. */ { @@ -1001,8 +1567,8 @@ static const ImpTab impTabL_DEFAULT = /* Even paragraph level */ /* 1 : R */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 1 }, /* 2 : AN */ { 0 , 1 , 0 , 2 , s(1,5), s(1,5), 0 , 2 }, /* 3 : R+EN/AN */ { 0 , 1 , 3 , 3 , s(1,4), s(1,4), 0 , 2 }, -/* 4 : R+ON */ { s(2,0), 1 , 3 , 3 , 4 , 4 , s(2,0), 1 }, -/* 5 : AN+ON */ { s(2,0), 1 , s(2,0), 2 , 5 , 5 , s(2,0), 1 } +/* 4 : R+ON */ { 0 , s(2,1), s(3,3), s(3,3), 4 , 4 , 0 , 0 }, +/* 5 : AN+ON */ { 0 , s(2,1), 0 , s(3,2), 5 , 5 , 0 , 0 } }; static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ /* In this table, conditional sequences receive the lower possible level @@ -1017,23 +1583,23 @@ static const ImpTab impTabR_DEFAULT = /* Odd paragraph level */ /* 4 : L+ON */ { s(2,1), 0 , s(2,1), 3 , 4 , 4 , 0 , 0 }, /* 5 : L+AN+ON */ { 1 , 0 , 1 , 3 , 5 , 5 , 0 , 0 } }; -static const ImpAct impAct0 = {0,1,2,3,4,5,6}; +static const ImpAct impAct0 = {0,1,2,3,4}; static const ImpTabPair impTab_DEFAULT = {{&impTabL_DEFAULT, &impTabR_DEFAULT}, {&impAct0, &impAct0}}; static const ImpTab impTabL_NUMBERS_SPECIAL = /* Even paragraph level */ -/* In this table, conditional sequences receive the higher possible level +/* In this table, conditional sequences receive the lower possible level until proven otherwise. */ { /* L , R , EN , AN , ON , S , B , Res */ -/* 0 : init */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 0 }, -/* 1 : L+EN/AN */ { 0 , 2 , 1 , 1 , 0 , 0 , 0 , 2 }, -/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), 0 , 0 , 1 }, -/* 3 : R+ON */ { s(2,0), 2 , 4 , 4 , 3 , 3 , s(2,0), 1 }, -/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } - }; +/* 0 : init */ { 0 , 2 , s(1,1), s(1,1), 0 , 0 , 0 , 0 }, +/* 1 : L+EN/AN */ { 0 , s(4,2), 1 , 1 , 0 , 0 , 0 , 0 }, +/* 2 : R */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 1 }, +/* 3 : R+ON */ { 0 , s(2,2), s(3,4), s(3,4), 3 , 3 , 0 , 0 }, +/* 4 : R+EN/AN */ { 0 , 2 , 4 , 4 , s(1,3), s(1,3), 0 , 2 } +}; static const ImpTabPair impTab_NUMBERS_SPECIAL = {{&impTabL_NUMBERS_SPECIAL, &impTabR_DEFAULT}, {&impAct0, &impAct0}}; @@ -1114,7 +1680,7 @@ static const ImpTab impTabR_INVERSE_LIKE_DIRECT = /* Odd paragraph level */ /* 5 : L+AN+ON */ { s(2,1), s(3,0), 6 , 4 , 5 , 5 , s(3,0), 2 }, /* 6 : L+ON+EN */ { s(2,1), s(3,0), 6 , 4 , 3 , 3 , s(3,0), 1 } }; -static const ImpAct impAct1 = {0,1,11,12}; +static const ImpAct impAct1 = {0,1,13,14}; /* FOOD FOR THOUGHT: in LTR table below, check case "JKL 123abc" */ static const ImpTabPair impTab_INVERSE_LIKE_DIRECT = { @@ -1149,11 +1715,12 @@ static const ImpTab impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS = /* 5 : L+ON+EN */ { s(5,3), s(4,0), 5 , s(3,6), 4 , s(4,0), s(4,0), 1 }, /* 6 : L+AN */ { s(5,3), s(4,0), 6 , 6 , 4 , s(4,0), s(4,0), 3 } }; -static const ImpAct impAct2 = {0,1,7,8,9,10}; +static const ImpAct impAct2 = {0,1,2,5,6,7,8}; +static const ImpAct impAct3 = {0,1,9,10,11,12}; static const ImpTabPair impTab_INVERSE_LIKE_DIRECT_WITH_MARKS = { {&impTabL_INVERSE_LIKE_DIRECT_WITH_MARKS, &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, - {&impAct0, &impAct2}}; + {&impAct2, &impAct3}}; static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL = { {&impTabL_NUMBERS_SPECIAL, @@ -1174,7 +1741,7 @@ static const ImpTab impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = static const ImpTabPair impTab_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS = { {&impTabL_INVERSE_FOR_NUMBERS_SPECIAL_WITH_MARKS, &impTabR_INVERSE_LIKE_DIRECT_WITH_MARKS}, - {&impAct0, &impAct2}}; + {&impAct2, &impAct3}}; #undef s @@ -1185,6 +1752,7 @@ typedef struct { int32_t startL2EN; /* start of level 2 sequence */ int32_t lastStrongRTL; /* index of last found R or AL */ int32_t state; /* current state */ + int32_t runStart; /* start position of the run */ UBiDiLevel runLevel; /* run level before implicit solving */ } LevState; @@ -1230,6 +1798,23 @@ addPoint(UBiDi *pBiDi, int32_t pos, int32_t flag) #undef FIRSTALLOC } +static void +setLevelsOutsideIsolates(UBiDi *pBiDi, int32_t start, int32_t limit, UBiDiLevel level) +{ + DirProp *dirProps=pBiDi->dirProps, dirProp; + UBiDiLevel *levels=pBiDi->levels; + int32_t isolateCount=0, k; + for(k=start; kstartON; break; - case 3: /* L or S after possible relevant EN/AN */ + case 3: /* EN/AN after R+ON */ + level=pLevState->runLevel+1; + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); + break; + + case 4: /* EN/AN before R for NUMBERS_SPECIAL */ + level=pLevState->runLevel+2; + setLevelsOutsideIsolates(pBiDi, pLevState->startON, start0, level); + break; + + case 5: /* L or S after possible relevant EN/AN */ /* check if we had EN after R/AL */ if (pLevState->startL2EN >= 0) { addPoint(pBiDi, pLevState->startL2EN, LRM_BEFORE); @@ -1314,7 +1909,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, } break; - case 4: /* R/AL after possible relevant EN/AN */ + case 6: /* R/AL after possible relevant EN/AN */ /* just clean up */ pInsertPoints=&(pBiDi->insertPoints); if (pInsertPoints->capacity > 0) @@ -1325,9 +1920,9 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, pLevState->lastStrongRTL=limit - 1; break; - case 5: /* EN/AN after R/AL + possible cont */ + case 7: /* EN/AN after R/AL + possible cont */ /* check for real AN */ - if ((_prop == DirProp_AN) && (NO_CONTEXT_RTL(pBiDi->dirProps[start0]) == AN) && + if ((_prop == DirProp_AN) && (pBiDi->dirProps[start0] == AN) && (pBiDi->reorderingMode!=UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)) { /* real AN */ @@ -1352,12 +1947,12 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, } break; - case 6: /* note location of latest R/AL */ + case 8: /* note location of latest R/AL */ pLevState->lastStrongRTL=limit - 1; pLevState->startON=-1; break; - case 7: /* L after R+ON/EN/AN */ + case 9: /* L after R+ON/EN/AN */ /* include possible adjacent number on the left */ for (k=start0-1; k>=0 && !(levels[k]&1); k--); if(k>=0) { @@ -1368,14 +1963,14 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, pLevState->startON=start0; break; - case 8: /* AN after L */ + case 10: /* AN after L */ /* AN numbers between L text on both sides may be trouble. */ /* tentatively bracket with LRMs; will be confirmed if followed by L */ addPoint(pBiDi, start0, LRM_BEFORE); /* add LRM before */ addPoint(pBiDi, start0, LRM_AFTER); /* add LRM after */ break; - case 9: /* R after L+ON/EN/AN */ + case 11: /* R after L+ON/EN/AN */ /* false alert, infirm LRMs around previous AN */ pInsertPoints=&(pBiDi->insertPoints); pInsertPoints->size=pInsertPoints->confirmed; @@ -1386,7 +1981,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, } break; - case 10: /* L after L+ON/AN */ + case 12: /* L after L+ON/AN */ level=pLevState->runLevel + addLevel; for(k=pLevState->startON; kstartON=start0; break; - case 11: /* L after L+ON+EN/AN/ON */ + case 13: /* L after L+ON+EN/AN/ON */ level=pLevState->runLevel; for(k=start0-1; k>=pLevState->startON; k--) { if(levels[k]==level+3) { @@ -1416,7 +2011,7 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, } break; - case 12: /* R after L+ON+EN/AN/ON */ + case 14: /* R after L+ON+EN/AN/ON */ level=pLevState->runLevel+1; for(k=start0-1; k>=pLevState->startON; k--) { if(levels[k]>level) { @@ -1432,15 +2027,22 @@ processPropertySeq(UBiDi *pBiDi, LevState *pLevState, uint8_t _prop, } if((addLevel) || (start < start0)) { level=pLevState->runLevel + addLevel; - for(k=start; k=pLevState->runStart) { + for(k=start; kprologue; int32_t length=pBiDi->proLength; int32_t i; @@ -1463,9 +2065,12 @@ lastL_R_AL(UBiDi *pBiDi) { return DirProp_ON; } +/** + * Returns the directionality of the first strong character, or digit, in the epilogue, if any. + * Requires epilogue!=null. + */ static DirProp firstL_R_AL_EN_AN(UBiDi *pBiDi) { - /* return first strong char or digit in epilogue */ const UChar *text=pBiDi->epilogue; int32_t length=pBiDi->epiLength; int32_t i; @@ -1496,17 +2101,15 @@ resolveImplicitLevels(UBiDi *pBiDi, int32_t start, int32_t limit, DirProp sor, DirProp eor) { const DirProp *dirProps=pBiDi->dirProps; - + DirProp dirProp; LevState levState; int32_t i, start1, start2; - uint8_t oldStateImp, stateImp, actionImp; + uint16_t oldStateImp, stateImp, actionImp; uint8_t gprop, resProp, cell; UBool inverseRTL; DirProp nextStrongProp=R; int32_t nextStrongPos=-1; - levState.startON = -1; /* silence gcc flow analysis */ - /* check for RTL inverse BiDi mode */ /* FOOD FOR THOUGHT: in case of RTL inverse BiDi, it would make sense to * loop on the text characters from end to start. @@ -1518,10 +2121,11 @@ resolveImplicitLevels(UBiDi *pBiDi, ((startlastArabicPos) && (GET_PARALEVEL(pBiDi, start) & 1) && (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT || pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL)); - /* initialize for levels state table */ + + /* initialize for property and levels state tables */ levState.startL2EN=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ levState.lastStrongRTL=-1; /* used for INVERSE_LIKE_DIRECT_WITH_MARKS */ - levState.state=0; + levState.runStart=start; levState.runLevel=pBiDi->levels[start]; levState.pImpTab=(const ImpTab*)((pBiDi->pImpTabPair)->pImpTab)[levState.runLevel&1]; levState.pImpAct=(const ImpAct*)((pBiDi->pImpTabPair)->pImpAct)[levState.runLevel&1]; @@ -1531,22 +2135,38 @@ resolveImplicitLevels(UBiDi *pBiDi, sor=lastStrong; } } - processPropertySeq(pBiDi, &levState, sor, start, start); - /* initialize for property state table */ - if(NO_CONTEXT_RTL(dirProps[start])==NSM) { - stateImp = 1 + sor; + /* The isolates[] entries contain enough information to + resume the bidi algorithm in the same state as it was + when it was interrupted by an isolate sequence. */ + if(dirProps[start]==PDI) { + levState.startON=pBiDi->isolates[pBiDi->isolateCount].startON; + start1=pBiDi->isolates[pBiDi->isolateCount].start1; + stateImp=pBiDi->isolates[pBiDi->isolateCount].stateImp; + levState.state=pBiDi->isolates[pBiDi->isolateCount].state; + pBiDi->isolateCount--; } else { - stateImp=0; + levState.startON=-1; + start1=start; + if(dirProps[start]==NSM) + stateImp = 1 + sor; + else + stateImp=0; + levState.state=0; + processPropertySeq(pBiDi, &levState, sor, start, start); } - start1=start; - start2=start; + start2=start; /* to make Java compiler happy */ for(i=start; i<=limit; i++) { if(i>=limit) { + int32_t k; + for(k=limit-1; k>start&&(DIRPROP_FLAG(dirProps[k])&MASK_BN_EXPLICIT); k--); + dirProp=dirProps[k]; + if(dirProp==LRI || dirProp==RLI) + break; /* no forced closing for sequence ending with LRI/RLI */ gprop=eor; } else { DirProp prop, prop1; - prop=NO_CONTEXT_RTL(dirProps[i]); + prop=dirProps[i]; if(inverseRTL) { if(prop==AL) { /* AL before EN does not make it AN */ @@ -1558,7 +2178,7 @@ resolveImplicitLevels(UBiDi *pBiDi, nextStrongProp=R; /* set default */ nextStrongPos=limit; for(j=i+1; jlength && pBiDi->epiLength>0) { DirProp firstStrong=firstL_R_AL_EN_AN(pBiDi); @@ -1614,7 +2235,19 @@ resolveImplicitLevels(UBiDi *pBiDi, eor=firstStrong; } } - processPropertySeq(pBiDi, &levState, eor, limit, limit); + + /* look for the last char not a BN or LRE/RLE/LRO/RLO/PDF */ + for(i=limit-1; i>start&&(DIRPROP_FLAG(dirProps[i])&MASK_BN_EXPLICIT); i--); + dirProp=dirProps[i]; + if((dirProp==LRI || dirProp==RLI) && limitlength) { + pBiDi->isolateCount++; + pBiDi->isolates[pBiDi->isolateCount].stateImp=stateImp; + pBiDi->isolates[pBiDi->isolateCount].state=levState.state; + pBiDi->isolates[pBiDi->isolateCount].start1=start1; + pBiDi->isolates[pBiDi->isolateCount].startON=levState.startON; + } + else + processPropertySeq(pBiDi, &levState, eor, limit, limit); } /* perform (L1) and (X9) ---------------------------------------------------- */ @@ -1638,7 +2271,7 @@ adjustWSLevels(UBiDi *pBiDi) { i=pBiDi->trailingWSStart; while(i>0) { /* reset a sequence of WS/BN before eop and B/S to the paragraph paraLevel */ - while(i>0 && (flag=DIRPROP_FLAG_NC(dirProps[--i]))&MASK_WS) { + while(i>0 && (flag=DIRPROP_FLAG(dirProps[--i]))&MASK_WS) { if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { levels[i]=0; } else { @@ -1649,7 +2282,7 @@ adjustWSLevels(UBiDi *pBiDi) { /* reset BN to the next character's paraLevel until B/S, which restarts above loop */ /* here, i+1 is guaranteed to be 0) { - flag=DIRPROP_FLAG_NC(dirProps[--i]); + flag=DIRPROP_FLAG(dirProps[--i]); if(flag&MASK_BN_EXPLICIT) { levels[i]=levels[i+1]; } else if(orderParagraphsLTR&&(flag&DIRPROP_FLAG(B))) { @@ -1700,6 +2333,7 @@ setParaSuccess(UBiDi *pBiDi) { #define BIDI_MIN(x, y) ((x)<(y) ? (x) : (y)) #define BIDI_ABS(x) ((x)>=0 ? (x) : (-(x))) + static void setParaRunsOnly(UBiDi *pBiDi, const UChar *text, int32_t length, UBiDiLevel paraLevel, UErrorCode *pErrorCode) { @@ -1893,6 +2527,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels, UErrorCode *pErrorCode) { UBiDiDirection direction; + DirProp *dirProps; /* check the argument values */ RETURN_VOID_IF_NULL_OR_FAILING_ERRCODE(pErrorCode); @@ -1917,7 +2552,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, pBiDi->text=text; pBiDi->length=pBiDi->originalLength=pBiDi->resultLength=length; pBiDi->paraLevel=paraLevel; - pBiDi->direction=UBIDI_LTR; + pBiDi->direction=paraLevel&1; pBiDi->paraCount=1; pBiDi->dirProps=NULL; @@ -1929,11 +2564,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, /* * Save the original paraLevel if contextual; otherwise, set to 0. */ - if(IS_DEFAULT_LEVEL(paraLevel)) { - pBiDi->defaultParaLevel=paraLevel; - } else { - pBiDi->defaultParaLevel=0; - } + pBiDi->defaultParaLevel=IS_DEFAULT_LEVEL(paraLevel); if(length==0) { /* @@ -1945,14 +2576,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, pBiDi->paraLevel&=1; pBiDi->defaultParaLevel=0; } - if(paraLevel&1) { - pBiDi->flags=DIRPROP_FLAG(R); - pBiDi->direction=UBIDI_RTL; - } else { - pBiDi->flags=DIRPROP_FLAG(L); - pBiDi->direction=UBIDI_LTR; - } - + pBiDi->flags=DIRPROP_FLAG_LR(paraLevel); pBiDi->runCount=0; pBiDi->paraCount=0; setParaSuccess(pBiDi); /* mark successful setPara */ @@ -1961,6 +2585,12 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, pBiDi->runCount=-1; + /* allocate paras memory */ + if(pBiDi->parasMemory) + pBiDi->paras=pBiDi->parasMemory; + else + pBiDi->paras=pBiDi->simpleParas; + /* * Get the directional properties, * the flags bit-set, and @@ -1968,35 +2598,28 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, */ if(getDirPropsMemory(pBiDi, length)) { pBiDi->dirProps=pBiDi->dirPropsMemory; - getDirProps(pBiDi); + if(!getDirProps(pBiDi)) { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } } else { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return; } + dirProps=pBiDi->dirProps; /* the processed length may have changed if UBIDI_OPTION_STREAMING */ length= pBiDi->length; pBiDi->trailingWSStart=length; /* the levels[] will reflect the WS run */ - /* allocate paras memory */ - if(pBiDi->paraCount>1) { - if(getInitialParasMemory(pBiDi, pBiDi->paraCount)) { - pBiDi->paras=pBiDi->parasMemory; - pBiDi->paras[pBiDi->paraCount-1]=length; - } else { - *pErrorCode=U_MEMORY_ALLOCATION_ERROR; - return; - } - } else { - /* initialize paras for single paragraph */ - pBiDi->paras=pBiDi->simpleParas; - pBiDi->simpleParas[0]=length; - } /* are explicit levels specified? */ if(embeddingLevels==NULL) { /* no: determine explicit levels according to the (Xn) rules */\ if(getLevelsMemory(pBiDi, length)) { pBiDi->levels=pBiDi->levelsMemory; - direction=resolveExplicitLevels(pBiDi); + direction=resolveExplicitLevels(pBiDi, pErrorCode); + if(U_FAILURE(*pErrorCode)) { + return; + } } else { *pErrorCode=U_MEMORY_ALLOCATION_ERROR; return; @@ -2010,6 +2633,22 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, } } + /* allocate isolate memory */ + if(pBiDi->isolateCount<=SIMPLE_ISOLATES_SIZE) + pBiDi->isolates=pBiDi->simpleIsolates; + else + if((int32_t)(pBiDi->isolateCount*sizeof(Isolate))<=pBiDi->isolatesSize) + pBiDi->isolates=pBiDi->isolatesMemory; + else { + if(getInitialIsolatesMemory(pBiDi, pBiDi->isolateCount)) { + pBiDi->isolates=pBiDi->isolatesMemory; + } else { + *pErrorCode=U_MEMORY_ALLOCATION_ERROR; + return; + } + } + pBiDi->isolateCount=-1; /* current isolates stack entry == none */ + /* * The steps after (X9) in the UBiDi algorithm are performed only if * the paragraph text has mixed directionality! @@ -2104,7 +2743,7 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, /* the values for this run's start are the same as for the previous run's end */ start=limit; level=nextLevel; - if((start>0) && (NO_CONTEXT_RTL(pBiDi->dirProps[start-1])==B)) { + if((start>0) && (dirProps[start-1]==B)) { /* except if this is a new paragraph, then set sor = para level */ sor=GET_LR_FROM_LEVEL(GET_PARALEVEL(pBiDi, start)); } else { @@ -2112,7 +2751,9 @@ ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length, } /* search for the limit of this run */ - while(++limitreorderingMode==UBIDI_REORDER_INVERSE_LIKE_DIRECT) || (pBiDi->reorderingMode==UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL))) { int32_t i, j, start, last; + UBiDiLevel level; DirProp dirProp; for(i=0; iparaCount; i++) { - last=pBiDi->paras[i]-1; - if((pBiDi->dirProps[last] & CONTEXT_RTL)==0) { + last=(pBiDi->paras[i].limit)-1; + level=pBiDi->paras[i].level; + if(level==0) continue; /* LTR paragraph */ - } - start= i==0 ? 0 : pBiDi->paras[i - 1]; + start= i==0 ? 0 : pBiDi->paras[i-1].limit; for(j=last; j>=start; j--) { - dirProp=NO_CONTEXT_RTL(pBiDi->dirProps[j]); + dirProp=dirProps[j]; if(dirProp==L) { if(jdirProps[last])==B) { + while(dirProps[last]==B) { last--; } } @@ -2285,7 +2927,7 @@ ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ if(paraIndex) { - paraStart=pBiDi->paras[paraIndex-1]; + paraStart=pBiDi->paras[paraIndex-1].limit; } else { paraStart=0; } @@ -2293,7 +2935,7 @@ ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex, *pParaStart=paraStart; } if(pParaLimit!=NULL) { - *pParaLimit=pBiDi->paras[paraIndex]; + *pParaLimit=pBiDi->paras[paraIndex].limit; } if(pParaLevel!=NULL) { *pParaLevel=GET_PARALEVEL(pBiDi, paraStart); @@ -2304,7 +2946,7 @@ U_CAPI int32_t U_EXPORT2 ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart, int32_t *pParaLimit, UBiDiLevel *pParaLevel, UErrorCode *pErrorCode) { - uint32_t paraIndex; + int32_t paraIndex; /* check the argument values */ /* pErrorCode will be checked by the call to ubidi_getParagraphByIndex */ @@ -2313,7 +2955,7 @@ ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, pBiDi=pBiDi->pParaBiDi; /* get Para object if Line object */ RETURN_IF_BAD_RANGE(charIndex, 0, pBiDi->length, *pErrorCode, -1); - for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex]; paraIndex++); + for(paraIndex=0; charIndex>=pBiDi->paras[paraIndex].limit; paraIndex++); ubidi_getParagraphByIndex(pBiDi, paraIndex, pParaStart, pParaLimit, pParaLevel, pErrorCode); return paraIndex; } @@ -2364,9 +3006,10 @@ ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c) if( pBiDi->fnClassCallback == NULL || (dir = (*pBiDi->fnClassCallback)(pBiDi->coClassCallback, c)) == U_BIDI_CLASS_DEFAULT ) { - return ubidi_getClass(pBiDi->bdp, c); - } else { - return dir; + dir = ubidi_getClass(pBiDi->bdp, c); + } + if(dir >= U_CHAR_DIRECTION_COUNT) { + dir = ON; } + return dir; } -