[apple/icu.git] / icuSources / layout / TibetanReordering.cpp

/*
 *
 * (C) Copyright IBM Corp. 1998-2006 - All Rights Reserved 
 *
 * Developed at DIT - Government of Bhutan
 *
 * Contact person: Pema Geyleg - <pema_geyleg@druknet.bt> 
 *
 * This file is a modification of the ICU file KhmerReordering.cpp
 * by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan
 * A first module for Dzongkha was developed by Karunakar under Panlocalisation funding.
 * Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola
 *
 */

//#include <stdio.h>
#include "LETypes.h"
#include "OpenTypeTables.h"
#include "TibetanReordering.h"
#include "LEGlyphStorage.h"


U_NAMESPACE_BEGIN

// Characters that get refered to by name...
enum
{
    C_DOTTED_CIRCLE = 0x25CC,
    C_PRE_NUMBER_MARK = 0x0F3F
 };


enum
{
    // simple classes, they are used in the statetable (in this file) to control the length of a syllable
    // they are also used to know where a character should be placed (location in reference to the base character)
    // and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to
    // indicate error in syllable construction 
    _xx = TibetanClassTable::CC_RESERVED,
    _ba = TibetanClassTable::CC_BASE,
    _sj = TibetanClassTable::CC_SUBJOINED | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, 
    _tp = TibetanClassTable::CC_TSA_PHRU  | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE,
    _ac = TibetanClassTable::CC_A_CHUNG |  TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW,
    _cs = TibetanClassTable::CC_COMP_SANSKRIT | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW,
    _ha = TibetanClassTable::CC_HALANTA | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW, 
    _bv = TibetanClassTable::CC_BELOW_VOWEL | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW,
    _av = TibetanClassTable::CC_ABOVE_VOWEL | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE,
    _an = TibetanClassTable::CC_ANUSVARA | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE,
    _cb = TibetanClassTable::CC_CANDRABINDU | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE,
    _vs = TibetanClassTable::CC_VISARGA | TibetanClassTable::CF_DOTTED_CIRCLE| TibetanClassTable::CF_POS_AFTER,
    _as = TibetanClassTable::CC_ABOVE_S_MARK | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_ABOVE,
    _bs = TibetanClassTable::CC_BELOW_S_MARK | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_BELOW,
    _di = TibetanClassTable::CC_DIGIT | TibetanClassTable::CF_DIGIT,
    _pd = TibetanClassTable::CC_PRE_DIGIT_MARK | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_PREDIGIT | TibetanClassTable::CF_POS_BEFORE ,
    _bd = TibetanClassTable::CC_POST_BELOW_DIGIT_M | TibetanClassTable::CF_DOTTED_CIRCLE | TibetanClassTable::CF_POS_AFTER
};


// Character class tables 
//_xx Non Combining characters
//_ba Base Consonants
//_sj Subjoined consonants
//_tp Tsa - phru
//_ac A-chung, Vowel Lengthening mark
//_cs Precomposed Sanskrit vowel + subjoined consonants
//_ha Halanta/Virama
//_bv Below vowel
//_av above vowel
//_an Anusvara
//_cb Candrabindu
//_vs Visaraga/Post mark
//_as Upper Stress marks
//_bs Lower Stress marks
//_di Digit
//_pd Number pre combining, Needs reordering
//_bd Other number combining marks

static const TibetanClassTable::CharClass tibetanCharClasses[] =
{
   // 0    1    2    3    4    5    6    7    8    9   a     b   c    d     e   f
    _xx, _ba, _xx, _xx, _ba, _ba, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0F00 - 0F0F 0
    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _bd, _bd, _xx, _xx, _xx, _xx, _xx, _xx, // 0F10 - 0F1F 1
    _di, _di, _di, _di, _di, _di, _di, _di, _di, _di, _xx, _xx, _xx, _xx, _xx, _xx, // 0F20 - 0F2F 2 
    _xx, _xx, _xx, _xx, _xx, _bs, _xx, _bs, _xx, _tp, _xx, _xx, _xx, _xx, _bd, _pd, // 0F30 - 0F3F 3 
    _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F40 - 0F4F 4
    _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F50 - 0F5F 5
    _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, _xx, // 0F60 - 0F6F 6
    _xx, _ac, _av, _cs, _bv, _bv, _cs, _cs, _cs, _cs, _av, _av, _av, _av, _an, _vs, // 0F70 - 0F7F 7
    _av, _cs, _cb, _cb, _ha, _xx, _as, _as, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, // 0F80 - 0F8F 8
    _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0F90 - 0F9F 9
    _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0FA0 - 0FAF a
    _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, // 0FB0 - 0FBF b
    _xx, _xx, _xx, _xx, _xx, _xx, _bs, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FC0 - 0FCF c
    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx,// 0FD0 - 0FDF  d
    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FE0 - 0FEF e
    _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FF0 - 0FFF f 
};                                                                                  


//
// Tibetan Class Tables
//                   

//
// The range of characters defined in the above table is defined here. For Tibetan 0F00 to 0FFF
// Even if the Tibetan range is bigger, most of the characters are not combinable, and therefore treated
// as _xx
static const TibetanClassTable tibetanClassTable = {0x0F00, 0x0FFF, tibetanCharClasses};


// Below we define how a character in the input string is either in the tibetanCharClasses table
// (in which case we get its type back), or an unknown object in which case we get _xx (CC_RESERVED) back
TibetanClassTable::CharClass TibetanClassTable::getCharClass(LEUnicode ch) const
{
    if (ch < firstChar || ch > lastChar) {
        return CC_RESERVED;
    }
    
    return classTable[ch - firstChar];
}

const TibetanClassTable *TibetanClassTable::getTibetanClassTable()
{
    return &tibetanClassTable;
}


class TibetanReorderingOutput : public UMemory {
private:
    le_int32 fOutIndex;
    LEUnicode *fOutChars;

    LEGlyphStorage &fGlyphStorage;


public:
    TibetanReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage)
        : fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage)
    {
        // nothing else to do...
    }

    ~TibetanReorderingOutput()
    {
        // nothing to do here...
    }

    void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask featureMask)
    {
        LEErrorCode success = LE_NO_ERROR;

        fOutChars[fOutIndex] = ch;

        fGlyphStorage.setCharIndex(fOutIndex, charIndex, success);
        fGlyphStorage.setAuxData(fOutIndex, featureMask, success);

        fOutIndex += 1;
    }

    le_int32 getOutputIndex()
    {
        return fOutIndex;
    }
};


//TODO remove unused flags
#define ccmpFeatureTag LE_CCMP_FEATURE_TAG
#define blwfFeatureTag LE_BLWF_FEATURE_TAG
#define pstfFeatureTag LE_PSTF_FEATURE_TAG
#define presFeatureTag LE_PRES_FEATURE_TAG
#define blwsFeatureTag LE_BLWS_FEATURE_TAG
#define abvsFeatureTag LE_ABVS_FEATURE_TAG
#define pstsFeatureTag LE_PSTS_FEATURE_TAG

#define blwmFeatureTag LE_BLWM_FEATURE_TAG
#define abvmFeatureTag LE_ABVM_FEATURE_TAG
#define distFeatureTag LE_DIST_FEATURE_TAG

#define prefFeatureTag LE_PREF_FEATURE_TAG
#define abvfFeatureTag LE_ABVF_FEATURE_TAG
#define cligFeatureTag LE_CLIG_FEATURE_TAG
#define mkmkFeatureTag LE_MKMK_FEATURE_TAG

// Shaping features
#define prefFeatureMask 0x80000000UL
#define blwfFeatureMask 0x40000000UL
#define abvfFeatureMask 0x20000000UL
#define pstfFeatureMask 0x10000000UL 
#define presFeatureMask 0x08000000UL
#define blwsFeatureMask 0x04000000UL
#define abvsFeatureMask 0x02000000UL
#define pstsFeatureMask 0x01000000UL
#define cligFeatureMask 0x00800000UL 
#define ccmpFeatureMask 0x00040000UL

// Positioning features
#define distFeatureMask 0x00400000UL
#define blwmFeatureMask 0x00200000UL
#define abvmFeatureMask 0x00100000UL
#define mkmkFeatureMask 0x00080000UL

#define tagPref    (ccmpFeatureMask | prefFeatureMask | presFeatureMask | cligFeatureMask | distFeatureMask)
#define tagAbvf    (ccmpFeatureMask | abvfFeatureMask | abvsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | mkmkFeatureMask)
#define tagPstf    (ccmpFeatureMask | blwfFeatureMask | blwsFeatureMask | prefFeatureMask | presFeatureMask | pstfFeatureMask | pstsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask)
#define tagBlwf    (ccmpFeatureMask | blwfFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | blwmFeatureMask | mkmkFeatureMask)
#define tagDefault (ccmpFeatureMask | prefFeatureMask | blwfFeatureMask | presFeatureMask | blwsFeatureMask | cligFeatureMask | distFeatureMask | abvmFeatureMask | blwmFeatureMask | mkmkFeatureMask)


// These are in the order in which the features need to be applied
// for correct processing
static const FeatureMap featureMap[] =
{
    // Shaping features
    {ccmpFeatureTag, ccmpFeatureMask},
    {prefFeatureTag, prefFeatureMask},
    {blwfFeatureTag, blwfFeatureMask},
    {abvfFeatureTag, abvfFeatureMask},
    {pstfFeatureTag, pstfFeatureMask}, 
    {presFeatureTag, presFeatureMask},
    {blwsFeatureTag, blwsFeatureMask},
    {abvsFeatureTag, abvsFeatureMask},
    {pstsFeatureTag, pstsFeatureMask},
    {cligFeatureTag, cligFeatureMask},
    
    // Positioning features
    {distFeatureTag, distFeatureMask},
    {blwmFeatureTag, blwmFeatureMask},
    {abvmFeatureTag, abvmFeatureMask},
    {mkmkFeatureTag, mkmkFeatureMask},
};

static const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap);

// The stateTable is used to calculate the end (the length) of a well
// formed Tibetan Syllable. 
//
// Each horizontal line is ordered exactly the same way as the values in TibetanClassTable
// CharClassValues in TibetanReordering.h This coincidence of values allows the
// follow up of the table.
//
// Each line corresponds to a state, which does not necessarily need to be a type
// of component... for example, state 2 is a base, with is always a first character
// in the syllable, but the state could be produced a consonant of any type when
// it is the first character that is analysed (in ground state).
//
static const le_int8 tibetanStateTable[][TibetanClassTable::CC_COUNT] =
{

     
    //Dzongkha state table
    //xx  ba  sj  tp  ac  cs  ha  bv  av  an  cb  vs  as  bs  di  pd  bd
    { 1,  2,  4,  3,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, 20, 21, 21,}, //  0 - ground state
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, //  1 - exit state (or sign to the right of the syllable)
    {-1, -1,  4,  3,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  2 - Base consonant    
    {-1, -1,  5, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  3 - Tsa phru after base
    {-1, -1,  4,  6,  8,  7,  9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  4 - Subjoined consonant after base             
    {-1, -1,  5, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  5 - Subjoined consonant after tsa phru
    {-1, -1, -1, -1,  8,  7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  6 - Tsa phru after subjoined consonant
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, //  7 - Pre Composed Sanskrit
    {-1, -1, -1, -1, -1, -1, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, //  8 - A-chung
    {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, -1, 19, 19, -1, -1, -1,}, //  9 - Halanta
    {-1, -1, -1, -1, -1, -1, -1, 11, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 10 - below vowel 1
    {-1, -1, -1, -1, -1, -1, -1, 12, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 11 - below vowel 2
    {-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 12 - below vowel 3   
    {-1, -1, -1, -1, -1, -1, -1, -1, 14, 17, 17, 18, 19, 19, -1, -1, -1,}, // 13 - Anusvara before vowel
    {-1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 17, 18, 19, 19, -1, -1, -1,}, // 14 - above vowel 1
    {-1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 17, 18, 19, 19, -1, -1, -1,}, // 15 - above vowel 2
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 17, 18, 19, 19, -1, -1, -1,}, // 16 - above vowel 3
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 19, 19, -1, -1, -1,}, // 17 - Anusvara or Candrabindu after vowel 
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 18 - Visarga    
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 19 - strss mark
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, 21,}, // 20 - digit 
    {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 21 - digit mark
    

};         


const FeatureMap *TibetanReordering::getFeatureMap(le_int32 &count)
{
    count = featureMapCount;

    return featureMap;
}


// Given an input string of characters and a location in which to start looking
// calculate, using the state table, which one is the last character of the syllable
// that starts in the starting position.
le_int32 TibetanReordering::findSyllable(const TibetanClassTable *classTable, const LEUnicode *chars, le_int32 prev, le_int32 charCount)
{
    le_int32 cursor = prev;
    le_int8 state = 0;

    while (cursor < charCount) {
        TibetanClassTable::CharClass charClass = (classTable->getCharClass(chars[cursor]) & TibetanClassTable::CF_CLASS_MASK);

        state = tibetanStateTable[state][charClass];

        if (state < 0) {
            break;
        }

        cursor += 1;
    }

    return cursor;
}


// This is the real reordering function as applied to the Tibetan language

le_int32 TibetanReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32,
                                  LEUnicode *outChars, LEGlyphStorage &glyphStorage)
{
    const TibetanClassTable *classTable = TibetanClassTable::getTibetanClassTable();

    TibetanReorderingOutput output(outChars, glyphStorage);
    TibetanClassTable::CharClass charClass;
    le_int32 i, prev = 0;

    // This loop only exits when we reach the end of a run, which may contain 
    // several syllables.
    while (prev < charCount) {
        le_int32 syllable = findSyllable(classTable, chars, prev, charCount);   
       
        // shall we add a dotted circle?
        // If in the position in which the base should be (first char in the string) there is
        // a character that has the Dotted circle flag (a character that cannot be a base)
        // then write a dotted circle
        if (classTable->getCharClass(chars[prev]) & TibetanClassTable::CF_DOTTED_CIRCLE) {
            output.writeChar(C_DOTTED_CIRCLE, prev, tagDefault);        
        }        

        // copy the rest to output, inverting the pre-number mark if present after a digit.
        for (i = prev; i < syllable; i += 1) {
            charClass = classTable->getCharClass(chars[i]);
           
           if ((TibetanClassTable::CF_DIGIT & charClass) 
              && ( classTable->getCharClass(chars[i+1]) & TibetanClassTable::CF_PREDIGIT))
           {
         		 output.writeChar(C_PRE_NUMBER_MARK, i, tagPref);
                         output.writeChar(chars[i], i+1 , tagPref);
			i += 1;
          } else {
            switch (charClass & TibetanClassTable::CF_POS_MASK) {
            	
            	// If the present character is a number, and the next character is a pre-number combining mark
            // then the two characters are reordered
          	           	
                case TibetanClassTable::CF_POS_ABOVE :
                    output.writeChar(chars[i], i, tagAbvf);
                    break;
                
                case TibetanClassTable::CF_POS_AFTER :
                    output.writeChar(chars[i], i, tagPstf);
                    break;
                
                case TibetanClassTable::CF_POS_BELOW :
                    output.writeChar(chars[i], i, tagBlwf);
                    break;
                
                default:                                       
                    // default - any other characters
                   output.writeChar(chars[i], i, tagDefault);
                    break;
            } // switch
          } // if
        } // for

        prev = syllable; // move the pointer to the start of next syllable
    }

    return output.getOutputIndex();
}


U_NAMESPACE_END
Commit	Line	Data
73c04bcf A	1	/*
	2	*
	3	* (C) Copyright IBM Corp. 1998-2006 - All Rights Reserved
	4	*
	5	* Developed at DIT - Government of Bhutan
	6	*
	7	* Contact person: Pema Geyleg - <pema_geyleg@druknet.bt>
	8	*
	9	* This file is a modification of the ICU file KhmerReordering.cpp
	10	* by Jens Herden and Javier Sola who have given all their possible rights to IBM and the Governement of Bhutan
	11	* A first module for Dzongkha was developed by Karunakar under Panlocalisation funding.
	12	* Assistance for this module has been received from Namgay Thinley, Christopher Fynn and Javier Sola
	13	*
	14	*/
	15
	16	//#include <stdio.h>
	17	#include "LETypes.h"
	18	#include "OpenTypeTables.h"
	19	#include "TibetanReordering.h"
	20	#include "LEGlyphStorage.h"
	21
	22
	23	U_NAMESPACE_BEGIN
	24
	25	// Characters that get refered to by name...
	26	enum
	27	{
	28	C_DOTTED_CIRCLE = 0x25CC,
	29	C_PRE_NUMBER_MARK = 0x0F3F
	30	};
	31
	32
	33	enum
	34	{
	35	// simple classes, they are used in the statetable (in this file) to control the length of a syllable
	36	// they are also used to know where a character should be placed (location in reference to the base character)
	37	// and also to know if a character, when independtly displayed, should be displayed with a dotted-circle to
	38	// indicate error in syllable construction
	39	_xx = TibetanClassTable::CC_RESERVED,
	40	_ba = TibetanClassTable::CC_BASE,
	41	_sj = TibetanClassTable::CC_SUBJOINED \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_BELOW,
	42	_tp = TibetanClassTable::CC_TSA_PHRU \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_ABOVE,
	43	_ac = TibetanClassTable::CC_A_CHUNG \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_BELOW,
	44	_cs = TibetanClassTable::CC_COMP_SANSKRIT \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_BELOW,
	45	_ha = TibetanClassTable::CC_HALANTA \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_BELOW,
	46	_bv = TibetanClassTable::CC_BELOW_VOWEL \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_BELOW,
	47	_av = TibetanClassTable::CC_ABOVE_VOWEL \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_ABOVE,
	48	_an = TibetanClassTable::CC_ANUSVARA \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_ABOVE,
	49	_cb = TibetanClassTable::CC_CANDRABINDU \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_ABOVE,
	50	_vs = TibetanClassTable::CC_VISARGA \| TibetanClassTable::CF_DOTTED_CIRCLE\| TibetanClassTable::CF_POS_AFTER,
	51	_as = TibetanClassTable::CC_ABOVE_S_MARK \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_ABOVE,
	52	_bs = TibetanClassTable::CC_BELOW_S_MARK \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_BELOW,
	53	_di = TibetanClassTable::CC_DIGIT \| TibetanClassTable::CF_DIGIT,
	54	_pd = TibetanClassTable::CC_PRE_DIGIT_MARK \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_PREDIGIT \| TibetanClassTable::CF_POS_BEFORE ,
	55	_bd = TibetanClassTable::CC_POST_BELOW_DIGIT_M \| TibetanClassTable::CF_DOTTED_CIRCLE \| TibetanClassTable::CF_POS_AFTER
	56	};
	57
	58
	59	// Character class tables
	60	//_xx Non Combining characters
	61	//_ba Base Consonants
	62	//_sj Subjoined consonants
	63	//_tp Tsa - phru
	64	//_ac A-chung, Vowel Lengthening mark
65	//_cs Precomposed Sanskrit vowel + subjoined consonants
66	//_ha Halanta/Virama
67	//_bv Below vowel
68	//_av above vowel
69	//_an Anusvara
70	//_cb Candrabindu
71	//_vs Visaraga/Post mark
72	//_as Upper Stress marks
73	//_bs Lower Stress marks
74	//_di Digit
75	//_pd Number pre combining, Needs reordering
76	//_bd Other number combining marks
77
78	static const TibetanClassTable::CharClass tibetanCharClasses[] =
79	{
80	// 0 1 2 3 4 5 6 7 8 9 a b c d e f
81	_xx, _ba, _xx, _xx, _ba, _ba, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0F00 - 0F0F 0
82	_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _bd, _bd, _xx, _xx, _xx, _xx, _xx, _xx, // 0F10 - 0F1F 1
83	_di, _di, _di, _di, _di, _di, _di, _di, _di, _di, _xx, _xx, _xx, _xx, _xx, _xx, // 0F20 - 0F2F 2
84	_xx, _xx, _xx, _xx, _xx, _bs, _xx, _bs, _xx, _tp, _xx, _xx, _xx, _xx, _bd, _pd, // 0F30 - 0F3F 3
85	_ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F40 - 0F4F 4
86	_ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, // 0F50 - 0F5F 5
87	_ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, _xx, // 0F60 - 0F6F 6
88	_xx, _ac, _av, _cs, _bv, _bv, _cs, _cs, _cs, _cs, _av, _av, _av, _av, _an, _vs, // 0F70 - 0F7F 7
89	_av, _cs, _cb, _cb, _ha, _xx, _as, _as, _ba, _ba, _ba, _ba, _xx, _xx, _xx, _xx, // 0F80 - 0F8F 8
90	_sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0F90 - 0F9F 9
91	_sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, // 0FA0 - 0FAF a
92	_sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _sj, _xx, _sj, _sj, // 0FB0 - 0FBF b
93	_xx, _xx, _xx, _xx, _xx, _xx, _bs, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FC0 - 0FCF c
94	_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx,// 0FD0 - 0FDF d
95	_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FE0 - 0FEF e
96	_xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, _xx, // 0FF0 - 0FFF f
97	};
98
99
100	//
101	// Tibetan Class Tables
102	//
103
104	//
105	// The range of characters defined in the above table is defined here. For Tibetan 0F00 to 0FFF
106	// Even if the Tibetan range is bigger, most of the characters are not combinable, and therefore treated
107	// as _xx
108	static const TibetanClassTable tibetanClassTable = {0x0F00, 0x0FFF, tibetanCharClasses};
109
110
111	// Below we define how a character in the input string is either in the tibetanCharClasses table
112	// (in which case we get its type back), or an unknown object in which case we get _xx (CC_RESERVED) back
113	TibetanClassTable::CharClass TibetanClassTable::getCharClass(LEUnicode ch) const
114	{
115	if (ch < firstChar \|\| ch > lastChar) {
116	return CC_RESERVED;
117	}
118
119	return classTable[ch - firstChar];
120	}
121
122	const TibetanClassTable *TibetanClassTable::getTibetanClassTable()
123	{
124	return &tibetanClassTable;
125	}
126
127
128
129	class TibetanReorderingOutput : public UMemory {
130	private:
131	le_int32 fOutIndex;
132	LEUnicode *fOutChars;
133
134	LEGlyphStorage &fGlyphStorage;
135
136
137	public:
138	TibetanReorderingOutput(LEUnicode *outChars, LEGlyphStorage &glyphStorage)
139	: fOutIndex(0), fOutChars(outChars), fGlyphStorage(glyphStorage)
140	{
141	// nothing else to do...
142	}
143
144	~TibetanReorderingOutput()
145	{
146	// nothing to do here...
147	}
148
149	void writeChar(LEUnicode ch, le_uint32 charIndex, FeatureMask featureMask)
150	{
151	LEErrorCode success = LE_NO_ERROR;
152
153	fOutChars[fOutIndex] = ch;
154
155	fGlyphStorage.setCharIndex(fOutIndex, charIndex, success);
156	fGlyphStorage.setAuxData(fOutIndex, featureMask, success);
157
158	fOutIndex += 1;
159	}
160
161	le_int32 getOutputIndex()
162	{
163	return fOutIndex;
164	}
165	};
166
167
168	//TODO remove unused flags
169	#define ccmpFeatureTag LE_CCMP_FEATURE_TAG
170	#define blwfFeatureTag LE_BLWF_FEATURE_TAG
171	#define pstfFeatureTag LE_PSTF_FEATURE_TAG
172	#define presFeatureTag LE_PRES_FEATURE_TAG
173	#define blwsFeatureTag LE_BLWS_FEATURE_TAG
174	#define abvsFeatureTag LE_ABVS_FEATURE_TAG
175	#define pstsFeatureTag LE_PSTS_FEATURE_TAG
176
177	#define blwmFeatureTag LE_BLWM_FEATURE_TAG
178	#define abvmFeatureTag LE_ABVM_FEATURE_TAG
179	#define distFeatureTag LE_DIST_FEATURE_TAG
180
181	#define prefFeatureTag LE_PREF_FEATURE_TAG
182	#define abvfFeatureTag LE_ABVF_FEATURE_TAG
183	#define cligFeatureTag LE_CLIG_FEATURE_TAG
184	#define mkmkFeatureTag LE_MKMK_FEATURE_TAG
185
186	// Shaping features
187	#define prefFeatureMask 0x80000000UL
188	#define blwfFeatureMask 0x40000000UL
189	#define abvfFeatureMask 0x20000000UL
190	#define pstfFeatureMask 0x10000000UL
191	#define presFeatureMask 0x08000000UL
192	#define blwsFeatureMask 0x04000000UL
193	#define abvsFeatureMask 0x02000000UL
194	#define pstsFeatureMask 0x01000000UL
195	#define cligFeatureMask 0x00800000UL
196	#define ccmpFeatureMask 0x00040000UL
197
198	// Positioning features
199	#define distFeatureMask 0x00400000UL
200	#define blwmFeatureMask 0x00200000UL
201	#define abvmFeatureMask 0x00100000UL
202	#define mkmkFeatureMask 0x00080000UL
203
204	#define tagPref (ccmpFeatureMask \| prefFeatureMask \| presFeatureMask \| cligFeatureMask \| distFeatureMask)
205	#define tagAbvf (ccmpFeatureMask \| abvfFeatureMask \| abvsFeatureMask \| cligFeatureMask \| distFeatureMask \| abvmFeatureMask \| mkmkFeatureMask)
206	#define tagPstf (ccmpFeatureMask \| blwfFeatureMask \| blwsFeatureMask \| prefFeatureMask \| presFeatureMask \| pstfFeatureMask \| pstsFeatureMask \| cligFeatureMask \| distFeatureMask \| blwmFeatureMask)
207	#define tagBlwf (ccmpFeatureMask \| blwfFeatureMask \| blwsFeatureMask \| cligFeatureMask \| distFeatureMask \| blwmFeatureMask \| mkmkFeatureMask)
208	#define tagDefault (ccmpFeatureMask \| prefFeatureMask \| blwfFeatureMask \| presFeatureMask \| blwsFeatureMask \| cligFeatureMask \| distFeatureMask \| abvmFeatureMask \| blwmFeatureMask \| mkmkFeatureMask)
209
210
211
212	// These are in the order in which the features need to be applied
213	// for correct processing
214	static const FeatureMap featureMap[] =
215	{
216	// Shaping features
217	{ccmpFeatureTag, ccmpFeatureMask},
218	{prefFeatureTag, prefFeatureMask},
219	{blwfFeatureTag, blwfFeatureMask},
220	{abvfFeatureTag, abvfFeatureMask},
221	{pstfFeatureTag, pstfFeatureMask},
222	{presFeatureTag, presFeatureMask},
223	{blwsFeatureTag, blwsFeatureMask},
224	{abvsFeatureTag, abvsFeatureMask},
225	{pstsFeatureTag, pstsFeatureMask},
226	{cligFeatureTag, cligFeatureMask},
227
228	// Positioning features
229	{distFeatureTag, distFeatureMask},
230	{blwmFeatureTag, blwmFeatureMask},
231	{abvmFeatureTag, abvmFeatureMask},
232	{mkmkFeatureTag, mkmkFeatureMask},
233	};
234
235	static const le_int32 featureMapCount = LE_ARRAY_SIZE(featureMap);
236
237	// The stateTable is used to calculate the end (the length) of a well
238	// formed Tibetan Syllable.
239	//
240	// Each horizontal line is ordered exactly the same way as the values in TibetanClassTable
241	// CharClassValues in TibetanReordering.h This coincidence of values allows the
242	// follow up of the table.
243	//
244	// Each line corresponds to a state, which does not necessarily need to be a type
245	// of component... for example, state 2 is a base, with is always a first character
246	// in the syllable, but the state could be produced a consonant of any type when
247	// it is the first character that is analysed (in ground state).
248	//
249	static const le_int8 tibetanStateTable[][TibetanClassTable::CC_COUNT] =
250	{
251
252
253	//Dzongkha state table
254	//xx ba sj tp ac cs ha bv av an cb vs as bs di pd bd
255	{ 1, 2, 4, 3, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, 20, 21, 21,}, // 0 - ground state
256	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 1 - exit state (or sign to the right of the syllable)
257	{-1, -1, 4, 3, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 2 - Base consonant
258	{-1, -1, 5, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 3 - Tsa phru after base
259	{-1, -1, 4, 6, 8, 7, 9, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 4 - Subjoined consonant after base
260	{-1, -1, 5, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 5 - Subjoined consonant after tsa phru
261	{-1, -1, -1, -1, 8, 7, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 6 - Tsa phru after subjoined consonant
262	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 7 - Pre Composed Sanskrit
263	{-1, -1, -1, -1, -1, -1, -1, 10, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 8 - A-chung
264	{-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, -1, 19, 19, -1, -1, -1,}, // 9 - Halanta
265	{-1, -1, -1, -1, -1, -1, -1, 11, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 10 - below vowel 1
266	{-1, -1, -1, -1, -1, -1, -1, 12, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 11 - below vowel 2
267	{-1, -1, -1, -1, -1, -1, -1, -1, 14, 13, 17, 18, 19, 19, -1, -1, -1,}, // 12 - below vowel 3
268	{-1, -1, -1, -1, -1, -1, -1, -1, 14, 17, 17, 18, 19, 19, -1, -1, -1,}, // 13 - Anusvara before vowel
269	{-1, -1, -1, -1, -1, -1, -1, -1, 15, 17, 17, 18, 19, 19, -1, -1, -1,}, // 14 - above vowel 1
270	{-1, -1, -1, -1, -1, -1, -1, -1, 16, 17, 17, 18, 19, 19, -1, -1, -1,}, // 15 - above vowel 2
271	{-1, -1, -1, -1, -1, -1, -1, -1, -1, 17, 17, 18, 19, 19, -1, -1, -1,}, // 16 - above vowel 3
272	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 18, 19, 19, -1, -1, -1,}, // 17 - Anusvara or Candrabindu after vowel
273	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 19, 19, -1, -1, -1,}, // 18 - Visarga
274	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 19 - strss mark
275	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 21, 21,}, // 20 - digit
276	{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,}, // 21 - digit mark
277
278
279	};
280
281
282	const FeatureMap *TibetanReordering::getFeatureMap(le_int32 &count)
283	{
284	count = featureMapCount;
285
286	return featureMap;
287	}
288
289
290	// Given an input string of characters and a location in which to start looking
291	// calculate, using the state table, which one is the last character of the syllable
292	// that starts in the starting position.
293	le_int32 TibetanReordering::findSyllable(const TibetanClassTable classTable, const LEUnicode chars, le_int32 prev, le_int32 charCount)
294	{
295	le_int32 cursor = prev;
296	le_int8 state = 0;
297
298	while (cursor < charCount) {
299	TibetanClassTable::CharClass charClass = (classTable->getCharClass(chars[cursor]) & TibetanClassTable::CF_CLASS_MASK);
300
301	state = tibetanStateTable[state][charClass];
302
303	if (state < 0) {
304	break;
305	}
306
307	cursor += 1;
308	}
309
310	return cursor;
311	}
312
313
314	// This is the real reordering function as applied to the Tibetan language
315
316	le_int32 TibetanReordering::reorder(const LEUnicode *chars, le_int32 charCount, le_int32,
317	LEUnicode *outChars, LEGlyphStorage &glyphStorage)
318	{
319	const TibetanClassTable *classTable = TibetanClassTable::getTibetanClassTable();
320
321	TibetanReorderingOutput output(outChars, glyphStorage);
322	TibetanClassTable::CharClass charClass;
323	le_int32 i, prev = 0;
324
325	// This loop only exits when we reach the end of a run, which may contain
326	// several syllables.
327	while (prev < charCount) {
328	le_int32 syllable = findSyllable(classTable, chars, prev, charCount);
329
330	// shall we add a dotted circle?
331	// If in the position in which the base should be (first char in the string) there is
332	// a character that has the Dotted circle flag (a character that cannot be a base)
333	// then write a dotted circle
334	if (classTable->getCharClass(chars[prev]) & TibetanClassTable::CF_DOTTED_CIRCLE) {
335	output.writeChar(C_DOTTED_CIRCLE, prev, tagDefault);
336	}
337
338	// copy the rest to output, inverting the pre-number mark if present after a digit.
339	for (i = prev; i < syllable; i += 1) {
340	charClass = classTable->getCharClass(chars[i]);
341
342	if ((TibetanClassTable::CF_DIGIT & charClass)
343	&& ( classTable->getCharClass(chars[i+1]) & TibetanClassTable::CF_PREDIGIT))
344	{
345	output.writeChar(C_PRE_NUMBER_MARK, i, tagPref);
346	output.writeChar(chars[i], i+1 , tagPref);
347	i += 1;
348	} else {
349	switch (charClass & TibetanClassTable::CF_POS_MASK) {
350
351	// If the present character is a number, and the next character is a pre-number combining mark
352	// then the two characters are reordered
353
354	case TibetanClassTable::CF_POS_ABOVE :
355	output.writeChar(chars[i], i, tagAbvf);
356	break;
357
358	case TibetanClassTable::CF_POS_AFTER :
359	output.writeChar(chars[i], i, tagPstf);
360	break;
361
362	case TibetanClassTable::CF_POS_BELOW :
363	output.writeChar(chars[i], i, tagBlwf);
364	break;
365
366	default:
367	// default - any other characters
368	output.writeChar(chars[i], i, tagDefault);
369	break;
370	} // switch
371	} // if
372	} // for
373
374	prev = syllable; // move the pointer to the start of next syllable
375	}
376
377	return output.getOutputIndex();
378	}
379
380
381	U_NAMESPACE_END