/*
**********************************************************************
-* Copyright (C) 1999-2003, International Business Machines
+* Copyright (C) 1999-2008, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
#if !UCONFIG_NO_TRANSLITERATION
#include "unicode/unifilt.h"
-#include "unicode/unifltlg.h"
#include "unicode/uniset.h"
#include "cpdtrans.h"
#include "uvector.h"
#include "cmemory.h"
// keep in sync with Transliterator
-static const UChar ID_SEP = 0x002D; /*-*/
+//static const UChar ID_SEP = 0x002D; /*-*/
static const UChar ID_DELIM = 0x003B; /*;*/
static const UChar NEWLINE = 10;
// Empty string
static const UChar EMPTY[] = {0}; //""
+static const UChar COLON_COLON[] = {0x3A, 0x3A, 0}; //"::"
U_NAMESPACE_BEGIN
-const char CompoundTransliterator::fgClassID=0;
+const UChar CompoundTransliterator::PASS_STRING[] = { 0x0025, 0x0050, 0x0061, 0x0073, 0x0073, 0 }; // "%Pass"
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompoundTransliterator)
/**
* Constructs a new compound transliterator given an array of
int32_t transliteratorCount,
UnicodeFilter* adoptedFilter) :
Transliterator(joinIDs(transliterators, transliteratorCount), adoptedFilter),
- trans(0), count(0), compoundRBTIndex(-1) {
+ trans(0), count(0), numAnonymousRBTs(0) {
setTransliterators(transliterators, transliteratorCount);
}
UParseError& /*parseError*/,
UErrorCode& status) :
Transliterator(id, adoptedFilter),
- trans(0), compoundRBTIndex(-1) {
+ trans(0), numAnonymousRBTs(0) {
// TODO add code for parseError...currently unused, but
// later may be used by parsing code...
- init(id, direction, -1, 0, TRUE, status);
+ init(id, direction, TRUE, status);
}
CompoundTransliterator::CompoundTransliterator(const UnicodeString& id,
UParseError& /*parseError*/,
UErrorCode& status) :
Transliterator(id, 0), // set filter to 0 here!
- trans(0), compoundRBTIndex(-1) {
+ trans(0), numAnonymousRBTs(0) {
// TODO add code for parseError...currently unused, but
// later may be used by parsing code...
- init(id, UTRANS_FORWARD, -1, 0, TRUE, status);
+ init(id, UTRANS_FORWARD, TRUE, status);
+}
+
+
+/**
+ * Private constructor for use of TransliteratorAlias
+ */
+CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID,
+ UVector& list,
+ UnicodeFilter* adoptedFilter,
+ int32_t anonymousRBTs,
+ UParseError& /*parseError*/,
+ UErrorCode& status) :
+ Transliterator(newID, adoptedFilter),
+ trans(0), numAnonymousRBTs(anonymousRBTs)
+{
+ init(list, UTRANS_FORWARD, FALSE, status);
}
/**
UParseError& /*parseError*/,
UErrorCode& status) :
Transliterator(EMPTY, NULL),
- trans(0), compoundRBTIndex(-1)
+ trans(0), numAnonymousRBTs(0)
{
// TODO add code for parseError...currently unused, but
// later may be used by parsing code...
// assume caller will fixup ID
}
-/**
- * Private constructor for compound RBTs. Construct a compound
- * transliterator using the given idBlock, with the adoptedTrans
- * inserted at the idSplitPoint.
- */
-CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID,
- const UnicodeString& idBlock,
- int32_t idSplitPoint,
- Transliterator *adoptedTrans,
+CompoundTransliterator::CompoundTransliterator(UVector& list,
+ int32_t anonymousRBTs,
+ UParseError& /*parseError*/,
UErrorCode& status) :
- Transliterator(newID, 0),
- trans(0), compoundRBTIndex(-1)
+ Transliterator(EMPTY, NULL),
+ trans(0), numAnonymousRBTs(anonymousRBTs)
{
- init(idBlock, UTRANS_FORWARD, idSplitPoint, adoptedTrans, FALSE, status);
+ init(list, UTRANS_FORWARD, FALSE, status);
}
/**
*/
void CompoundTransliterator::init(const UnicodeString& id,
UTransDirection direction,
- int32_t idSplitPoint,
- Transliterator *adoptedSplitTrans,
UBool fixReverseID,
UErrorCode& status) {
// assert(trans == 0);
if (U_FAILURE(status)) {
- delete adoptedSplitTrans;
return;
}
if (!TransliteratorIDParser::parseCompoundID(id, direction,
regenID, list, compoundFilter)) {
status = U_INVALID_ID;
- delete adoptedSplitTrans;
delete compoundFilter;
return;
}
- compoundRBTIndex = TransliteratorIDParser::instantiateList(list, adoptedSplitTrans, idSplitPoint, status);
+ TransliteratorIDParser::instantiateList(list, status);
init(list, direction, fixReverseID, status);
trans[i] = (Transliterator*) list.elementAt(j);
}
- // Fix compoundRBTIndex for REVERSE transliterators
- if (compoundRBTIndex >= 0 && direction == UTRANS_REVERSE) {
- compoundRBTIndex = count - 1 - compoundRBTIndex;
- }
-
// If the direction is UTRANS_REVERSE then we may need to fix the
// ID.
if (direction == UTRANS_REVERSE && fixReverseID) {
* Copy constructor.
*/
CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) :
- Transliterator(t), trans(0), count(0), compoundRBTIndex(-1) {
+ Transliterator(t), trans(0), count(0), numAnonymousRBTs(-1) {
*this = t;
}
* Assignment operator.
*/
CompoundTransliterator& CompoundTransliterator::operator=(
- const CompoundTransliterator& t) {
+ const CompoundTransliterator& t)
+{
Transliterator::operator=(t);
- int32_t i;
- for (i=0; i<count; ++i) {
- delete trans[i];
- trans[i] = 0;
+ int32_t i = 0;
+ UBool failed = FALSE;
+ if (trans != NULL) {
+ for (i=0; i<count; ++i) {
+ delete trans[i];
+ trans[i] = 0;
+ }
}
if (t.count > count) {
- uprv_free(trans);
+ if (trans != NULL) {
+ uprv_free(trans);
+ }
trans = (Transliterator **)uprv_malloc(t.count * sizeof(Transliterator *));
}
count = t.count;
- for (i=0; i<count; ++i) {
- trans[i] = t.trans[i]->clone();
+ if (trans != NULL) {
+ for (i=0; i<count; ++i) {
+ trans[i] = t.trans[i]->clone();
+ if (trans[i] == NULL) {
+ failed = TRUE;
+ break;
+ }
+ }
+ }
+
+ // if memory allocation failed delete backwards trans array
+ if (failed && i > 0) {
+ int32_t n;
+ for (n = i-1; n >= 0; n--) {
+ uprv_free(trans[n]);
+ trans[n] = NULL;
+ }
}
- compoundRBTIndex = t.compoundRBTIndex;
+ numAnonymousRBTs = t.numAnonymousRBTs;
return *this;
}
void CompoundTransliterator::setTransliterators(Transliterator* const transliterators[],
int32_t transCount) {
Transliterator** a = (Transliterator **)uprv_malloc(transCount * sizeof(Transliterator *));
- for (int32_t i=0; i<transCount; ++i) {
+ if (a == NULL) {
+ return;
+ }
+ int32_t i = 0;
+ UBool failed = FALSE;
+ for (i=0; i<transCount; ++i) {
a[i] = transliterators[i]->clone();
+ if (a[i] == NULL) {
+ failed = TRUE;
+ break;
+ }
+ }
+ if (failed && i > 0) {
+ int32_t n;
+ for (n = i-1; n >= 0; n--) {
+ uprv_free(a[n]);
+ a[n] = NULL;
+ }
+ return;
}
adoptTransliterators(a, transCount);
}
// compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex,
// we do call toRules() recursively.
rulesSource.truncate(0);
- if (compoundRBTIndex >= 0 && getFilter() != NULL) {
+ if (numAnonymousRBTs >= 1 && getFilter() != NULL) {
// If we are a compound RBT and if we have a global
// filter, then emit it at the top.
UnicodeString pat;
- rulesSource.append("::").append(getFilter()->toPattern(pat, escapeUnprintable)).append(ID_DELIM);
+ rulesSource.append(COLON_COLON).append(getFilter()->toPattern(pat, escapeUnprintable)).append(ID_DELIM);
}
for (int32_t i=0; i<count; ++i) {
UnicodeString rule;
- if (i == compoundRBTIndex) {
+
+ // Anonymous RuleBasedTransliterators (inline rules and
+ // ::BEGIN/::END blocks) are given IDs that begin with
+ // "%Pass": use toRules() to write all the rules to the output
+ // (and insert "::Null;" if we have two in a row)
+ if (trans[i]->getID().startsWith(PASS_STRING)) {
trans[i]->toRules(rule, escapeUnprintable);
+ if (numAnonymousRBTs > 1 && i > 0 && trans[i - 1]->getID().startsWith(PASS_STRING))
+ rule = UNICODE_STRING_SIMPLE("::Null;") + rule;
+
+ // we also use toRules() on CompoundTransliterators (which we
+ // check for by looking for a semicolon in the ID)-- this gets
+ // the list of their child transliterators output in the right
+ // format
+ } else if (trans[i]->getID().indexOf(ID_DELIM) >= 0) {
+ trans[i]->toRules(rule, escapeUnprintable);
+
+ // for everything else, use Transliterator::toRules()
} else {
trans[i]->Transliterator::toRules(rule, escapeUnprintable);
}