X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/b75a7d8f3b4adbae880cab104ce2c6a50eee4db2..249c4c5ea9376c24572daf9c2effa7484a282f14:/icuSources/i18n/rbt_pars.cpp diff --git a/icuSources/i18n/rbt_pars.cpp b/icuSources/i18n/rbt_pars.cpp index 8f599198..6ed89b1f 100644 --- a/icuSources/i18n/rbt_pars.cpp +++ b/icuSources/i18n/rbt_pars.cpp @@ -1,12 +1,14 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html /* -********************************************************************** -* Copyright (C) 1999-2003, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 11/17/99 aliu Creation. -********************************************************************** -*/ + ********************************************************************** + * Copyright (C) 1999-2016, International Business Machines + * Corporation and others. All Rights Reserved. + ********************************************************************** + * Date Name Description + * 11/17/99 aliu Creation. + ********************************************************************** + */ #include "unicode/utypes.h" @@ -19,6 +21,7 @@ #include "unicode/uchar.h" #include "unicode/ustring.h" #include "unicode/uniset.h" +#include "unicode/utf16.h" #include "cstring.h" #include "funcrepl.h" #include "hash.h" @@ -29,12 +32,15 @@ #include "rbt_rule.h" #include "strmatch.h" #include "strrepl.h" -#include "symtable.h" +#include "unicode/symtable.h" #include "tridpars.h" #include "uvector.h" +#include "hash.h" +#include "patternprops.h" #include "util.h" #include "cmemory.h" #include "uprops.h" +#include "putilimp.h" // Operators #define VARIABLE_DEF_OP ((UChar)0x003D) /*=*/ @@ -107,6 +113,15 @@ static const UChar HALF_ENDERS[] = { // "=><;" static const int32_t ID_TOKEN_LEN = 2; static const UChar ID_TOKEN[] = { 0x3A, 0x3A }; // ':', ':' +/* +commented out until we do real ::BEGIN/::END functionality +static const int32_t BEGIN_TOKEN_LEN = 5; +static const UChar BEGIN_TOKEN[] = { 0x42, 0x45, 0x47, 0x49, 0x4e }; // 'BEGIN' + +static const int32_t END_TOKEN_LEN = 3; +static const UChar END_TOKEN[] = { 0x45, 0x4e, 0x44 }; // 'END' +*/ + U_NAMESPACE_BEGIN //---------------------------------------------------------------------- @@ -125,8 +140,13 @@ public: const UVector* variablesVector; // alias + const Hashtable* variableNames; // alias + ParseData(const TransliterationRuleData* data = 0, - const UVector* variablesVector = 0); + const UVector* variablesVector = 0, + const Hashtable* variableNames = 0); + + virtual ~ParseData(); virtual const UnicodeString* lookup(const UnicodeString& s) const; @@ -152,14 +172,17 @@ private: }; ParseData::ParseData(const TransliterationRuleData* d, - const UVector* sets) : - data(d), variablesVector(sets) {} + const UVector* sets, + const Hashtable* vNames) : + data(d), variablesVector(sets), variableNames(vNames) {} + +ParseData::~ParseData() {} /** * Implement SymbolTable API. */ const UnicodeString* ParseData::lookup(const UnicodeString& name) const { - return (const UnicodeString*) data->variableNames->get(name); + return (const UnicodeString*) variableNames->get(name); } /** @@ -264,8 +287,6 @@ public: UBool anchorStart; UBool anchorEnd; - - UErrorCode ec; /** * The segment number from 1..n of the next '(' we see @@ -281,12 +302,13 @@ public: RuleHalf(TransliteratorParser& parser); ~RuleHalf(); - int32_t parse(const UnicodeString& rule, int32_t pos, int32_t limit); + int32_t parse(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status); int32_t parseSection(const UnicodeString& rule, int32_t pos, int32_t limit, UnicodeString& buf, const UnicodeString& illegal, - UBool isSegment); + UBool isSegment, + UErrorCode& status); /** * Remove context. @@ -307,8 +329,9 @@ public: int syntaxError(UErrorCode code, const UnicodeString& rule, - int32_t start) { - return parser.syntaxError(code, rule, start); + int32_t start, + UErrorCode& status) { + return parser.syntaxError(code, rule, start, status); } private: @@ -318,7 +341,6 @@ private: }; RuleHalf::RuleHalf(TransliteratorParser& p) : - ec(U_ZERO_ERROR), parser(p) { cursor = -1; @@ -339,13 +361,13 @@ RuleHalf::~RuleHalf() { * @return the index after the terminating character, or * if limit was reached, limit */ -int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit) { +int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status) { int32_t start = pos; text.truncate(0); - pos = parseSection(rule, pos, limit, text, ILLEGAL_TOP, FALSE); + pos = parseSection(rule, pos, limit, text, UnicodeString(TRUE, ILLEGAL_TOP, -1), FALSE, status); if (cursorOffset > 0 && cursor != cursorOffsetPos) { - return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start); + return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start, status); } return pos; @@ -377,7 +399,7 @@ int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit) { int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t limit, UnicodeString& buf, const UnicodeString& illegal, - UBool isSegment) { + UBool isSegment, UErrorCode& status) { int32_t start = pos; ParsePosition pp; UnicodeString scratch; @@ -392,7 +414,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l // Since all syntax characters are in the BMP, fetching // 16-bit code units suffices here. UChar c = rule.charAt(pos++); - if (uprv_isRuleWhiteSpace(c)) { + if (PatternProps::isWhiteSpace(c)) { // Ignore whitespace. Note that this is not Unicode // spaces, but Java spaces -- a subset, representing // whitespace likely to be seen in code. @@ -401,19 +423,19 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l if (u_strchr(HALF_ENDERS, c) != NULL) { if (isSegment) { // Unclosed segment - return syntaxError(U_UNCLOSED_SEGMENT, rule, start); + return syntaxError(U_UNCLOSED_SEGMENT, rule, start, status); } break; } if (anchorEnd) { // Text after a presumed end anchor is a syntax err - return syntaxError(U_MALFORMED_VARIABLE_REFERENCE, rule, start); + return syntaxError(U_MALFORMED_VARIABLE_REFERENCE, rule, start, status); } if (UnicodeSet::resemblesPattern(rule, pos-1)) { pp.setIndex(pos-1); // Backup to opening '[' - buf.append(parser.parseSet(rule, pp)); - if (U_FAILURE(parser.status)) { - return syntaxError(U_MALFORMED_SET, rule, start); + buf.append(parser.parseSet(rule, pp, status)); + if (U_FAILURE(status)) { + return syntaxError(U_MALFORMED_SET, rule, start, status); } pos = pp.getIndex(); continue; @@ -421,14 +443,14 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l // Handle escapes if (c == ESCAPE) { if (pos == limit) { - return syntaxError(U_TRAILING_BACKSLASH, rule, start); + return syntaxError(U_TRAILING_BACKSLASH, rule, start, status); } UChar32 escaped = rule.unescapeAt(pos); // pos is already past '\\' if (escaped == (UChar32) -1) { - return syntaxError(U_MALFORMED_UNICODE_ESCAPE, rule, start); + return syntaxError(U_MALFORMED_UNICODE_ESCAPE, rule, start, status); } if (!parser.checkVariableRange(escaped)) { - return syntaxError(U_VARIABLE_RANGE_OVERLAP, rule, start); + return syntaxError(U_VARIABLE_RANGE_OVERLAP, rule, start, status); } buf.append(escaped); continue; @@ -449,7 +471,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l quoteStart = buf.length(); for (;;) { if (iq < 0) { - return syntaxError(U_UNTERMINATED_QUOTE, rule, start); + return syntaxError(U_UNTERMINATED_QUOTE, rule, start, status); } scratch.truncate(0); rule.extractBetween(pos, iq, scratch); @@ -467,7 +489,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l for (iq=quoteStart; iq= 0) { - syntaxError(U_ILLEGAL_CHARACTER, rule, start); + syntaxError(U_ILLEGAL_CHARACTER, rule, start, status); } switch (c) { @@ -492,7 +514,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l anchorStart = TRUE; } else { return syntaxError(U_MISPLACED_ANCHOR_START, - rule, start); + rule, start, status); } break; case SEGMENT_OPEN: @@ -507,7 +529,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l int32_t segmentNumber = nextSegmentNumber++; // 1-based // Parse the segment - pos = parseSection(rule, pos, limit, buf, ILLEGAL_SEG, TRUE); + pos = parseSection(rule, pos, limit, buf, UnicodeString(TRUE, ILLEGAL_SEG, -1), TRUE, status); // After parsing a segment, the relevant characters are // in buf, starting at offset bufSegStart. Extract them @@ -515,12 +537,15 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l // standin for that matcher. StringMatcher* m = new StringMatcher(buf, bufSegStart, buf.length(), - segmentNumber, *parser.data); + segmentNumber, *parser.curData); + if (m == NULL) { + return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status); + } // Record and associate object and segment number - parser.setSegmentObject(segmentNumber, m); + parser.setSegmentObject(segmentNumber, m, status); buf.truncate(bufSegStart); - buf.append(parser.getSegmentStandin(segmentNumber)); + buf.append(parser.getSegmentStandin(segmentNumber, status)); } break; case FUNCTION: @@ -532,13 +557,13 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l // The next character MUST be a segment open if (single == NULL || !ICU_Utility::parseChar(rule, iref, SEGMENT_OPEN)) { - return syntaxError(U_INVALID_FUNCTION, rule, start); + return syntaxError(U_INVALID_FUNCTION, rule, start, status); } Transliterator *t = single->createInstance(); delete single; if (t == NULL) { - return syntaxError(U_INVALID_FUNCTION, rule, start); + return syntaxError(U_INVALID_FUNCTION, rule, start, status); } // bufSegStart is the offset in buf to the first @@ -546,18 +571,21 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l int32_t bufSegStart = buf.length(); // Parse the segment - pos = parseSection(rule, iref, limit, buf, ILLEGAL_FUNC, TRUE); + pos = parseSection(rule, iref, limit, buf, UnicodeString(TRUE, ILLEGAL_FUNC, -1), TRUE, status); // After parsing a segment, the relevant characters are // in buf, starting at offset bufSegStart. UnicodeString output; buf.extractBetween(bufSegStart, buf.length(), output); FunctionReplacer *r = - new FunctionReplacer(t, new StringReplacer(output, parser.data)); + new FunctionReplacer(t, new StringReplacer(output, parser.curData)); + if (r == NULL) { + return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status); + } // Replace the buffer contents with a stand-in buf.truncate(bufSegStart); - buf.append(parser.generateStandInFor(r)); + buf.append(parser.generateStandInFor(r, status)); } break; case SymbolTable::SYMBOL_REF: @@ -580,9 +608,9 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l r = ICU_Utility::parseNumber(rule, pos, 10); if (r < 0) { return syntaxError(U_UNDEFINED_SEGMENT_REFERENCE, - rule, start); + rule, start, status); } - buf.append(parser.getSegmentStandin(r)); + buf.append(parser.getSegmentStandin(r, status)); } else { pp.setIndex(pos); UnicodeString name = parser.parseData-> @@ -602,13 +630,13 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l // that case appendVariableDef() will append the // special placeholder char variableLimit-1. varStart = buf.length(); - parser.appendVariableDef(name, buf); + parser.appendVariableDef(name, buf, status); varLimit = buf.length(); } } break; case DOT: - buf.append(parser.getDotStandIn()); + buf.append(parser.getDotStandIn(status)); break; case KLEENE_STAR: case ONE_OR_MORE: @@ -622,7 +650,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l { if (isSegment && buf.length() == bufStart) { // The */+ immediately follows '(' - return syntaxError(U_MISPLACED_QUANTIFIER, rule, start); + return syntaxError(U_MISPLACED_QUANTIFIER, rule, start, status); } int32_t qstart, qlimit; @@ -644,7 +672,10 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l } UnicodeFunctor *m = - new StringMatcher(buf, qstart, qlimit, 0, *parser.data); + new StringMatcher(buf, qstart, qlimit, 0, *parser.curData); + if (m == NULL) { + return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status); + } int32_t min = 0; int32_t max = Quantifier::MAX; switch (c) { @@ -659,8 +690,11 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l // do nothing -- min, max already set } m = new Quantifier(m, min, max); + if (m == NULL) { + return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status); + } buf.truncate(qstart); - buf.append(parser.generateStandInFor(m)); + buf.append(parser.generateStandInFor(m, status)); } break; @@ -678,31 +712,31 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l //------------------------------------------------------ case CONTEXT_ANTE: if (ante >= 0) { - return syntaxError(U_MULTIPLE_ANTE_CONTEXTS, rule, start); + return syntaxError(U_MULTIPLE_ANTE_CONTEXTS, rule, start, status); } ante = buf.length(); break; case CONTEXT_POST: if (post >= 0) { - return syntaxError(U_MULTIPLE_POST_CONTEXTS, rule, start); + return syntaxError(U_MULTIPLE_POST_CONTEXTS, rule, start, status); } post = buf.length(); break; case CURSOR_POS: if (cursor >= 0) { - return syntaxError(U_MULTIPLE_CURSORS, rule, start); + return syntaxError(U_MULTIPLE_CURSORS, rule, start, status); } cursor = buf.length(); break; case CURSOR_OFFSET: if (cursorOffset < 0) { if (buf.length() > 0) { - return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start); + return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start, status); } --cursorOffset; } else if (cursorOffset > 0) { if (buf.length() != cursorOffsetPos || cursor >= 0) { - return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start); + return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start, status); } ++cursorOffset; } else { @@ -712,7 +746,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l cursorOffsetPos = buf.length(); cursorOffset = 1; } else { - return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start); + return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start, status); } } break; @@ -729,7 +763,7 @@ int32_t RuleHalf::parseSection(const UnicodeString& rule, int32_t pos, int32_t l !((c >= 0x0030/*'0'*/ && c <= 0x0039/*'9'*/) || (c >= 0x0041/*'A'*/ && c <= 0x005A/*'Z'*/) || (c >= 0x0061/*'a'*/ && c <= 0x007A/*'z'*/))) { - return syntaxError(U_UNQUOTED_SPECIAL, rule, start); + return syntaxError(U_UNQUOTED_SPECIAL, rule, start, status); } buf.append(c); break; @@ -762,7 +796,7 @@ void RuleHalf::removeContext() { UBool RuleHalf::isValidOutput(TransliteratorParser& transParser) { for (int32_t i=0; iisReplacer(c)) { return FALSE; } @@ -777,7 +811,7 @@ UBool RuleHalf::isValidOutput(TransliteratorParser& transParser) { UBool RuleHalf::isValidInput(TransliteratorParser& transParser) { for (int32_t i=0; iisMatcher(c)) { return FALSE; } @@ -792,23 +826,29 @@ UBool RuleHalf::isValidInput(TransliteratorParser& transParser) { /** * Constructor. */ -TransliteratorParser::TransliteratorParser() { - data = NULL; +TransliteratorParser::TransliteratorParser(UErrorCode &statusReturn) : +dataVector(statusReturn), +idBlockVector(statusReturn), +variablesVector(statusReturn), +segmentObjects(statusReturn) +{ + idBlockVector.setDeleter(uprv_deleteUObject); + curData = NULL; compoundFilter = NULL; parseData = NULL; - variablesVector = NULL; - segmentObjects = NULL; + variableNames.setValueDeleter(uprv_deleteUObject); } /** * Destructor. */ TransliteratorParser::~TransliteratorParser() { - delete data; + while (!dataVector.isEmpty()) + delete (TransliterationRuleData*)(dataVector.orphanElementAt(0)); delete compoundFilter; delete parseData; - delete variablesVector; - delete segmentObjects; + while (!variablesVector.isEmpty()) + delete (UnicodeFunctor*)variablesVector.orphanElementAt(0); } void @@ -817,9 +857,8 @@ TransliteratorParser::parse(const UnicodeString& rules, UParseError& pe, UErrorCode& ec) { if (U_SUCCESS(ec)) { - parseRules(rules, transDirection); + parseRules(rules, transDirection, ec); pe = parseError; - ec = status; } } @@ -832,15 +871,6 @@ UnicodeSet* TransliteratorParser::orphanCompoundFilter() { return f; } -/** - * Return the data object parsed by parse(). Caller owns result. - */ -TransliterationRuleData* TransliteratorParser::orphanData() { - TransliterationRuleData* d = data; - data = NULL; - return d; -} - //---------------------------------------------------------------------- // Private implementation //---------------------------------------------------------------------- @@ -854,55 +884,48 @@ TransliterationRuleData* TransliteratorParser::orphanData() { * rules */ void TransliteratorParser::parseRules(const UnicodeString& rule, - UTransDirection theDirection) { + UTransDirection theDirection, + UErrorCode& status) +{ // Clear error struct + uprv_memset(&parseError, 0, sizeof(parseError)); parseError.line = parseError.offset = -1; - parseError.preContext[0] = parseError.postContext[0] = (UChar)0; - status = U_ZERO_ERROR; - delete data; - data = new TransliterationRuleData(status); + UBool parsingIDs = TRUE; + int32_t ruleCount = 0; + + while (!dataVector.isEmpty()) { + delete (TransliterationRuleData*)(dataVector.orphanElementAt(0)); + } if (U_FAILURE(status)) { return; } + idBlockVector.removeAllElements(); + curData = NULL; direction = theDirection; ruleCount = 0; delete compoundFilter; compoundFilter = NULL; - if (variablesVector == NULL) { - variablesVector = new UVector(status); - } else { - variablesVector->removeAllElements(); + while (!variablesVector.isEmpty()) { + delete (UnicodeFunctor*)variablesVector.orphanElementAt(0); } - parseData = new ParseData(0, variablesVector); + variableNames.removeAll(); + parseData = new ParseData(0, &variablesVector, &variableNames); if (parseData == NULL) { status = U_MEMORY_ALLOCATION_ERROR; return; } - parseData->data = data; - // By default, rules use part of the private use area - // E000..F8FF for variables and other stand-ins. Currently - // the range F000..F8FF is typically sufficient. The 'use - // variable range' pragma allows rule sets to modify this. - setVariableRange(0xF000, 0xF8FF); - dotStandIn = (UChar) -1; + UnicodeString *tempstr = NULL; // used for memory allocation error checking UnicodeString str; // scratch - idBlock.truncate(0); - idSplitPoint = -1; + UnicodeString idBlockResult; int32_t pos = 0; int32_t limit = rule.length(); - // The mode marks whether we are in the header ::id block, the - // rule block, or the footer ::id block. - // mode == 0: start: rule->1, ::id->0 - // mode == 1: in rules: rule->1, ::id->2 - // mode == 2: in footer rule block: rule->ERROR, ::id->2 - int32_t mode = 0; // The compound filter offset is an index into idBlockResult. // If it is 0, then the compound filter occurred at the start, @@ -912,12 +935,9 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, compoundFilter = NULL; int32_t compoundFilterOffset = -1; - // The number of ::ID block entries we have parsed - int32_t idBlockCount = 0; - while (pos < limit && U_SUCCESS(status)) { UChar c = rule.charAt(pos++); - if (uprv_isRuleWhiteSpace(c)) { + if (PatternProps::isWhiteSpace(c)) { // Ignore leading whitespace. continue; } @@ -929,58 +949,68 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, } continue; // Either fall out or restart with next line } + + // skip empty rules + if (c == END_OF_RULE) + continue; + + // keep track of how many rules we've seen + ++ruleCount; + // We've found the start of a rule or ID. c is its first // character, and pos points past c. --pos; // Look for an ID token. Must have at least ID_TOKEN_LEN + 1 // chars left. if ((pos + ID_TOKEN_LEN + 1) <= limit && - rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) { + rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) { pos += ID_TOKEN_LEN; c = rule.charAt(pos); - while (uprv_isRuleWhiteSpace(c) && pos < limit) { + while (PatternProps::isWhiteSpace(c) && pos < limit) { ++pos; c = rule.charAt(pos); } - if (mode == 1) { - // We have just entered the footer ::ID block - mode = 2; - // In the forward direction add elements at the end. - // In the reverse direction add elements at the start. - idSplitPoint = idBlockCount; - } int32_t p = pos; + if (!parsingIDs) { + if (curData != NULL) { + if (direction == UTRANS_FORWARD) + dataVector.addElement(curData, status); + else + dataVector.insertElementAt(curData, 0, status); + curData = NULL; + } + parsingIDs = TRUE; + } + TransliteratorIDParser::SingleID* id = - TransliteratorIDParser::parseSingleID(rule, p, direction); + TransliteratorIDParser::parseSingleID(rule, p, direction, status); if (p != pos && ICU_Utility::parseChar(rule, p, END_OF_RULE)) { // Successful ::ID parse. - + if (direction == UTRANS_FORWARD) { - idBlock.append(id->canonID).append(END_OF_RULE); + idBlockResult.append(id->canonID).append(END_OF_RULE); } else { - idBlock.insert(0, END_OF_RULE); - idBlock.insert(0, id->canonID); + idBlockResult.insert(0, END_OF_RULE); + idBlockResult.insert(0, id->canonID); } - - ++idBlockCount; - + } else { // Couldn't parse an ID. Try to parse a global filter int32_t withParens = -1; - UnicodeSet* f = TransliteratorIDParser::parseGlobalFilter(rule, p, direction, withParens, &idBlock); + UnicodeSet* f = TransliteratorIDParser::parseGlobalFilter(rule, p, direction, withParens, NULL); if (f != NULL) { if (ICU_Utility::parseChar(rule, p, END_OF_RULE) && (direction == UTRANS_FORWARD) == (withParens == 0)) { if (compoundFilter != NULL) { // Multiple compound filters - syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rule, pos); + syntaxError(U_MULTIPLE_COMPOUND_FILTERS, rule, pos, status); delete f; } else { compoundFilter = f; - compoundFilterOffset = idBlockCount; + compoundFilterOffset = ruleCount; } } else { delete f; @@ -988,81 +1018,125 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, } else { // Invalid ::id // Can be parsed as neither an ID nor a global filter - syntaxError(U_INVALID_ID, rule, pos); + syntaxError(U_INVALID_ID, rule, pos, status); } } delete id; - pos = p; - } else if (resemblesPragma(rule, pos, limit)) { - int32_t ppp = parsePragma(rule, pos, limit); - if (ppp < 0) { - syntaxError(U_MALFORMED_PRAGMA, rule, pos); - } - pos = ppp; } else { - // Parse a rule - pos = parseRule(rule, pos, limit); - if (U_SUCCESS(status)) { - ++ruleCount; - if (mode == 2) { - // ::id in illegal position (because a rule - // occurred after the ::id footer block) - syntaxError(U_ILLEGAL_ARGUMENT_ERROR,rule,pos); + if (parsingIDs) { + tempstr = new UnicodeString(idBlockResult); + // NULL pointer check + if (tempstr == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (direction == UTRANS_FORWARD) + idBlockVector.addElement(tempstr, status); + else + idBlockVector.insertElementAt(tempstr, 0, status); + idBlockResult.remove(); + parsingIDs = FALSE; + curData = new TransliterationRuleData(status); + // NULL pointer check + if (curData == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; } - }else{ - syntaxError(status,rule,pos); + parseData->data = curData; + + // By default, rules use part of the private use area + // E000..F8FF for variables and other stand-ins. Currently + // the range F000..F8FF is typically sufficient. The 'use + // variable range' pragma allows rule sets to modify this. + setVariableRange(0xF000, 0xF8FF, status); + } + + if (resemblesPragma(rule, pos, limit)) { + int32_t ppp = parsePragma(rule, pos, limit, status); + if (ppp < 0) { + syntaxError(U_MALFORMED_PRAGMA, rule, pos, status); + } + pos = ppp; + // Parse a rule + } else { + pos = parseRule(rule, pos, limit, status); } - mode = 1; } } - - if (idSplitPoint < 0) { - idSplitPoint = idBlockCount; - } - - if (direction == UTRANS_REVERSE) { - idSplitPoint = idBlockCount - idSplitPoint; - } - // Convert the set vector to an array - data->variablesLength = variablesVector->size(); - if(data->variablesLength == 0) { - data->variables = 0; - } else { - data->variables = (UnicodeFunctor **)uprv_malloc(data->variablesLength * sizeof(UnicodeFunctor *)); + if (parsingIDs && idBlockResult.length() > 0) { + tempstr = new UnicodeString(idBlockResult); + // NULL pointer check + if (tempstr == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + if (direction == UTRANS_FORWARD) + idBlockVector.addElement(tempstr, status); + else + idBlockVector.insertElementAt(tempstr, 0, status); } - - // orphanElement removes the given element and shifts all other - // elements down. For performance (and code clarity) we work from - // the end back to index 0. - int32_t i; - for (i=data->variablesLength; i>0; ) { - --i; - data->variables[i] = - (UnicodeSet*) variablesVector->orphanElementAt(i); + else if (!parsingIDs && curData != NULL) { + if (direction == UTRANS_FORWARD) + dataVector.addElement(curData, status); + else + dataVector.insertElementAt(curData, 0, status); } - - // Index the rules + if (U_SUCCESS(status)) { + // Convert the set vector to an array + int32_t i, dataVectorSize = dataVector.size(); + for (i = 0; i < dataVectorSize; i++) { + TransliterationRuleData* data = (TransliterationRuleData*)dataVector.elementAt(i); + data->variablesLength = variablesVector.size(); + if (data->variablesLength == 0) { + data->variables = 0; + } else { + data->variables = (UnicodeFunctor**)uprv_malloc(data->variablesLength * sizeof(UnicodeFunctor*)); + // NULL pointer check + if (data->variables == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + data->variablesAreOwned = (i == 0); + } + + for (int32_t j = 0; j < data->variablesLength; j++) { + data->variables[j] = + static_cast(variablesVector.elementAt(j)); + } + + data->variableNames.removeAll(); + int32_t pos = UHASH_FIRST; + const UHashElement* he = variableNames.nextElement(pos); + while (he != NULL) { + UnicodeString* tempus = (UnicodeString*)(((UnicodeString*)(he->value.pointer))->clone()); + if (tempus == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return; + } + data->variableNames.put(*((UnicodeString*)(he->key.pointer)), + tempus, status); + he = variableNames.nextElement(pos); + } + } + variablesVector.removeAllElements(); // keeps them from getting deleted when we succeed + + // Index the rules if (compoundFilter != NULL) { - if ((direction == UTRANS_FORWARD && - compoundFilterOffset != 0) || - (direction == UTRANS_REVERSE && - compoundFilterOffset != idBlockCount)) { + if ((direction == UTRANS_FORWARD && compoundFilterOffset != 1) || + (direction == UTRANS_REVERSE && compoundFilterOffset != ruleCount)) { status = U_MISPLACED_COMPOUND_FILTER; } } - data->ruleSet.freeze(parseError,status); - - if (idSplitPoint < 0) { - idSplitPoint = idBlock.length(); + for (i = 0; i < dataVectorSize; i++) { + TransliterationRuleData* data = (TransliterationRuleData*)dataVector.elementAt(i); + data->ruleSet.freeze(parseError, status); } - - if (ruleCount == 0) { - delete data; - data = NULL; + if (idBlockVector.size() == 1 && ((UnicodeString*)idBlockVector.elementAt(0))->isEmpty()) { + idBlockVector.removeElementAt(0); } } } @@ -1070,14 +1144,17 @@ void TransliteratorParser::parseRules(const UnicodeString& rule, /** * Set the variable range to [start, end] (inclusive). */ -void TransliteratorParser::setVariableRange(int32_t start, int32_t end) { +void TransliteratorParser::setVariableRange(int32_t start, int32_t end, UErrorCode& status) { if (start > end || start < 0 || end > 0xFFFF) { status = U_MALFORMED_PRAGMA; return; } - data->variablesBase = variableNext = (UChar) start; // first private use - variableLimit = (UChar) (end + 1); + curData->variablesBase = (UChar) start; + if (dataVector.size() == 0) { + variableNext = (UChar) start; + variableLimit = (UChar) (end + 1); + } } /** @@ -1086,14 +1163,14 @@ void TransliteratorParser::setVariableRange(int32_t start, int32_t end) { * variable range does not overlap characters used in a rule. */ UBool TransliteratorParser::checkVariableRange(UChar32 ch) const { - return !(ch >= data->variablesBase && ch < variableLimit); + return !(ch >= curData->variablesBase && ch < variableLimit); } /** * Set the maximum backup to 'backup', in response to a pragma * statement. */ -void TransliteratorParser::pragmaMaximumBackup(int32_t backup) { +void TransliteratorParser::pragmaMaximumBackup(int32_t /*backup*/) { //TODO Finish } @@ -1101,7 +1178,7 @@ void TransliteratorParser::pragmaMaximumBackup(int32_t backup) { * Begin normalizing all rules using the given mode, in response * to a pragma statement. */ -void TransliteratorParser::pragmaNormalizeRules(UNormalizationMode mode) { +void TransliteratorParser::pragmaNormalizeRules(UNormalizationMode /*mode*/) { //TODO Finish } @@ -1123,7 +1200,7 @@ static const UChar PRAGMA_NFC_RULES[] = {0x7E,0x6E,0x66,0x63,0x20,0x72,0x75,0x6C */ UBool TransliteratorParser::resemblesPragma(const UnicodeString& rule, int32_t pos, int32_t limit) { // Must start with /use\s/i - return ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_USE, NULL) >= 0; + return ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_USE, 4), NULL) >= 0; } /** @@ -1135,7 +1212,7 @@ UBool TransliteratorParser::resemblesPragma(const UnicodeString& rule, int32_t p * @return the position index after the final ';' of the pragma, * or -1 on failure. */ -int32_t TransliteratorParser::parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit) { +int32_t TransliteratorParser::parsePragma(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status) { int32_t array[2]; // resemblesPragma() has already returned true, so we @@ -1148,25 +1225,25 @@ int32_t TransliteratorParser::parsePragma(const UnicodeString& rule, int32_t pos // use maximum backup 16; // use nfd rules; // use nfc rules; - int p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_VARIABLE_RANGE, array); + int p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_VARIABLE_RANGE, -1), array); if (p >= 0) { - setVariableRange(array[0], array[1]); + setVariableRange(array[0], array[1], status); return p; } - p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_MAXIMUM_BACKUP, array); + p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_MAXIMUM_BACKUP, -1), array); if (p >= 0) { pragmaMaximumBackup(array[0]); return p; } - p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_NFD_RULES, NULL); + p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_NFD_RULES, -1), NULL); if (p >= 0) { pragmaNormalizeRules(UNORM_NFD); return p; } - p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_NFC_RULES, NULL); + p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_NFC_RULES, -1), NULL); if (p >= 0) { pragmaNormalizeRules(UNORM_NFC); return p; @@ -1189,7 +1266,7 @@ int32_t TransliteratorParser::parsePragma(const UnicodeString& rule, int32_t pos * indicators. Once it does a lexical breakdown of the rule at pos, it * creates a rule object and adds it to our rule list. */ -int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, int32_t limit) { +int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status) { // Locate the left side, operator, and right side int32_t start = pos; UChar op = 0; @@ -1197,11 +1274,7 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, // Set up segments data segmentStandins.truncate(0); - if (segmentObjects == NULL) { - segmentObjects = new UVector(status); - } else { - segmentObjects->removeAllElements(); - } + segmentObjects.removeAllElements(); // Use pointers to automatics to make swapping possible. RuleHalf _left(*this), _right(*this); @@ -1209,13 +1282,13 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, RuleHalf* right = &_right; undefinedVariableName.remove(); - pos = left->parse(rule, pos, limit); + pos = left->parse(rule, pos, limit, status); if (U_FAILURE(status)) { return start; } if (pos == limit || u_strchr(gOPERATORS, (op = rule.charAt(--pos))) == NULL) { - return syntaxError(U_MISSING_OPERATOR, rule, start); + return syntaxError(U_MISSING_OPERATOR, rule, start, status); } ++pos; @@ -1239,7 +1312,7 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, break; } - pos = right->parse(rule, pos, limit); + pos = right->parse(rule, pos, limit, status); if (U_FAILURE(status)) { return start; } @@ -1249,7 +1322,7 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, ++pos; } else { // RuleHalf parser must have terminated at an operator - return syntaxError(U_UNQUOTED_SPECIAL, rule, start); + return syntaxError(U_UNQUOTED_SPECIAL, rule, start, status); } } @@ -1263,19 +1336,23 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, // defined). if (undefinedVariableName.length() == 0) { // "Missing '$' or duplicate definition" - return syntaxError(U_BAD_VARIABLE_DEFINITION, rule, start); + return syntaxError(U_BAD_VARIABLE_DEFINITION, rule, start, status); } if (left->text.length() != 1 || left->text.charAt(0) != variableLimit) { // "Malformed LHS" - return syntaxError(U_MALFORMED_VARIABLE_DEFINITION, rule, start); + return syntaxError(U_MALFORMED_VARIABLE_DEFINITION, rule, start, status); } if (left->anchorStart || left->anchorEnd || right->anchorStart || right->anchorEnd) { - return syntaxError(U_MALFORMED_VARIABLE_DEFINITION, rule, start); + return syntaxError(U_MALFORMED_VARIABLE_DEFINITION, rule, start, status); } // We allow anything on the right, including an empty string. UnicodeString* value = new UnicodeString(right->text); - data->variableNames->put(undefinedVariableName, value, status); + // NULL pointer check + if (value == NULL) { + return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status); + } + variableNames.put(undefinedVariableName, value, status); ++variableLimit; return pos; } @@ -1285,21 +1362,21 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, if (undefinedVariableName.length() != 0) { return syntaxError(// "Undefined variable $" + undefinedVariableName, U_UNDEFINED_VARIABLE, - rule, start); + rule, start, status); } // Verify segments - if (segmentStandins.length() > segmentObjects->size()) { - syntaxError(U_UNDEFINED_SEGMENT_REFERENCE, rule, start); + if (segmentStandins.length() > segmentObjects.size()) { + syntaxError(U_UNDEFINED_SEGMENT_REFERENCE, rule, start, status); } for (i=0; isize(); ++i) { - if (segmentObjects->elementAt(i) == NULL) { - syntaxError(U_INTERNAL_TRANSLITERATOR_ERROR, rule, start); // will never happen + for (i=0; iisValidInput(*this) || !right->isValidOutput(*this) || left->ante > left->post) { - return syntaxError(U_MALFORMED_RULE, rule, start); + return syntaxError(U_MALFORMED_RULE, rule, start, status); } // Flatten segment objects vector to an array UnicodeFunctor** segmentsArray = NULL; - if (segmentObjects->size() > 0) { - segmentsArray = (UnicodeFunctor **)uprv_malloc(segmentObjects->size() * sizeof(UnicodeFunctor *)); - segmentObjects->toArray((void**) segmentsArray); + if (segmentObjects.size() > 0) { + segmentsArray = (UnicodeFunctor **)uprv_malloc(segmentObjects.size() * sizeof(UnicodeFunctor *)); + // Null pointer check + if (segmentsArray == NULL) { + return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status); + } + segmentObjects.toArray((void**) segmentsArray); + } + TransliterationRule* temptr = new TransliterationRule( + left->text, left->ante, left->post, + right->text, right->cursor, right->cursorOffset, + segmentsArray, + segmentObjects.size(), + left->anchorStart, left->anchorEnd, + curData, + status); + //Null pointer check + if (temptr == NULL) { + uprv_free(segmentsArray); + return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status); } - data->ruleSet.addRule(new TransliterationRule( - left->text, left->ante, left->post, - right->text, right->cursor, right->cursorOffset, - segmentsArray, - segmentObjects->size(), - left->anchorStart, left->anchorEnd, - data, - status), status); + curData->ruleSet.addRule(temptr, status); return pos; } @@ -1384,8 +1471,10 @@ int32_t TransliteratorParser::parseRule(const UnicodeString& rule, int32_t pos, * @param start position of first character of current rule */ int32_t TransliteratorParser::syntaxError(UErrorCode parseErrorCode, - const UnicodeString& rule, - int32_t pos) { + const UnicodeString& rule, + int32_t pos, + UErrorCode& status) +{ parseError.offset = pos; parseError.line = 0 ; /* we are not using line numbers */ @@ -1416,24 +1505,30 @@ int32_t TransliteratorParser::syntaxError(UErrorCode parseErrorCode, * used to represent it. */ UChar TransliteratorParser::parseSet(const UnicodeString& rule, - ParsePosition& pos) { - UnicodeSet* set = new UnicodeSet(rule, pos, *parseData, status); + ParsePosition& pos, + UErrorCode& status) { + UnicodeSet* set = new UnicodeSet(rule, pos, USET_IGNORE_SPACE, parseData, status); + // Null pointer check + if (set == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return (UChar)0x0000; // Return empty character with error. + } set->compact(); - return generateStandInFor(set); + return generateStandInFor(set, status); } /** * Generate and return a stand-in for a new UnicodeFunctor. Store * the matcher (adopt it). */ -UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted) { +UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted, UErrorCode& status) { // assert(obj != null); // Look up previous stand-in, if any. This is a short list // (typical n is 0, 1, or 2); linear search is optimal. - for (int32_t i=0; isize(); ++i) { - if (variablesVector->elementAt(i) == adopted) { // [sic] pointer comparison - return (UChar) (data->variablesBase + i); + for (int32_t i=0; ivariablesBase + i); } } @@ -1442,16 +1537,16 @@ UChar TransliteratorParser::generateStandInFor(UnicodeFunctor* adopted) { status = U_VARIABLE_RANGE_EXHAUSTED; return 0; } - variablesVector->addElement(adopted, status); + variablesVector.addElement(adopted, status); return variableNext++; } /** * Return the standin for segment seg (1-based). */ -UChar TransliteratorParser::getSegmentStandin(int32_t seg) { +UChar TransliteratorParser::getSegmentStandin(int32_t seg, UErrorCode& status) { // Special character used to indicate an empty spot - UChar empty = data->variablesBase - 1; + UChar empty = curData->variablesBase - 1; while (segmentStandins.length() < seg) { segmentStandins.append(empty); } @@ -1465,7 +1560,7 @@ UChar TransliteratorParser::getSegmentStandin(int32_t seg) { // Set a placeholder in the master variables vector that will be // filled in later by setSegmentObject(). We know that we will get // called first because setSegmentObject() will call us. - variablesVector->addElement((void*) NULL, status); + variablesVector.addElement((void*) NULL, status); segmentStandins.setCharAt(seg-1, c); } return c; @@ -1474,32 +1569,38 @@ UChar TransliteratorParser::getSegmentStandin(int32_t seg) { /** * Set the object for segment seg (1-based). */ -void TransliteratorParser::setSegmentObject(int32_t seg, StringMatcher* adopted) { +void TransliteratorParser::setSegmentObject(int32_t seg, StringMatcher* adopted, UErrorCode& status) { // Since we call parseSection() recursively, nested // segments will result in segment i+1 getting parsed // and stored before segment i; be careful with the // vector handling here. - if (segmentObjects->size() < seg) { - segmentObjects->setSize(seg); + if (segmentObjects.size() < seg) { + segmentObjects.setSize(seg, status); } - int32_t index = getSegmentStandin(seg) - data->variablesBase; - if (segmentObjects->elementAt(seg-1) != NULL || - variablesVector->elementAt(index) != NULL) { + int32_t index = getSegmentStandin(seg, status) - curData->variablesBase; + if (segmentObjects.elementAt(seg-1) != NULL || + variablesVector.elementAt(index) != NULL) { // should never happen status = U_INTERNAL_TRANSLITERATOR_ERROR; return; } - segmentObjects->setElementAt(adopted, seg-1); - variablesVector->setElementAt(adopted, index); + segmentObjects.setElementAt(adopted, seg-1); + variablesVector.setElementAt(adopted, index); } /** * Return the stand-in for the dot set. It is allocated the first * time and reused thereafter. */ -UChar TransliteratorParser::getDotStandIn() { +UChar TransliteratorParser::getDotStandIn(UErrorCode& status) { if (dotStandIn == (UChar) -1) { - dotStandIn = generateStandInFor(new UnicodeSet(DOT_SET, status)); + UnicodeSet* tempus = new UnicodeSet(UnicodeString(TRUE, DOT_SET, -1), status); + // Null pointer check. + if (tempus == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + return (UChar)0x0000; + } + dotStandIn = generateStandInFor(tempus, status); } return dotStandIn; } @@ -1509,8 +1610,9 @@ UChar TransliteratorParser::getDotStandIn() { * UnicodeString. */ void TransliteratorParser::appendVariableDef(const UnicodeString& name, - UnicodeString& buf) { - const UnicodeString* s = (const UnicodeString*) data->variableNames->get(name); + UnicodeString& buf, + UErrorCode& status) { + const UnicodeString* s = (const UnicodeString*) variableNames.get(name); if (s == NULL) { // We allow one undefined variable so that variable definition // statements work. For the first undefined variable we return @@ -1538,10 +1640,108 @@ void TransliteratorParser::appendVariableDef(const UnicodeString& name, /** * Glue method to get around access restrictions in C++. */ -Transliterator* TransliteratorParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) { +/*Transliterator* TransliteratorParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) { return Transliterator::createBasicInstance(id, canonID); -} +}*/ U_NAMESPACE_END +U_CAPI int32_t +utrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorCode *status) { + U_NAMESPACE_USE + + //const UChar *sourceStart = source; + const UChar *targetStart = target; + const UChar *sourceLimit = source+sourceLen; + UChar *targetLimit = target+sourceLen; + UChar32 c = 0; + UBool quoted = FALSE; + int32_t index; + + uprv_memset(target, 0, sourceLen*U_SIZEOF_UCHAR); + + /* read the rules into the buffer */ + while (source < sourceLimit) + { + index=0; + U16_NEXT_UNSAFE(source, index, c); + source+=index; + if(c == QUOTE) { + quoted = (UBool)!quoted; + } + else if (!quoted) { + if (c == RULE_COMMENT_CHAR) { + /* skip comments and all preceding spaces */ + while (targetStart < target && *(target - 1) == 0x0020) { + target--; + } + do { + if (source == sourceLimit) { + c = U_SENTINEL; + break; + } + c = *(source++); + } + while (c != CR && c != LF); + if (c < 0) { + break; + } + } + else if (c == ESCAPE && source < sourceLimit) { + UChar32 c2 = *source; + if (c2 == CR || c2 == LF) { + /* A backslash at the end of a line. */ + /* Since we're stripping lines, ignore the backslash. */ + source++; + continue; + } + if (c2 == 0x0075 && source+5 < sourceLimit) { /* \u seen. \U isn't unescaped. */ + int32_t escapeOffset = 0; + UnicodeString escapedStr(source, 5); + c2 = escapedStr.unescapeAt(escapeOffset); + + if (c2 == (UChar32)0xFFFFFFFF || escapeOffset == 0) + { + *status = U_PARSE_ERROR; + return 0; + } + if (!PatternProps::isWhiteSpace(c2) && !u_iscntrl(c2) && !u_ispunct(c2)) { + /* It was escaped for a reason. Write what it was suppose to be. */ + source+=5; + c = c2; + } + } + else if (c2 == QUOTE) { + /* \' seen. Make sure we don't do anything when we see it again. */ + quoted = (UBool)!quoted; + } + } + } + if (c == CR || c == LF) + { + /* ignore spaces carriage returns, and all leading spaces on the next line. + * and line feed unless in the form \uXXXX + */ + quoted = FALSE; + while (source < sourceLimit) { + c = *(source); + if (c != CR && c != LF && c != 0x0020) { + break; + } + source++; + } + continue; + } + + /* Append UChar * after dissembling if c > 0xffff*/ + index=0; + U16_APPEND_UNSAFE(target, index, c); + target+=index; + } + if (target < targetLimit) { + *target = 0; + } + return (int32_t)(target-targetStart); +} + #endif /* #if !UCONFIG_NO_TRANSLITERATION */