+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
**********************************************************************
- * Copyright (C) 1999-2006, International Business Machines
+ * Copyright (C) 1999-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
#include "unicode/uchar.h"
#include "unicode/ustring.h"
#include "unicode/uniset.h"
+#include "unicode/utf16.h"
#include "cstring.h"
#include "funcrepl.h"
#include "hash.h"
#include "tridpars.h"
#include "uvector.h"
#include "hash.h"
+#include "patternprops.h"
#include "util.h"
#include "cmemory.h"
#include "uprops.h"
const UVector* variablesVector = 0,
const Hashtable* variableNames = 0);
+ virtual ~ParseData();
+
virtual const UnicodeString* lookup(const UnicodeString& s) const;
virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
const Hashtable* vNames) :
data(d), variablesVector(sets), variableNames(vNames) {}
+ParseData::~ParseData() {}
+
/**
* Implement SymbolTable API.
*/
int32_t RuleHalf::parse(const UnicodeString& rule, int32_t pos, int32_t limit, UErrorCode& status) {
int32_t start = pos;
text.truncate(0);
- pos = parseSection(rule, pos, limit, text, ILLEGAL_TOP, FALSE, status);
+ pos = parseSection(rule, pos, limit, text, UnicodeString(TRUE, ILLEGAL_TOP, -1), FALSE, status);
if (cursorOffset > 0 && cursor != cursorOffsetPos) {
return syntaxError(U_MISPLACED_CURSOR_OFFSET, rule, start, status);
// Since all syntax characters are in the BMP, fetching
// 16-bit code units suffices here.
UChar c = rule.charAt(pos++);
- if (uprv_isRuleWhiteSpace(c)) {
+ if (PatternProps::isWhiteSpace(c)) {
// Ignore whitespace. Note that this is not Unicode
// spaces, but Java spaces -- a subset, representing
// whitespace likely to be seen in code.
int32_t segmentNumber = nextSegmentNumber++; // 1-based
// Parse the segment
- pos = parseSection(rule, pos, limit, buf, ILLEGAL_SEG, TRUE, status);
+ pos = parseSection(rule, pos, limit, buf, UnicodeString(TRUE, ILLEGAL_SEG, -1), TRUE, status);
// After parsing a segment, the relevant characters are
// in buf, starting at offset bufSegStart. Extract them
StringMatcher* m =
new StringMatcher(buf, bufSegStart, buf.length(),
segmentNumber, *parser.curData);
+ if (m == NULL) {
+ return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
+ }
// Record and associate object and segment number
parser.setSegmentObject(segmentNumber, m, status);
int32_t bufSegStart = buf.length();
// Parse the segment
- pos = parseSection(rule, iref, limit, buf, ILLEGAL_FUNC, TRUE, status);
+ pos = parseSection(rule, iref, limit, buf, UnicodeString(TRUE, ILLEGAL_FUNC, -1), TRUE, status);
// After parsing a segment, the relevant characters are
// in buf, starting at offset bufSegStart.
buf.extractBetween(bufSegStart, buf.length(), output);
FunctionReplacer *r =
new FunctionReplacer(t, new StringReplacer(output, parser.curData));
+ if (r == NULL) {
+ return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
+ }
// Replace the buffer contents with a stand-in
buf.truncate(bufSegStart);
UnicodeFunctor *m =
new StringMatcher(buf, qstart, qlimit, 0, *parser.curData);
+ if (m == NULL) {
+ return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
+ }
int32_t min = 0;
int32_t max = Quantifier::MAX;
switch (c) {
// do nothing -- min, max already set
}
m = new Quantifier(m, min, max);
+ if (m == NULL) {
+ return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
+ }
buf.truncate(qstart);
buf.append(parser.generateStandInFor(m, status));
}
UBool RuleHalf::isValidOutput(TransliteratorParser& transParser) {
for (int32_t i=0; i<text.length(); ) {
UChar32 c = text.char32At(i);
- i += UTF_CHAR_LENGTH(c);
+ i += U16_LENGTH(c);
if (!transParser.parseData->isReplacer(c)) {
return FALSE;
}
UBool RuleHalf::isValidInput(TransliteratorParser& transParser) {
for (int32_t i=0; i<text.length(); ) {
UChar32 c = text.char32At(i);
- i += UTF_CHAR_LENGTH(c);
+ i += U16_LENGTH(c);
if (!transParser.parseData->isMatcher(c)) {
return FALSE;
}
variablesVector(statusReturn),
segmentObjects(statusReturn)
{
- idBlockVector.setDeleter(uhash_deleteUnicodeString);
+ idBlockVector.setDeleter(uprv_deleteUObject);
curData = NULL;
compoundFilter = NULL;
parseData = NULL;
- variableNames.setValueDeleter(uhash_deleteUnicodeString);
+ variableNames.setValueDeleter(uprv_deleteUObject);
}
/**
*/
void TransliteratorParser::parseRules(const UnicodeString& rule,
UTransDirection theDirection,
- UErrorCode& status) {
+ UErrorCode& status)
+{
// Clear error struct
+ uprv_memset(&parseError, 0, sizeof(parseError));
parseError.line = parseError.offset = -1;
- parseError.preContext[0] = parseError.postContext[0] = (UChar)0;
UBool parsingIDs = TRUE;
int32_t ruleCount = 0;
dotStandIn = (UChar) -1;
+ UnicodeString *tempstr = NULL; // used for memory allocation error checking
UnicodeString str; // scratch
UnicodeString idBlockResult;
int32_t pos = 0;
while (pos < limit && U_SUCCESS(status)) {
UChar c = rule.charAt(pos++);
- if (uprv_isRuleWhiteSpace(c)) {
+ if (PatternProps::isWhiteSpace(c)) {
// Ignore leading whitespace.
continue;
}
rule.compare(pos, ID_TOKEN_LEN, ID_TOKEN) == 0) {
pos += ID_TOKEN_LEN;
c = rule.charAt(pos);
- while (uprv_isRuleWhiteSpace(c) && pos < limit) {
+ while (PatternProps::isWhiteSpace(c) && pos < limit) {
++pos;
c = rule.charAt(pos);
}
pos = p;
} else {
if (parsingIDs) {
+ tempstr = new UnicodeString(idBlockResult);
+ // NULL pointer check
+ if (tempstr == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
if (direction == UTRANS_FORWARD)
- idBlockVector.addElement(new UnicodeString(idBlockResult), status);
+ idBlockVector.addElement(tempstr, status);
else
- idBlockVector.insertElementAt(new UnicodeString(idBlockResult), 0, status);
+ idBlockVector.insertElementAt(tempstr, 0, status);
idBlockResult.remove();
parsingIDs = FALSE;
curData = new TransliterationRuleData(status);
+ // NULL pointer check
+ if (curData == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
parseData->data = curData;
// By default, rules use part of the private use area
}
if (parsingIDs && idBlockResult.length() > 0) {
+ tempstr = new UnicodeString(idBlockResult);
+ // NULL pointer check
+ if (tempstr == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
if (direction == UTRANS_FORWARD)
- idBlockVector.addElement(new UnicodeString(idBlockResult), status);
+ idBlockVector.addElement(tempstr, status);
else
- idBlockVector.insertElementAt(new UnicodeString(idBlockResult), 0, status);
+ idBlockVector.insertElementAt(tempstr, 0, status);
}
else if (!parsingIDs && curData != NULL) {
if (direction == UTRANS_FORWARD)
data->variables = 0;
} else {
data->variables = (UnicodeFunctor**)uprv_malloc(data->variablesLength * sizeof(UnicodeFunctor*));
+ // NULL pointer check
+ if (data->variables == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
data->variablesAreOwned = (i == 0);
}
for (int32_t j = 0; j < data->variablesLength; j++) {
data->variables[j] =
- ((UnicodeSet*)variablesVector.elementAt(j));
+ static_cast<UnicodeFunctor *>(variablesVector.elementAt(j));
}
data->variableNames.removeAll();
- int32_t pos = -1;
+ int32_t pos = UHASH_FIRST;
const UHashElement* he = variableNames.nextElement(pos);
while (he != NULL) {
+ UnicodeString* tempus = (UnicodeString*)(((UnicodeString*)(he->value.pointer))->clone());
+ if (tempus == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
data->variableNames.put(*((UnicodeString*)(he->key.pointer)),
- ((UnicodeString*)(he->value.pointer))->clone(), status);
+ tempus, status);
he = variableNames.nextElement(pos);
}
}
*/
UBool TransliteratorParser::resemblesPragma(const UnicodeString& rule, int32_t pos, int32_t limit) {
// Must start with /use\s/i
- return ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_USE, NULL) >= 0;
+ return ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_USE, 4), NULL) >= 0;
}
/**
// use maximum backup 16;
// use nfd rules;
// use nfc rules;
- int p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_VARIABLE_RANGE, array);
+ int p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_VARIABLE_RANGE, -1), array);
if (p >= 0) {
setVariableRange(array[0], array[1], status);
return p;
}
- p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_MAXIMUM_BACKUP, array);
+ p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_MAXIMUM_BACKUP, -1), array);
if (p >= 0) {
pragmaMaximumBackup(array[0]);
return p;
}
- p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_NFD_RULES, NULL);
+ p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_NFD_RULES, -1), NULL);
if (p >= 0) {
pragmaNormalizeRules(UNORM_NFD);
return p;
}
- p = ICU_Utility::parsePattern(rule, pos, limit, PRAGMA_NFC_RULES, NULL);
+ p = ICU_Utility::parsePattern(rule, pos, limit, UnicodeString(TRUE, PRAGMA_NFC_RULES, -1), NULL);
if (p >= 0) {
pragmaNormalizeRules(UNORM_NFC);
return p;
}
// We allow anything on the right, including an empty string.
UnicodeString* value = new UnicodeString(right->text);
+ // NULL pointer check
+ if (value == NULL) {
+ return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
+ }
variableNames.put(undefinedVariableName, value, status);
++variableLimit;
return pos;
UnicodeFunctor** segmentsArray = NULL;
if (segmentObjects.size() > 0) {
segmentsArray = (UnicodeFunctor **)uprv_malloc(segmentObjects.size() * sizeof(UnicodeFunctor *));
+ // Null pointer check
+ if (segmentsArray == NULL) {
+ return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
+ }
segmentObjects.toArray((void**) segmentsArray);
}
+ TransliterationRule* temptr = new TransliterationRule(
+ left->text, left->ante, left->post,
+ right->text, right->cursor, right->cursorOffset,
+ segmentsArray,
+ segmentObjects.size(),
+ left->anchorStart, left->anchorEnd,
+ curData,
+ status);
+ //Null pointer check
+ if (temptr == NULL) {
+ uprv_free(segmentsArray);
+ return syntaxError(U_MEMORY_ALLOCATION_ERROR, rule, start, status);
+ }
- curData->ruleSet.addRule(new TransliterationRule(
- left->text, left->ante, left->post,
- right->text, right->cursor, right->cursorOffset,
- segmentsArray,
- segmentObjects.size(),
- left->anchorStart, left->anchorEnd,
- curData,
- status), status);
+ curData->ruleSet.addRule(temptr, status);
return pos;
}
ParsePosition& pos,
UErrorCode& status) {
UnicodeSet* set = new UnicodeSet(rule, pos, USET_IGNORE_SPACE, parseData, status);
+ // Null pointer check
+ if (set == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return (UChar)0x0000; // Return empty character with error.
+ }
set->compact();
return generateStandInFor(set, status);
}
// and stored before segment i; be careful with the
// vector handling here.
if (segmentObjects.size() < seg) {
- segmentObjects.setSize(seg);
+ segmentObjects.setSize(seg, status);
}
int32_t index = getSegmentStandin(seg, status) - curData->variablesBase;
if (segmentObjects.elementAt(seg-1) != NULL ||
*/
UChar TransliteratorParser::getDotStandIn(UErrorCode& status) {
if (dotStandIn == (UChar) -1) {
- dotStandIn = generateStandInFor(new UnicodeSet(DOT_SET, status), status);
+ UnicodeSet* tempus = new UnicodeSet(UnicodeString(TRUE, DOT_SET, -1), status);
+ // Null pointer check.
+ if (tempus == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ return (UChar)0x0000;
+ }
+ dotStandIn = generateStandInFor(tempus, status);
}
return dotStandIn;
}
/**
* Glue method to get around access restrictions in C++.
*/
-Transliterator* TransliteratorParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) {
+/*Transliterator* TransliteratorParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) {
return Transliterator::createBasicInstance(id, canonID);
-}
+}*/
U_NAMESPACE_END
U_CAPI int32_t
utrans_stripRules(const UChar *source, int32_t sourceLen, UChar *target, UErrorCode *status) {
+ U_NAMESPACE_USE
+
//const UChar *sourceStart = source;
const UChar *targetStart = target;
const UChar *sourceLimit = source+sourceLen;
target--;
}
do {
+ if (source == sourceLimit) {
+ c = U_SENTINEL;
+ break;
+ }
c = *(source++);
}
while (c != CR && c != LF);
+ if (c < 0) {
+ break;
+ }
}
- else if (c == ESCAPE) {
+ else if (c == ESCAPE && source < sourceLimit) {
UChar32 c2 = *source;
if (c2 == CR || c2 == LF) {
/* A backslash at the end of a line. */
*status = U_PARSE_ERROR;
return 0;
}
- if (!uprv_isRuleWhiteSpace(c2) && !u_iscntrl(c2) && !u_ispunct(c2)) {
+ if (!PatternProps::isWhiteSpace(c2) && !u_iscntrl(c2) && !u_ispunct(c2)) {
/* It was escaped for a reason. Write what it was suppose to be. */
source+=5;
c = c2;