X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/73c04bcfe1096173b00431f0cdc742894b15eef0..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/tools/toolutil/xmlparser.cpp
diff --git a/icuSources/tools/toolutil/xmlparser.cpp b/icuSources/tools/toolutil/xmlparser.cpp
index 195fa303..a9650cc5 100644
--- a/icuSources/tools/toolutil/xmlparser.cpp
+++ b/icuSources/tools/toolutil/xmlparser.cpp
@@ -1,12 +1,14 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
-* Copyright (C) 2004-2006, International Business Machines
+* Copyright (C) 2004-2010, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: xmlparser.cpp
-* encoding: US-ASCII
+* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
@@ -61,27 +63,32 @@ UXMLParser::UXMLParser(UErrorCode &status) :
// example: "
// This is a sloppy implementation - just look for the leading
// allow for a possible leading BOM.
- mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>"), 0, status),
+ mXMLDecl(UnicodeString("(?s)\\uFEFF?<\\?xml.+?\\?>", -1, US_INV), 0, status),
// XML Comment production #15
// example: "
// note, does not detect an illegal "--" within comments
- mXMLComment(UnicodeString("(?s)"), 0, status),
+ mXMLComment(UnicodeString("(?s)", -1, US_INV), 0, status),
// XML Spaces
// production [3]
- mXMLSP(UnicodeString(XML_SPACES "+"), 0, status),
+ mXMLSP(UnicodeString(XML_SPACES "+", -1, US_INV), 0, status),
// XML Doctype decl production #28
// example "
+ // or "
// TODO: we don't actually parse the DOCTYPE or internal subsets.
// Some internal dtd subsets could confuse this simple-minded
- // attempt at skipping over them.
- mXMLDoctype(UnicodeString("(?s)"), 0, status),
+ // attempt at skipping over them, specifically, occcurences
+ // of closeing square brackets. These could appear in comments,
+ // or in parameter entity declarations, for example.
+ mXMLDoctype(UnicodeString(
+ "(?s)|\\[.*?\\].*?>)", -1, US_INV
+ ), 0, status),
// XML PI production #16
// example "
- mXMLPI(UnicodeString("(?s)<\\?.+?\\?>"), 0, status),
+ mXMLPI(UnicodeString("(?s)<\\?.+?\\?>", -1, US_INV), 0, status),
// XML Element Start Productions #40, #41
// example
@@ -92,11 +99,11 @@ UXMLParser::UXMLParser(UErrorCode &status) :
XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "
"(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'
")*" // * for zero or more attributes.
- XML_SPACES "*?>"), 0, status), // match " >"
+ XML_SPACES "*?>", -1, US_INV), 0, status), // match " >"
// XML Element End production #42
// example
- mXMLElemEnd (UnicodeString("(" XML_NAME ")" XML_SPACES "*>"), 0, status),
+ mXMLElemEnd (UnicodeString("(" XML_NAME ")" XML_SPACES "*>", -1, US_INV), 0, status),
// XML Element Empty production #44
// example
@@ -105,11 +112,11 @@ UXMLParser::UXMLParser(UErrorCode &status) :
XML_SPACES "+" XML_NAME XML_SPACES "*=" XML_SPACES "*" // match "ATTR_NAME = "
"(?:(?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))" // match '"attribute value"'
")*" // * for zero or more attributes.
- XML_SPACES "*?/>"), 0, status), // match " />"
+ XML_SPACES "*?/>", -1, US_INV), 0, status), // match " />"
// XMLCharData. Everything but '<'. Note that & will be dealt with later.
- mXMLCharData(UnicodeString("(?s)[^<]*"), 0, status),
+ mXMLCharData(UnicodeString("(?s)[^<]*", -1, US_INV), 0, status),
// Attribute name = "value". XML Productions 10, 40/41
// Capture group 1 is name,
@@ -121,14 +128,14 @@ UXMLParser::UXMLParser(UErrorCode &status) :
// Here, we match a single attribute, and make its name and
// attribute value available to the parser code.
mAttrValue(UnicodeString(XML_SPACES "+(" XML_NAME ")" XML_SPACES "*=" XML_SPACES "*"
- "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))"), 0, status),
+ "((?:\\\'[^<\\\']*?\\\')|(?:\\\"[^<\\\"]*?\\\"))", -1, US_INV), 0, status),
- mAttrNormalizer(UnicodeString(XML_SPACES), 0, status),
+ mAttrNormalizer(UnicodeString(XML_SPACES, -1, US_INV), 0, status),
// Match any of the new-line sequences in content.
// All are changed to \u000a.
- mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028"), 0, status),
+ mNewLineNormalizer(UnicodeString("\\u000d\\u000a|\\u000d\\u0085|\\u000a|\\u000d|\\u0085|\\u2028", -1, US_INV), 0, status),
// & char references
// We will figure out what we've got based on which capture group has content.
@@ -202,7 +209,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {
goto exit;
}
- buffer=src.getBuffer(bytesLength);
+ buffer=toUCharPtr(src.getBuffer(bytesLength));
if(buffer==NULL) {
// unexpected failure to reserve some string capacity
errorCode=U_MEMORY_ALLOCATION_ERROR;
@@ -271,7 +278,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {
pb=bytes;
for(;;) {
length=src.length();
- buffer=src.getBuffer(capacity);
+ buffer=toUCharPtr(src.getBuffer(capacity));
if(buffer==NULL) {
// unexpected failure to reserve some string capacity
errorCode=U_MEMORY_ALLOCATION_ERROR;
@@ -306,7 +313,7 @@ UXMLParser::parseFile(const char *filename, UErrorCode &errorCode) {
// reached end of file, convert once more to flush the converter
flush=TRUE;
}
- };
+ }
exit:
ucnv_close(cnv);
@@ -558,7 +565,7 @@ UnicodeString
UXMLParser::scanContent(UErrorCode &status) {
UnicodeString result;
if (mXMLCharData.lookingAt(fPos, status)) {
- result = mXMLCharData.group(0, status);
+ result = mXMLCharData.group((int32_t)0, status);
// Normalize the new-lines. (Before char ref substitution)
mNewLineNormalizer.reset(result);
result = mNewLineNormalizer.replaceAll(fOneLF, status);
@@ -617,7 +624,7 @@ UXMLParser::replaceCharRefs(UnicodeString &s, UErrorCode &status) {
// An unrecognized &entity; Leave it alone.
// TODO: check that it really looks like an entity, and is not some
// random & in the text.
- replacement = mAmps.group(0, status);
+ replacement = mAmps.group((int32_t)0, status);
}
mAmps.appendReplacement(result, replacement, status);
}
@@ -710,8 +717,9 @@ UXMLElement::appendText(UnicodeString &text, UBool recurse) const {
int32_t i, count=fChildren.size();
for(i=0; igetDynamicClassID()==UnicodeString::getStaticClassID()) {
- text.append(*(const UnicodeString *)node);
+ const UnicodeString *s=dynamic_cast(node);
+ if(s!=NULL) {
+ text.append(*s);
} else if(recurse) /* must be a UXMLElement */ {
((const UXMLElement *)node)->appendText(text, recurse);
}
@@ -761,7 +769,7 @@ const UObject *
UXMLElement::getChild(int32_t i, UXMLNodeType &type) const {
if(0<=i && igetDynamicClassID()==UXMLElement::getStaticClassID()) {
+ if(dynamic_cast(node)!=NULL) {
type=UXML_NODE_TYPE_ELEMENT;
} else {
type=UXML_NODE_TYPE_STRING;
@@ -782,10 +790,9 @@ UXMLElement::nextChildElement(int32_t &i) const {
int32_t count=fChildren.size();
while(igetDynamicClassID()==UXMLElement::getStaticClassID()) {
- return (const UXMLElement *)node;
+ const UXMLElement *elem=dynamic_cast(node);
+ if(elem!=NULL) {
+ return elem;
}
}
return NULL;
@@ -804,8 +811,8 @@ UXMLElement::getChildElement(const UnicodeString &name) const {
int32_t i, count=fChildren.size();
for(i=0; igetDynamicClassID()==UXMLElement::getStaticClassID()) {
- const UXMLElement *elem=(const UXMLElement *)node;
+ const UXMLElement *elem=dynamic_cast(node);
+ if(elem!=NULL) {
if(p==elem->fName) {
return elem;
}