X-Git-Url: https://git.saurik.com/wxWidgets.git/blobdiff_plain/b2247ee93bd093d52417ea063720f4165d8a0da8..4b263e5ef358411a345dee9ffef3ba69c19be40e:/src/expat/lib/xmlparse.c diff --git a/src/expat/lib/xmlparse.c b/src/expat/lib/xmlparse.c index c55c768002..b14700eb75 100644 --- a/src/expat/lib/xmlparse.c +++ b/src/expat/lib/xmlparse.c @@ -4,39 +4,45 @@ #include #include /* memset(), memcpy() */ +#include +#include /* UINT_MAX */ +#include /* time() */ -#ifdef COMPILED_FROM_DSP +#define XML_BUILDING_EXPAT 1 +#ifdef COMPILED_FROM_DSP #include "winconfig.h" -#include "expat.h" - -#elif defined(MACOS_CLASSIC) +#elif defined(OS2_32) -#include "macconfig.h" +#include "os2config.h" #include "expat.h" -#else - -#include - -#ifdef __declspec -#define XMLPARSEAPI(type) type __cdecl -#endif +#elif defined(__MSDOS__) +#include "dosconfig.h" #include "expat.h" -#ifdef __declspec -#undef XMLPARSEAPI -#endif +#elif defined(MACOS_CLASSIC) +#include "macconfig.h" +#elif defined(__amigaos__) +#include "amigaconfig.h" +#elif defined(__WATCOMC__) +#include "watcomconfig.h" +#elif defined(HAVE_EXPAT_CONFIG_H) +#include "expat_config.h" #endif /* ndef COMPILED_FROM_DSP */ +#include "ascii.h" +#include "expat.h" + #ifdef XML_UNICODE #define XML_ENCODE_MAX XML_UTF16_ENCODE_MAX #define XmlConvert XmlUtf16Convert #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS #define XmlEncode XmlUtf16Encode -#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1)) +/* Using pointer subtraction to convert to integer type. */ +#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1)) typedef unsigned short ICHAR; #else #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX @@ -100,12 +106,37 @@ typedef struct { typedef struct { NAMED **v; + unsigned char power; size_t size; size_t used; - size_t usedLim; const XML_Memory_Handling_Suite *mem; } HASH_TABLE; +/* Basic character hash algorithm, taken from Python's string hash: + h = h * 1000003 ^ character, the constant being a prime number. + +*/ +#ifdef XML_UNICODE +#define CHAR_HASH(h, c) \ + (((h) * 0xF4243) ^ (unsigned short)(c)) +#else +#define CHAR_HASH(h, c) \ + (((h) * 0xF4243) ^ (unsigned char)(c)) +#endif + +/* For probing (after a collision) we need a step size relative prime + to the hash table size, which is a power of 2. We use double-hashing, + since we can calculate a second hash value cheaply by taking those bits + of the first hash value that were discarded (masked out) when the table + index was calculated: index = hash & mask, where mask = table->size - 1. + We limit the maximum step size to table->size / 4 (mask >> 2) and make + it odd, since odd numbers are always relative prime to a power of 2. +*/ +#define SECOND_HASH(hash, mask, power) \ + ((((hash) & ~(mask)) >> ((power) - 1)) & ((mask) >> 2)) +#define PROBE_STEP(hash, mask, power) \ + ((unsigned char)((SECOND_HASH(hash, mask, power)) | 1)) + typedef struct { NAMED **p; NAMED **end; @@ -114,6 +145,7 @@ typedef struct { #define INIT_TAG_BUF_SIZE 32 /* must be a multiple of sizeof(XML_Char) */ #define INIT_DATA_BUF_SIZE 1024 #define INIT_ATTS_SIZE 16 +#define INIT_ATTS_VERSION 0xFFFFFFFF #define INIT_BLOCK_SIZE 1024 #define INIT_BUFFER_SIZE 1024 @@ -169,7 +201,8 @@ typedef struct tag { typedef struct { const XML_Char *name; const XML_Char *textPtr; - int textLen; + int textLen; /* length in XML_Chars */ + int processed; /* # of processed bytes - when suspended */ const XML_Char *systemId; const XML_Char *base; const XML_Char *publicId; @@ -221,6 +254,12 @@ typedef struct { const XML_Char *value; } DEFAULT_ATTRIBUTE; +typedef struct { + unsigned long version; + unsigned long hash; + const XML_Char *uriName; +} NS_ATT; + typedef struct { const XML_Char *name; PREFIX *prefix; @@ -264,6 +303,8 @@ typedef struct open_internal_entity { const char *internalEventEndPtr; struct open_internal_entity *next; ENTITY *entity; + int startTagLevel; + XML_Bool betweenDecl; /* WFC: PE Between Declarations */ } OPEN_INTERNAL_ENTITY; typedef enum XML_Error PTRCALL Processor(XML_Parser parser, @@ -288,29 +329,33 @@ static Processor externalEntityInitProcessor; static Processor externalEntityInitProcessor2; static Processor externalEntityInitProcessor3; static Processor externalEntityContentProcessor; +static Processor internalEntityProcessor; static enum XML_Error handleUnknownEncoding(XML_Parser parser, const XML_Char *encodingName); static enum XML_Error processXmlDecl(XML_Parser parser, int isGeneralTextEntity, - const char *, const char *); + const char *s, const char *next); static enum XML_Error initializeEncoding(XML_Parser parser); static enum XML_Error doProlog(XML_Parser parser, const ENCODING *enc, const char *s, - const char *end, int tok, const char *next, const char **nextPtr); + const char *end, int tok, const char *next, const char **nextPtr, + XML_Bool haveMore); static enum XML_Error -processInternalParamEntity(XML_Parser parser, ENTITY *entity); +processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl); static enum XML_Error doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc, - const char *start, const char *end, const char **endPtr); + const char *start, const char *end, const char **endPtr, + XML_Bool haveMore); static enum XML_Error doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr, - const char *end, const char **nextPtr); + const char *end, const char **nextPtr, XML_Bool haveMore); #ifdef XML_DTD static enum XML_Error doIgnoreSection(XML_Parser parser, const ENCODING *, const char **startPtr, - const char *end, const char **nextPtr); + const char *end, const char **nextPtr, XML_Bool haveMore); #endif /* XML_DTD */ static enum XML_Error @@ -320,9 +365,8 @@ static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr); static int -defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, - XML_Bool isCdata, XML_Bool isId, const XML_Char *dfltValue, - XML_Parser parser); +defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, + XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser); static enum XML_Error storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata, const char *, const char *, STRING_POOL *); @@ -359,12 +403,13 @@ static void dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms); static void dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms); static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); +dtdCopy(XML_Parser oldParser, + DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms); static int -copyEntityTable(HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); - +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *, STRING_POOL *, const HASH_TABLE *); static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize); +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize); static void FASTCALL hashTableInit(HASH_TABLE *, const XML_Memory_Handling_Suite *ms); static void FASTCALL hashTableClear(HASH_TABLE *); @@ -397,11 +442,15 @@ static ELEMENT_TYPE * getElementType(XML_Parser parser, const ENCODING *enc, const char *ptr, const char *end); +static unsigned long generate_hash_secret_salt(void); +static XML_Bool startParsing(XML_Parser parser); + static XML_Parser parserCreate(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep, DTD *dtd); + static void parserInit(XML_Parser parser, const XML_Char *encodingName); @@ -430,7 +479,7 @@ struct XML_ParserStruct { char *m_bufferEnd; /* allocated end of buffer */ const char *m_bufferLim; - long m_parseEndByteIndex; + XML_Index m_parseEndByteIndex; const char *m_parseEndPtr; XML_Char *m_dataBuf; XML_Char *m_dataBufEnd; @@ -466,7 +515,7 @@ struct XML_ParserStruct { void *m_unknownEncodingMem; void *m_unknownEncodingData; void *m_unknownEncodingHandlerData; - void (*m_unknownEncodingRelease)(void *); + void (XMLCALL *m_unknownEncodingRelease)(void *); PROLOG_STATE m_prologState; Processor *m_processor; enum XML_Error m_errorCode; @@ -474,6 +523,7 @@ struct XML_ParserStruct { const char *m_eventEndPtr; const char *m_positionPtr; OPEN_INTERNAL_ENTITY *m_openInternalEntities; + OPEN_INTERNAL_ENTITY *m_freeInternalEntities; XML_Bool m_defaultExpandInternalEntities; int m_tagLevel; ENTITY *m_declEntity; @@ -497,18 +547,26 @@ struct XML_ParserStruct { int m_nSpecifiedAtts; int m_idAttIndex; ATTRIBUTE *m_atts; + NS_ATT *m_nsAtts; + unsigned long m_nsAttsVersion; + unsigned char m_nsAttsPower; +#ifdef XML_ATTR_INFO + XML_AttrInfo *m_attInfo; +#endif POSITION m_position; STRING_POOL m_tempPool; STRING_POOL m_temp2Pool; char *m_groupConnector; - unsigned m_groupSize; + unsigned int m_groupSize; XML_Char m_namespaceSeparator; XML_Parser m_parentParser; + XML_ParsingStatus m_parsingStatus; #ifdef XML_DTD XML_Bool m_isParamEntity; XML_Bool m_useForeignDTD; enum XML_ParamEntityParsing m_paramEntityParsing; #endif + unsigned long m_hash_secret_salt; }; #define MALLOC(s) (parser->m_mem.malloc_fcn((s))) @@ -567,6 +625,7 @@ struct XML_ParserStruct { #define positionPtr (parser->m_positionPtr) #define position (parser->m_position) #define openInternalEntities (parser->m_openInternalEntities) +#define freeInternalEntities (parser->m_freeInternalEntities) #define defaultExpandInternalEntities \ (parser->m_defaultExpandInternalEntities) #define tagLevel (parser->m_tagLevel) @@ -599,36 +658,32 @@ struct XML_ParserStruct { #define attsSize (parser->m_attsSize) #define nSpecifiedAtts (parser->m_nSpecifiedAtts) #define idAttIndex (parser->m_idAttIndex) +#define nsAtts (parser->m_nsAtts) +#define nsAttsVersion (parser->m_nsAttsVersion) +#define nsAttsPower (parser->m_nsAttsPower) +#define attInfo (parser->m_attInfo) #define tempPool (parser->m_tempPool) #define temp2Pool (parser->m_temp2Pool) #define groupConnector (parser->m_groupConnector) #define groupSize (parser->m_groupSize) #define namespaceSeparator (parser->m_namespaceSeparator) #define parentParser (parser->m_parentParser) +#define ps_parsing (parser->m_parsingStatus.parsing) +#define ps_finalBuffer (parser->m_parsingStatus.finalBuffer) #ifdef XML_DTD #define isParamEntity (parser->m_isParamEntity) #define useForeignDTD (parser->m_useForeignDTD) #define paramEntityParsing (parser->m_paramEntityParsing) #endif /* XML_DTD */ +#define hash_secret_salt (parser->m_hash_secret_salt) -#define parsing \ - (parentParser \ - ? \ - (isParamEntity \ - ? \ - (processor != externalParEntInitProcessor) \ - : \ - (processor != externalEntityInitProcessor)) \ - : \ - (processor != prologInitProcessor)) - -XML_Parser +XML_Parser XMLCALL XML_ParserCreate(const XML_Char *encodingName) { return XML_ParserCreate_MM(encodingName, NULL, NULL); } -XML_Parser +XML_Parser XMLCALL XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) { XML_Char tmp[2]; @@ -637,28 +692,43 @@ XML_ParserCreateNS(const XML_Char *encodingName, XML_Char nsSep) } static const XML_Char implicitContext[] = { - 'x', 'm', 'l', '=', 'h', 't', 't', 'p', ':', '/', '/', - 'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/', - 'X', 'M', 'L', '/', '1', '9', '9', '8', '/', - 'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0' + ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p, + ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w, + ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g, + ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9, + ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e, + ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0' }; -XML_Parser +static unsigned long +generate_hash_secret_salt(void) +{ + unsigned int seed = time(NULL) % UINT_MAX; + srand(seed); + return rand(); +} + +static XML_Bool /* only valid for root parser */ +startParsing(XML_Parser parser) +{ + /* hash functions must be initialized before setContext() is called */ + if (hash_secret_salt == 0) + hash_secret_salt = generate_hash_secret_salt(); + if (ns) { + /* implicit context only set for root parser, since child + parsers (i.e. external entity parsers) will inherit it + */ + return setContext(parser, implicitContext); + } + return XML_TRUE; +} + +XML_Parser XMLCALL XML_ParserCreate_MM(const XML_Char *encodingName, const XML_Memory_Handling_Suite *memsuite, const XML_Char *nameSep) { - XML_Parser parser = parserCreate(encodingName, memsuite, nameSep, NULL); - if (parser != NULL && ns) { - /* implicit context only set for root parser, since child - parsers (i.e. external entity parsers) will inherit it - */ - if (!setContext(parser, implicitContext)) { - XML_ParserFree(parser); - return NULL; - } - } - return parser; + return parserCreate(encodingName, memsuite, nameSep, NULL); } static XML_Parser @@ -703,9 +773,20 @@ parserCreate(const XML_Char *encodingName, FREE(parser); return NULL; } +#ifdef XML_ATTR_INFO + attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo)); + if (attInfo == NULL) { + FREE(atts); + FREE(parser); + return NULL; + } +#endif dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char)); if (dataBuf == NULL) { FREE(atts); +#ifdef XML_ATTR_INFO + FREE(attInfo); +#endif FREE(parser); return NULL; } @@ -718,6 +799,9 @@ parserCreate(const XML_Char *encodingName, if (_dtd == NULL) { FREE(dataBuf); FREE(atts); +#ifdef XML_ATTR_INFO + FREE(attInfo); +#endif FREE(parser); return NULL; } @@ -725,6 +809,7 @@ parserCreate(const XML_Char *encodingName, freeBindingList = NULL; freeTagList = NULL; + freeInternalEntities = NULL; groupSize = 0; groupConnector = NULL; @@ -732,10 +817,14 @@ parserCreate(const XML_Char *encodingName, unknownEncodingHandler = NULL; unknownEncodingHandlerData = NULL; - namespaceSeparator = '!'; + namespaceSeparator = ASCII_EXCL; ns = XML_FALSE; ns_triplets = XML_FALSE; + nsAtts = NULL; + nsAttsVersion = 0; + nsAttsPower = 0; + poolInit(&tempPool, &(parser->m_mem)); poolInit(&temp2Pool, &(parser->m_mem)); parserInit(parser, encodingName); @@ -811,7 +900,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) eventPtr = NULL; eventEndPtr = NULL; positionPtr = NULL; - openInternalEntities = 0; + openInternalEntities = NULL; defaultExpandInternalEntities = XML_TRUE; tagLevel = 0; tagStack = NULL; @@ -821,11 +910,13 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) unknownEncodingRelease = NULL; unknownEncodingData = NULL; parentParser = NULL; + ps_parsing = XML_INITIALIZED; #ifdef XML_DTD isParamEntity = XML_FALSE; useForeignDTD = XML_FALSE; paramEntityParsing = XML_PARAM_ENTITY_PARSING_NEVER; #endif + hash_secret_salt = 0; } /* moves list of bindings to freeBindingList */ @@ -840,10 +931,11 @@ moveToFreeBindingList(XML_Parser parser, BINDING *bindings) } } -XML_Bool +XML_Bool XMLCALL XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) { TAG *tStk; + OPEN_INTERNAL_ENTITY *openEntityList; if (parentParser) return XML_FALSE; /* move tagStack to freeTagList */ @@ -856,26 +948,33 @@ XML_ParserReset(XML_Parser parser, const XML_Char *encodingName) tag->bindings = NULL; freeTagList = tag; } + /* move openInternalEntities to freeInternalEntities */ + openEntityList = openInternalEntities; + while (openEntityList) { + OPEN_INTERNAL_ENTITY *openEntity = openEntityList; + openEntityList = openEntity->next; + openEntity->next = freeInternalEntities; + freeInternalEntities = openEntity; + } moveToFreeBindingList(parser, inheritedBindings); - if (unknownEncodingMem) - FREE(unknownEncodingMem); + FREE(unknownEncodingMem); if (unknownEncodingRelease) unknownEncodingRelease(unknownEncodingData); poolClear(&tempPool); poolClear(&temp2Pool); parserInit(parser, encodingName); dtdReset(_dtd, &parser->m_mem); - return setContext(parser, implicitContext); + return XML_TRUE; } -enum XML_Status +enum XML_Status XMLCALL XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) { /* Block after XML_Parse()/XML_ParseBuffer() has been called. XXX There's no way for the caller to determine which of the XXX possible error cases caused the XML_STATUS_ERROR return. */ - if (parsing) + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return XML_STATUS_ERROR; if (encodingName == NULL) protocolEncodingName = NULL; @@ -887,7 +986,7 @@ XML_SetEncoding(XML_Parser parser, const XML_Char *encodingName) return XML_STATUS_OK; } -XML_Parser +XML_Parser XMLCALL XML_ExternalEntityParserCreate(XML_Parser oldParser, const XML_Char *context, const XML_Char *encodingName) @@ -934,6 +1033,12 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, int oldInEntityValue = prologState.inEntityValue; #endif XML_Bool oldns_triplets = ns_triplets; + /* Note that the new parser shares the same hash secret as the old + parser, so that dtdCopy and copyEntityTable can lookup values + from hash tables associated with either parser without us having + to worry which hash secrets each table has. + */ + unsigned long oldhash_secret_salt = hash_secret_salt; #ifdef XML_DTD if (!context) @@ -987,13 +1092,14 @@ XML_ExternalEntityParserCreate(XML_Parser oldParser, externalEntityRefHandlerArg = oldExternalEntityRefHandlerArg; defaultExpandInternalEntities = oldDefaultExpandInternalEntities; ns_triplets = oldns_triplets; + hash_secret_salt = oldhash_secret_salt; parentParser = oldParser; #ifdef XML_DTD paramEntityParsing = oldParamEntityParsing; prologState.inEntityValue = oldInEntityValue; if (context) { #endif /* XML_DTD */ - if (!dtdCopy(_dtd, oldDtd, &parser->m_mem) + if (!dtdCopy(oldParser, _dtd, oldDtd, &parser->m_mem) || !setContext(parser, context)) { XML_ParserFree(parser); return NULL; @@ -1030,23 +1136,44 @@ destroyBindings(BINDING *bindings, XML_Parser parser) } } -void +void XMLCALL XML_ParserFree(XML_Parser parser) { + TAG *tagList; + OPEN_INTERNAL_ENTITY *entityList; + if (parser == NULL) + return; + /* free tagStack and freeTagList */ + tagList = tagStack; for (;;) { TAG *p; - if (tagStack == NULL) { + if (tagList == NULL) { if (freeTagList == NULL) break; - tagStack = freeTagList; + tagList = freeTagList; freeTagList = NULL; } - p = tagStack; - tagStack = tagStack->parent; + p = tagList; + tagList = tagList->parent; FREE(p->buf); destroyBindings(p->bindings, parser); FREE(p); } + /* free openInternalEntities and freeInternalEntities */ + entityList = openInternalEntities; + for (;;) { + OPEN_INTERNAL_ENTITY *openEntity; + if (entityList == NULL) { + if (freeInternalEntities == NULL) + break; + entityList = freeInternalEntities; + freeInternalEntities = NULL; + } + openEntity = entityList; + entityList = entityList->next; + FREE(openEntity); + } + destroyBindings(freeBindingList, parser); destroyBindings(inheritedBindings, parser); poolDestroy(&tempPool); @@ -1061,30 +1188,31 @@ XML_ParserFree(XML_Parser parser) #endif /* XML_DTD */ dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem); FREE((void *)atts); - if (groupConnector) - FREE(groupConnector); - if (buffer) - FREE(buffer); +#ifdef XML_ATTR_INFO + FREE((void *)attInfo); +#endif + FREE(groupConnector); + FREE(buffer); FREE(dataBuf); - if (unknownEncodingMem) - FREE(unknownEncodingMem); + FREE(nsAtts); + FREE(unknownEncodingMem); if (unknownEncodingRelease) unknownEncodingRelease(unknownEncodingData); FREE(parser); } -void +void XMLCALL XML_UseParserAsHandlerArg(XML_Parser parser) { handlerArg = parser; } -enum XML_Error +enum XML_Error XMLCALL XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) { #ifdef XML_DTD /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parsing) + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return XML_ERROR_CANT_CHANGE_FEATURE_ONCE_PARSING; useForeignDTD = useDTD; return XML_ERROR_NONE; @@ -1093,16 +1221,16 @@ XML_UseForeignDTD(XML_Parser parser, XML_Bool useDTD) #endif } -void +void XMLCALL XML_SetReturnNSTriplet(XML_Parser parser, int do_nst) { /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parsing) + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return; ns_triplets = do_nst ? XML_TRUE : XML_FALSE; } -void +void XMLCALL XML_SetUserData(XML_Parser parser, void *p) { if (handlerArg == userData) @@ -1111,7 +1239,7 @@ XML_SetUserData(XML_Parser parser, void *p) userData = p; } -enum XML_Status +enum XML_Status XMLCALL XML_SetBase(XML_Parser parser, const XML_Char *p) { if (p) { @@ -1125,25 +1253,33 @@ XML_SetBase(XML_Parser parser, const XML_Char *p) return XML_STATUS_OK; } -const XML_Char * +const XML_Char * XMLCALL XML_GetBase(XML_Parser parser) { return curBase; } -int +int XMLCALL XML_GetSpecifiedAttributeCount(XML_Parser parser) { return nSpecifiedAtts; } -int +int XMLCALL XML_GetIdAttributeIndex(XML_Parser parser) { return idAttIndex; } -void +#ifdef XML_ATTR_INFO +const XML_AttrInfo * XMLCALL +XML_GetAttributeInfo(XML_Parser parser) +{ + return attInfo; +} +#endif + +void XMLCALL XML_SetElementHandler(XML_Parser parser, XML_StartElementHandler start, XML_EndElementHandler end) @@ -1152,40 +1288,40 @@ XML_SetElementHandler(XML_Parser parser, endElementHandler = end; } -void +void XMLCALL XML_SetStartElementHandler(XML_Parser parser, XML_StartElementHandler start) { startElementHandler = start; } -void +void XMLCALL XML_SetEndElementHandler(XML_Parser parser, XML_EndElementHandler end) { endElementHandler = end; } -void +void XMLCALL XML_SetCharacterDataHandler(XML_Parser parser, XML_CharacterDataHandler handler) { characterDataHandler = handler; } -void +void XMLCALL XML_SetProcessingInstructionHandler(XML_Parser parser, XML_ProcessingInstructionHandler handler) { processingInstructionHandler = handler; } -void +void XMLCALL XML_SetCommentHandler(XML_Parser parser, XML_CommentHandler handler) { commentHandler = handler; } -void +void XMLCALL XML_SetCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start, XML_EndCdataSectionHandler end) @@ -1194,19 +1330,19 @@ XML_SetCdataSectionHandler(XML_Parser parser, endCdataSectionHandler = end; } -void +void XMLCALL XML_SetStartCdataSectionHandler(XML_Parser parser, XML_StartCdataSectionHandler start) { startCdataSectionHandler = start; } -void +void XMLCALL XML_SetEndCdataSectionHandler(XML_Parser parser, XML_EndCdataSectionHandler end) { endCdataSectionHandler = end; } -void +void XMLCALL XML_SetDefaultHandler(XML_Parser parser, XML_DefaultHandler handler) { @@ -1214,7 +1350,7 @@ XML_SetDefaultHandler(XML_Parser parser, defaultExpandInternalEntities = XML_FALSE; } -void +void XMLCALL XML_SetDefaultHandlerExpand(XML_Parser parser, XML_DefaultHandler handler) { @@ -1222,7 +1358,7 @@ XML_SetDefaultHandlerExpand(XML_Parser parser, defaultExpandInternalEntities = XML_TRUE; } -void +void XMLCALL XML_SetDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start, XML_EndDoctypeDeclHandler end) @@ -1231,33 +1367,33 @@ XML_SetDoctypeDeclHandler(XML_Parser parser, endDoctypeDeclHandler = end; } -void +void XMLCALL XML_SetStartDoctypeDeclHandler(XML_Parser parser, XML_StartDoctypeDeclHandler start) { startDoctypeDeclHandler = start; } -void +void XMLCALL XML_SetEndDoctypeDeclHandler(XML_Parser parser, XML_EndDoctypeDeclHandler end) { endDoctypeDeclHandler = end; } -void +void XMLCALL XML_SetUnparsedEntityDeclHandler(XML_Parser parser, XML_UnparsedEntityDeclHandler handler) { unparsedEntityDeclHandler = handler; } -void +void XMLCALL XML_SetNotationDeclHandler(XML_Parser parser, XML_NotationDeclHandler handler) { notationDeclHandler = handler; } -void +void XMLCALL XML_SetNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start, XML_EndNamespaceDeclHandler end) @@ -1266,33 +1402,33 @@ XML_SetNamespaceDeclHandler(XML_Parser parser, endNamespaceDeclHandler = end; } -void +void XMLCALL XML_SetStartNamespaceDeclHandler(XML_Parser parser, XML_StartNamespaceDeclHandler start) { startNamespaceDeclHandler = start; } -void +void XMLCALL XML_SetEndNamespaceDeclHandler(XML_Parser parser, XML_EndNamespaceDeclHandler end) { endNamespaceDeclHandler = end; } -void +void XMLCALL XML_SetNotStandaloneHandler(XML_Parser parser, XML_NotStandaloneHandler handler) { notStandaloneHandler = handler; } -void +void XMLCALL XML_SetExternalEntityRefHandler(XML_Parser parser, XML_ExternalEntityRefHandler handler) { externalEntityRefHandler = handler; } -void +void XMLCALL XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) { if (arg) @@ -1301,14 +1437,14 @@ XML_SetExternalEntityRefHandlerArg(XML_Parser parser, void *arg) externalEntityRefHandlerArg = parser; } -void +void XMLCALL XML_SetSkippedEntityHandler(XML_Parser parser, XML_SkippedEntityHandler handler) { skippedEntityHandler = handler; } -void +void XMLCALL XML_SetUnknownEncodingHandler(XML_Parser parser, XML_UnknownEncodingHandler handler, void *data) @@ -1317,39 +1453,39 @@ XML_SetUnknownEncodingHandler(XML_Parser parser, unknownEncodingHandlerData = data; } -void +void XMLCALL XML_SetElementDeclHandler(XML_Parser parser, XML_ElementDeclHandler eldecl) { elementDeclHandler = eldecl; } -void +void XMLCALL XML_SetAttlistDeclHandler(XML_Parser parser, XML_AttlistDeclHandler attdecl) { attlistDeclHandler = attdecl; } -void +void XMLCALL XML_SetEntityDeclHandler(XML_Parser parser, XML_EntityDeclHandler handler) { entityDeclHandler = handler; } -void +void XMLCALL XML_SetXmlDeclHandler(XML_Parser parser, XML_XmlDeclHandler handler) { xmlDeclHandler = handler; } -int +int XMLCALL XML_SetParamEntityParsing(XML_Parser parser, enum XML_ParamEntityParsing peParsing) { /* block after XML_Parse()/XML_ParseBuffer() has been called */ - if (parsing) + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) return 0; #ifdef XML_DTD paramEntityParsing = peParsing; @@ -1359,16 +1495,63 @@ XML_SetParamEntityParsing(XML_Parser parser, #endif } -enum XML_Status +int XMLCALL +XML_SetHashSalt(XML_Parser parser, + unsigned long hash_salt) +{ + /* block after XML_Parse()/XML_ParseBuffer() has been called */ + if (ps_parsing == XML_PARSING || ps_parsing == XML_SUSPENDED) + return 0; + hash_secret_salt = hash_salt; + return 1; +} + +enum XML_Status XMLCALL XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + switch (ps_parsing) { + case XML_SUSPENDED: + errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parentParser == NULL && !startParsing(parser)) { + errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + default: + ps_parsing = XML_PARSING; + } + if (len == 0) { + ps_finalBuffer = (XML_Bool)isFinal; if (!isFinal) return XML_STATUS_OK; positionPtr = bufferPtr; - errorCode = processor(parser, bufferPtr, parseEndPtr = bufferEnd, 0); - if (errorCode == XML_ERROR_NONE) - return XML_STATUS_OK; + parseEndPtr = bufferEnd; + + /* If data are left over from last buffer, and we now know that these + data are the final chunk of input, then we have to check them again + to detect errors based on that fact. + */ + errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + + if (errorCode == XML_ERROR_NONE) { + switch (ps_parsing) { + case XML_SUSPENDED: + XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); + positionPtr = bufferPtr; + return XML_STATUS_SUSPENDED; + case XML_INITIALIZED: + case XML_PARSING: + ps_parsing = XML_FINISHED; + /* fall through */ + default: + return XML_STATUS_OK; + } + } eventEndPtr = eventPtr; processor = errorProcessor; return XML_STATUS_ERROR; @@ -1377,24 +1560,36 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) else if (bufferPtr == bufferEnd) { const char *end; int nLeftOver; + enum XML_Error result; parseEndByteIndex += len; positionPtr = s; - if (isFinal) { - errorCode = processor(parser, s, parseEndPtr = s + len, 0); - if (errorCode == XML_ERROR_NONE) - return XML_STATUS_OK; - eventEndPtr = eventPtr; - processor = errorProcessor; - return XML_STATUS_ERROR; - } + ps_finalBuffer = (XML_Bool)isFinal; + errorCode = processor(parser, s, parseEndPtr = s + len, &end); + if (errorCode != XML_ERROR_NONE) { eventEndPtr = eventPtr; processor = errorProcessor; return XML_STATUS_ERROR; } + else { + switch (ps_parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (isFinal) { + ps_parsing = XML_FINISHED; + return XML_STATUS_OK; + } + /* fall through */ + default: + result = XML_STATUS_OK; + } + } + XmlUpdatePosition(encoding, positionPtr, end, &position); - positionPtr = end; nLeftOver = s + len - end; if (nLeftOver) { if (buffer == NULL || nLeftOver > bufferLim - buffer) { @@ -1404,23 +1599,23 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) ? (char *)MALLOC(len * 2) : (char *)REALLOC(buffer, len * 2)); if (temp == NULL) { - errorCode = XML_ERROR_NO_MEMORY; - return XML_STATUS_ERROR; - } - buffer = temp; - if (!buffer) { errorCode = XML_ERROR_NO_MEMORY; eventPtr = eventEndPtr = NULL; processor = errorProcessor; return XML_STATUS_ERROR; } + buffer = temp; bufferLim = buffer + len * 2; } memcpy(buffer, end, nLeftOver); - bufferPtr = buffer; - bufferEnd = buffer + nLeftOver; } - return XML_STATUS_OK; + bufferPtr = buffer; + bufferEnd = buffer + nLeftOver; + positionPtr = bufferPtr; + parseEndPtr = bufferEnd; + eventPtr = bufferPtr; + eventEndPtr = bufferPtr; + return result; } #endif /* not defined XML_CONTEXT_BYTES */ else { @@ -1434,37 +1629,80 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) } } -enum XML_Status +enum XML_Status XMLCALL XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { - const char *start = bufferPtr; + const char *start; + enum XML_Status result = XML_STATUS_OK; + + switch (ps_parsing) { + case XML_SUSPENDED: + errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + case XML_INITIALIZED: + if (parentParser == NULL && !startParsing(parser)) { + errorCode = XML_ERROR_NO_MEMORY; + return XML_STATUS_ERROR; + } + default: + ps_parsing = XML_PARSING; + } + + start = bufferPtr; positionPtr = start; bufferEnd += len; + parseEndPtr = bufferEnd; parseEndByteIndex += len; - errorCode = processor(parser, start, parseEndPtr = bufferEnd, - isFinal ? (const char **)NULL : &bufferPtr); - if (errorCode == XML_ERROR_NONE) { - if (!isFinal) { - XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); - positionPtr = bufferPtr; - } - return XML_STATUS_OK; - } - else { + ps_finalBuffer = (XML_Bool)isFinal; + + errorCode = processor(parser, start, parseEndPtr, &bufferPtr); + + if (errorCode != XML_ERROR_NONE) { eventEndPtr = eventPtr; processor = errorProcessor; return XML_STATUS_ERROR; } + else { + switch (ps_parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (isFinal) { + ps_parsing = XML_FINISHED; + return result; + } + default: ; /* should not happen */ + } + } + + XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); + positionPtr = bufferPtr; + return result; } -void * +void * XMLCALL XML_GetBuffer(XML_Parser parser, int len) { + switch (ps_parsing) { + case XML_SUSPENDED: + errorCode = XML_ERROR_SUSPENDED; + return NULL; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return NULL; + default: ; + } + if (len > bufferLim - bufferEnd) { /* FIXME avoid integer overflow */ - int neededSize = len + (bufferEnd - bufferPtr); + int neededSize = len + (int)(bufferEnd - bufferPtr); #ifdef XML_CONTEXT_BYTES - int keep = bufferPtr - buffer; + int keep = (int)(bufferPtr - buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; @@ -1473,7 +1711,7 @@ XML_GetBuffer(XML_Parser parser, int len) if (neededSize <= bufferLim - buffer) { #ifdef XML_CONTEXT_BYTES if (keep < bufferPtr - buffer) { - int offset = (bufferPtr - buffer) - keep; + int offset = (int)(bufferPtr - buffer) - keep; memmove(buffer, &buffer[offset], bufferEnd - bufferPtr + keep); bufferEnd -= offset; bufferPtr -= offset; @@ -1486,7 +1724,7 @@ XML_GetBuffer(XML_Parser parser, int len) } else { char *newBuf; - int bufferSize = bufferLim - bufferPtr; + int bufferSize = (int)(bufferLim - bufferPtr); if (bufferSize == 0) bufferSize = INIT_BUFFER_SIZE; do { @@ -1500,7 +1738,7 @@ XML_GetBuffer(XML_Parser parser, int len) bufferLim = newBuf + bufferSize; #ifdef XML_CONTEXT_BYTES if (bufferPtr) { - int keep = bufferPtr - buffer; + int keep = (int)(bufferPtr - buffer); if (keep > XML_CONTEXT_BYTES) keep = XML_CONTEXT_BYTES; memcpy(newBuf, &bufferPtr[-keep], bufferEnd - bufferPtr + keep); @@ -1522,17 +1760,94 @@ XML_GetBuffer(XML_Parser parser, int len) bufferPtr = buffer = newBuf; #endif /* not defined XML_CONTEXT_BYTES */ } + eventPtr = eventEndPtr = NULL; + positionPtr = NULL; } return bufferEnd; } -enum XML_Error +enum XML_Status XMLCALL +XML_StopParser(XML_Parser parser, XML_Bool resumable) +{ + switch (ps_parsing) { + case XML_SUSPENDED: + if (resumable) { + errorCode = XML_ERROR_SUSPENDED; + return XML_STATUS_ERROR; + } + ps_parsing = XML_FINISHED; + break; + case XML_FINISHED: + errorCode = XML_ERROR_FINISHED; + return XML_STATUS_ERROR; + default: + if (resumable) { +#ifdef XML_DTD + if (isParamEntity) { + errorCode = XML_ERROR_SUSPEND_PE; + return XML_STATUS_ERROR; + } +#endif + ps_parsing = XML_SUSPENDED; + } + else + ps_parsing = XML_FINISHED; + } + return XML_STATUS_OK; +} + +enum XML_Status XMLCALL +XML_ResumeParser(XML_Parser parser) +{ + enum XML_Status result = XML_STATUS_OK; + + if (ps_parsing != XML_SUSPENDED) { + errorCode = XML_ERROR_NOT_SUSPENDED; + return XML_STATUS_ERROR; + } + ps_parsing = XML_PARSING; + + errorCode = processor(parser, bufferPtr, parseEndPtr, &bufferPtr); + + if (errorCode != XML_ERROR_NONE) { + eventEndPtr = eventPtr; + processor = errorProcessor; + return XML_STATUS_ERROR; + } + else { + switch (ps_parsing) { + case XML_SUSPENDED: + result = XML_STATUS_SUSPENDED; + break; + case XML_INITIALIZED: + case XML_PARSING: + if (ps_finalBuffer) { + ps_parsing = XML_FINISHED; + return result; + } + default: ; + } + } + + XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position); + positionPtr = bufferPtr; + return result; +} + +void XMLCALL +XML_GetParsingStatus(XML_Parser parser, XML_ParsingStatus *status) +{ + assert(status != NULL); + *status = parser->m_parsingStatus; +} + +enum XML_Error XMLCALL XML_GetErrorCode(XML_Parser parser) { return errorCode; } -long +XML_Index XMLCALL XML_GetCurrentByteIndex(XML_Parser parser) { if (eventPtr) @@ -1540,72 +1855,72 @@ XML_GetCurrentByteIndex(XML_Parser parser) return -1; } -int +int XMLCALL XML_GetCurrentByteCount(XML_Parser parser) { if (eventEndPtr && eventPtr) - return eventEndPtr - eventPtr; + return (int)(eventEndPtr - eventPtr); return 0; } -const char * +const char * XMLCALL XML_GetInputContext(XML_Parser parser, int *offset, int *size) { #ifdef XML_CONTEXT_BYTES if (eventPtr && buffer) { - *offset = eventPtr - buffer; - *size = bufferEnd - buffer; + *offset = (int)(eventPtr - buffer); + *size = (int)(bufferEnd - buffer); return buffer; } #endif /* defined XML_CONTEXT_BYTES */ return (char *) 0; } -int +XML_Size XMLCALL XML_GetCurrentLineNumber(XML_Parser parser) { - if (eventPtr) { + if (eventPtr && eventPtr >= positionPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; } return position.lineNumber + 1; } -int +XML_Size XMLCALL XML_GetCurrentColumnNumber(XML_Parser parser) { - if (eventPtr) { + if (eventPtr && eventPtr >= positionPtr) { XmlUpdatePosition(encoding, positionPtr, eventPtr, &position); positionPtr = eventPtr; } return position.columnNumber; } -void +void XMLCALL XML_FreeContentModel(XML_Parser parser, XML_Content *model) { FREE(model); } -void * +void * XMLCALL XML_MemMalloc(XML_Parser parser, size_t size) { return MALLOC(size); } -void * +void * XMLCALL XML_MemRealloc(XML_Parser parser, void *ptr, size_t size) { return REALLOC(ptr, size); } -void +void XMLCALL XML_MemFree(XML_Parser parser, void *ptr) { FREE(ptr); } -void +void XMLCALL XML_DefaultCurrent(XML_Parser parser) { if (defaultHandler) { @@ -1619,10 +1934,10 @@ XML_DefaultCurrent(XML_Parser parser) } } -const XML_LChar * +const XML_LChar * XMLCALL XML_ErrorString(enum XML_Error code) { - static const XML_LChar *message[] = { + static const XML_LChar* const message[] = { 0, XML_L("out of memory"), XML_L("syntax error"), @@ -1640,7 +1955,7 @@ XML_ErrorString(enum XML_Error code) XML_L("reference to invalid character number"), XML_L("reference to binary entity"), XML_L("reference to external entity in attribute"), - XML_L("xml declaration not at start of external entity"), + XML_L("XML or text declaration not at start of entity"), XML_L("unknown encoding"), XML_L("encoding specified in XML declaration is incorrect"), XML_L("unclosed CDATA section"), @@ -1649,14 +1964,28 @@ XML_ErrorString(enum XML_Error code) XML_L("unexpected parser state - please send a bug report"), XML_L("entity declared in parameter entity"), XML_L("requested feature requires XML_DTD support in Expat"), - XML_L("cannot change setting once parsing has begun") + XML_L("cannot change setting once parsing has begun"), + XML_L("unbound prefix"), + XML_L("must not undeclare prefix"), + XML_L("incomplete markup in parameter entity"), + XML_L("XML declaration not well-formed"), + XML_L("text declaration not well-formed"), + XML_L("illegal character(s) in public id"), + XML_L("parser suspended"), + XML_L("parser not suspended"), + XML_L("parsing aborted"), + XML_L("parsing finished"), + XML_L("cannot suspend in external parameter entity"), + XML_L("reserved prefix (xml) must not be undeclared or bound to another namespace name"), + XML_L("reserved prefix (xmlns) must not be declared or undeclared"), + XML_L("prefix must not be bound to one of the reserved namespace names") }; if (code > 0 && code < sizeof(message)/sizeof(message[0])) return message[code]; return NULL; } -const XML_LChar * +const XML_LChar * XMLCALL XML_ExpatVersion(void) { /* V1 is used to string-ize the version number. However, it would @@ -1676,7 +2005,7 @@ XML_ExpatVersion(void) { #undef V2 } -XML_Expat_Version +XML_Expat_Version XMLCALL XML_ExpatVersionInfo(void) { XML_Expat_Version version; @@ -1688,33 +2017,42 @@ XML_ExpatVersionInfo(void) return version; } -const XML_Feature * +const XML_Feature * XMLCALL XML_GetFeatureList(void) { - static XML_Feature features[] = { - {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)")}, - {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)")}, + static const XML_Feature features[] = { + {XML_FEATURE_SIZEOF_XML_CHAR, XML_L("sizeof(XML_Char)"), + sizeof(XML_Char)}, + {XML_FEATURE_SIZEOF_XML_LCHAR, XML_L("sizeof(XML_LChar)"), + sizeof(XML_LChar)}, #ifdef XML_UNICODE - {XML_FEATURE_UNICODE, XML_L("XML_UNICODE")}, + {XML_FEATURE_UNICODE, XML_L("XML_UNICODE"), 0}, #endif #ifdef XML_UNICODE_WCHAR_T - {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T")}, + {XML_FEATURE_UNICODE_WCHAR_T, XML_L("XML_UNICODE_WCHAR_T"), 0}, #endif #ifdef XML_DTD - {XML_FEATURE_DTD, XML_L("XML_DTD")}, + {XML_FEATURE_DTD, XML_L("XML_DTD"), 0}, #endif #ifdef XML_CONTEXT_BYTES {XML_FEATURE_CONTEXT_BYTES, XML_L("XML_CONTEXT_BYTES"), XML_CONTEXT_BYTES}, #endif #ifdef XML_MIN_SIZE - {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE")}, + {XML_FEATURE_MIN_SIZE, XML_L("XML_MIN_SIZE"), 0}, +#endif +#ifdef XML_NS + {XML_FEATURE_NS, XML_L("XML_NS"), 0}, #endif - {XML_FEATURE_END, NULL} +#ifdef XML_LARGE_SIZE + {XML_FEATURE_LARGE_SIZE, XML_L("XML_LARGE_SIZE"), 0}, +#endif +#ifdef XML_ATTR_INFO + {XML_FEATURE_ATTR_INFO, XML_L("XML_ATTR_INFO"), 0}, +#endif + {XML_FEATURE_END, NULL, 0} }; - features[0].value = sizeof(XML_Char); - features[1].value = sizeof(XML_LChar); return features; } @@ -1774,12 +2112,12 @@ contentProcessor(XML_Parser parser, const char *end, const char **endPtr) { - enum XML_Error result = - doContent(parser, 0, encoding, start, end, endPtr); - if (result != XML_ERROR_NONE) - return result; - if (!storeRawNames(parser)) - return XML_ERROR_NO_MEMORY; + enum XML_Error result = doContent(parser, 0, encoding, start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result == XML_ERROR_NONE) { + if (!storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + } return result; } @@ -1811,21 +2149,21 @@ externalEntityInitProcessor2(XML_Parser parser, doContent (by detecting XML_TOK_NONE) without processing any xml text declaration - causing the error XML_ERROR_MISPLACED_XML_PI in doContent. */ - if (next == end && endPtr) { + if (next == end && !ps_finalBuffer) { *endPtr = next; return XML_ERROR_NONE; } start = next; break; case XML_TOK_PARTIAL: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } @@ -1842,30 +2180,41 @@ externalEntityInitProcessor3(XML_Parser parser, const char *end, const char **endPtr) { + int tok; const char *next = start; /* XmlContentTok doesn't always set the last arg */ - int tok = XmlContentTok(encoding, start, end, &next); + eventPtr = start; + tok = XmlContentTok(encoding, start, end, &next); + eventEndPtr = next; + switch (tok) { case XML_TOK_XML_DECL: { - enum XML_Error result = processXmlDecl(parser, 1, start, next); + enum XML_Error result; + result = processXmlDecl(parser, 1, start, next); if (result != XML_ERROR_NONE) return result; - start = next; + switch (ps_parsing) { + case XML_SUSPENDED: + *endPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + start = next; + } } break; case XML_TOK_PARTIAL: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (endPtr) { + if (!ps_finalBuffer) { *endPtr = start; return XML_ERROR_NONE; } - eventPtr = start; return XML_ERROR_PARTIAL_CHAR; } processor = externalEntityContentProcessor; @@ -1879,12 +2228,12 @@ externalEntityContentProcessor(XML_Parser parser, const char *end, const char **endPtr) { - enum XML_Error result = - doContent(parser, 1, encoding, start, end, endPtr); - if (result != XML_ERROR_NONE) - return result; - if (!storeRawNames(parser)) - return XML_ERROR_NO_MEMORY; + enum XML_Error result = doContent(parser, 1, encoding, start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result == XML_ERROR_NONE) { + if (!storeRawNames(parser)) + return XML_ERROR_NO_MEMORY; + } return result; } @@ -1894,9 +2243,12 @@ doContent(XML_Parser parser, const ENCODING *enc, const char *s, const char *end, - const char **nextPtr) + const char **nextPtr, + XML_Bool haveMore) { - DTD * const dtd = _dtd; /* save one level of indirection */ + /* save one level of indirection */ + DTD * const dtd = _dtd; + const char **eventPP; const char **eventEndPP; if (enc == encoding) { @@ -1908,13 +2260,14 @@ doContent(XML_Parser parser, eventEndPP = &(openInternalEntities->internalEventEndPtr); } *eventPP = s; + for (;;) { const char *next = s; /* XmlContentTok doesn't always set the last arg */ int tok = XmlContentTok(enc, s, end, &next); *eventEndPP = next; switch (tok) { case XML_TOK_TRAILING_CR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } @@ -1925,19 +2278,24 @@ doContent(XML_Parser parser, } else if (defaultHandler) reportDefault(parser, enc, s, end); + /* We are at the end of the final buffer, should we check for + XML_SUSPENDED, XML_FINISHED? + */ if (startTagLevel == 0) return XML_ERROR_NO_ELEMENTS; if (tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; + *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_NONE: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } if (startTagLevel > 0) { if (tagLevel != startTagLevel) return XML_ERROR_ASYNC_ENTITY; + *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_NO_ELEMENTS; @@ -1945,13 +2303,13 @@ doContent(XML_Parser parser, *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } @@ -1975,7 +2333,7 @@ doContent(XML_Parser parser, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&dtd->pool); /* First, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, @@ -2000,7 +2358,6 @@ doContent(XML_Parser parser, return XML_ERROR_BINARY_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; - OPEN_INTERNAL_ENTITY openEntity; if (!defaultExpandInternalEntities) { if (skippedEntityHandler) skippedEntityHandler(handlerArg, entity->name, 0); @@ -2008,21 +2365,8 @@ doContent(XML_Parser parser, reportDefault(parser, enc, s, next); break; } - entity->open = XML_TRUE; - openEntity.next = openInternalEntities; - openInternalEntities = &openEntity; - openEntity.entity = entity; - openEntity.internalEventPtr = NULL; - openEntity.internalEventEndPtr = NULL; - result = doContent(parser, - tagLevel, - internalEncoding, - (char *)entity->textPtr, - (char *)(entity->textPtr + entity->textLen), - 0); - entity->open = XML_FALSE; - openInternalEntities = openEntity.next; - if (result) + result = processInternalEntity(parser, entity, XML_FALSE); + if (result != XML_ERROR_NONE) return result; } else if (externalEntityRefHandler) { @@ -2032,7 +2376,7 @@ doContent(XML_Parser parser, entity->open = XML_FALSE; if (!context) return XML_ERROR_NO_MEMORY; - if (!externalEntityRefHandler((XML_Parser)externalEntityRefHandlerArg, + if (!externalEntityRefHandler(externalEntityRefHandlerArg, context, entity->base, entity->systemId, @@ -2084,12 +2428,12 @@ doContent(XML_Parser parser, XmlConvert(enc, &fromPtr, rawNameEnd, (ICHAR **)&toPtr, (ICHAR *)tag->bufEnd - 1); - convLen = toPtr - (XML_Char *)tag->buf; + convLen = (int)(toPtr - (XML_Char *)tag->buf); if (fromPtr == rawNameEnd) { tag->name.strLen = convLen; break; } - bufSize = (tag->bufEnd - tag->buf) << 1; + bufSize = (int)(tag->bufEnd - tag->buf) << 1; { char *temp = (char *)REALLOC(tag->buf, bufSize); if (temp == NULL) @@ -2258,15 +2602,17 @@ doContent(XML_Parser parser, #endif else if (defaultHandler) reportDefault(parser, enc, s, next); - result = doCdataSection(parser, enc, &next, end, nextPtr); - if (!next) { + result = doCdataSection(parser, enc, &next, end, nextPtr, haveMore); + if (result != XML_ERROR_NONE) + return result; + else if (!next) { processor = cdataSectionProcessor; return result; } } break; case XML_TOK_TRAILING_RSQB: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } @@ -2275,15 +2621,18 @@ doContent(XML_Parser parser, ICHAR *dataPtr = (ICHAR *)dataBuf; XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); characterDataHandler(handlerArg, dataBuf, - dataPtr - (ICHAR *)dataBuf); + (int)(dataPtr - (ICHAR *)dataBuf)); } else characterDataHandler(handlerArg, (XML_Char *)s, - (XML_Char *)end - (XML_Char *)s); + (int)((XML_Char *)end - (XML_Char *)s)); } else if (defaultHandler) reportDefault(parser, enc, s, end); + /* We are at the end of the final buffer, should we check for + XML_SUSPENDED, XML_FINISHED? + */ if (startTagLevel == 0) { *eventPP = end; return XML_ERROR_NO_ELEMENTS; @@ -2292,28 +2641,32 @@ doContent(XML_Parser parser, *eventPP = end; return XML_ERROR_ASYNC_ENTITY; } + *nextPtr = end; return XML_ERROR_NONE; case XML_TOK_DATA_CHARS: - if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = s; - characterDataHandler(handlerArg, dataBuf, - dataPtr - (ICHAR *)dataBuf); - if (s == next) - break; - *eventPP = s; + { + XML_CharacterDataHandler charDataHandler = characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = s; + charDataHandler(handlerArg, dataBuf, + (int)(dataPtr - (ICHAR *)dataBuf)); + if (s == next) + break; + *eventPP = s; + } } + else + charDataHandler(handlerArg, + (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); + else if (defaultHandler) + reportDefault(parser, enc, s, next); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); break; case XML_TOK_PI: if (!reportProcessingInstruction(parser, enc, s, next)) @@ -2329,6 +2682,14 @@ doContent(XML_Parser parser, break; } *eventPP = s = next; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: ; + } } /* not reached */ } @@ -2349,8 +2710,8 @@ storeAtts(XML_Parser parser, const ENCODING *enc, BINDING **bindingsPtr) { DTD * const dtd = _dtd; /* save one level of indirection */ - ELEMENT_TYPE *elementType = NULL; - int nDefaultAtts = 0; + ELEMENT_TYPE *elementType; + int nDefaultAtts; const XML_Char **appAtts; /* the attribute list for the application */ int attIndex = 0; int prefixLen; @@ -2362,12 +2723,12 @@ storeAtts(XML_Parser parser, const ENCODING *enc, const XML_Char *localPart; /* lookup the element type name */ - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, tagNamePtr->str,0); + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, tagNamePtr->str,0); if (!elementType) { const XML_Char *name = poolCopyString(&dtd->pool, tagNamePtr->str); if (!name) return XML_ERROR_NO_MEMORY; - elementType = (ELEMENT_TYPE *)lookup(&dtd->elementTypes, name, + elementType = (ELEMENT_TYPE *)lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (!elementType) return XML_ERROR_NO_MEMORY; @@ -2381,24 +2742,48 @@ storeAtts(XML_Parser parser, const ENCODING *enc, if (n + nDefaultAtts > attsSize) { int oldAttsSize = attsSize; ATTRIBUTE *temp; +#ifdef XML_ATTR_INFO + XML_AttrInfo *temp2; +#endif attsSize = n + nDefaultAtts + INIT_ATTS_SIZE; temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE)); if (temp == NULL) return XML_ERROR_NO_MEMORY; atts = temp; +#ifdef XML_ATTR_INFO + temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo)); + if (temp2 == NULL) + return XML_ERROR_NO_MEMORY; + attInfo = temp2; +#endif if (n > oldAttsSize) XmlGetAttributes(enc, attStr, n, atts); } appAtts = (const XML_Char **)atts; for (i = 0; i < n; i++) { + ATTRIBUTE *currAtt = &atts[i]; +#ifdef XML_ATTR_INFO + XML_AttrInfo *currAttInfo = &attInfo[i]; +#endif /* add the name and value to the attribute list */ - ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name, - atts[i].name - + XmlNameLength(enc, atts[i].name)); + ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name, + currAtt->name + + XmlNameLength(enc, currAtt->name)); if (!attId) return XML_ERROR_NO_MEMORY; - /* detect duplicate attributes */ +#ifdef XML_ATTR_INFO + currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name); + currAttInfo->nameEnd = currAttInfo->nameStart + + XmlNameLength(enc, currAtt->name); + currAttInfo->valueStart = parseEndByteIndex - + (parseEndPtr - currAtt->valuePtr); + currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd); +#endif + /* Detect duplicate attributes by their QNames. This does not work when + namespace processing is turned on and different prefixes for the same + namespace are used. For this case we have a check further down. + */ if ((attId->name)[-1]) { if (enc == encoding) eventPtr = atts[i].name; @@ -2498,60 +2883,130 @@ storeAtts(XML_Parser parser, const ENCODING *enc, } appAtts[attIndex] = 0; + /* expand prefixed attribute names, check for duplicates, + and clear flags that say whether attributes were specified */ i = 0; if (nPrefixes) { - /* expand prefixed attribute names */ + int j; /* hash table index */ + unsigned long version = nsAttsVersion; + int nsAttsSize = (int)1 << nsAttsPower; + /* size of hash table must be at least 2 * (# of prefixed attributes) */ + if ((nPrefixes << 1) >> nsAttsPower) { /* true for nsAttsPower = 0 */ + NS_ATT *temp; + /* hash table size must also be a power of 2 and >= 8 */ + while (nPrefixes >> nsAttsPower++); + if (nsAttsPower < 3) + nsAttsPower = 3; + nsAttsSize = (int)1 << nsAttsPower; + temp = (NS_ATT *)REALLOC(nsAtts, nsAttsSize * sizeof(NS_ATT)); + if (!temp) + return XML_ERROR_NO_MEMORY; + nsAtts = temp; + version = 0; /* force re-initialization of nsAtts hash table */ + } + /* using a version flag saves us from initializing nsAtts every time */ + if (!version) { /* initialize version flags when version wraps around */ + version = INIT_ATTS_VERSION; + for (j = nsAttsSize; j != 0; ) + nsAtts[--j].version = version; + } + nsAttsVersion = --version; + + /* expand prefixed names and check for duplicates */ for (; i < attIndex; i += 2) { - if (appAtts[i][-1] == 2) { + const XML_Char *s = appAtts[i]; + if (s[-1] == 2) { /* prefixed */ ATTRIBUTE_ID *id; - ((XML_Char *)(appAtts[i]))[-1] = 0; - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, appAtts[i], 0); - if (id->prefix->binding) { - int j; - const BINDING *b = id->prefix->binding; - const XML_Char *s = appAtts[i]; - for (j = 0; j < b->uriLen; j++) { - if (!poolAppendChar(&tempPool, b->uri[j])) - return XML_ERROR_NO_MEMORY; + const BINDING *b; + unsigned long uriHash = hash_secret_salt; + ((XML_Char *)s)[-1] = 0; /* clear flag */ + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0); + b = id->prefix->binding; + if (!b) + return XML_ERROR_UNBOUND_PREFIX; + + /* as we expand the name we also calculate its hash value */ + for (j = 0; j < b->uriLen; j++) { + const XML_Char c = b->uri[j]; + if (!poolAppendChar(&tempPool, c)) + return XML_ERROR_NO_MEMORY; + uriHash = CHAR_HASH(uriHash, c); + } + while (*s++ != XML_T(ASCII_COLON)) + ; + do { /* copies null terminator */ + const XML_Char c = *s; + if (!poolAppendChar(&tempPool, *s)) + return XML_ERROR_NO_MEMORY; + uriHash = CHAR_HASH(uriHash, c); + } while (*s++); + + { /* Check hash table for duplicate of expanded name (uriName). + Derived from code in lookup(parser, HASH_TABLE *table, ...). + */ + unsigned char step = 0; + unsigned long mask = nsAttsSize - 1; + j = uriHash & mask; /* index into hash table */ + while (nsAtts[j].version == version) { + /* for speed we compare stored hash values first */ + if (uriHash == nsAtts[j].hash) { + const XML_Char *s1 = poolStart(&tempPool); + const XML_Char *s2 = nsAtts[j].uriName; + /* s1 is null terminated, but not s2 */ + for (; *s1 == *s2 && *s1 != 0; s1++, s2++); + if (*s1 == 0) + return XML_ERROR_DUPLICATE_ATTRIBUTE; + } + if (!step) + step = PROBE_STEP(uriHash, mask, nsAttsPower); + j < step ? (j += nsAttsSize - step) : (j -= step); } - while (*s++ != XML_T(':')) - ; + } + + if (ns_triplets) { /* append namespace separator and prefix */ + tempPool.ptr[-1] = namespaceSeparator; + s = b->prefix->name; do { if (!poolAppendChar(&tempPool, *s)) return XML_ERROR_NO_MEMORY; } while (*s++); - if (ns_triplets) { - tempPool.ptr[-1] = namespaceSeparator; - s = b->prefix->name; - do { - if (!poolAppendChar(&tempPool, *s)) - return XML_ERROR_NO_MEMORY; - } while (*s++); - } - - appAtts[i] = poolStart(&tempPool); - poolFinish(&tempPool); } - if (!--nPrefixes) + + /* store expanded name in attribute list */ + s = poolStart(&tempPool); + poolFinish(&tempPool); + appAtts[i] = s; + + /* fill empty slot with new version, uriName and hash value */ + nsAtts[j].version = version; + nsAtts[j].hash = uriHash; + nsAtts[j].uriName = s; + + if (!--nPrefixes) { + i += 2; break; + } } - else - ((XML_Char *)(appAtts[i]))[-1] = 0; + else /* not prefixed */ + ((XML_Char *)s)[-1] = 0; /* clear flag */ } } - /* clear the flags that say whether attributes were specified */ + /* clear flags for the remaining attributes */ for (; i < attIndex; i += 2) ((XML_Char *)(appAtts[i]))[-1] = 0; for (binding = *bindingsPtr; binding; binding = binding->nextTagBinding) binding->attId->name[-1] = 0; + if (!ns) + return XML_ERROR_NONE; + /* expand the element type name */ if (elementType->prefix) { binding = elementType->prefix->binding; if (!binding) - return XML_ERROR_NONE; + return XML_ERROR_UNBOUND_PREFIX; localPart = tagNamePtr->str; - while (*localPart++ != XML_T(':')) + while (*localPart++ != XML_T(ASCII_COLON)) ; } else if (dtd->defaultPrefix.binding) { @@ -2561,16 +3016,16 @@ storeAtts(XML_Parser parser, const ENCODING *enc, else return XML_ERROR_NONE; prefixLen = 0; - if (ns && ns_triplets && binding->prefix->name) { + if (ns_triplets && binding->prefix->name) { for (; binding->prefix->name[prefixLen++];) - ; + ; /* prefixLen includes null terminator */ } tagNamePtr->localPart = localPart; tagNamePtr->uriLen = binding->uriLen; tagNamePtr->prefix = binding->prefix->name; tagNamePtr->prefixLen = prefixLen; for (i = 0; localPart[i++];) - ; + ; /* i includes null terminator */ n = i + binding->uriLen + prefixLen; if (n > binding->uriAlloc) { TAG *p; @@ -2585,11 +3040,13 @@ storeAtts(XML_Parser parser, const ENCODING *enc, FREE(binding->uri); binding->uri = uri; } + /* if namespaceSeparator != '\0' then uri includes it already */ uri = binding->uri + binding->uriLen; memcpy(uri, localPart, i * sizeof(XML_Char)); + /* we always have a namespace separator between localPart and prefix */ if (prefixLen) { - uri = uri + (i - 1); - if (namespaceSeparator) { *(uri) = namespaceSeparator; } + uri += i - 1; + *uri = namespaceSeparator; /* replace null terminator */ memcpy(uri + 1, binding->prefix->name, prefixLen * sizeof(XML_Char)); } tagNamePtr->str = binding->uri; @@ -2603,15 +3060,70 @@ static enum XML_Error addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, const XML_Char *uri, BINDING **bindingsPtr) { + static const XML_Char xmlNamespace[] = { + ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, + ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, + ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, + ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH, + ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c, + ASCII_e, '\0' + }; + static const int xmlLen = + (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1; + static const XML_Char xmlnsNamespace[] = { + ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH, + ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, + ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0, + ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s, + ASCII_SLASH, '\0' + }; + static const int xmlnsLen = + (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1; + + XML_Bool mustBeXML = XML_FALSE; + XML_Bool isXML = XML_TRUE; + XML_Bool isXMLNS = XML_TRUE; + BINDING *b; int len; - /* empty string is only valid when there is no prefix per XML NS 1.0 */ + /* empty URI is only valid for default namespace per XML NS 1.0 (not 1.1) */ if (*uri == XML_T('\0') && prefix->name) - return XML_ERROR_SYNTAX; + return XML_ERROR_UNDECLARING_PREFIX; + + if (prefix->name + && prefix->name[0] == XML_T(ASCII_x) + && prefix->name[1] == XML_T(ASCII_m) + && prefix->name[2] == XML_T(ASCII_l)) { + + /* Not allowed to bind xmlns */ + if (prefix->name[3] == XML_T(ASCII_n) + && prefix->name[4] == XML_T(ASCII_s) + && prefix->name[5] == XML_T('\0')) + return XML_ERROR_RESERVED_PREFIX_XMLNS; + + if (prefix->name[3] == XML_T('\0')) + mustBeXML = XML_TRUE; + } + + for (len = 0; uri[len]; len++) { + if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len])) + isXML = XML_FALSE; + + if (!mustBeXML && isXMLNS + && (len > xmlnsLen || uri[len] != xmlnsNamespace[len])) + isXMLNS = XML_FALSE; + } + isXML = isXML && len == xmlLen; + isXMLNS = isXMLNS && len == xmlnsLen; + + if (mustBeXML != isXML) + return mustBeXML ? XML_ERROR_RESERVED_PREFIX_XML + : XML_ERROR_RESERVED_NAMESPACE_URI; + + if (isXMLNS) + return XML_ERROR_RESERVED_NAMESPACE_URI; - for (len = 0; uri[len]; len++) - ; if (namespaceSeparator) len++; if (freeBindingList) { @@ -2644,13 +3156,15 @@ addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId, b->prefix = prefix; b->attId = attId; b->prevPrefixBinding = prefix->binding; + /* NULL binding when default namespace undeclared */ if (*uri == XML_T('\0') && prefix == &_dtd->defaultPrefix) prefix->binding = NULL; else prefix->binding = b; b->nextTagBinding = *bindingsPtr; *bindingsPtr = b; - if (startNamespaceDeclHandler) + /* if attId == NULL then we are not starting a namespace scope */ + if (attId && startNamespaceDeclHandler) startNamespaceDeclHandler(handlerArg, prefix->name, prefix->binding ? uri : 0); return XML_ERROR_NONE; @@ -2665,8 +3179,10 @@ cdataSectionProcessor(XML_Parser parser, const char *end, const char **endPtr) { - enum XML_Error result = doCdataSection(parser, encoding, &start, - end, endPtr); + enum XML_Error result = doCdataSection(parser, encoding, &start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result != XML_ERROR_NONE) + return result; if (start) { if (parentParser) { /* we are parsing an external entity */ processor = externalEntityContentProcessor; @@ -2680,7 +3196,7 @@ cdataSectionProcessor(XML_Parser parser, return result; } -/* startPtr gets set to non-null is the section is closed, and to null if +/* startPtr gets set to non-null if the section is closed, and to null if the section is not yet closed. */ static enum XML_Error @@ -2688,7 +3204,8 @@ doCdataSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, - const char **nextPtr) + const char **nextPtr, + XML_Bool haveMore) { const char *s = *startPtr; const char **eventPP; @@ -2704,6 +3221,7 @@ doCdataSection(XML_Parser parser, } *eventPP = s; *startPtr = NULL; + for (;;) { const char *next; int tok = XmlCdataSectionTok(enc, s, end, &next); @@ -2720,7 +3238,11 @@ doCdataSection(XML_Parser parser, else if (defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; - return XML_ERROR_NONE; + *nextPtr = next; + if (ps_parsing == XML_FINISHED) + return XML_ERROR_ABORTED; + else + return XML_ERROR_NONE; case XML_TOK_DATA_NEWLINE: if (characterDataHandler) { XML_Char c = 0xA; @@ -2730,39 +3252,42 @@ doCdataSection(XML_Parser parser, reportDefault(parser, enc, s, next); break; case XML_TOK_DATA_CHARS: - if (characterDataHandler) { - if (MUST_CONVERT(enc, s)) { - for (;;) { - ICHAR *dataPtr = (ICHAR *)dataBuf; - XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); - *eventEndPP = next; - characterDataHandler(handlerArg, dataBuf, - dataPtr - (ICHAR *)dataBuf); - if (s == next) - break; - *eventPP = s; + { + XML_CharacterDataHandler charDataHandler = characterDataHandler; + if (charDataHandler) { + if (MUST_CONVERT(enc, s)) { + for (;;) { + ICHAR *dataPtr = (ICHAR *)dataBuf; + XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd); + *eventEndPP = next; + charDataHandler(handlerArg, dataBuf, + (int)(dataPtr - (ICHAR *)dataBuf)); + if (s == next) + break; + *eventPP = s; + } } + else + charDataHandler(handlerArg, + (XML_Char *)s, + (int)((XML_Char *)next - (XML_Char *)s)); } - else - characterDataHandler(handlerArg, - (XML_Char *)s, - (XML_Char *)next - (XML_Char *)s); + else if (defaultHandler) + reportDefault(parser, enc, s, next); } - else if (defaultHandler) - reportDefault(parser, enc, s, next); break; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } @@ -2771,7 +3296,16 @@ doCdataSection(XML_Parser parser, *eventPP = next; return XML_ERROR_UNEXPECTED_STATE; } + *eventPP = s = next; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: ; + } } /* not reached */ } @@ -2787,8 +3321,10 @@ ignoreSectionProcessor(XML_Parser parser, const char *end, const char **endPtr) { - enum XML_Error result = doIgnoreSection(parser, encoding, &start, - end, endPtr); + enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, + endPtr, (XML_Bool)!ps_finalBuffer); + if (result != XML_ERROR_NONE) + return result; if (start) { processor = prologProcessor; return prologProcessor(parser, start, end, endPtr); @@ -2804,7 +3340,8 @@ doIgnoreSection(XML_Parser parser, const ENCODING *enc, const char **startPtr, const char *end, - const char **nextPtr) + const char **nextPtr, + XML_Bool haveMore) { const char *next; int tok; @@ -2829,19 +3366,23 @@ doIgnoreSection(XML_Parser parser, if (defaultHandler) reportDefault(parser, enc, s, next); *startPtr = next; - return XML_ERROR_NONE; + *nextPtr = next; + if (ps_parsing == XML_FINISHED) + return XML_ERROR_ABORTED; + else + return XML_ERROR_NONE; case XML_TOK_INVALID: *eventPP = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_PARTIAL_CHAR; case XML_TOK_PARTIAL: case XML_TOK_NONE: - if (nextPtr) { + if (haveMore) { *nextPtr = s; return XML_ERROR_NONE; } @@ -2906,8 +3447,12 @@ processXmlDecl(XML_Parser parser, int isGeneralTextEntity, &versionend, &encodingName, &newEncoding, - &standalone)) - return XML_ERROR_SYNTAX; + &standalone)) { + if (isGeneralTextEntity) + return XML_ERROR_TEXT_DECL; + else + return XML_ERROR_XML_DECL; + } if (!isGeneralTextEntity && standalone == 1) { _dtd->standalone = XML_TRUE; #ifdef XML_DTD @@ -3053,35 +3598,47 @@ entityValueInitProcessor(XML_Parser parser, const char *end, const char **nextPtr) { - const char *start = s; - const char *next = s; int tok; + const char *start = s; + const char *next = start; + eventPtr = start; for (;;) { tok = XmlPrologTok(encoding, start, end, &next); + eventEndPtr = next; if (tok <= 0) { - if (nextPtr != 0 && tok != XML_TOK_INVALID) { - *nextPtr = s; - return XML_ERROR_NONE; + if (!ps_finalBuffer && tok != XML_TOK_INVALID) { + *nextPtr = s; + return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: - return XML_ERROR_INVALID_TOKEN; + return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - return XML_ERROR_UNCLOSED_TOKEN; + return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - return XML_ERROR_PARTIAL_CHAR; + return XML_ERROR_PARTIAL_CHAR; case XML_TOK_NONE: /* start == end */ default: break; } + /* found end of entity value - can store it now */ return storeEntityValue(parser, encoding, s, end); } else if (tok == XML_TOK_XML_DECL) { - enum XML_Error result = processXmlDecl(parser, 0, start, next); - if (result != XML_ERROR_NONE) - return result; - if (nextPtr) *nextPtr = next; + enum XML_Error result; + result = processXmlDecl(parser, 0, start, next); + if (result != XML_ERROR_NONE) + return result; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + *nextPtr = next; + } /* stop scanning for text declaration - we found one */ processor = entityValueProcessor; return entityValueProcessor(parser, next, end, nextPtr); @@ -3093,11 +3650,12 @@ entityValueInitProcessor(XML_Parser parser, then, when this routine is entered the next time, XmlPrologTok will return XML_TOK_INVALID, since the BOM is still in the buffer */ - else if (tok == XML_TOK_BOM && next == end && nextPtr) { + else if (tok == XML_TOK_BOM && next == end && !ps_finalBuffer) { *nextPtr = next; return XML_ERROR_NONE; } start = next; + eventPtr = start; } } @@ -3107,13 +3665,12 @@ externalParEntProcessor(XML_Parser parser, const char *end, const char **nextPtr) { - const char *start = s; const char *next = s; int tok; - tok = XmlPrologTok(encoding, start, end, &next); + tok = XmlPrologTok(encoding, s, end, &next); if (tok <= 0) { - if (nextPtr != 0 && tok != XML_TOK_INVALID) { + if (!ps_finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3139,7 +3696,8 @@ externalParEntProcessor(XML_Parser parser, } processor = prologProcessor; - return doProlog(parser, encoding, s, end, tok, next, nextPtr); + return doProlog(parser, encoding, s, end, tok, next, + nextPtr, (XML_Bool)!ps_finalBuffer); } static enum XML_Error PTRCALL @@ -3156,21 +3714,22 @@ entityValueProcessor(XML_Parser parser, for (;;) { tok = XmlPrologTok(enc, start, end, &next); if (tok <= 0) { - if (nextPtr != 0 && tok != XML_TOK_INVALID) { + if (!ps_finalBuffer && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } switch (tok) { case XML_TOK_INVALID: - return XML_ERROR_INVALID_TOKEN; + return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - return XML_ERROR_UNCLOSED_TOKEN; + return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - return XML_ERROR_PARTIAL_CHAR; + return XML_ERROR_PARTIAL_CHAR; case XML_TOK_NONE: /* start == end */ default: break; } + /* found end of entity value - can store it now */ return storeEntityValue(parser, enc, s, end); } start = next; @@ -3187,7 +3746,8 @@ prologProcessor(XML_Parser parser, { const char *next = s; int tok = XmlPrologTok(encoding, s, end, &next); - return doProlog(parser, encoding, s, end, tok, next, nextPtr); + return doProlog(parser, encoding, s, end, tok, next, + nextPtr, (XML_Bool)!ps_finalBuffer); } static enum XML_Error @@ -3197,28 +3757,34 @@ doProlog(XML_Parser parser, const char *end, int tok, const char *next, - const char **nextPtr) + const char **nextPtr, + XML_Bool haveMore) { #ifdef XML_DTD - static const XML_Char externalSubsetName[] = { '#' , '\0' }; + static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' }; #endif /* XML_DTD */ - static const XML_Char atypeCDATA[] = { 'C', 'D', 'A', 'T', 'A', '\0' }; - static const XML_Char atypeID[] = { 'I', 'D', '\0' }; - static const XML_Char atypeIDREF[] = { 'I', 'D', 'R', 'E', 'F', '\0' }; - static const XML_Char atypeIDREFS[] = { 'I', 'D', 'R', 'E', 'F', 'S', '\0' }; - static const XML_Char atypeENTITY[] = { 'E', 'N', 'T', 'I', 'T', 'Y', '\0' }; - static const XML_Char atypeENTITIES[] = - { 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S', '\0' }; + static const XML_Char atypeCDATA[] = + { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' }; + static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' }; + static const XML_Char atypeIDREF[] = + { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' }; + static const XML_Char atypeIDREFS[] = + { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' }; + static const XML_Char atypeENTITY[] = + { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' }; + static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N, + ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' }; static const XML_Char atypeNMTOKEN[] = { - 'N', 'M', 'T', 'O', 'K', 'E', 'N', '\0' }; - static const XML_Char atypeNMTOKENS[] = { - 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S', '\0' }; - static const XML_Char notationPrefix[] = { - 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N', '(', '\0' }; - static const XML_Char enumValueSep[] = { '|', '\0' }; - static const XML_Char enumValueStart[] = { '(', '\0' }; + ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' }; + static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, + ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' }; + static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T, + ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' }; + static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' }; + static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' }; - DTD * const dtd = _dtd; /* save one level of indirection */ + /* save one level of indirection */ + DTD * const dtd = _dtd; const char **eventPP; const char **eventEndPP; @@ -3232,13 +3798,14 @@ doProlog(XML_Parser parser, eventPP = &(openInternalEntities->internalEventPtr); eventEndPP = &(openInternalEntities->internalEventEndPtr); } + for (;;) { int role; XML_Bool handleDefault = XML_TRUE; *eventPP = s; *eventEndPP = next; if (tok <= 0) { - if (nextPtr != 0 && tok != XML_TOK_INVALID) { + if (haveMore && tok != XML_TOK_INVALID) { *nextPtr = s; return XML_ERROR_NONE; } @@ -3250,14 +3817,25 @@ doProlog(XML_Parser parser, return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: return XML_ERROR_PARTIAL_CHAR; + case -XML_TOK_PROLOG_S: + tok = -tok; + break; case XML_TOK_NONE: #ifdef XML_DTD - if (enc != encoding) + /* for internal PE NOT referenced between declarations */ + if (enc != encoding && !openInternalEntities->betweenDecl) { + *nextPtr = s; return XML_ERROR_NONE; - if (isParamEntity) { + } + /* WFC: PE Between Declarations - must check that PE contains + complete markup, not only for external PEs, but also for + internal PEs if the reference occurs between declarations. + */ + if (isParamEntity || enc != encoding) { if (XmlTokenRole(&prologState, XML_TOK_NONE, end, end, enc) == XML_ROLE_ERROR) - return XML_ERROR_SYNTAX; + return XML_ERROR_INCOMPLETE_PE; + *nextPtr = s; return XML_ERROR_NONE; } #endif /* XML_DTD */ @@ -3313,28 +3891,34 @@ doProlog(XML_Parser parser, case XML_ROLE_DOCTYPE_PUBLIC_ID: #ifdef XML_DTD useForeignDTD = XML_FALSE; + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, + externalSubsetName, + sizeof(ENTITY)); + if (!declEntity) + return XML_ERROR_NO_MEMORY; #endif /* XML_DTD */ dtd->hasParamEntityRefs = XML_TRUE; if (startDoctypeDeclHandler) { - doctypePubid = poolStoreString(&tempPool, enc, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar); - if (!doctypePubid) + XML_Char *pubId; + if (!XmlIsPublicId(enc, s, next, eventPP)) + return XML_ERROR_PUBLICID; + pubId = poolStoreString(&tempPool, enc, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar); + if (!pubId) return XML_ERROR_NO_MEMORY; + normalizePublicId(pubId); poolFinish(&tempPool); + doctypePubid = pubId; handleDefault = XML_FALSE; + goto alreadyChecked; } -#ifdef XML_DTD - declEntity = (ENTITY *)lookup(&dtd->paramEntities, - externalSubsetName, - sizeof(ENTITY)); - if (!declEntity) - return XML_ERROR_NO_MEMORY; -#endif /* XML_DTD */ /* fall through */ case XML_ROLE_ENTITY_PUBLIC_ID: if (!XmlIsPublicId(enc, s, next, eventPP)) - return XML_ERROR_SYNTAX; + return XML_ERROR_PUBLICID; + alreadyChecked: if (dtd->keepProcessing && declEntity) { XML_Char *tem = poolStoreString(&dtd->pool, enc, @@ -3362,9 +3946,11 @@ doProlog(XML_Parser parser, */ #ifdef XML_DTD if (doctypeSysid || useForeignDTD) { - dtd->hasParamEntityRefs = XML_TRUE; /* when docTypeSysid == NULL */ + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; + dtd->hasParamEntityRefs = XML_TRUE; if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + ENTITY *entity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!entity) @@ -3378,11 +3964,17 @@ doProlog(XML_Parser parser, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - if (dtd->paramEntityRead && - !dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (dtd->paramEntityRead) { + if (!dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else if (!doctypeSysid) + dtd->hasParamEntityRefs = hadParamEntityRefs; /* end of DTD - no need to update dtd->keepProcessing */ } useForeignDTD = XML_FALSE; @@ -3399,9 +3991,10 @@ doProlog(XML_Parser parser, last chance to read the foreign DTD */ if (useForeignDTD) { + XML_Bool hadParamEntityRefs = dtd->hasParamEntityRefs; dtd->hasParamEntityRefs = XML_TRUE; if (paramEntityParsing && externalEntityRefHandler) { - ENTITY *entity = (ENTITY *)lookup(&dtd->paramEntities, + ENTITY *entity = (ENTITY *)lookup(parser, &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!entity) @@ -3414,11 +4007,17 @@ doProlog(XML_Parser parser, entity->systemId, entity->publicId)) return XML_ERROR_EXTERNAL_ENTITY_HANDLING; - if (dtd->paramEntityRead && - !dtd->standalone && - notStandaloneHandler && - !notStandaloneHandler(handlerArg)) - return XML_ERROR_NOT_STANDALONE; + if (dtd->paramEntityRead) { + if (!dtd->standalone && + notStandaloneHandler && + !notStandaloneHandler(handlerArg)) + return XML_ERROR_NOT_STANDALONE; + } + /* if we didn't read the foreign DTD then this means that there + is no external subset and we must reset dtd->hasParamEntityRefs + */ + else + dtd->hasParamEntityRefs = hadParamEntityRefs; /* end of DTD - no need to update dtd->keepProcessing */ } } @@ -3491,15 +4090,15 @@ doProlog(XML_Parser parser, case XML_ROLE_REQUIRED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { if (!defineAttribute(declElementType, declAttributeId, - declAttributeIsCdata, declAttributeIsId, 0, - parser)) + declAttributeIsCdata, declAttributeIsId, + 0, parser)) return XML_ERROR_NO_MEMORY; if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T('(') - || (*declAttributeType == XML_T('N') - && declAttributeType[1] == XML_T('O'))) { + if (*declAttributeType == XML_T(ASCII_LPAREN) + || (*declAttributeType == XML_T(ASCII_N) + && declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(')')) + if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) || !poolAppendChar(&tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; declAttributeType = tempPool.start; @@ -3518,11 +4117,11 @@ doProlog(XML_Parser parser, case XML_ROLE_FIXED_ATTRIBUTE_VALUE: if (dtd->keepProcessing) { const XML_Char *attVal; - enum XML_Error result - = storeAttributeValue(parser, enc, declAttributeIsCdata, - s + enc->minBytesPerChar, - next - enc->minBytesPerChar, - &dtd->pool); + enum XML_Error result = + storeAttributeValue(parser, enc, declAttributeIsCdata, + s + enc->minBytesPerChar, + next - enc->minBytesPerChar, + &dtd->pool); if (result) return result; attVal = poolStart(&dtd->pool); @@ -3532,11 +4131,11 @@ doProlog(XML_Parser parser, declAttributeIsCdata, XML_FALSE, attVal, parser)) return XML_ERROR_NO_MEMORY; if (attlistDeclHandler && declAttributeType) { - if (*declAttributeType == XML_T('(') - || (*declAttributeType == XML_T('N') - && declAttributeType[1] == XML_T('O'))) { + if (*declAttributeType == XML_T(ASCII_LPAREN) + || (*declAttributeType == XML_T(ASCII_N) + && declAttributeType[1] == XML_T(ASCII_O))) { /* Enumerated or Notation type */ - if (!poolAppendChar(&tempPool, XML_T(')')) + if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN)) || !poolAppendChar(&tempPool, XML_T('\0'))) return XML_ERROR_NO_MEMORY; declAttributeType = tempPool.start; @@ -3559,7 +4158,7 @@ doProlog(XML_Parser parser, next - enc->minBytesPerChar); if (declEntity) { declEntity->textPtr = poolStart(&dtd->entityValuePool); - declEntity->textLen = poolLength(&dtd->entityValuePool); + declEntity->textLen = (int)(poolLength(&dtd->entityValuePool)); poolFinish(&dtd->entityValuePool); if (entityDeclHandler) { *eventEndPP = s; @@ -3609,7 +4208,8 @@ doProlog(XML_Parser parser, break; #else /* XML_DTD */ if (!declEntity) { - declEntity = (ENTITY *)lookup(&dtd->paramEntities, + declEntity = (ENTITY *)lookup(parser, + &dtd->paramEntities, externalSubsetName, sizeof(ENTITY)); if (!declEntity) @@ -3684,7 +4284,7 @@ doProlog(XML_Parser parser, const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->generalEntities, name, + declEntity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; @@ -3716,7 +4316,7 @@ doProlog(XML_Parser parser, const XML_Char *name = poolStoreString(&dtd->pool, enc, s, next); if (!name) return XML_ERROR_NO_MEMORY; - declEntity = (ENTITY *)lookup(&dtd->paramEntities, + declEntity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, sizeof(ENTITY)); if (!declEntity) return XML_ERROR_NO_MEMORY; @@ -3757,7 +4357,7 @@ doProlog(XML_Parser parser, break; case XML_ROLE_NOTATION_PUBLIC_ID: if (!XmlIsPublicId(enc, s, next, eventPP)) - return XML_ERROR_SYNTAX; + return XML_ERROR_PUBLICID; if (declNotationName) { /* means notationDeclHandler != NULL */ XML_Char *tem = poolStoreString(&tempPool, enc, @@ -3804,6 +4404,8 @@ doProlog(XML_Parser parser, case XML_ROLE_ERROR: switch (tok) { case XML_TOK_PARAM_ENTITY_REF: + /* PE references in internal subset are + not allowed within declarations. */ return XML_ERROR_PARAM_ENTITY_REF; case XML_TOK_XML_DECL: return XML_ERROR_MISPLACED_XML_PI; @@ -3817,8 +4419,10 @@ doProlog(XML_Parser parser, if (defaultHandler) reportDefault(parser, enc, s, next); handleDefault = XML_FALSE; - result = doIgnoreSection(parser, enc, &next, end, nextPtr); - if (!next) { + result = doIgnoreSection(parser, enc, &next, end, nextPtr, haveMore); + if (result != XML_ERROR_NONE) + return result; + else if (!next) { processor = ignoreSectionProcessor; return result; } @@ -3859,14 +4463,14 @@ doProlog(XML_Parser parser, } break; case XML_ROLE_GROUP_SEQUENCE: - if (groupConnector[prologState.level] == '|') + if (groupConnector[prologState.level] == ASCII_PIPE) return XML_ERROR_SYNTAX; - groupConnector[prologState.level] = ','; + groupConnector[prologState.level] = ASCII_COMMA; if (dtd->in_eldecl && elementDeclHandler) handleDefault = XML_FALSE; break; case XML_ROLE_GROUP_CHOICE: - if (groupConnector[prologState.level] == ',') + if (groupConnector[prologState.level] == ASCII_COMMA) return XML_ERROR_SYNTAX; if (dtd->in_eldecl && !groupConnector[prologState.level] @@ -3878,16 +4482,11 @@ doProlog(XML_Parser parser, if (elementDeclHandler) handleDefault = XML_FALSE; } - groupConnector[prologState.level] = '|'; + groupConnector[prologState.level] = ASCII_PIPE; break; case XML_ROLE_PARAM_ENTITY_REF: #ifdef XML_DTD case XML_ROLE_INNER_PARAM_ENTITY_REF: - /* PE references in internal subset are - not allowed within declarations */ - if (prologState.documentEntity && - role == XML_ROLE_INNER_PARAM_ENTITY_REF) - return XML_ERROR_PARAM_ENTITY_REF; dtd->hasParamEntityRefs = XML_TRUE; if (!paramEntityParsing) dtd->keepProcessing = dtd->standalone; @@ -3899,7 +4498,7 @@ doProlog(XML_Parser parser, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&dtd->pool); /* first, determine if a check for an existing declaration is needed; if yes, check that the entity exists, and that it is internal, @@ -3927,7 +4526,9 @@ doProlog(XML_Parser parser, return XML_ERROR_RECURSIVE_ENTITY_REF; if (entity->textPtr) { enum XML_Error result; - result = processInternalParamEntity(parser, entity); + XML_Bool betweenDecl = + (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE); + result = processInternalEntity(parser, entity, betweenDecl); if (result != XML_ERROR_NONE) return result; handleDefault = XML_FALSE; @@ -4120,8 +4721,16 @@ doProlog(XML_Parser parser, if (handleDefault && defaultHandler) reportDefault(parser, enc, s, next); - s = next; - tok = XmlPrologTok(enc, s, end, &next); + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: + s = next; + tok = XmlPrologTok(enc, s, end, &next); + } } /* not reached */ } @@ -4142,15 +4751,14 @@ epilogProcessor(XML_Parser parser, /* report partial linebreak - it might be the last token */ case -XML_TOK_PROLOG_S: if (defaultHandler) { - eventEndPtr = next; reportDefault(parser, encoding, s, next); + if (ps_parsing == XML_FINISHED) + return XML_ERROR_ABORTED; } - if (nextPtr) - *nextPtr = next; + *nextPtr = next; return XML_ERROR_NONE; case XML_TOK_NONE: - if (nextPtr) - *nextPtr = s; + *nextPtr = s; return XML_ERROR_NONE; case XML_TOK_PROLOG_S: if (defaultHandler) @@ -4168,13 +4776,13 @@ epilogProcessor(XML_Parser parser, eventPtr = next; return XML_ERROR_INVALID_TOKEN; case XML_TOK_PARTIAL: - if (nextPtr) { + if (!ps_finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } return XML_ERROR_UNCLOSED_TOKEN; case XML_TOK_PARTIAL_CHAR: - if (nextPtr) { + if (!ps_finalBuffer) { *nextPtr = s; return XML_ERROR_NONE; } @@ -4183,34 +4791,134 @@ epilogProcessor(XML_Parser parser, return XML_ERROR_JUNK_AFTER_DOC_ELEMENT; } eventPtr = s = next; + switch (ps_parsing) { + case XML_SUSPENDED: + *nextPtr = next; + return XML_ERROR_NONE; + case XML_FINISHED: + return XML_ERROR_ABORTED; + default: ; + } } } -#ifdef XML_DTD - static enum XML_Error -processInternalParamEntity(XML_Parser parser, ENTITY *entity) +processInternalEntity(XML_Parser parser, ENTITY *entity, + XML_Bool betweenDecl) { - const char *s, *end, *next; - int tok; + const char *textStart, *textEnd; + const char *next; enum XML_Error result; - OPEN_INTERNAL_ENTITY openEntity; + OPEN_INTERNAL_ENTITY *openEntity; + + if (freeInternalEntities) { + openEntity = freeInternalEntities; + freeInternalEntities = openEntity->next; + } + else { + openEntity = (OPEN_INTERNAL_ENTITY *)MALLOC(sizeof(OPEN_INTERNAL_ENTITY)); + if (!openEntity) + return XML_ERROR_NO_MEMORY; + } entity->open = XML_TRUE; - openEntity.next = openInternalEntities; - openInternalEntities = &openEntity; - openEntity.entity = entity; - openEntity.internalEventPtr = NULL; - openEntity.internalEventEndPtr = NULL; - s = (char *)entity->textPtr; - end = (char *)(entity->textPtr + entity->textLen); - tok = XmlPrologTok(internalEncoding, s, end, &next); - result = doProlog(parser, internalEncoding, s, end, tok, next, 0); - entity->open = XML_FALSE; - openInternalEntities = openEntity.next; + entity->processed = 0; + openEntity->next = openInternalEntities; + openInternalEntities = openEntity; + openEntity->entity = entity; + openEntity->startTagLevel = tagLevel; + openEntity->betweenDecl = betweenDecl; + openEntity->internalEventPtr = NULL; + openEntity->internalEventEndPtr = NULL; + textStart = (char *)entity->textPtr; + textEnd = (char *)(entity->textPtr + entity->textLen); + +#ifdef XML_DTD + if (entity->is_param) { + int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, internalEncoding, textStart, textEnd, tok, + next, &next, XML_FALSE); + } + else +#endif /* XML_DTD */ + result = doContent(parser, tagLevel, internalEncoding, textStart, + textEnd, &next, XML_FALSE); + + if (result == XML_ERROR_NONE) { + if (textEnd != next && ps_parsing == XML_SUSPENDED) { + entity->processed = (int)(next - textStart); + processor = internalEntityProcessor; + } + else { + entity->open = XML_FALSE; + openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = freeInternalEntities; + freeInternalEntities = openEntity; + } + } return result; } +static enum XML_Error PTRCALL +internalEntityProcessor(XML_Parser parser, + const char *s, + const char *end, + const char **nextPtr) +{ + ENTITY *entity; + const char *textStart, *textEnd; + const char *next; + enum XML_Error result; + OPEN_INTERNAL_ENTITY *openEntity = openInternalEntities; + if (!openEntity) + return XML_ERROR_UNEXPECTED_STATE; + + entity = openEntity->entity; + textStart = ((char *)entity->textPtr) + entity->processed; + textEnd = (char *)(entity->textPtr + entity->textLen); + +#ifdef XML_DTD + if (entity->is_param) { + int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next); + result = doProlog(parser, internalEncoding, textStart, textEnd, tok, + next, &next, XML_FALSE); + } + else #endif /* XML_DTD */ + result = doContent(parser, openEntity->startTagLevel, internalEncoding, + textStart, textEnd, &next, XML_FALSE); + + if (result != XML_ERROR_NONE) + return result; + else if (textEnd != next && ps_parsing == XML_SUSPENDED) { + entity->processed = (int)(next - (char *)entity->textPtr); + return result; + } + else { + entity->open = XML_FALSE; + openInternalEntities = openEntity->next; + /* put openEntity back in list of free instances */ + openEntity->next = freeInternalEntities; + freeInternalEntities = openEntity; + } + +#ifdef XML_DTD + if (entity->is_param) { + int tok; + processor = prologProcessor; + tok = XmlPrologTok(encoding, s, end, &next); + return doProlog(parser, encoding, s, end, tok, next, nextPtr, + (XML_Bool)!ps_finalBuffer); + } + else +#endif /* XML_DTD */ + { + processor = contentProcessor; + /* see externalEntityContentProcessor vs contentProcessor */ + return doContent(parser, parentParser ? 1 : 0, encoding, s, end, + nextPtr, (XML_Bool)!ps_finalBuffer); + } +} static enum XML_Error PTRCALL errorProcessor(XML_Parser parser, @@ -4315,11 +5023,10 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, next - enc->minBytesPerChar); if (!name) return XML_ERROR_NO_MEMORY; - entity = (ENTITY *)lookup(&dtd->generalEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->generalEntities, name, 0); poolDiscard(&temp2Pool); - /* first, determine if a check for an existing declaration is needed; - if yes, check that the entity exists, and that it is internal, - otherwise call the default handler (if called from content) + /* First, determine if a check for an existing declaration is needed; + if yes, check that the entity exists, and that it is internal. */ if (pool == &dtd->pool) /* are we called from prolog? */ checkEntityDecl = @@ -4338,13 +5045,16 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, return XML_ERROR_ENTITY_DECLARED_IN_PE; } else if (!entity) { - /* cannot report skipped entity here - see comments on - skippedEntityHandler + /* Cannot report skipped entity here - see comments on + skippedEntityHandler. if (skippedEntityHandler) skippedEntityHandler(handlerArg, name, 0); */ + /* Cannot call the default handler because this would be + out of sync with the call to the startElementHandler. if ((pool == &tempPool) && defaultHandler) reportDefault(parser, enc, ptr, next); + */ break; } if (entity->open) { @@ -4360,7 +5070,7 @@ appendAttributeValue(XML_Parser parser, const ENCODING *enc, XML_Bool isCdata, if (!entity->textPtr) { if (enc == encoding) eventPtr = ptr; - return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; + return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF; } else { enum XML_Error result; @@ -4422,7 +5132,7 @@ storeEntityValue(XML_Parser parser, result = XML_ERROR_NO_MEMORY; goto endEntityValue; } - entity = (ENTITY *)lookup(&dtd->paramEntities, name, 0); + entity = (ENTITY *)lookup(parser, &dtd->paramEntities, name, 0); poolDiscard(&tempPool); if (!entity) { /* not a well-formedness error - see XML 1.0: WFC Entity Declared */ @@ -4474,8 +5184,8 @@ storeEntityValue(XML_Parser parser, break; } #endif /* XML_DTD */ - /* in the internal subset, PE references are not legal - within markup declarations, e.g entity values in this case */ + /* In the internal subset, PE references are not legal + within markup declarations, e.g entity values in this case. */ eventPtr = entityTextPtr; result = XML_ERROR_PARAM_ENTITY_REF; goto endEntityValue; @@ -4644,12 +5354,12 @@ reportDefault(XML_Parser parser, const ENCODING *enc, ICHAR *dataPtr = (ICHAR *)dataBuf; XmlConvert(enc, &s, end, &dataPtr, (ICHAR *)dataBufEnd); *eventEndPP = s; - defaultHandler(handlerArg, dataBuf, dataPtr - (ICHAR *)dataBuf); + defaultHandler(handlerArg, dataBuf, (int)(dataPtr - (ICHAR *)dataBuf)); *eventPP = s; } while (s != end); } else - defaultHandler(handlerArg, (XML_Char *)s, (XML_Char *)end - (XML_Char *)s); + defaultHandler(handlerArg, (XML_Char *)s, (int)((XML_Char *)end - (XML_Char *)s)); } @@ -4671,7 +5381,7 @@ defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *attId, XML_Bool isCdata, if (type->nDefaultAtts == type->allocDefaultAtts) { if (type->allocDefaultAtts == 0) { type->allocDefaultAtts = 8; - type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts + type->defaultAtts = (DEFAULT_ATTRIBUTE *)MALLOC(type->allocDefaultAtts * sizeof(DEFAULT_ATTRIBUTE)); if (!type->defaultAtts) return 0; @@ -4703,7 +5413,7 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) DTD * const dtd = _dtd; /* save one level of indirection */ const XML_Char *name; for (name = elementType->name; *name; name++) { - if (*name == XML_T(':')) { + if (*name == XML_T(ASCII_COLON)) { PREFIX *prefix; const XML_Char *s; for (s = elementType->name; s != name; s++) { @@ -4712,7 +5422,7 @@ setElementTypePrefix(XML_Parser parser, ELEMENT_TYPE *elementType) } if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return 0; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (!prefix) return 0; @@ -4739,8 +5449,9 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, name = poolStoreString(&dtd->pool, enc, start, end); if (!name) return NULL; + /* skip quotation mark - its storage will be re-used (like in name[-1]) */ ++name; - id = (ATTRIBUTE_ID *)lookup(&dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); + id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, name, sizeof(ATTRIBUTE_ID)); if (!id) return NULL; if (id->name != name) @@ -4749,22 +5460,23 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, poolFinish(&dtd->pool); if (!ns) ; - else if (name[0] == XML_T('x') - && name[1] == XML_T('m') - && name[2] == XML_T('l') - && name[3] == XML_T('n') - && name[4] == XML_T('s') - && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) { + else if (name[0] == XML_T(ASCII_x) + && name[1] == XML_T(ASCII_m) + && name[2] == XML_T(ASCII_l) + && name[3] == XML_T(ASCII_n) + && name[4] == XML_T(ASCII_s) + && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) { if (name[5] == XML_T('\0')) id->prefix = &dtd->defaultPrefix; else - id->prefix = (PREFIX *)lookup(&dtd->prefixes, name + 6, sizeof(PREFIX)); + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, name + 6, sizeof(PREFIX)); id->xmlns = XML_TRUE; } else { int i; for (i = 0; name[i]; i++) { - if (name[i] == XML_T(':')) { + /* attributes without prefix are *not* in the default namespace */ + if (name[i] == XML_T(ASCII_COLON)) { int j; for (j = 0; j < i; j++) { if (!poolAppendChar(&dtd->pool, name[j])) @@ -4772,7 +5484,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, } if (!poolAppendChar(&dtd->pool, XML_T('\0'))) return NULL; - id->prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&dtd->pool), + id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool), sizeof(PREFIX)); if (id->prefix->name == poolStart(&dtd->pool)) poolFinish(&dtd->pool); @@ -4786,7 +5498,7 @@ getAttributeId(XML_Parser parser, const ENCODING *enc, return id; } -#define CONTEXT_SEP XML_T('\f') +#define CONTEXT_SEP XML_T(ASCII_FF) static const XML_Char * getContext(XML_Parser parser) @@ -4798,10 +5510,10 @@ getContext(XML_Parser parser) if (dtd->defaultPrefix.binding) { int i; int len; - if (!poolAppendChar(&tempPool, XML_T('='))) + if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) return NULL; len = dtd->defaultPrefix.binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (namespaceSeparator) len--; for (i = 0; i < len; i++) if (!poolAppendChar(&tempPool, dtd->defaultPrefix.binding->uri[i])) @@ -4824,10 +5536,10 @@ getContext(XML_Parser parser) for (s = prefix->name; *s; s++) if (!poolAppendChar(&tempPool, *s)) return NULL; - if (!poolAppendChar(&tempPool, XML_T('='))) + if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS))) return NULL; len = prefix->binding->uriLen; - if (namespaceSeparator != XML_T('\0')) + if (namespaceSeparator) len--; for (i = 0; i < len; i++) if (!poolAppendChar(&tempPool, prefix->binding->uri[i])) @@ -4868,7 +5580,7 @@ setContext(XML_Parser parser, const XML_Char *context) ENTITY *e; if (!poolAppendChar(&tempPool, XML_T('\0'))) return XML_FALSE; - e = (ENTITY *)lookup(&dtd->generalEntities, poolStart(&tempPool), 0); + e = (ENTITY *)lookup(parser, &dtd->generalEntities, poolStart(&tempPool), 0); if (e) e->open = XML_TRUE; if (*s != XML_T('\0')) @@ -4876,14 +5588,14 @@ setContext(XML_Parser parser, const XML_Char *context) context = s; poolDiscard(&tempPool); } - else if (*s == XML_T('=')) { + else if (*s == XML_T(ASCII_EQUALS)) { PREFIX *prefix; if (poolLength(&tempPool) == 0) prefix = &dtd->defaultPrefix; else { if (!poolAppendChar(&tempPool, XML_T('\0'))) return XML_FALSE; - prefix = (PREFIX *)lookup(&dtd->prefixes, poolStart(&tempPool), + prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&tempPool), sizeof(PREFIX)); if (!prefix) return XML_FALSE; @@ -4901,7 +5613,7 @@ setContext(XML_Parser parser, const XML_Char *context) return XML_FALSE; if (!poolAppendChar(&tempPool, XML_T('\0'))) return XML_FALSE; - if (addBinding(parser, prefix, 0, poolStart(&tempPool), + if (addBinding(parser, prefix, NULL, poolStart(&tempPool), &inheritedBindings) != XML_ERROR_NONE) return XML_FALSE; poolDiscard(&tempPool); @@ -4947,9 +5659,7 @@ dtdCreate(const XML_Memory_Handling_Suite *ms) if (p == NULL) return p; poolInit(&(p->pool), ms); -#ifdef XML_DTD poolInit(&(p->entityValuePool), ms); -#endif /* XML_DTD */ hashTableInit(&(p->generalEntities), ms); hashTableInit(&(p->elementTypes), ms); hashTableInit(&(p->attributeIds), ms); @@ -4996,21 +5706,17 @@ dtdReset(DTD *p, const XML_Memory_Handling_Suite *ms) hashTableClear(&(p->attributeIds)); hashTableClear(&(p->prefixes)); poolClear(&(p->pool)); -#ifdef XML_DTD poolClear(&(p->entityValuePool)); -#endif /* XML_DTD */ p->defaultPrefix.name = NULL; p->defaultPrefix.binding = NULL; p->in_eldecl = XML_FALSE; - if (p->scaffIndex) { - ms->free_fcn(p->scaffIndex); - p->scaffIndex = NULL; - } - if (p->scaffold) { - ms->free_fcn(p->scaffold); - p->scaffold = NULL; - } + + ms->free_fcn(p->scaffIndex); + p->scaffIndex = NULL; + ms->free_fcn(p->scaffold); + p->scaffold = NULL; + p->scaffLevel = 0; p->scaffSize = 0; p->scaffCount = 0; @@ -5041,14 +5747,10 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) hashTableDestroy(&(p->attributeIds)); hashTableDestroy(&(p->prefixes)); poolDestroy(&(p->pool)); -#ifdef XML_DTD poolDestroy(&(p->entityValuePool)); -#endif /* XML_DTD */ if (isDocEntity) { - if (p->scaffIndex) - ms->free_fcn(p->scaffIndex); - if (p->scaffold) - ms->free_fcn(p->scaffold); + ms->free_fcn(p->scaffIndex); + ms->free_fcn(p->scaffold); } ms->free_fcn(p); } @@ -5057,7 +5759,7 @@ dtdDestroy(DTD *p, XML_Bool isDocEntity, const XML_Memory_Handling_Suite *ms) The new DTD has already been initialized. */ static int -dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) +dtdCopy(XML_Parser oldParser, DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) { HASH_TABLE_ITER iter; @@ -5072,7 +5774,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) name = poolCopyString(&(newDtd->pool), oldP->name); if (!name) return 0; - if (!lookup(&(newDtd->prefixes), name, sizeof(PREFIX))) + if (!lookup(oldParser, &(newDtd->prefixes), name, sizeof(PREFIX))) return 0; } @@ -5094,7 +5796,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) if (!name) return 0; ++name; - newA = (ATTRIBUTE_ID *)lookup(&(newDtd->attributeIds), name, + newA = (ATTRIBUTE_ID *)lookup(oldParser, &(newDtd->attributeIds), name, sizeof(ATTRIBUTE_ID)); if (!newA) return 0; @@ -5104,7 +5806,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) if (oldA->prefix == &oldDtd->defaultPrefix) newA->prefix = &newDtd->defaultPrefix; else - newA->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newA->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldA->prefix->name, 0); } } @@ -5123,7 +5825,7 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) name = poolCopyString(&(newDtd->pool), oldE->name); if (!name) return 0; - newE = (ELEMENT_TYPE *)lookup(&(newDtd->elementTypes), name, + newE = (ELEMENT_TYPE *)lookup(oldParser, &(newDtd->elementTypes), name, sizeof(ELEMENT_TYPE)); if (!newE) return 0; @@ -5137,14 +5839,14 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) } if (oldE->idAtt) newE->idAtt = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->idAtt->name, 0); + lookup(oldParser, &(newDtd->attributeIds), oldE->idAtt->name, 0); newE->allocDefaultAtts = newE->nDefaultAtts = oldE->nDefaultAtts; if (oldE->prefix) - newE->prefix = (PREFIX *)lookup(&(newDtd->prefixes), + newE->prefix = (PREFIX *)lookup(oldParser, &(newDtd->prefixes), oldE->prefix->name, 0); for (i = 0; i < newE->nDefaultAtts; i++) { newE->defaultAtts[i].id = (ATTRIBUTE_ID *) - lookup(&(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); + lookup(oldParser, &(newDtd->attributeIds), oldE->defaultAtts[i].id->name, 0); newE->defaultAtts[i].isCdata = oldE->defaultAtts[i].isCdata; if (oldE->defaultAtts[i].value) { newE->defaultAtts[i].value @@ -5158,13 +5860,15 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) } /* Copy the entity tables. */ - if (!copyEntityTable(&(newDtd->generalEntities), + if (!copyEntityTable(oldParser, + &(newDtd->generalEntities), &(newDtd->pool), &(oldDtd->generalEntities))) return 0; #ifdef XML_DTD - if (!copyEntityTable(&(newDtd->paramEntities), + if (!copyEntityTable(oldParser, + &(newDtd->paramEntities), &(newDtd->pool), &(oldDtd->paramEntities))) return 0; @@ -5187,7 +5891,8 @@ dtdCopy(DTD *newDtd, const DTD *oldDtd, const XML_Memory_Handling_Suite *ms) } /* End dtdCopy */ static int -copyEntityTable(HASH_TABLE *newTable, +copyEntityTable(XML_Parser oldParser, + HASH_TABLE *newTable, STRING_POOL *newPool, const HASH_TABLE *oldTable) { @@ -5206,7 +5911,7 @@ copyEntityTable(HASH_TABLE *newTable, name = poolCopyString(newPool, oldE->name); if (!name) return 0; - newE = (ENTITY *)lookup(newTable, name, sizeof(ENTITY)); + newE = (ENTITY *)lookup(oldParser, newTable, name, sizeof(ENTITY)); if (!newE) return 0; if (oldE->systemId) { @@ -5252,57 +5957,66 @@ copyEntityTable(HASH_TABLE *newTable, return 1; } -#define INIT_SIZE 64 +#define INIT_POWER 6 -static int FASTCALL +static XML_Bool FASTCALL keyeq(KEY s1, KEY s2) { for (; *s1 == *s2; s1++, s2++) if (*s1 == 0) - return 1; - return 0; + return XML_TRUE; + return XML_FALSE; } static unsigned long FASTCALL -hash(KEY s) +hash(XML_Parser parser, KEY s) { - unsigned long h = 0; + unsigned long h = hash_secret_salt; while (*s) - h = (h << 5) + h + (unsigned char)*s++; + h = CHAR_HASH(h, *s++); return h; } static NAMED * -lookup(HASH_TABLE *table, KEY name, size_t createSize) +lookup(XML_Parser parser, HASH_TABLE *table, KEY name, size_t createSize) { size_t i; if (table->size == 0) { size_t tsize; - if (!createSize) return NULL; - tsize = INIT_SIZE * sizeof(NAMED *); + table->power = INIT_POWER; + /* table->size is a power of 2 */ + table->size = (size_t)1 << INIT_POWER; + tsize = table->size * sizeof(NAMED *); table->v = (NAMED **)table->mem->malloc_fcn(tsize); - if (!table->v) + if (!table->v) { + table->size = 0; return NULL; + } memset(table->v, 0, tsize); - table->size = INIT_SIZE; - table->usedLim = INIT_SIZE / 2; - i = hash(name) & (table->size - 1); + i = hash(parser, name) & ((unsigned long)table->size - 1); } else { - unsigned long h = hash(name); - for (i = h & (table->size - 1); - table->v[i]; - i == 0 ? i = table->size - 1 : --i) { + unsigned long h = hash(parser, name); + unsigned long mask = (unsigned long)table->size - 1; + unsigned char step = 0; + i = h & mask; + while (table->v[i]) { if (keyeq(name, table->v[i]->name)) return table->v[i]; + if (!step) + step = PROBE_STEP(h, mask, table->power); + i < step ? (i += table->size - step) : (i -= step); } if (!createSize) return NULL; - if (table->used == table->usedLim) { - /* check for overflow */ - size_t newSize = table->size * 2; + + /* check for overflow (table is half full) */ + if (table->used >> (table->power - 1)) { + unsigned char newPower = table->power + 1; + size_t newSize = (size_t)1 << newPower; + unsigned long newMask = (unsigned long)newSize - 1; size_t tsize = newSize * sizeof(NAMED *); NAMED **newV = (NAMED **)table->mem->malloc_fcn(tsize); if (!newV) @@ -5310,21 +6024,27 @@ lookup(HASH_TABLE *table, KEY name, size_t createSize) memset(newV, 0, tsize); for (i = 0; i < table->size; i++) if (table->v[i]) { - size_t j; - for (j = hash(table->v[i]->name) & (newSize - 1); - newV[j]; - j == 0 ? j = newSize - 1 : --j) - ; + unsigned long newHash = hash(parser, table->v[i]->name); + size_t j = newHash & newMask; + step = 0; + while (newV[j]) { + if (!step) + step = PROBE_STEP(newHash, newMask, newPower); + j < step ? (j += newSize - step) : (j -= step); + } newV[j] = table->v[i]; } table->mem->free_fcn(table->v); table->v = newV; + table->power = newPower; table->size = newSize; - table->usedLim = newSize/2; - for (i = h & (table->size - 1); - table->v[i]; - i == 0 ? i = table->size - 1 : --i) - ; + i = h & newMask; + step = 0; + while (table->v[i]) { + if (!step) + step = PROBE_STEP(h, newMask, newPower); + i < step ? (i += newSize - step) : (i -= step); + } } } table->v[i] = (NAMED *)table->mem->malloc_fcn(createSize); @@ -5341,13 +6061,9 @@ hashTableClear(HASH_TABLE *table) { size_t i; for (i = 0; i < table->size; i++) { - NAMED *p = table->v[i]; - if (p) { - table->mem->free_fcn(p); - table->v[i] = NULL; - } + table->mem->free_fcn(table->v[i]); + table->v[i] = NULL; } - table->usedLim = table->size / 2; table->used = 0; } @@ -5355,20 +6071,16 @@ static void FASTCALL hashTableDestroy(HASH_TABLE *table) { size_t i; - for (i = 0; i < table->size; i++) { - NAMED *p = table->v[i]; - if (p) - table->mem->free_fcn(p); - } - if (table->v) - table->mem->free_fcn(table->v); + for (i = 0; i < table->size; i++) + table->mem->free_fcn(table->v[i]); + table->mem->free_fcn(table->v); } static void FASTCALL hashTableInit(HASH_TABLE *p, const XML_Memory_Handling_Suite *ms) { + p->power = 0; p->size = 0; - p->usedLim = 0; p->used = 0; p->v = NULL; p->mem = ms; @@ -5532,13 +6244,14 @@ poolGrow(STRING_POOL *pool) } } if (pool->blocks && pool->start == pool->blocks->s) { - int blockSize = (pool->end - pool->start)*2; - pool->blocks = (BLOCK *) + int blockSize = (int)(pool->end - pool->start)*2; + BLOCK *temp = (BLOCK *) pool->mem->realloc_fcn(pool->blocks, - (offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char))); - if (pool->blocks == NULL) + (offsetof(BLOCK, s) + + blockSize * sizeof(XML_Char))); + if (temp == NULL) return XML_FALSE; + pool->blocks = temp; pool->blocks->size = blockSize; pool->ptr = pool->blocks->s + (pool->ptr - pool->start); pool->start = pool->blocks->s; @@ -5546,13 +6259,13 @@ poolGrow(STRING_POOL *pool) } else { BLOCK *tem; - int blockSize = pool->end - pool->start; + int blockSize = (int)(pool->end - pool->start); if (blockSize < INIT_BLOCK_SIZE) blockSize = INIT_BLOCK_SIZE; else blockSize *= 2; tem = (BLOCK *)pool->mem->malloc_fcn(offsetof(BLOCK, s) - + blockSize * sizeof(XML_Char)); + + blockSize * sizeof(XML_Char)); if (!tem) return XML_FALSE; tem->size = blockSize; @@ -5687,7 +6400,7 @@ getElementType(XML_Parser parser, if (!name) return NULL; - ret = (ELEMENT_TYPE *) lookup(&dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); + ret = (ELEMENT_TYPE *) lookup(parser, &dtd->elementTypes, name, sizeof(ELEMENT_TYPE)); if (!ret) return NULL; if (ret->name != name)