]>
git.saurik.com Git - wxWidgets.git/blob - src/expat/lib/xmltok.h
   1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 
   2    See the file COPYING for copying permission. 
   5 #ifndef XmlTok_INCLUDED 
   6 #define XmlTok_INCLUDED 1 
  12 /* The following token may be returned by XmlContentTok */ 
  13 #define XML_TOK_TRAILING_RSQB -5 /* ] or ]] at the end of the scan; might be 
  14                                     start of illegal ]]> sequence */ 
  15 /* The following tokens may be returned by both XmlPrologTok and 
  18 #define XML_TOK_NONE -4          /* The string to be scanned is empty */ 
  19 #define XML_TOK_TRAILING_CR -3   /* A CR at the end of the scan; 
  20                                     might be part of CRLF sequence */ 
  21 #define XML_TOK_PARTIAL_CHAR -2  /* only part of a multibyte sequence */ 
  22 #define XML_TOK_PARTIAL -1       /* only part of a token */ 
  23 #define XML_TOK_INVALID 0 
  25 /* The following tokens are returned by XmlContentTok; some are also 
  26    returned by XmlAttributeValueTok, XmlEntityTok, XmlCdataSectionTok. 
  28 #define XML_TOK_START_TAG_WITH_ATTS 1 
  29 #define XML_TOK_START_TAG_NO_ATTS 2 
  30 #define XML_TOK_EMPTY_ELEMENT_WITH_ATTS 3 /* empty element tag <e/> */ 
  31 #define XML_TOK_EMPTY_ELEMENT_NO_ATTS 4 
  32 #define XML_TOK_END_TAG 5 
  33 #define XML_TOK_DATA_CHARS 6 
  34 #define XML_TOK_DATA_NEWLINE 7 
  35 #define XML_TOK_CDATA_SECT_OPEN 8 
  36 #define XML_TOK_ENTITY_REF 9 
  37 #define XML_TOK_CHAR_REF 10               /* numeric character reference */ 
  39 /* The following tokens may be returned by both XmlPrologTok and 
  42 #define XML_TOK_PI 11                     /* processing instruction */ 
  43 #define XML_TOK_XML_DECL 12               /* XML decl or text decl */ 
  44 #define XML_TOK_COMMENT 13 
  45 #define XML_TOK_BOM 14                    /* Byte order mark */ 
  47 /* The following tokens are returned only by XmlPrologTok */ 
  48 #define XML_TOK_PROLOG_S 15 
  49 #define XML_TOK_DECL_OPEN 16              /* <!foo */ 
  50 #define XML_TOK_DECL_CLOSE 17             /* > */ 
  51 #define XML_TOK_NAME 18 
  52 #define XML_TOK_NMTOKEN 19 
  53 #define XML_TOK_POUND_NAME 20             /* #name */ 
  54 #define XML_TOK_OR 21                     /* | */ 
  55 #define XML_TOK_PERCENT 22 
  56 #define XML_TOK_OPEN_PAREN 23 
  57 #define XML_TOK_CLOSE_PAREN 24 
  58 #define XML_TOK_OPEN_BRACKET 25 
  59 #define XML_TOK_CLOSE_BRACKET 26 
  60 #define XML_TOK_LITERAL 27 
  61 #define XML_TOK_PARAM_ENTITY_REF 28 
  62 #define XML_TOK_INSTANCE_START 29 
  64 /* The following occur only in element type declarations */ 
  65 #define XML_TOK_NAME_QUESTION 30          /* name? */ 
  66 #define XML_TOK_NAME_ASTERISK 31          /* name* */ 
  67 #define XML_TOK_NAME_PLUS 32              /* name+ */ 
  68 #define XML_TOK_COND_SECT_OPEN 33         /* <![ */ 
  69 #define XML_TOK_COND_SECT_CLOSE 34        /* ]]> */ 
  70 #define XML_TOK_CLOSE_PAREN_QUESTION 35   /* )? */ 
  71 #define XML_TOK_CLOSE_PAREN_ASTERISK 36   /* )* */ 
  72 #define XML_TOK_CLOSE_PAREN_PLUS 37       /* )+ */ 
  73 #define XML_TOK_COMMA 38 
  75 /* The following token is returned only by XmlAttributeValueTok */ 
  76 #define XML_TOK_ATTRIBUTE_VALUE_S 39 
  78 /* The following token is returned only by XmlCdataSectionTok */ 
  79 #define XML_TOK_CDATA_SECT_CLOSE 40 
  81 /* With namespace processing this is returned by XmlPrologTok for a 
  84 #define XML_TOK_PREFIXED_NAME 41 
  87 #define XML_TOK_IGNORE_SECT 42 
  91 #define XML_N_STATES 4 
  92 #else /* not XML_DTD */ 
  93 #define XML_N_STATES 3 
  94 #endif /* not XML_DTD */ 
  96 #define XML_PROLOG_STATE 0 
  97 #define XML_CONTENT_STATE 1 
  98 #define XML_CDATA_SECTION_STATE 2 
 100 #define XML_IGNORE_SECTION_STATE 3 
 103 #define XML_N_LITERAL_TYPES 2 
 104 #define XML_ATTRIBUTE_VALUE_LITERAL 0 
 105 #define XML_ENTITY_VALUE_LITERAL 1 
 107 /* The size of the buffer passed to XmlUtf8Encode must be at least this. */ 
 108 #define XML_UTF8_ENCODE_MAX 4 
 109 /* The size of the buffer passed to XmlUtf16Encode must be at least this. */ 
 110 #define XML_UTF16_ENCODE_MAX 2 
 112 typedef struct position 
{ 
 113   /* first line and first column are 0 not 1 */ 
 114   unsigned long lineNumber
; 
 115   unsigned long columnNumber
; 
 120   const char *valuePtr
; 
 121   const char *valueEnd
; 
 126 typedef struct encoding ENCODING
; 
 128 typedef int (PTRCALL 
*SCANNER
)(const ENCODING 
*, 
 134   SCANNER scanners
[XML_N_STATES
]; 
 135   SCANNER literalScanners
[XML_N_LITERAL_TYPES
]; 
 136   int (PTRCALL 
*sameName
)(const ENCODING 
*, 
 139   int (PTRCALL 
*nameMatchesAscii
)(const ENCODING 
*, 
 143   int (PTRFASTCALL 
*nameLength
)(const ENCODING 
*, const char *); 
 144   const char *(PTRFASTCALL 
*skipS
)(const ENCODING 
*, const char *); 
 145   int (PTRCALL 
*getAtts
)(const ENCODING 
*enc
, 
 149   int (PTRFASTCALL 
*charRefNumber
)(const ENCODING 
*enc
, const char *ptr
); 
 150   int (PTRCALL 
*predefinedEntityName
)(const ENCODING 
*, 
 153   void (PTRCALL 
*updatePosition
)(const ENCODING 
*, 
 157   int (PTRCALL 
*isPublicId
)(const ENCODING 
*enc
, 
 160                             const char **badPtr
); 
 161   void (PTRCALL 
*utf8Convert
)(const ENCODING 
*enc
, 
 166   void (PTRCALL 
*utf16Convert
)(const ENCODING 
*enc
, 
 169                                unsigned short **toP
, 
 170                                const unsigned short *toLim
); 
 176 /* Scan the string starting at ptr until the end of the next complete 
 177    token, but do not scan past eptr.  Return an integer giving the 
 180    Return XML_TOK_NONE when ptr == eptr; nextTokPtr will not be set. 
 182    Return XML_TOK_PARTIAL when the string does not contain a complete 
 183    token; nextTokPtr will not be set. 
 185    Return XML_TOK_INVALID when the string does not start a valid 
 186    token; nextTokPtr will be set to point to the character which made 
 189    Otherwise the string starts with a valid token; nextTokPtr will be 
 190    set to point to the character following the end of that token. 
 192    Each data character counts as a single token, but adjacent data 
 193    characters may be returned together.  Similarly for characters in 
 194    the prolog outside literals, comments and processing instructions. 
 198 #define XmlTok(enc, state, ptr, end, nextTokPtr) \ 
 199   (((enc)->scanners[state])(enc, ptr, end, nextTokPtr)) 
 201 #define XmlPrologTok(enc, ptr, end, nextTokPtr) \ 
 202    XmlTok(enc, XML_PROLOG_STATE, ptr, end, nextTokPtr) 
 204 #define XmlContentTok(enc, ptr, end, nextTokPtr) \ 
 205    XmlTok(enc, XML_CONTENT_STATE, ptr, end, nextTokPtr) 
 207 #define XmlCdataSectionTok(enc, ptr, end, nextTokPtr) \ 
 208    XmlTok(enc, XML_CDATA_SECTION_STATE, ptr, end, nextTokPtr) 
 212 #define XmlIgnoreSectionTok(enc, ptr, end, nextTokPtr) \ 
 213    XmlTok(enc, XML_IGNORE_SECTION_STATE, ptr, end, nextTokPtr) 
 217 /* This is used for performing a 2nd-level tokenization on the content 
 218    of a literal that has already been returned by XmlTok. 
 220 #define XmlLiteralTok(enc, literalType, ptr, end, nextTokPtr) \ 
 221   (((enc)->literalScanners[literalType])(enc, ptr, end, nextTokPtr)) 
 223 #define XmlAttributeValueTok(enc, ptr, end, nextTokPtr) \ 
 224    XmlLiteralTok(enc, XML_ATTRIBUTE_VALUE_LITERAL, ptr, end, nextTokPtr) 
 226 #define XmlEntityValueTok(enc, ptr, end, nextTokPtr) \ 
 227    XmlLiteralTok(enc, XML_ENTITY_VALUE_LITERAL, ptr, end, nextTokPtr) 
 229 #define XmlSameName(enc, ptr1, ptr2) (((enc)->sameName)(enc, ptr1, ptr2)) 
 231 #define XmlNameMatchesAscii(enc, ptr1, end1, ptr2) \ 
 232   (((enc)->nameMatchesAscii)(enc, ptr1, end1, ptr2)) 
 234 #define XmlNameLength(enc, ptr) \ 
 235   (((enc)->nameLength)(enc, ptr)) 
 237 #define XmlSkipS(enc, ptr) \ 
 238   (((enc)->skipS)(enc, ptr)) 
 240 #define XmlGetAttributes(enc, ptr, attsMax, atts) \ 
 241   (((enc)->getAtts)(enc, ptr, attsMax, atts)) 
 243 #define XmlCharRefNumber(enc, ptr) \ 
 244   (((enc)->charRefNumber)(enc, ptr)) 
 246 #define XmlPredefinedEntityName(enc, ptr, end) \ 
 247   (((enc)->predefinedEntityName)(enc, ptr, end)) 
 249 #define XmlUpdatePosition(enc, ptr, end, pos) \ 
 250   (((enc)->updatePosition)(enc, ptr, end, pos)) 
 252 #define XmlIsPublicId(enc, ptr, end, badPtr) \ 
 253   (((enc)->isPublicId)(enc, ptr, end, badPtr)) 
 255 #define XmlUtf8Convert(enc, fromP, fromLim, toP, toLim) \ 
 256   (((enc)->utf8Convert)(enc, fromP, fromLim, toP, toLim)) 
 258 #define XmlUtf16Convert(enc, fromP, fromLim, toP, toLim) \ 
 259   (((enc)->utf16Convert)(enc, fromP, fromLim, toP, toLim)) 
 263   const ENCODING 
**encPtr
; 
 266 int XmlParseXmlDecl(int isGeneralTextEntity
, 
 271                     const char **versionPtr
, 
 272                     const char **versionEndPtr
, 
 273                     const char **encodingNamePtr
, 
 274                     const ENCODING 
**namedEncodingPtr
, 
 277 int XmlInitEncoding(INIT_ENCODING 
*, const ENCODING 
**, const char *name
); 
 278 const ENCODING 
*XmlGetUtf8InternalEncoding(void); 
 279 const ENCODING 
*XmlGetUtf16InternalEncoding(void); 
 280 int FASTCALL 
XmlUtf8Encode(int charNumber
, char *buf
); 
 281 int FASTCALL 
XmlUtf16Encode(int charNumber
, unsigned short *buf
); 
 282 int XmlSizeOfUnknownEncoding(void); 
 284 typedef int (*CONVERTER
)(void *userData
, const char *p
); 
 287 XmlInitUnknownEncoding(void *mem
, 
 292 int XmlParseXmlDeclNS(int isGeneralTextEntity
, 
 297                       const char **versionPtr
, 
 298                       const char **versionEndPtr
, 
 299                       const char **encodingNamePtr
, 
 300                       const ENCODING 
**namedEncodingPtr
, 
 303 int XmlInitEncodingNS(INIT_ENCODING 
*, const ENCODING 
**, const char *name
); 
 304 const ENCODING 
*XmlGetUtf8InternalEncodingNS(void); 
 305 const ENCODING 
*XmlGetUtf16InternalEncodingNS(void); 
 307 XmlInitUnknownEncodingNS(void *mem
, 
 315 #endif /* not XmlTok_INCLUDED */