1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 
   2    See the file COPYING for copying permission. 
   5 /* This file is included! */ 
   8 #ifndef IS_INVALID_CHAR 
   9 #define IS_INVALID_CHAR(enc, ptr, n) (0) 
  12 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ 
  15         return XML_TOK_PARTIAL_CHAR; \ 
  16       if (IS_INVALID_CHAR(enc, ptr, n)) { \ 
  17         *(nextTokPtr) = (ptr); \ 
  18         return XML_TOK_INVALID; \ 
  23 #define INVALID_CASES(ptr, nextTokPtr) \ 
  24   INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ 
  25   INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ 
  26   INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ 
  30     *(nextTokPtr) = (ptr); \ 
  31     return XML_TOK_INVALID; 
  33 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ 
  36        return XML_TOK_PARTIAL_CHAR; \ 
  37      if (!IS_NAME_CHAR(enc, ptr, n)) { \ 
  39        return XML_TOK_INVALID; \ 
  44 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ 
  46     if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ 
  48       return XML_TOK_INVALID; \ 
  57   CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ 
  58   CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ 
  59   CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) 
  61 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ 
  64        return XML_TOK_PARTIAL_CHAR; \ 
  65      if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ 
  67        return XML_TOK_INVALID; \ 
  72 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ 
  74     if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ 
  76       return XML_TOK_INVALID; \ 
  82   CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 
  83   CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 
  84   CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 
  87 #define PREFIX(ident) ident 
  90 /* ptr points to character following "<!-" */ 
  93 PREFIX(scanComment
)(const ENCODING 
*enc
, const char *ptr
, 
  94                     const char *end
, const char **nextTokPtr
) 
  97     if (!CHAR_MATCHES(enc
, ptr
, ASCII_MINUS
)) { 
  99       return XML_TOK_INVALID
; 
 103       switch (BYTE_TYPE(enc
, ptr
)) { 
 104       INVALID_CASES(ptr
, nextTokPtr
) 
 106         if ((ptr 
+= MINBPC(enc
)) == end
) 
 107           return XML_TOK_PARTIAL
; 
 108         if (CHAR_MATCHES(enc
, ptr
, ASCII_MINUS
)) { 
 109           if ((ptr 
+= MINBPC(enc
)) == end
) 
 110             return XML_TOK_PARTIAL
; 
 111           if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 113             return XML_TOK_INVALID
; 
 115           *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 116           return XML_TOK_COMMENT
; 
 125   return XML_TOK_PARTIAL
; 
 128 /* ptr points to character following "<!" */ 
 131 PREFIX(scanDecl
)(const ENCODING 
*enc
, const char *ptr
, 
 132                  const char *end
, const char **nextTokPtr
) 
 135     return XML_TOK_PARTIAL
; 
 136   switch (BYTE_TYPE(enc
, ptr
)) { 
 138     return PREFIX(scanComment
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 140     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 141     return XML_TOK_COND_SECT_OPEN
; 
 148     return XML_TOK_INVALID
; 
 151     switch (BYTE_TYPE(enc
, ptr
)) { 
 153       if (ptr 
+ MINBPC(enc
) == end
) 
 154         return XML_TOK_PARTIAL
; 
 155       /* don't allow <!ENTITY% foo "whatever"> */ 
 156       switch (BYTE_TYPE(enc
, ptr 
+ MINBPC(enc
))) { 
 157       case BT_S
: case BT_CR
: case BT_LF
: case BT_PERCNT
: 
 159         return XML_TOK_INVALID
; 
 162     case BT_S
: case BT_CR
: case BT_LF
: 
 164       return XML_TOK_DECL_OPEN
; 
 171       return XML_TOK_INVALID
; 
 174   return XML_TOK_PARTIAL
; 
 178 PREFIX(checkPiTarget
)(const ENCODING 
*enc
, const char *ptr
, 
 179                       const char *end
, int *tokPtr
) 
 182   *tokPtr 
= XML_TOK_PI
; 
 183   if (end 
- ptr 
!= MINBPC(enc
)*3) 
 185   switch (BYTE_TO_ASCII(enc
, ptr
)) { 
 195   switch (BYTE_TO_ASCII(enc
, ptr
)) { 
 205   switch (BYTE_TO_ASCII(enc
, ptr
)) { 
 216   *tokPtr 
= XML_TOK_XML_DECL
; 
 220 /* ptr points to character following "<?" */ 
 223 PREFIX(scanPi
)(const ENCODING 
*enc
, const char *ptr
, 
 224                const char *end
, const char **nextTokPtr
) 
 227   const char *target 
= ptr
; 
 229     return XML_TOK_PARTIAL
; 
 230   switch (BYTE_TYPE(enc
, ptr
)) { 
 231   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 234     return XML_TOK_INVALID
; 
 237     switch (BYTE_TYPE(enc
, ptr
)) { 
 238     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 239     case BT_S
: case BT_CR
: case BT_LF
: 
 240       if (!PREFIX(checkPiTarget
)(enc
, target
, ptr
, &tok
)) { 
 242         return XML_TOK_INVALID
; 
 246         switch (BYTE_TYPE(enc
, ptr
)) { 
 247         INVALID_CASES(ptr
, nextTokPtr
) 
 251             return XML_TOK_PARTIAL
; 
 252           if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 253             *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 262       return XML_TOK_PARTIAL
; 
 264       if (!PREFIX(checkPiTarget
)(enc
, target
, ptr
, &tok
)) { 
 266         return XML_TOK_INVALID
; 
 270         return XML_TOK_PARTIAL
; 
 271       if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 272         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 278       return XML_TOK_INVALID
; 
 281   return XML_TOK_PARTIAL
; 
 285 PREFIX(scanCdataSection
)(const ENCODING 
*enc
, const char *ptr
, 
 286                          const char *end
, const char **nextTokPtr
) 
 288   static const char CDATA_LSQB
[] = { ASCII_C
, ASCII_D
, ASCII_A
, 
 289                                      ASCII_T
, ASCII_A
, ASCII_LSQB 
}; 
 292   if (end 
- ptr 
< 6 * MINBPC(enc
)) 
 293     return XML_TOK_PARTIAL
; 
 294   for (i 
= 0; i 
< 6; i
++, ptr 
+= MINBPC(enc
)) { 
 295     if (!CHAR_MATCHES(enc
, ptr
, CDATA_LSQB
[i
])) { 
 297       return XML_TOK_INVALID
; 
 301   return XML_TOK_CDATA_SECT_OPEN
; 
 305 PREFIX(cdataSectionTok
)(const ENCODING 
*enc
, const char *ptr
, 
 306                         const char *end
, const char **nextTokPtr
) 
 310   if (MINBPC(enc
) > 1) { 
 311     size_t n 
= end 
- ptr
; 
 312     if (n 
& (MINBPC(enc
) - 1)) { 
 313       n 
&= ~(MINBPC(enc
) - 1); 
 315         return XML_TOK_PARTIAL
; 
 319   switch (BYTE_TYPE(enc
, ptr
)) { 
 323       return XML_TOK_PARTIAL
; 
 324     if (!CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) 
 328       return XML_TOK_PARTIAL
; 
 329     if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 333     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 334     return XML_TOK_CDATA_SECT_CLOSE
; 
 338       return XML_TOK_PARTIAL
; 
 339     if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
 342     return XML_TOK_DATA_NEWLINE
; 
 344     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 345     return XML_TOK_DATA_NEWLINE
; 
 346   INVALID_CASES(ptr
, nextTokPtr
) 
 352     switch (BYTE_TYPE(enc
, ptr
)) { 
 353 #define LEAD_CASE(n) \ 
 355       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 
 357         return XML_TOK_DATA_CHARS; \ 
 361     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
 370       return XML_TOK_DATA_CHARS
; 
 377   return XML_TOK_DATA_CHARS
; 
 380 /* ptr points to character following "</" */ 
 383 PREFIX(scanEndTag
)(const ENCODING 
*enc
, const char *ptr
, 
 384                    const char *end
, const char **nextTokPtr
) 
 387     return XML_TOK_PARTIAL
; 
 388   switch (BYTE_TYPE(enc
, ptr
)) { 
 389   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 392     return XML_TOK_INVALID
; 
 395     switch (BYTE_TYPE(enc
, ptr
)) { 
 396     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 397     case BT_S
: case BT_CR
: case BT_LF
: 
 398       for (ptr 
+= MINBPC(enc
); ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
 399         switch (BYTE_TYPE(enc
, ptr
)) { 
 400         case BT_S
: case BT_CR
: case BT_LF
: 
 403           *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 404           return XML_TOK_END_TAG
; 
 407           return XML_TOK_INVALID
; 
 410       return XML_TOK_PARTIAL
; 
 413       /* no need to check qname syntax here, 
 414          since end-tag must match exactly */ 
 419       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 420       return XML_TOK_END_TAG
; 
 423       return XML_TOK_INVALID
; 
 426   return XML_TOK_PARTIAL
; 
 429 /* ptr points to character following "&#X" */ 
 432 PREFIX(scanHexCharRef
)(const ENCODING 
*enc
, const char *ptr
, 
 433                        const char *end
, const char **nextTokPtr
) 
 436     switch (BYTE_TYPE(enc
, ptr
)) { 
 442       return XML_TOK_INVALID
; 
 444     for (ptr 
+= MINBPC(enc
); ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
 445       switch (BYTE_TYPE(enc
, ptr
)) { 
 450         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 451         return XML_TOK_CHAR_REF
; 
 454         return XML_TOK_INVALID
; 
 458   return XML_TOK_PARTIAL
; 
 461 /* ptr points to character following "&#" */ 
 464 PREFIX(scanCharRef
)(const ENCODING 
*enc
, const char *ptr
, 
 465                     const char *end
, const char **nextTokPtr
) 
 468     if (CHAR_MATCHES(enc
, ptr
, ASCII_x
)) 
 469       return PREFIX(scanHexCharRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 470     switch (BYTE_TYPE(enc
, ptr
)) { 
 475       return XML_TOK_INVALID
; 
 477     for (ptr 
+= MINBPC(enc
); ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
 478       switch (BYTE_TYPE(enc
, ptr
)) { 
 482         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 483         return XML_TOK_CHAR_REF
; 
 486         return XML_TOK_INVALID
; 
 490   return XML_TOK_PARTIAL
; 
 493 /* ptr points to character following "&" */ 
 496 PREFIX(scanRef
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 497                 const char **nextTokPtr
) 
 500     return XML_TOK_PARTIAL
; 
 501   switch (BYTE_TYPE(enc
, ptr
)) { 
 502   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 504     return PREFIX(scanCharRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 507     return XML_TOK_INVALID
; 
 510     switch (BYTE_TYPE(enc
, ptr
)) { 
 511     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 513       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 514       return XML_TOK_ENTITY_REF
; 
 517       return XML_TOK_INVALID
; 
 520   return XML_TOK_PARTIAL
; 
 523 /* ptr points to character following first character of attribute name */ 
 526 PREFIX(scanAtts
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 527                  const char **nextTokPtr
) 
 533     switch (BYTE_TYPE(enc
, ptr
)) { 
 534     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 539         return XML_TOK_INVALID
; 
 544         return XML_TOK_PARTIAL
; 
 545       switch (BYTE_TYPE(enc
, ptr
)) { 
 546       CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 549         return XML_TOK_INVALID
; 
 553     case BT_S
: case BT_CR
: case BT_LF
: 
 559           return XML_TOK_PARTIAL
; 
 560         t 
= BYTE_TYPE(enc
, ptr
); 
 570           return XML_TOK_INVALID
; 
 583             return XML_TOK_PARTIAL
; 
 584           open 
= BYTE_TYPE(enc
, ptr
); 
 585           if (open 
== BT_QUOT 
|| open 
== BT_APOS
) 
 594             return XML_TOK_INVALID
; 
 598         /* in attribute value */ 
 602             return XML_TOK_PARTIAL
; 
 603           t 
= BYTE_TYPE(enc
, ptr
); 
 607           INVALID_CASES(ptr
, nextTokPtr
) 
 610               int tok 
= PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, &ptr
); 
 612                 if (tok 
== XML_TOK_INVALID
) 
 620             return XML_TOK_INVALID
; 
 628           return XML_TOK_PARTIAL
; 
 629         switch (BYTE_TYPE(enc
, ptr
)) { 
 640           return XML_TOK_INVALID
; 
 642         /* ptr points to closing quote */ 
 646             return XML_TOK_PARTIAL
; 
 647           switch (BYTE_TYPE(enc
, ptr
)) { 
 648           CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 649           case BT_S
: case BT_CR
: case BT_LF
: 
 653             *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 654             return XML_TOK_START_TAG_WITH_ATTS
; 
 659               return XML_TOK_PARTIAL
; 
 660             if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 662               return XML_TOK_INVALID
; 
 664             *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 665             return XML_TOK_EMPTY_ELEMENT_WITH_ATTS
; 
 668             return XML_TOK_INVALID
; 
 676       return XML_TOK_INVALID
; 
 679   return XML_TOK_PARTIAL
; 
 682 /* ptr points to character following "<" */ 
 685 PREFIX(scanLt
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 686                const char **nextTokPtr
) 
 692     return XML_TOK_PARTIAL
; 
 693   switch (BYTE_TYPE(enc
, ptr
)) { 
 694   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 696     if ((ptr 
+= MINBPC(enc
)) == end
) 
 697       return XML_TOK_PARTIAL
; 
 698     switch (BYTE_TYPE(enc
, ptr
)) { 
 700       return PREFIX(scanComment
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 702       return PREFIX(scanCdataSection
)(enc
, ptr 
+ MINBPC(enc
), 
 706     return XML_TOK_INVALID
; 
 708     return PREFIX(scanPi
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 710     return PREFIX(scanEndTag
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 713     return XML_TOK_INVALID
; 
 718   /* we have a start-tag */ 
 720     switch (BYTE_TYPE(enc
, ptr
)) { 
 721     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 726         return XML_TOK_INVALID
; 
 731         return XML_TOK_PARTIAL
; 
 732       switch (BYTE_TYPE(enc
, ptr
)) { 
 733       CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 736         return XML_TOK_INVALID
; 
 740     case BT_S
: case BT_CR
: case BT_LF
: 
 744           switch (BYTE_TYPE(enc
, ptr
)) { 
 745           CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 750           case BT_S
: case BT_CR
: case BT_LF
: 
 755             return XML_TOK_INVALID
; 
 757           return PREFIX(scanAtts
)(enc
, ptr
, end
, nextTokPtr
); 
 759         return XML_TOK_PARTIAL
; 
 763       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 764       return XML_TOK_START_TAG_NO_ATTS
; 
 769         return XML_TOK_PARTIAL
; 
 770       if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 772         return XML_TOK_INVALID
; 
 774       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 775       return XML_TOK_EMPTY_ELEMENT_NO_ATTS
; 
 778       return XML_TOK_INVALID
; 
 781   return XML_TOK_PARTIAL
; 
 785 PREFIX(contentTok
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 786                    const char **nextTokPtr
) 
 790   if (MINBPC(enc
) > 1) { 
 791     size_t n 
= end 
- ptr
; 
 792     if (n 
& (MINBPC(enc
) - 1)) { 
 793       n 
&= ~(MINBPC(enc
) - 1); 
 795         return XML_TOK_PARTIAL
; 
 799   switch (BYTE_TYPE(enc
, ptr
)) { 
 801     return PREFIX(scanLt
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 803     return PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 807       return XML_TOK_TRAILING_CR
; 
 808     if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
 811     return XML_TOK_DATA_NEWLINE
; 
 813     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 814     return XML_TOK_DATA_NEWLINE
; 
 818       return XML_TOK_TRAILING_RSQB
; 
 819     if (!CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) 
 823       return XML_TOK_TRAILING_RSQB
; 
 824     if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 829     return XML_TOK_INVALID
; 
 830   INVALID_CASES(ptr
, nextTokPtr
) 
 836     switch (BYTE_TYPE(enc
, ptr
)) { 
 837 #define LEAD_CASE(n) \ 
 839       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 
 841         return XML_TOK_DATA_CHARS; \ 
 845     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
 848       if (ptr 
+ MINBPC(enc
) != end
) { 
 849          if (!CHAR_MATCHES(enc
, ptr 
+ MINBPC(enc
), ASCII_RSQB
)) { 
 853          if (ptr 
+ 2*MINBPC(enc
) != end
) { 
 854            if (!CHAR_MATCHES(enc
, ptr 
+ 2*MINBPC(enc
), ASCII_GT
)) { 
 858            *nextTokPtr 
= ptr 
+ 2*MINBPC(enc
); 
 859            return XML_TOK_INVALID
; 
 871       return XML_TOK_DATA_CHARS
; 
 878   return XML_TOK_DATA_CHARS
; 
 881 /* ptr points to character following "%" */ 
 884 PREFIX(scanPercent
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 885                     const char **nextTokPtr
) 
 888     return XML_TOK_PARTIAL
; 
 889   switch (BYTE_TYPE(enc
, ptr
)) { 
 890   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 891   case BT_S
: case BT_LF
: case BT_CR
: case BT_PERCNT
: 
 893     return XML_TOK_PERCENT
; 
 896     return XML_TOK_INVALID
; 
 899     switch (BYTE_TYPE(enc
, ptr
)) { 
 900     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 902       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 903       return XML_TOK_PARAM_ENTITY_REF
; 
 906       return XML_TOK_INVALID
; 
 909   return XML_TOK_PARTIAL
; 
 913 PREFIX(scanPoundName
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 914                       const char **nextTokPtr
) 
 917     return XML_TOK_PARTIAL
; 
 918   switch (BYTE_TYPE(enc
, ptr
)) { 
 919   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 922     return XML_TOK_INVALID
; 
 925     switch (BYTE_TYPE(enc
, ptr
)) { 
 926     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 927     case BT_CR
: case BT_LF
: case BT_S
: 
 928     case BT_RPAR
: case BT_GT
: case BT_PERCNT
: case BT_VERBAR
: 
 930       return XML_TOK_POUND_NAME
; 
 933       return XML_TOK_INVALID
; 
 936   return -XML_TOK_POUND_NAME
; 
 940 PREFIX(scanLit
)(int open
, const ENCODING 
*enc
, 
 941                 const char *ptr
, const char *end
, 
 942                 const char **nextTokPtr
) 
 945     int t 
= BYTE_TYPE(enc
, ptr
); 
 947     INVALID_CASES(ptr
, nextTokPtr
) 
 954         return -XML_TOK_LITERAL
; 
 956       switch (BYTE_TYPE(enc
, ptr
)) { 
 957       case BT_S
: case BT_CR
: case BT_LF
: 
 958       case BT_GT
: case BT_PERCNT
: case BT_LSQB
: 
 959         return XML_TOK_LITERAL
; 
 961         return XML_TOK_INVALID
; 
 968   return XML_TOK_PARTIAL
; 
 972 PREFIX(prologTok
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 973                   const char **nextTokPtr
) 
 978   if (MINBPC(enc
) > 1) { 
 979     size_t n 
= end 
- ptr
; 
 980     if (n 
& (MINBPC(enc
) - 1)) { 
 981       n 
&= ~(MINBPC(enc
) - 1); 
 983         return XML_TOK_PARTIAL
; 
 987   switch (BYTE_TYPE(enc
, ptr
)) { 
 989     return PREFIX(scanLit
)(BT_QUOT
, enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 991     return PREFIX(scanLit
)(BT_APOS
, enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 996         return XML_TOK_PARTIAL
; 
 997       switch (BYTE_TYPE(enc
, ptr
)) { 
 999         return PREFIX(scanDecl
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1001         return PREFIX(scanPi
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1008         *nextTokPtr 
= ptr 
- MINBPC(enc
); 
1009         return XML_TOK_INSTANCE_START
; 
1012       return XML_TOK_INVALID
; 
1015     if (ptr 
+ MINBPC(enc
) == end
) { 
1017       /* indicate that this might be part of a CR/LF pair */ 
1018       return -XML_TOK_PROLOG_S
; 
1021   case BT_S
: case BT_LF
: 
1026       switch (BYTE_TYPE(enc
, ptr
)) { 
1027       case BT_S
: case BT_LF
: 
1030         /* don't split CR/LF pair */ 
1031         if (ptr 
+ MINBPC(enc
) != end
) 
1036         return XML_TOK_PROLOG_S
; 
1040     return XML_TOK_PROLOG_S
; 
1042     return PREFIX(scanPercent
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1044     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1045     return XML_TOK_COMMA
; 
1047     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1048     return XML_TOK_OPEN_BRACKET
; 
1052       return -XML_TOK_CLOSE_BRACKET
; 
1053     if (CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) { 
1054       if (ptr 
+ MINBPC(enc
) == end
) 
1055         return XML_TOK_PARTIAL
; 
1056       if (CHAR_MATCHES(enc
, ptr 
+ MINBPC(enc
), ASCII_GT
)) { 
1057         *nextTokPtr 
= ptr 
+ 2*MINBPC(enc
); 
1058         return XML_TOK_COND_SECT_CLOSE
; 
1062     return XML_TOK_CLOSE_BRACKET
; 
1064     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1065     return XML_TOK_OPEN_PAREN
; 
1069       return -XML_TOK_CLOSE_PAREN
; 
1070     switch (BYTE_TYPE(enc
, ptr
)) { 
1072       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1073       return XML_TOK_CLOSE_PAREN_ASTERISK
; 
1075       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1076       return XML_TOK_CLOSE_PAREN_QUESTION
; 
1078       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1079       return XML_TOK_CLOSE_PAREN_PLUS
; 
1080     case BT_CR
: case BT_LF
: case BT_S
: 
1081     case BT_GT
: case BT_COMMA
: case BT_VERBAR
: 
1084       return XML_TOK_CLOSE_PAREN
; 
1087     return XML_TOK_INVALID
; 
1089     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1092     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1093     return XML_TOK_DECL_CLOSE
; 
1095     return PREFIX(scanPoundName
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1096 #define LEAD_CASE(n) \ 
1097   case BT_LEAD ## n: \ 
1098     if (end - ptr < n) \ 
1099       return XML_TOK_PARTIAL_CHAR; \ 
1100     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ 
1102       tok = XML_TOK_NAME; \ 
1105     if (IS_NAME_CHAR(enc, ptr, n)) { \ 
1107       tok = XML_TOK_NMTOKEN; \ 
1110     *nextTokPtr = ptr; \ 
1111     return XML_TOK_INVALID; 
1112     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1125     tok 
= XML_TOK_NMTOKEN
; 
1129     if (IS_NMSTRT_CHAR_MINBPC(enc
, ptr
)) { 
1134     if (IS_NAME_CHAR_MINBPC(enc
, ptr
)) { 
1136       tok 
= XML_TOK_NMTOKEN
; 
1142     return XML_TOK_INVALID
; 
1144   while (ptr 
!= end
) { 
1145     switch (BYTE_TYPE(enc
, ptr
)) { 
1146     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
1147     case BT_GT
: case BT_RPAR
: case BT_COMMA
: 
1148     case BT_VERBAR
: case BT_LSQB
: case BT_PERCNT
: 
1149     case BT_S
: case BT_CR
: case BT_LF
: 
1158           return XML_TOK_PARTIAL
; 
1159         tok 
= XML_TOK_PREFIXED_NAME
; 
1160         switch (BYTE_TYPE(enc
, ptr
)) { 
1161         CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
1163           tok 
= XML_TOK_NMTOKEN
; 
1167       case XML_TOK_PREFIXED_NAME
: 
1168         tok 
= XML_TOK_NMTOKEN
; 
1174       if (tok 
== XML_TOK_NMTOKEN
)  { 
1176         return XML_TOK_INVALID
; 
1178       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1179       return XML_TOK_NAME_PLUS
; 
1181       if (tok 
== XML_TOK_NMTOKEN
)  { 
1183         return XML_TOK_INVALID
; 
1185       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1186       return XML_TOK_NAME_ASTERISK
; 
1188       if (tok 
== XML_TOK_NMTOKEN
)  { 
1190         return XML_TOK_INVALID
; 
1192       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1193       return XML_TOK_NAME_QUESTION
; 
1196       return XML_TOK_INVALID
; 
1203 PREFIX(attributeValueTok
)(const ENCODING 
*enc
, const char *ptr
, 
1204                           const char *end
, const char **nextTokPtr
) 
1208     return XML_TOK_NONE
; 
1210   while (ptr 
!= end
) { 
1211     switch (BYTE_TYPE(enc
, ptr
)) { 
1212 #define LEAD_CASE(n) \ 
1213     case BT_LEAD ## n: ptr += n; break; 
1214     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1218         return PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1220       return XML_TOK_DATA_CHARS
; 
1222       /* this is for inside entity references */ 
1224       return XML_TOK_INVALID
; 
1227         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1228         return XML_TOK_DATA_NEWLINE
; 
1231       return XML_TOK_DATA_CHARS
; 
1236           return XML_TOK_TRAILING_CR
; 
1237         if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
1240         return XML_TOK_DATA_NEWLINE
; 
1243       return XML_TOK_DATA_CHARS
; 
1246         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1247         return XML_TOK_ATTRIBUTE_VALUE_S
; 
1250       return XML_TOK_DATA_CHARS
; 
1257   return XML_TOK_DATA_CHARS
; 
1261 PREFIX(entityValueTok
)(const ENCODING 
*enc
, const char *ptr
, 
1262                        const char *end
, const char **nextTokPtr
) 
1266     return XML_TOK_NONE
; 
1268   while (ptr 
!= end
) { 
1269     switch (BYTE_TYPE(enc
, ptr
)) { 
1270 #define LEAD_CASE(n) \ 
1271     case BT_LEAD ## n: ptr += n; break; 
1272     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1276         return PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1278       return XML_TOK_DATA_CHARS
; 
1281         int tok 
=  PREFIX(scanPercent
)(enc
, ptr 
+ MINBPC(enc
), 
1283         return (tok 
== XML_TOK_PERCENT
) ? XML_TOK_INVALID 
: tok
; 
1286       return XML_TOK_DATA_CHARS
; 
1289         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1290         return XML_TOK_DATA_NEWLINE
; 
1293       return XML_TOK_DATA_CHARS
; 
1298           return XML_TOK_TRAILING_CR
; 
1299         if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
1302         return XML_TOK_DATA_NEWLINE
; 
1305       return XML_TOK_DATA_CHARS
; 
1312   return XML_TOK_DATA_CHARS
; 
1318 PREFIX(ignoreSectionTok
)(const ENCODING 
*enc
, const char *ptr
, 
1319                          const char *end
, const char **nextTokPtr
) 
1322   if (MINBPC(enc
) > 1) { 
1323     size_t n 
= end 
- ptr
; 
1324     if (n 
& (MINBPC(enc
) - 1)) { 
1325       n 
&= ~(MINBPC(enc
) - 1); 
1329   while (ptr 
!= end
) { 
1330     switch (BYTE_TYPE(enc
, ptr
)) { 
1331     INVALID_CASES(ptr
, nextTokPtr
) 
1333       if ((ptr 
+= MINBPC(enc
)) == end
) 
1334         return XML_TOK_PARTIAL
; 
1335       if (CHAR_MATCHES(enc
, ptr
, ASCII_EXCL
)) { 
1336         if ((ptr 
+= MINBPC(enc
)) == end
) 
1337           return XML_TOK_PARTIAL
; 
1338         if (CHAR_MATCHES(enc
, ptr
, ASCII_LSQB
)) { 
1345       if ((ptr 
+= MINBPC(enc
)) == end
) 
1346         return XML_TOK_PARTIAL
; 
1347       if (CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) { 
1348         if ((ptr 
+= MINBPC(enc
)) == end
) 
1349           return XML_TOK_PARTIAL
; 
1350         if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
1354             return XML_TOK_IGNORE_SECT
; 
1365   return XML_TOK_PARTIAL
; 
1368 #endif /* XML_DTD */ 
1371 PREFIX(isPublicId
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
1372                    const char **badPtr
) 
1376   for (; ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
1377     switch (BYTE_TYPE(enc
, ptr
)) { 
1401       if (CHAR_MATCHES(enc
, ptr
, ASCII_TAB
)) { 
1408       if (!(BYTE_TO_ASCII(enc
, ptr
) & ~0x7f)) 
1411       switch (BYTE_TO_ASCII(enc
, ptr
)) { 
1425 /* This must only be called for a well-formed start-tag or empty 
1426    element tag.  Returns the number of attributes.  Pointers to the 
1427    first attsMax attributes are stored in atts. 
1431 PREFIX(getAtts
)(const ENCODING 
*enc
, const char *ptr
, 
1432                 int attsMax
, ATTRIBUTE 
*atts
) 
1434   enum { other
, inName
, inValue 
} state 
= inName
; 
1436   int open 
= 0; /* defined when state == inValue; 
1437                    initialization just to shut up compilers */ 
1439   for (ptr 
+= MINBPC(enc
);; ptr 
+= MINBPC(enc
)) { 
1440     switch (BYTE_TYPE(enc
, ptr
)) { 
1441 #define START_NAME \ 
1442       if (state == other) { \ 
1443         if (nAtts < attsMax) { \ 
1444           atts[nAtts].name = ptr; \ 
1445           atts[nAtts].normalized = 1; \ 
1449 #define LEAD_CASE(n) \ 
1450     case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; 
1451     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1460       if (state 
!= inValue
) { 
1461         if (nAtts 
< attsMax
) 
1462           atts
[nAtts
].valuePtr 
= ptr 
+ MINBPC(enc
); 
1466       else if (open 
== BT_QUOT
) { 
1468         if (nAtts 
< attsMax
) 
1469           atts
[nAtts
].valueEnd 
= ptr
; 
1474       if (state 
!= inValue
) { 
1475         if (nAtts 
< attsMax
) 
1476           atts
[nAtts
].valuePtr 
= ptr 
+ MINBPC(enc
); 
1480       else if (open 
== BT_APOS
) { 
1482         if (nAtts 
< attsMax
) 
1483           atts
[nAtts
].valueEnd 
= ptr
; 
1488       if (nAtts 
< attsMax
) 
1489         atts
[nAtts
].normalized 
= 0; 
1492       if (state 
== inName
) 
1494       else if (state 
== inValue
 
1496                && atts
[nAtts
].normalized
 
1497                && (ptr 
== atts
[nAtts
].valuePtr
 
1498                    || BYTE_TO_ASCII(enc
, ptr
) != ASCII_SPACE
 
1499                    || BYTE_TO_ASCII(enc
, ptr 
+ MINBPC(enc
)) == ASCII_SPACE
 
1500                    || BYTE_TYPE(enc
, ptr 
+ MINBPC(enc
)) == open
)) 
1501         atts
[nAtts
].normalized 
= 0; 
1503     case BT_CR
: case BT_LF
: 
1504       /* This case ensures that the first attribute name is counted 
1505          Apart from that we could just change state on the quote. */ 
1506       if (state 
== inName
) 
1508       else if (state 
== inValue 
&& nAtts 
< attsMax
) 
1509         atts
[nAtts
].normalized 
= 0; 
1513       if (state 
!= inValue
) 
1523 static int PTRFASTCALL
 
1524 PREFIX(charRefNumber
)(const ENCODING 
*enc
, const char *ptr
) 
1528   ptr 
+= 2*MINBPC(enc
); 
1529   if (CHAR_MATCHES(enc
, ptr
, ASCII_x
)) { 
1530     for (ptr 
+= MINBPC(enc
); 
1531          !CHAR_MATCHES(enc
, ptr
, ASCII_SEMI
); 
1532          ptr 
+= MINBPC(enc
)) { 
1533       int c 
= BYTE_TO_ASCII(enc
, ptr
); 
1535       case ASCII_0
: case ASCII_1
: case ASCII_2
: case ASCII_3
: case ASCII_4
: 
1536       case ASCII_5
: case ASCII_6
: case ASCII_7
: case ASCII_8
: case ASCII_9
: 
1538         result 
|= (c 
- ASCII_0
); 
1540       case ASCII_A
: case ASCII_B
: case ASCII_C
: 
1541       case ASCII_D
: case ASCII_E
: case ASCII_F
: 
1543         result 
+= 10 + (c 
- ASCII_A
); 
1545       case ASCII_a
: case ASCII_b
: case ASCII_c
: 
1546       case ASCII_d
: case ASCII_e
: case ASCII_f
: 
1548         result 
+= 10 + (c 
- ASCII_a
); 
1551       if (result 
>= 0x110000) 
1556     for (; !CHAR_MATCHES(enc
, ptr
, ASCII_SEMI
); ptr 
+= MINBPC(enc
)) { 
1557       int c 
= BYTE_TO_ASCII(enc
, ptr
); 
1559       result 
+= (c 
- ASCII_0
); 
1560       if (result 
>= 0x110000) 
1564   return checkCharRefNumber(result
); 
1568 PREFIX(predefinedEntityName
)(const ENCODING 
*enc
, const char *ptr
, 
1571   switch ((end 
- ptr
)/MINBPC(enc
)) { 
1573     if (CHAR_MATCHES(enc
, ptr 
+ MINBPC(enc
), ASCII_t
)) { 
1574       switch (BYTE_TO_ASCII(enc
, ptr
)) { 
1583     if (CHAR_MATCHES(enc
, ptr
, ASCII_a
)) { 
1585       if (CHAR_MATCHES(enc
, ptr
, ASCII_m
)) { 
1587         if (CHAR_MATCHES(enc
, ptr
, ASCII_p
)) 
1593     switch (BYTE_TO_ASCII(enc
, ptr
)) { 
1596       if (CHAR_MATCHES(enc
, ptr
, ASCII_u
)) { 
1598         if (CHAR_MATCHES(enc
, ptr
, ASCII_o
)) { 
1600           if (CHAR_MATCHES(enc
, ptr
, ASCII_t
)) 
1607       if (CHAR_MATCHES(enc
, ptr
, ASCII_p
)) { 
1609         if (CHAR_MATCHES(enc
, ptr
, ASCII_o
)) { 
1611           if (CHAR_MATCHES(enc
, ptr
, ASCII_s
)) 
1622 PREFIX(sameName
)(const ENCODING 
*enc
, const char *ptr1
, const char *ptr2
) 
1625     switch (BYTE_TYPE(enc
, ptr1
)) { 
1626 #define LEAD_CASE(n) \ 
1627     case BT_LEAD ## n: \ 
1628       if (*ptr1++ != *ptr2++) \ 
1630     LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) 
1633       if (*ptr1
++ != *ptr2
++) 
1645       if (*ptr2
++ != *ptr1
++) 
1647       if (MINBPC(enc
) > 1) { 
1648         if (*ptr2
++ != *ptr1
++) 
1650         if (MINBPC(enc
) > 2) { 
1651           if (*ptr2
++ != *ptr1
++) 
1653           if (MINBPC(enc
) > 3) { 
1654             if (*ptr2
++ != *ptr1
++) 
1661       if (MINBPC(enc
) == 1 && *ptr1 
== *ptr2
) 
1663       switch (BYTE_TYPE(enc
, ptr2
)) { 
1686 PREFIX(nameMatchesAscii
)(const ENCODING 
*enc
, const char *ptr1
, 
1687                          const char *end1
, const char *ptr2
) 
1689   for (; *ptr2
; ptr1 
+= MINBPC(enc
), ptr2
++) { 
1692     if (!CHAR_MATCHES(enc
, ptr1
, *ptr2
)) 
1695   return ptr1 
== end1
; 
1698 static int PTRFASTCALL
 
1699 PREFIX(nameLength
)(const ENCODING 
*enc
, const char *ptr
) 
1701   const char *start 
= ptr
; 
1703     switch (BYTE_TYPE(enc
, ptr
)) { 
1704 #define LEAD_CASE(n) \ 
1705     case BT_LEAD ## n: ptr += n; break; 
1706     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1720       return (int)(ptr 
- start
); 
1725 static const char * PTRFASTCALL
 
1726 PREFIX(skipS
)(const ENCODING 
*enc
, const char *ptr
) 
1729     switch (BYTE_TYPE(enc
, ptr
)) { 
1742 PREFIX(updatePosition
)(const ENCODING 
*enc
, 
1748     switch (BYTE_TYPE(enc
, ptr
)) { 
1749 #define LEAD_CASE(n) \ 
1750     case BT_LEAD ## n: \ 
1753     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1756       pos
->columnNumber 
= (XML_Size
)-1; 
1763       if (ptr 
!= end 
&& BYTE_TYPE(enc
, ptr
) == BT_LF
) 
1765       pos
->columnNumber 
= (XML_Size
)-1; 
1771     pos
->columnNumber
++; 
1776 #undef MULTIBYTE_CASES 
1777 #undef INVALID_CASES 
1778 #undef CHECK_NAME_CASE 
1779 #undef CHECK_NAME_CASES 
1780 #undef CHECK_NMSTRT_CASE 
1781 #undef CHECK_NMSTRT_CASES 
1783 #endif /* XML_TOK_IMPL_C */