1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd 
   2    See the file COPYING for copying permission. 
   5 #ifndef IS_INVALID_CHAR 
   6 #define IS_INVALID_CHAR(enc, ptr, n) (0) 
   9 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \ 
  12         return XML_TOK_PARTIAL_CHAR; \ 
  13       if (IS_INVALID_CHAR(enc, ptr, n)) { \ 
  14         *(nextTokPtr) = (ptr); \ 
  15         return XML_TOK_INVALID; \ 
  20 #define INVALID_CASES(ptr, nextTokPtr) \ 
  21   INVALID_LEAD_CASE(2, ptr, nextTokPtr) \ 
  22   INVALID_LEAD_CASE(3, ptr, nextTokPtr) \ 
  23   INVALID_LEAD_CASE(4, ptr, nextTokPtr) \ 
  27     *(nextTokPtr) = (ptr); \ 
  28     return XML_TOK_INVALID; 
  30 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \ 
  33        return XML_TOK_PARTIAL_CHAR; \ 
  34      if (!IS_NAME_CHAR(enc, ptr, n)) { \ 
  36        return XML_TOK_INVALID; \ 
  41 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \ 
  43     if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \ 
  45       return XML_TOK_INVALID; \ 
  54   CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \ 
  55   CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \ 
  56   CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr) 
  58 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \ 
  61        return XML_TOK_PARTIAL_CHAR; \ 
  62      if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \ 
  64        return XML_TOK_INVALID; \ 
  69 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \ 
  71     if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \ 
  73       return XML_TOK_INVALID; \ 
  79   CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \ 
  80   CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \ 
  81   CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr) 
  84 #define PREFIX(ident) ident 
  87 /* ptr points to character following "<!-" */ 
  90 PREFIX(scanComment
)(const ENCODING 
*enc
, const char *ptr
, 
  91                     const char *end
, const char **nextTokPtr
) 
  94     if (!CHAR_MATCHES(enc
, ptr
, ASCII_MINUS
)) { 
  96       return XML_TOK_INVALID
; 
 100       switch (BYTE_TYPE(enc
, ptr
)) { 
 101       INVALID_CASES(ptr
, nextTokPtr
) 
 103         if ((ptr 
+= MINBPC(enc
)) == end
) 
 104           return XML_TOK_PARTIAL
; 
 105         if (CHAR_MATCHES(enc
, ptr
, ASCII_MINUS
)) { 
 106           if ((ptr 
+= MINBPC(enc
)) == end
) 
 107             return XML_TOK_PARTIAL
; 
 108           if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 110             return XML_TOK_INVALID
; 
 112           *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 113           return XML_TOK_COMMENT
; 
 122   return XML_TOK_PARTIAL
; 
 125 /* ptr points to character following "<!" */ 
 128 PREFIX(scanDecl
)(const ENCODING 
*enc
, const char *ptr
, 
 129                  const char *end
, const char **nextTokPtr
) 
 132     return XML_TOK_PARTIAL
; 
 133   switch (BYTE_TYPE(enc
, ptr
)) { 
 135     return PREFIX(scanComment
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 137     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 138     return XML_TOK_COND_SECT_OPEN
; 
 145     return XML_TOK_INVALID
; 
 148     switch (BYTE_TYPE(enc
, ptr
)) { 
 150       if (ptr 
+ MINBPC(enc
) == end
) 
 151         return XML_TOK_PARTIAL
; 
 152       /* don't allow <!ENTITY% foo "whatever"> */ 
 153       switch (BYTE_TYPE(enc
, ptr 
+ MINBPC(enc
))) { 
 154       case BT_S
: case BT_CR
: case BT_LF
: case BT_PERCNT
: 
 156         return XML_TOK_INVALID
; 
 159     case BT_S
: case BT_CR
: case BT_LF
: 
 161       return XML_TOK_DECL_OPEN
; 
 168       return XML_TOK_INVALID
; 
 171   return XML_TOK_PARTIAL
; 
 175 PREFIX(checkPiTarget
)(const ENCODING 
*enc
, const char *ptr
, 
 176                       const char *end
, int *tokPtr
) 
 179   *tokPtr 
= XML_TOK_PI
; 
 180   if (end 
- ptr 
!= MINBPC(enc
)*3) 
 182   switch (BYTE_TO_ASCII(enc
, ptr
)) { 
 192   switch (BYTE_TO_ASCII(enc
, ptr
)) { 
 202   switch (BYTE_TO_ASCII(enc
, ptr
)) { 
 213   *tokPtr 
= XML_TOK_XML_DECL
; 
 217 /* ptr points to character following "<?" */ 
 220 PREFIX(scanPi
)(const ENCODING 
*enc
, const char *ptr
, 
 221                const char *end
, const char **nextTokPtr
) 
 224   const char *target 
= ptr
; 
 226     return XML_TOK_PARTIAL
; 
 227   switch (BYTE_TYPE(enc
, ptr
)) { 
 228   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 231     return XML_TOK_INVALID
; 
 234     switch (BYTE_TYPE(enc
, ptr
)) { 
 235     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 236     case BT_S
: case BT_CR
: case BT_LF
: 
 237       if (!PREFIX(checkPiTarget
)(enc
, target
, ptr
, &tok
)) { 
 239         return XML_TOK_INVALID
; 
 243         switch (BYTE_TYPE(enc
, ptr
)) { 
 244         INVALID_CASES(ptr
, nextTokPtr
) 
 248             return XML_TOK_PARTIAL
; 
 249           if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 250             *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 259       return XML_TOK_PARTIAL
; 
 261       if (!PREFIX(checkPiTarget
)(enc
, target
, ptr
, &tok
)) { 
 263         return XML_TOK_INVALID
; 
 267         return XML_TOK_PARTIAL
; 
 268       if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 269         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 275       return XML_TOK_INVALID
; 
 278   return XML_TOK_PARTIAL
; 
 282 PREFIX(scanCdataSection
)(const ENCODING 
*enc
, const char *ptr
, 
 283                          const char *end
, const char **nextTokPtr
) 
 285   static const char CDATA_LSQB
[] = { ASCII_C
, ASCII_D
, ASCII_A
, 
 286                                      ASCII_T
, ASCII_A
, ASCII_LSQB 
}; 
 289   if (end 
- ptr 
< 6 * MINBPC(enc
)) 
 290     return XML_TOK_PARTIAL
; 
 291   for (i 
= 0; i 
< 6; i
++, ptr 
+= MINBPC(enc
)) { 
 292     if (!CHAR_MATCHES(enc
, ptr
, CDATA_LSQB
[i
])) { 
 294       return XML_TOK_INVALID
; 
 298   return XML_TOK_CDATA_SECT_OPEN
; 
 302 PREFIX(cdataSectionTok
)(const ENCODING 
*enc
, const char *ptr
, 
 303                         const char *end
, const char **nextTokPtr
) 
 307   if (MINBPC(enc
) > 1) { 
 308     size_t n 
= end 
- ptr
; 
 309     if (n 
& (MINBPC(enc
) - 1)) { 
 310       n 
&= ~(MINBPC(enc
) - 1); 
 312         return XML_TOK_PARTIAL
; 
 316   switch (BYTE_TYPE(enc
, ptr
)) { 
 320       return XML_TOK_PARTIAL
; 
 321     if (!CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) 
 325       return XML_TOK_PARTIAL
; 
 326     if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 330     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 331     return XML_TOK_CDATA_SECT_CLOSE
; 
 335       return XML_TOK_PARTIAL
; 
 336     if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
 339     return XML_TOK_DATA_NEWLINE
; 
 341     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 342     return XML_TOK_DATA_NEWLINE
; 
 343   INVALID_CASES(ptr
, nextTokPtr
) 
 349     switch (BYTE_TYPE(enc
, ptr
)) { 
 350 #define LEAD_CASE(n) \ 
 352       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 
 354         return XML_TOK_DATA_CHARS; \ 
 358     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
 367       return XML_TOK_DATA_CHARS
; 
 374   return XML_TOK_DATA_CHARS
; 
 377 /* ptr points to character following "</" */ 
 380 PREFIX(scanEndTag
)(const ENCODING 
*enc
, const char *ptr
, 
 381                    const char *end
, const char **nextTokPtr
) 
 384     return XML_TOK_PARTIAL
; 
 385   switch (BYTE_TYPE(enc
, ptr
)) { 
 386   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 389     return XML_TOK_INVALID
; 
 392     switch (BYTE_TYPE(enc
, ptr
)) { 
 393     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 394     case BT_S
: case BT_CR
: case BT_LF
: 
 395       for (ptr 
+= MINBPC(enc
); ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
 396         switch (BYTE_TYPE(enc
, ptr
)) { 
 397         case BT_S
: case BT_CR
: case BT_LF
: 
 400           *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 401           return XML_TOK_END_TAG
; 
 404           return XML_TOK_INVALID
; 
 407       return XML_TOK_PARTIAL
; 
 410       /* no need to check qname syntax here, 
 411          since end-tag must match exactly */ 
 416       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 417       return XML_TOK_END_TAG
; 
 420       return XML_TOK_INVALID
; 
 423   return XML_TOK_PARTIAL
; 
 426 /* ptr points to character following "&#X" */ 
 429 PREFIX(scanHexCharRef
)(const ENCODING 
*enc
, const char *ptr
, 
 430                        const char *end
, const char **nextTokPtr
) 
 433     switch (BYTE_TYPE(enc
, ptr
)) { 
 439       return XML_TOK_INVALID
; 
 441     for (ptr 
+= MINBPC(enc
); ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
 442       switch (BYTE_TYPE(enc
, ptr
)) { 
 447         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 448         return XML_TOK_CHAR_REF
; 
 451         return XML_TOK_INVALID
; 
 455   return XML_TOK_PARTIAL
; 
 458 /* ptr points to character following "&#" */ 
 461 PREFIX(scanCharRef
)(const ENCODING 
*enc
, const char *ptr
, 
 462                     const char *end
, const char **nextTokPtr
) 
 465     if (CHAR_MATCHES(enc
, ptr
, ASCII_x
)) 
 466       return PREFIX(scanHexCharRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 467     switch (BYTE_TYPE(enc
, ptr
)) { 
 472       return XML_TOK_INVALID
; 
 474     for (ptr 
+= MINBPC(enc
); ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
 475       switch (BYTE_TYPE(enc
, ptr
)) { 
 479         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 480         return XML_TOK_CHAR_REF
; 
 483         return XML_TOK_INVALID
; 
 487   return XML_TOK_PARTIAL
; 
 490 /* ptr points to character following "&" */ 
 493 PREFIX(scanRef
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 494                 const char **nextTokPtr
) 
 497     return XML_TOK_PARTIAL
; 
 498   switch (BYTE_TYPE(enc
, ptr
)) { 
 499   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 501     return PREFIX(scanCharRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 504     return XML_TOK_INVALID
; 
 507     switch (BYTE_TYPE(enc
, ptr
)) { 
 508     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 510       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 511       return XML_TOK_ENTITY_REF
; 
 514       return XML_TOK_INVALID
; 
 517   return XML_TOK_PARTIAL
; 
 520 /* ptr points to character following first character of attribute name */ 
 523 PREFIX(scanAtts
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 524                  const char **nextTokPtr
) 
 530     switch (BYTE_TYPE(enc
, ptr
)) { 
 531     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 536         return XML_TOK_INVALID
; 
 541         return XML_TOK_PARTIAL
; 
 542       switch (BYTE_TYPE(enc
, ptr
)) { 
 543       CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 546         return XML_TOK_INVALID
; 
 550     case BT_S
: case BT_CR
: case BT_LF
: 
 556           return XML_TOK_PARTIAL
; 
 557         t 
= BYTE_TYPE(enc
, ptr
); 
 567           return XML_TOK_INVALID
; 
 580             return XML_TOK_PARTIAL
; 
 581           open 
= BYTE_TYPE(enc
, ptr
); 
 582           if (open 
== BT_QUOT 
|| open 
== BT_APOS
) 
 591             return XML_TOK_INVALID
; 
 595         /* in attribute value */ 
 599             return XML_TOK_PARTIAL
; 
 600           t 
= BYTE_TYPE(enc
, ptr
); 
 604           INVALID_CASES(ptr
, nextTokPtr
) 
 607               int tok 
= PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, &ptr
); 
 609                 if (tok 
== XML_TOK_INVALID
) 
 617             return XML_TOK_INVALID
; 
 625           return XML_TOK_PARTIAL
; 
 626         switch (BYTE_TYPE(enc
, ptr
)) { 
 637           return XML_TOK_INVALID
; 
 639         /* ptr points to closing quote */ 
 643             return XML_TOK_PARTIAL
; 
 644           switch (BYTE_TYPE(enc
, ptr
)) { 
 645           CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 646           case BT_S
: case BT_CR
: case BT_LF
: 
 650             *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 651             return XML_TOK_START_TAG_WITH_ATTS
; 
 656               return XML_TOK_PARTIAL
; 
 657             if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 659               return XML_TOK_INVALID
; 
 661             *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 662             return XML_TOK_EMPTY_ELEMENT_WITH_ATTS
; 
 665             return XML_TOK_INVALID
; 
 673       return XML_TOK_INVALID
; 
 676   return XML_TOK_PARTIAL
; 
 679 /* ptr points to character following "<" */ 
 682 PREFIX(scanLt
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 683                const char **nextTokPtr
) 
 689     return XML_TOK_PARTIAL
; 
 690   switch (BYTE_TYPE(enc
, ptr
)) { 
 691   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 693     if ((ptr 
+= MINBPC(enc
)) == end
) 
 694       return XML_TOK_PARTIAL
; 
 695     switch (BYTE_TYPE(enc
, ptr
)) { 
 697       return PREFIX(scanComment
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 699       return PREFIX(scanCdataSection
)(enc
, ptr 
+ MINBPC(enc
), 
 703     return XML_TOK_INVALID
; 
 705     return PREFIX(scanPi
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 707     return PREFIX(scanEndTag
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 710     return XML_TOK_INVALID
; 
 715   /* we have a start-tag */ 
 717     switch (BYTE_TYPE(enc
, ptr
)) { 
 718     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 723         return XML_TOK_INVALID
; 
 728         return XML_TOK_PARTIAL
; 
 729       switch (BYTE_TYPE(enc
, ptr
)) { 
 730       CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 733         return XML_TOK_INVALID
; 
 737     case BT_S
: case BT_CR
: case BT_LF
: 
 741           switch (BYTE_TYPE(enc
, ptr
)) { 
 742           CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 747           case BT_S
: case BT_CR
: case BT_LF
: 
 752             return XML_TOK_INVALID
; 
 754           return PREFIX(scanAtts
)(enc
, ptr
, end
, nextTokPtr
); 
 756         return XML_TOK_PARTIAL
; 
 760       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 761       return XML_TOK_START_TAG_NO_ATTS
; 
 766         return XML_TOK_PARTIAL
; 
 767       if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 769         return XML_TOK_INVALID
; 
 771       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 772       return XML_TOK_EMPTY_ELEMENT_NO_ATTS
; 
 775       return XML_TOK_INVALID
; 
 778   return XML_TOK_PARTIAL
; 
 782 PREFIX(contentTok
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 783                    const char **nextTokPtr
) 
 787   if (MINBPC(enc
) > 1) { 
 788     size_t n 
= end 
- ptr
; 
 789     if (n 
& (MINBPC(enc
) - 1)) { 
 790       n 
&= ~(MINBPC(enc
) - 1); 
 792         return XML_TOK_PARTIAL
; 
 796   switch (BYTE_TYPE(enc
, ptr
)) { 
 798     return PREFIX(scanLt
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 800     return PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 804       return XML_TOK_TRAILING_CR
; 
 805     if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
 808     return XML_TOK_DATA_NEWLINE
; 
 810     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 811     return XML_TOK_DATA_NEWLINE
; 
 815       return XML_TOK_TRAILING_RSQB
; 
 816     if (!CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) 
 820       return XML_TOK_TRAILING_RSQB
; 
 821     if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
 826     return XML_TOK_INVALID
; 
 827   INVALID_CASES(ptr
, nextTokPtr
) 
 833     switch (BYTE_TYPE(enc
, ptr
)) { 
 834 #define LEAD_CASE(n) \ 
 836       if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \ 
 838         return XML_TOK_DATA_CHARS; \ 
 842     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
 845       if (ptr 
+ MINBPC(enc
) != end
) { 
 846          if (!CHAR_MATCHES(enc
, ptr 
+ MINBPC(enc
), ASCII_RSQB
)) { 
 850          if (ptr 
+ 2*MINBPC(enc
) != end
) { 
 851            if (!CHAR_MATCHES(enc
, ptr 
+ 2*MINBPC(enc
), ASCII_GT
)) { 
 855            *nextTokPtr 
= ptr 
+ 2*MINBPC(enc
); 
 856            return XML_TOK_INVALID
; 
 868       return XML_TOK_DATA_CHARS
; 
 875   return XML_TOK_DATA_CHARS
; 
 878 /* ptr points to character following "%" */ 
 881 PREFIX(scanPercent
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 882                     const char **nextTokPtr
) 
 885     return XML_TOK_PARTIAL
; 
 886   switch (BYTE_TYPE(enc
, ptr
)) { 
 887   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 888   case BT_S
: case BT_LF
: case BT_CR
: case BT_PERCNT
: 
 890     return XML_TOK_PERCENT
; 
 893     return XML_TOK_INVALID
; 
 896     switch (BYTE_TYPE(enc
, ptr
)) { 
 897     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 899       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
 900       return XML_TOK_PARAM_ENTITY_REF
; 
 903       return XML_TOK_INVALID
; 
 906   return XML_TOK_PARTIAL
; 
 910 PREFIX(scanPoundName
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 911                       const char **nextTokPtr
) 
 914     return XML_TOK_PARTIAL
; 
 915   switch (BYTE_TYPE(enc
, ptr
)) { 
 916   CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
) 
 919     return XML_TOK_INVALID
; 
 922     switch (BYTE_TYPE(enc
, ptr
)) { 
 923     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
 924     case BT_CR
: case BT_LF
: case BT_S
: 
 925     case BT_RPAR
: case BT_GT
: case BT_PERCNT
: case BT_VERBAR
: 
 927       return XML_TOK_POUND_NAME
; 
 930       return XML_TOK_INVALID
; 
 933   return -XML_TOK_POUND_NAME
; 
 937 PREFIX(scanLit
)(int open
, const ENCODING 
*enc
, 
 938                 const char *ptr
, const char *end
, 
 939                 const char **nextTokPtr
) 
 942     int t 
= BYTE_TYPE(enc
, ptr
); 
 944     INVALID_CASES(ptr
, nextTokPtr
) 
 951         return -XML_TOK_LITERAL
; 
 953       switch (BYTE_TYPE(enc
, ptr
)) { 
 954       case BT_S
: case BT_CR
: case BT_LF
: 
 955       case BT_GT
: case BT_PERCNT
: case BT_LSQB
: 
 956         return XML_TOK_LITERAL
; 
 958         return XML_TOK_INVALID
; 
 965   return XML_TOK_PARTIAL
; 
 969 PREFIX(prologTok
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
 970                   const char **nextTokPtr
) 
 975   if (MINBPC(enc
) > 1) { 
 976     size_t n 
= end 
- ptr
; 
 977     if (n 
& (MINBPC(enc
) - 1)) { 
 978       n 
&= ~(MINBPC(enc
) - 1); 
 980         return XML_TOK_PARTIAL
; 
 984   switch (BYTE_TYPE(enc
, ptr
)) { 
 986     return PREFIX(scanLit
)(BT_QUOT
, enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 988     return PREFIX(scanLit
)(BT_APOS
, enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 993         return XML_TOK_PARTIAL
; 
 994       switch (BYTE_TYPE(enc
, ptr
)) { 
 996         return PREFIX(scanDecl
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
 998         return PREFIX(scanPi
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1005         *nextTokPtr 
= ptr 
- MINBPC(enc
); 
1006         return XML_TOK_INSTANCE_START
; 
1009       return XML_TOK_INVALID
; 
1012     if (ptr 
+ MINBPC(enc
) == end
) { 
1014       /* indicate that this might be part of a CR/LF pair */ 
1015       return -XML_TOK_PROLOG_S
; 
1018   case BT_S
: case BT_LF
: 
1023       switch (BYTE_TYPE(enc
, ptr
)) { 
1024       case BT_S
: case BT_LF
: 
1027         /* don't split CR/LF pair */ 
1028         if (ptr 
+ MINBPC(enc
) != end
) 
1033         return XML_TOK_PROLOG_S
; 
1037     return XML_TOK_PROLOG_S
; 
1039     return PREFIX(scanPercent
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1041     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1042     return XML_TOK_COMMA
; 
1044     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1045     return XML_TOK_OPEN_BRACKET
; 
1049       return -XML_TOK_CLOSE_BRACKET
; 
1050     if (CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) { 
1051       if (ptr 
+ MINBPC(enc
) == end
) 
1052         return XML_TOK_PARTIAL
; 
1053       if (CHAR_MATCHES(enc
, ptr 
+ MINBPC(enc
), ASCII_GT
)) { 
1054         *nextTokPtr 
= ptr 
+ 2*MINBPC(enc
); 
1055         return XML_TOK_COND_SECT_CLOSE
; 
1059     return XML_TOK_CLOSE_BRACKET
; 
1061     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1062     return XML_TOK_OPEN_PAREN
; 
1066       return -XML_TOK_CLOSE_PAREN
; 
1067     switch (BYTE_TYPE(enc
, ptr
)) { 
1069       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1070       return XML_TOK_CLOSE_PAREN_ASTERISK
; 
1072       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1073       return XML_TOK_CLOSE_PAREN_QUESTION
; 
1075       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1076       return XML_TOK_CLOSE_PAREN_PLUS
; 
1077     case BT_CR
: case BT_LF
: case BT_S
: 
1078     case BT_GT
: case BT_COMMA
: case BT_VERBAR
: 
1081       return XML_TOK_CLOSE_PAREN
; 
1084     return XML_TOK_INVALID
; 
1086     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1089     *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1090     return XML_TOK_DECL_CLOSE
; 
1092     return PREFIX(scanPoundName
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1093 #define LEAD_CASE(n) \ 
1094   case BT_LEAD ## n: \ 
1095     if (end - ptr < n) \ 
1096       return XML_TOK_PARTIAL_CHAR; \ 
1097     if (IS_NMSTRT_CHAR(enc, ptr, n)) { \ 
1099       tok = XML_TOK_NAME; \ 
1102     if (IS_NAME_CHAR(enc, ptr, n)) { \ 
1104       tok = XML_TOK_NMTOKEN; \ 
1107     *nextTokPtr = ptr; \ 
1108     return XML_TOK_INVALID; 
1109     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1122     tok 
= XML_TOK_NMTOKEN
; 
1126     if (IS_NMSTRT_CHAR_MINBPC(enc
, ptr
)) { 
1131     if (IS_NAME_CHAR_MINBPC(enc
, ptr
)) { 
1133       tok 
= XML_TOK_NMTOKEN
; 
1139     return XML_TOK_INVALID
; 
1141   while (ptr 
!= end
) { 
1142     switch (BYTE_TYPE(enc
, ptr
)) { 
1143     CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
1144     case BT_GT
: case BT_RPAR
: case BT_COMMA
: 
1145     case BT_VERBAR
: case BT_LSQB
: case BT_PERCNT
: 
1146     case BT_S
: case BT_CR
: case BT_LF
: 
1155           return XML_TOK_PARTIAL
; 
1156         tok 
= XML_TOK_PREFIXED_NAME
; 
1157         switch (BYTE_TYPE(enc
, ptr
)) { 
1158         CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
) 
1160           tok 
= XML_TOK_NMTOKEN
; 
1164       case XML_TOK_PREFIXED_NAME
: 
1165         tok 
= XML_TOK_NMTOKEN
; 
1171       if (tok 
== XML_TOK_NMTOKEN
)  { 
1173         return XML_TOK_INVALID
; 
1175       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1176       return XML_TOK_NAME_PLUS
; 
1178       if (tok 
== XML_TOK_NMTOKEN
)  { 
1180         return XML_TOK_INVALID
; 
1182       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1183       return XML_TOK_NAME_ASTERISK
; 
1185       if (tok 
== XML_TOK_NMTOKEN
)  { 
1187         return XML_TOK_INVALID
; 
1189       *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1190       return XML_TOK_NAME_QUESTION
; 
1193       return XML_TOK_INVALID
; 
1200 PREFIX(attributeValueTok
)(const ENCODING 
*enc
, const char *ptr
, 
1201                           const char *end
, const char **nextTokPtr
) 
1205     return XML_TOK_NONE
; 
1207   while (ptr 
!= end
) { 
1208     switch (BYTE_TYPE(enc
, ptr
)) { 
1209 #define LEAD_CASE(n) \ 
1210     case BT_LEAD ## n: ptr += n; break; 
1211     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1215         return PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1217       return XML_TOK_DATA_CHARS
; 
1219       /* this is for inside entity references */ 
1221       return XML_TOK_INVALID
; 
1224         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1225         return XML_TOK_DATA_NEWLINE
; 
1228       return XML_TOK_DATA_CHARS
; 
1233           return XML_TOK_TRAILING_CR
; 
1234         if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
1237         return XML_TOK_DATA_NEWLINE
; 
1240       return XML_TOK_DATA_CHARS
; 
1243         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1244         return XML_TOK_ATTRIBUTE_VALUE_S
; 
1247       return XML_TOK_DATA_CHARS
; 
1254   return XML_TOK_DATA_CHARS
; 
1258 PREFIX(entityValueTok
)(const ENCODING 
*enc
, const char *ptr
, 
1259                        const char *end
, const char **nextTokPtr
) 
1263     return XML_TOK_NONE
; 
1265   while (ptr 
!= end
) { 
1266     switch (BYTE_TYPE(enc
, ptr
)) { 
1267 #define LEAD_CASE(n) \ 
1268     case BT_LEAD ## n: ptr += n; break; 
1269     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1273         return PREFIX(scanRef
)(enc
, ptr 
+ MINBPC(enc
), end
, nextTokPtr
); 
1275       return XML_TOK_DATA_CHARS
; 
1278         int tok 
=  PREFIX(scanPercent
)(enc
, ptr 
+ MINBPC(enc
), 
1280         return (tok 
== XML_TOK_PERCENT
) ? XML_TOK_INVALID 
: tok
; 
1283       return XML_TOK_DATA_CHARS
; 
1286         *nextTokPtr 
= ptr 
+ MINBPC(enc
); 
1287         return XML_TOK_DATA_NEWLINE
; 
1290       return XML_TOK_DATA_CHARS
; 
1295           return XML_TOK_TRAILING_CR
; 
1296         if (BYTE_TYPE(enc
, ptr
) == BT_LF
) 
1299         return XML_TOK_DATA_NEWLINE
; 
1302       return XML_TOK_DATA_CHARS
; 
1309   return XML_TOK_DATA_CHARS
; 
1315 PREFIX(ignoreSectionTok
)(const ENCODING 
*enc
, const char *ptr
, 
1316                          const char *end
, const char **nextTokPtr
) 
1319   if (MINBPC(enc
) > 1) { 
1320     size_t n 
= end 
- ptr
; 
1321     if (n 
& (MINBPC(enc
) - 1)) { 
1322       n 
&= ~(MINBPC(enc
) - 1); 
1326   while (ptr 
!= end
) { 
1327     switch (BYTE_TYPE(enc
, ptr
)) { 
1328     INVALID_CASES(ptr
, nextTokPtr
) 
1330       if ((ptr 
+= MINBPC(enc
)) == end
) 
1331         return XML_TOK_PARTIAL
; 
1332       if (CHAR_MATCHES(enc
, ptr
, ASCII_EXCL
)) { 
1333         if ((ptr 
+= MINBPC(enc
)) == end
) 
1334           return XML_TOK_PARTIAL
; 
1335         if (CHAR_MATCHES(enc
, ptr
, ASCII_LSQB
)) { 
1342       if ((ptr 
+= MINBPC(enc
)) == end
) 
1343         return XML_TOK_PARTIAL
; 
1344       if (CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) { 
1345         if ((ptr 
+= MINBPC(enc
)) == end
) 
1346           return XML_TOK_PARTIAL
; 
1347         if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) { 
1351             return XML_TOK_IGNORE_SECT
; 
1362   return XML_TOK_PARTIAL
; 
1365 #endif /* XML_DTD */ 
1368 PREFIX(isPublicId
)(const ENCODING 
*enc
, const char *ptr
, const char *end
, 
1369                    const char **badPtr
) 
1373   for (; ptr 
!= end
; ptr 
+= MINBPC(enc
)) { 
1374     switch (BYTE_TYPE(enc
, ptr
)) { 
1398       if (CHAR_MATCHES(enc
, ptr
, ASCII_TAB
)) { 
1405       if (!(BYTE_TO_ASCII(enc
, ptr
) & ~0x7f)) 
1408       switch (BYTE_TO_ASCII(enc
, ptr
)) { 
1422 /* This must only be called for a well-formed start-tag or empty 
1423    element tag.  Returns the number of attributes.  Pointers to the 
1424    first attsMax attributes are stored in atts. 
1428 PREFIX(getAtts
)(const ENCODING 
*enc
, const char *ptr
, 
1429                 int attsMax
, ATTRIBUTE 
*atts
) 
1431   enum { other
, inName
, inValue 
} state 
= inName
; 
1433   int open 
= 0; /* defined when state == inValue; 
1434                    initialization just to shut up compilers */ 
1436   for (ptr 
+= MINBPC(enc
);; ptr 
+= MINBPC(enc
)) { 
1437     switch (BYTE_TYPE(enc
, ptr
)) { 
1438 #define START_NAME \ 
1439       if (state == other) { \ 
1440         if (nAtts < attsMax) { \ 
1441           atts[nAtts].name = ptr; \ 
1442           atts[nAtts].normalized = 1; \ 
1446 #define LEAD_CASE(n) \ 
1447     case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break; 
1448     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1457       if (state 
!= inValue
) { 
1458         if (nAtts 
< attsMax
) 
1459           atts
[nAtts
].valuePtr 
= ptr 
+ MINBPC(enc
); 
1463       else if (open 
== BT_QUOT
) { 
1465         if (nAtts 
< attsMax
) 
1466           atts
[nAtts
].valueEnd 
= ptr
; 
1471       if (state 
!= inValue
) { 
1472         if (nAtts 
< attsMax
) 
1473           atts
[nAtts
].valuePtr 
= ptr 
+ MINBPC(enc
); 
1477       else if (open 
== BT_APOS
) { 
1479         if (nAtts 
< attsMax
) 
1480           atts
[nAtts
].valueEnd 
= ptr
; 
1485       if (nAtts 
< attsMax
) 
1486         atts
[nAtts
].normalized 
= 0; 
1489       if (state 
== inName
) 
1491       else if (state 
== inValue
 
1493                && atts
[nAtts
].normalized
 
1494                && (ptr 
== atts
[nAtts
].valuePtr
 
1495                    || BYTE_TO_ASCII(enc
, ptr
) != ASCII_SPACE
 
1496                    || BYTE_TO_ASCII(enc
, ptr 
+ MINBPC(enc
)) == ASCII_SPACE
 
1497                    || BYTE_TYPE(enc
, ptr 
+ MINBPC(enc
)) == open
)) 
1498         atts
[nAtts
].normalized 
= 0; 
1500     case BT_CR
: case BT_LF
: 
1501       /* This case ensures that the first attribute name is counted 
1502          Apart from that we could just change state on the quote. */ 
1503       if (state 
== inName
) 
1505       else if (state 
== inValue 
&& nAtts 
< attsMax
) 
1506         atts
[nAtts
].normalized 
= 0; 
1510       if (state 
!= inValue
) 
1520 static int PTRFASTCALL
 
1521 PREFIX(charRefNumber
)(const ENCODING 
*enc
, const char *ptr
) 
1525   ptr 
+= 2*MINBPC(enc
); 
1526   if (CHAR_MATCHES(enc
, ptr
, ASCII_x
)) { 
1527     for (ptr 
+= MINBPC(enc
); 
1528          !CHAR_MATCHES(enc
, ptr
, ASCII_SEMI
); 
1529          ptr 
+= MINBPC(enc
)) { 
1530       int c 
= BYTE_TO_ASCII(enc
, ptr
); 
1532       case ASCII_0
: case ASCII_1
: case ASCII_2
: case ASCII_3
: case ASCII_4
: 
1533       case ASCII_5
: case ASCII_6
: case ASCII_7
: case ASCII_8
: case ASCII_9
: 
1535         result 
|= (c 
- ASCII_0
); 
1537       case ASCII_A
: case ASCII_B
: case ASCII_C
: 
1538       case ASCII_D
: case ASCII_E
: case ASCII_F
: 
1540         result 
+= 10 + (c 
- ASCII_A
); 
1542       case ASCII_a
: case ASCII_b
: case ASCII_c
: 
1543       case ASCII_d
: case ASCII_e
: case ASCII_f
: 
1545         result 
+= 10 + (c 
- ASCII_a
); 
1548       if (result 
>= 0x110000) 
1553     for (; !CHAR_MATCHES(enc
, ptr
, ASCII_SEMI
); ptr 
+= MINBPC(enc
)) { 
1554       int c 
= BYTE_TO_ASCII(enc
, ptr
); 
1556       result 
+= (c 
- ASCII_0
); 
1557       if (result 
>= 0x110000) 
1561   return checkCharRefNumber(result
); 
1565 PREFIX(predefinedEntityName
)(const ENCODING 
*enc
, const char *ptr
, 
1568   switch ((end 
- ptr
)/MINBPC(enc
)) { 
1570     if (CHAR_MATCHES(enc
, ptr 
+ MINBPC(enc
), ASCII_t
)) { 
1571       switch (BYTE_TO_ASCII(enc
, ptr
)) { 
1580     if (CHAR_MATCHES(enc
, ptr
, ASCII_a
)) { 
1582       if (CHAR_MATCHES(enc
, ptr
, ASCII_m
)) { 
1584         if (CHAR_MATCHES(enc
, ptr
, ASCII_p
)) 
1590     switch (BYTE_TO_ASCII(enc
, ptr
)) { 
1593       if (CHAR_MATCHES(enc
, ptr
, ASCII_u
)) { 
1595         if (CHAR_MATCHES(enc
, ptr
, ASCII_o
)) { 
1597           if (CHAR_MATCHES(enc
, ptr
, ASCII_t
)) 
1604       if (CHAR_MATCHES(enc
, ptr
, ASCII_p
)) { 
1606         if (CHAR_MATCHES(enc
, ptr
, ASCII_o
)) { 
1608           if (CHAR_MATCHES(enc
, ptr
, ASCII_s
)) 
1619 PREFIX(sameName
)(const ENCODING 
*enc
, const char *ptr1
, const char *ptr2
) 
1622     switch (BYTE_TYPE(enc
, ptr1
)) { 
1623 #define LEAD_CASE(n) \ 
1624     case BT_LEAD ## n: \ 
1625       if (*ptr1++ != *ptr2++) \ 
1627     LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2) 
1630       if (*ptr1
++ != *ptr2
++) 
1642       if (*ptr2
++ != *ptr1
++) 
1644       if (MINBPC(enc
) > 1) { 
1645         if (*ptr2
++ != *ptr1
++) 
1647         if (MINBPC(enc
) > 2) { 
1648           if (*ptr2
++ != *ptr1
++) 
1650           if (MINBPC(enc
) > 3) { 
1651             if (*ptr2
++ != *ptr1
++) 
1658       if (MINBPC(enc
) == 1 && *ptr1 
== *ptr2
) 
1660       switch (BYTE_TYPE(enc
, ptr2
)) { 
1683 PREFIX(nameMatchesAscii
)(const ENCODING 
*enc
, const char *ptr1
, 
1684                          const char *end1
, const char *ptr2
) 
1686   for (; *ptr2
; ptr1 
+= MINBPC(enc
), ptr2
++) { 
1689     if (!CHAR_MATCHES(enc
, ptr1
, *ptr2
)) 
1692   return ptr1 
== end1
; 
1695 static int PTRFASTCALL
 
1696 PREFIX(nameLength
)(const ENCODING 
*enc
, const char *ptr
) 
1698   const char *start 
= ptr
; 
1700     switch (BYTE_TYPE(enc
, ptr
)) { 
1701 #define LEAD_CASE(n) \ 
1702     case BT_LEAD ## n: ptr += n; break; 
1703     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1722 static const char * PTRFASTCALL
 
1723 PREFIX(skipS
)(const ENCODING 
*enc
, const char *ptr
) 
1726     switch (BYTE_TYPE(enc
, ptr
)) { 
1739 PREFIX(updatePosition
)(const ENCODING 
*enc
, 
1744   while (ptr 
!= end
) { 
1745     switch (BYTE_TYPE(enc
, ptr
)) { 
1746 #define LEAD_CASE(n) \ 
1747     case BT_LEAD ## n: \ 
1750     LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4) 
1753       pos
->columnNumber 
= (unsigned)-1; 
1760       if (ptr 
!= end 
&& BYTE_TYPE(enc
, ptr
) == BT_LF
) 
1762       pos
->columnNumber 
= (unsigned)-1; 
1768     pos
->columnNumber
++; 
1773 #undef MULTIBYTE_CASES 
1774 #undef INVALID_CASES 
1775 #undef CHECK_NAME_CASE 
1776 #undef CHECK_NAME_CASES 
1777 #undef CHECK_NMSTRT_CASE 
1778 #undef CHECK_NMSTRT_CASES