2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file copying.txt for copying permission.
6 #ifndef IS_INVALID_CHAR
7 #define IS_INVALID_CHAR(enc, ptr, n) (0)
10 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
13 return XML_TOK_PARTIAL_CHAR; \
14 if (IS_INVALID_CHAR(enc, ptr, n)) { \
15 *(nextTokPtr) = (ptr); \
16 return XML_TOK_INVALID; \
21 #define INVALID_CASES(ptr, nextTokPtr) \
22 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
23 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
24 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
28 *(nextTokPtr) = (ptr); \
29 return XML_TOK_INVALID;
31 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
34 return XML_TOK_PARTIAL_CHAR; \
35 if (!IS_NAME_CHAR(enc, ptr, n)) { \
37 return XML_TOK_INVALID; \
42 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
44 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
46 return XML_TOK_INVALID; \
55 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
56 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
57 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
59 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
62 return XML_TOK_PARTIAL_CHAR; \
63 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
65 return XML_TOK_INVALID; \
70 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
72 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
74 return XML_TOK_INVALID; \
80 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
81 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
82 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
85 #define PREFIX(ident) ident
88 /* ptr points to character following "<!-" */
91 int PREFIX(scanComment
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
92 const char **nextTokPtr
)
95 if (!CHAR_MATCHES(enc
, ptr
, ASCII_MINUS
)) {
97 return XML_TOK_INVALID
;
101 switch (BYTE_TYPE(enc
, ptr
)) {
102 INVALID_CASES(ptr
, nextTokPtr
)
104 if ((ptr
+= MINBPC(enc
)) == end
)
105 return XML_TOK_PARTIAL
;
106 if (CHAR_MATCHES(enc
, ptr
, ASCII_MINUS
)) {
107 if ((ptr
+= MINBPC(enc
)) == end
)
108 return XML_TOK_PARTIAL
;
109 if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
111 return XML_TOK_INVALID
;
113 *nextTokPtr
= ptr
+ MINBPC(enc
);
114 return XML_TOK_COMMENT
;
123 return XML_TOK_PARTIAL
;
126 /* ptr points to character following "<!" */
129 int PREFIX(scanDecl
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
130 const char **nextTokPtr
)
133 return XML_TOK_PARTIAL
;
134 switch (BYTE_TYPE(enc
, ptr
)) {
136 return PREFIX(scanComment
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
138 *nextTokPtr
= ptr
+ MINBPC(enc
);
139 return XML_TOK_COND_SECT_OPEN
;
146 return XML_TOK_INVALID
;
149 switch (BYTE_TYPE(enc
, ptr
)) {
151 if (ptr
+ MINBPC(enc
) == end
)
152 return XML_TOK_PARTIAL
;
153 /* don't allow <!ENTITY% foo "whatever"> */
154 switch (BYTE_TYPE(enc
, ptr
+ MINBPC(enc
))) {
155 case BT_S
: case BT_CR
: case BT_LF
: case BT_PERCNT
:
157 return XML_TOK_INVALID
;
160 case BT_S
: case BT_CR
: case BT_LF
:
162 return XML_TOK_DECL_OPEN
;
169 return XML_TOK_INVALID
;
172 return XML_TOK_PARTIAL
;
176 int PREFIX(checkPiTarget
)(const ENCODING
*enc
, const char *ptr
, const char *end
, int *tokPtr
)
179 *tokPtr
= XML_TOK_PI
;
180 if (end
- ptr
!= MINBPC(enc
)*3)
182 switch (BYTE_TO_ASCII(enc
, ptr
)) {
192 switch (BYTE_TO_ASCII(enc
, ptr
)) {
202 switch (BYTE_TO_ASCII(enc
, ptr
)) {
213 *tokPtr
= XML_TOK_XML_DECL
;
217 /* ptr points to character following "<?" */
220 int PREFIX(scanPi
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
221 const char **nextTokPtr
)
224 const char *target
= ptr
;
226 return XML_TOK_PARTIAL
;
227 switch (BYTE_TYPE(enc
, ptr
)) {
228 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
231 return XML_TOK_INVALID
;
234 switch (BYTE_TYPE(enc
, ptr
)) {
235 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
236 case BT_S
: case BT_CR
: case BT_LF
:
237 if (!PREFIX(checkPiTarget
)(enc
, target
, ptr
, &tok
)) {
239 return XML_TOK_INVALID
;
243 switch (BYTE_TYPE(enc
, ptr
)) {
244 INVALID_CASES(ptr
, nextTokPtr
)
248 return XML_TOK_PARTIAL
;
249 if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
250 *nextTokPtr
= ptr
+ MINBPC(enc
);
259 return XML_TOK_PARTIAL
;
261 if (!PREFIX(checkPiTarget
)(enc
, target
, ptr
, &tok
)) {
263 return XML_TOK_INVALID
;
267 return XML_TOK_PARTIAL
;
268 if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
269 *nextTokPtr
= ptr
+ MINBPC(enc
);
275 return XML_TOK_INVALID
;
278 return XML_TOK_PARTIAL
;
283 int PREFIX(scanCdataSection
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
284 const char **nextTokPtr
)
286 static const char CDATA_LSQB
[] = { ASCII_C
, ASCII_D
, ASCII_A
, ASCII_T
, ASCII_A
, ASCII_LSQB
};
289 if (end
- ptr
< 6 * MINBPC(enc
))
290 return XML_TOK_PARTIAL
;
291 for (i
= 0; i
< 6; i
++, ptr
+= MINBPC(enc
)) {
292 if (!CHAR_MATCHES(enc
, ptr
, CDATA_LSQB
[i
])) {
294 return XML_TOK_INVALID
;
298 return XML_TOK_CDATA_SECT_OPEN
;
302 int PREFIX(cdataSectionTok
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
303 const char **nextTokPtr
)
307 if (MINBPC(enc
) > 1) {
308 size_t n
= end
- ptr
;
309 if (n
& (MINBPC(enc
) - 1)) {
310 n
&= ~(MINBPC(enc
) - 1);
312 return XML_TOK_PARTIAL
;
316 switch (BYTE_TYPE(enc
, ptr
)) {
320 return XML_TOK_PARTIAL
;
321 if (!CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
))
325 return XML_TOK_PARTIAL
;
326 if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
330 *nextTokPtr
= ptr
+ MINBPC(enc
);
331 return XML_TOK_CDATA_SECT_CLOSE
;
335 return XML_TOK_PARTIAL
;
336 if (BYTE_TYPE(enc
, ptr
) == BT_LF
)
339 return XML_TOK_DATA_NEWLINE
;
341 *nextTokPtr
= ptr
+ MINBPC(enc
);
342 return XML_TOK_DATA_NEWLINE
;
343 INVALID_CASES(ptr
, nextTokPtr
)
349 switch (BYTE_TYPE(enc
, ptr
)) {
350 #define LEAD_CASE(n) \
352 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
354 return XML_TOK_DATA_CHARS; \
358 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
367 return XML_TOK_DATA_CHARS
;
374 return XML_TOK_DATA_CHARS
;
377 /* ptr points to character following "</" */
380 int PREFIX(scanEndTag
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
381 const char **nextTokPtr
)
384 return XML_TOK_PARTIAL
;
385 switch (BYTE_TYPE(enc
, ptr
)) {
386 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
389 return XML_TOK_INVALID
;
392 switch (BYTE_TYPE(enc
, ptr
)) {
393 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
394 case BT_S
: case BT_CR
: case BT_LF
:
395 for (ptr
+= MINBPC(enc
); ptr
!= end
; ptr
+= MINBPC(enc
)) {
396 switch (BYTE_TYPE(enc
, ptr
)) {
397 case BT_S
: case BT_CR
: case BT_LF
:
400 *nextTokPtr
= ptr
+ MINBPC(enc
);
401 return XML_TOK_END_TAG
;
404 return XML_TOK_INVALID
;
407 return XML_TOK_PARTIAL
;
410 /* no need to check qname syntax here, since end-tag must match exactly */
415 *nextTokPtr
= ptr
+ MINBPC(enc
);
416 return XML_TOK_END_TAG
;
419 return XML_TOK_INVALID
;
422 return XML_TOK_PARTIAL
;
425 /* ptr points to character following "&#X" */
428 int PREFIX(scanHexCharRef
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
429 const char **nextTokPtr
)
432 switch (BYTE_TYPE(enc
, ptr
)) {
438 return XML_TOK_INVALID
;
440 for (ptr
+= MINBPC(enc
); ptr
!= end
; ptr
+= MINBPC(enc
)) {
441 switch (BYTE_TYPE(enc
, ptr
)) {
446 *nextTokPtr
= ptr
+ MINBPC(enc
);
447 return XML_TOK_CHAR_REF
;
450 return XML_TOK_INVALID
;
454 return XML_TOK_PARTIAL
;
457 /* ptr points to character following "&#" */
460 int PREFIX(scanCharRef
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
461 const char **nextTokPtr
)
464 if (CHAR_MATCHES(enc
, ptr
, ASCII_x
))
465 return PREFIX(scanHexCharRef
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
466 switch (BYTE_TYPE(enc
, ptr
)) {
471 return XML_TOK_INVALID
;
473 for (ptr
+= MINBPC(enc
); ptr
!= end
; ptr
+= MINBPC(enc
)) {
474 switch (BYTE_TYPE(enc
, ptr
)) {
478 *nextTokPtr
= ptr
+ MINBPC(enc
);
479 return XML_TOK_CHAR_REF
;
482 return XML_TOK_INVALID
;
486 return XML_TOK_PARTIAL
;
489 /* ptr points to character following "&" */
492 int PREFIX(scanRef
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
493 const char **nextTokPtr
)
496 return XML_TOK_PARTIAL
;
497 switch (BYTE_TYPE(enc
, ptr
)) {
498 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
500 return PREFIX(scanCharRef
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
503 return XML_TOK_INVALID
;
506 switch (BYTE_TYPE(enc
, ptr
)) {
507 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
509 *nextTokPtr
= ptr
+ MINBPC(enc
);
510 return XML_TOK_ENTITY_REF
;
513 return XML_TOK_INVALID
;
516 return XML_TOK_PARTIAL
;
519 /* ptr points to character following first character of attribute name */
522 int PREFIX(scanAtts
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
523 const char **nextTokPtr
)
529 switch (BYTE_TYPE(enc
, ptr
)) {
530 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
535 return XML_TOK_INVALID
;
540 return XML_TOK_PARTIAL
;
541 switch (BYTE_TYPE(enc
, ptr
)) {
542 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
545 return XML_TOK_INVALID
;
549 case BT_S
: case BT_CR
: case BT_LF
:
555 return XML_TOK_PARTIAL
;
556 t
= BYTE_TYPE(enc
, ptr
);
566 return XML_TOK_INVALID
;
580 return XML_TOK_PARTIAL
;
581 open
= BYTE_TYPE(enc
, ptr
);
582 if (open
== BT_QUOT
|| open
== BT_APOS
)
591 return XML_TOK_INVALID
;
595 /* in attribute value */
599 return XML_TOK_PARTIAL
;
600 t
= BYTE_TYPE(enc
, ptr
);
604 INVALID_CASES(ptr
, nextTokPtr
)
607 int tok
= PREFIX(scanRef
)(enc
, ptr
+ MINBPC(enc
), end
, &ptr
);
609 if (tok
== XML_TOK_INVALID
)
617 return XML_TOK_INVALID
;
625 return XML_TOK_PARTIAL
;
626 switch (BYTE_TYPE(enc
, ptr
)) {
637 return XML_TOK_INVALID
;
639 /* ptr points to closing quote */
643 return XML_TOK_PARTIAL
;
644 switch (BYTE_TYPE(enc
, ptr
)) {
645 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
646 case BT_S
: case BT_CR
: case BT_LF
:
650 *nextTokPtr
= ptr
+ MINBPC(enc
);
651 return XML_TOK_START_TAG_WITH_ATTS
;
656 return XML_TOK_PARTIAL
;
657 if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
659 return XML_TOK_INVALID
;
661 *nextTokPtr
= ptr
+ MINBPC(enc
);
662 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS
;
665 return XML_TOK_INVALID
;
673 return XML_TOK_INVALID
;
676 return XML_TOK_PARTIAL
;
679 /* ptr points to character following "<" */
682 int PREFIX(scanLt
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
683 const char **nextTokPtr
)
689 return XML_TOK_PARTIAL
;
690 switch (BYTE_TYPE(enc
, ptr
)) {
691 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
693 if ((ptr
+= MINBPC(enc
)) == end
)
694 return XML_TOK_PARTIAL
;
695 switch (BYTE_TYPE(enc
, ptr
)) {
697 return PREFIX(scanComment
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
699 return PREFIX(scanCdataSection
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
702 return XML_TOK_INVALID
;
704 return PREFIX(scanPi
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
706 return PREFIX(scanEndTag
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
709 return XML_TOK_INVALID
;
714 /* we have a start-tag */
716 switch (BYTE_TYPE(enc
, ptr
)) {
717 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
722 return XML_TOK_INVALID
;
727 return XML_TOK_PARTIAL
;
728 switch (BYTE_TYPE(enc
, ptr
)) {
729 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
732 return XML_TOK_INVALID
;
736 case BT_S
: case BT_CR
: case BT_LF
:
740 switch (BYTE_TYPE(enc
, ptr
)) {
741 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
746 case BT_S
: case BT_CR
: case BT_LF
:
751 return XML_TOK_INVALID
;
753 return PREFIX(scanAtts
)(enc
, ptr
, end
, nextTokPtr
);
755 return XML_TOK_PARTIAL
;
759 *nextTokPtr
= ptr
+ MINBPC(enc
);
760 return XML_TOK_START_TAG_NO_ATTS
;
765 return XML_TOK_PARTIAL
;
766 if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
768 return XML_TOK_INVALID
;
770 *nextTokPtr
= ptr
+ MINBPC(enc
);
771 return XML_TOK_EMPTY_ELEMENT_NO_ATTS
;
774 return XML_TOK_INVALID
;
777 return XML_TOK_PARTIAL
;
781 int PREFIX(contentTok
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
782 const char **nextTokPtr
)
786 if (MINBPC(enc
) > 1) {
787 size_t n
= end
- ptr
;
788 if (n
& (MINBPC(enc
) - 1)) {
789 n
&= ~(MINBPC(enc
) - 1);
791 return XML_TOK_PARTIAL
;
795 switch (BYTE_TYPE(enc
, ptr
)) {
797 return PREFIX(scanLt
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
799 return PREFIX(scanRef
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
803 return XML_TOK_TRAILING_CR
;
804 if (BYTE_TYPE(enc
, ptr
) == BT_LF
)
807 return XML_TOK_DATA_NEWLINE
;
809 *nextTokPtr
= ptr
+ MINBPC(enc
);
810 return XML_TOK_DATA_NEWLINE
;
814 return XML_TOK_TRAILING_RSQB
;
815 if (!CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
))
819 return XML_TOK_TRAILING_RSQB
;
820 if (!CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
825 return XML_TOK_INVALID
;
826 INVALID_CASES(ptr
, nextTokPtr
)
832 switch (BYTE_TYPE(enc
, ptr
)) {
833 #define LEAD_CASE(n) \
835 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
837 return XML_TOK_DATA_CHARS; \
841 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
844 if (ptr
+ MINBPC(enc
) != end
) {
845 if (!CHAR_MATCHES(enc
, ptr
+ MINBPC(enc
), ASCII_RSQB
)) {
849 if (ptr
+ 2*MINBPC(enc
) != end
) {
850 if (!CHAR_MATCHES(enc
, ptr
+ 2*MINBPC(enc
), ASCII_GT
)) {
854 *nextTokPtr
= ptr
+ 2*MINBPC(enc
);
855 return XML_TOK_INVALID
;
867 return XML_TOK_DATA_CHARS
;
874 return XML_TOK_DATA_CHARS
;
877 /* ptr points to character following "%" */
880 int PREFIX(scanPercent
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
881 const char **nextTokPtr
)
884 return XML_TOK_PARTIAL
;
885 switch (BYTE_TYPE(enc
, ptr
)) {
886 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
887 case BT_S
: case BT_LF
: case BT_CR
: case BT_PERCNT
:
889 return XML_TOK_PERCENT
;
892 return XML_TOK_INVALID
;
895 switch (BYTE_TYPE(enc
, ptr
)) {
896 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
898 *nextTokPtr
= ptr
+ MINBPC(enc
);
899 return XML_TOK_PARAM_ENTITY_REF
;
902 return XML_TOK_INVALID
;
905 return XML_TOK_PARTIAL
;
909 int PREFIX(scanPoundName
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
910 const char **nextTokPtr
)
913 return XML_TOK_PARTIAL
;
914 switch (BYTE_TYPE(enc
, ptr
)) {
915 CHECK_NMSTRT_CASES(enc
, ptr
, end
, nextTokPtr
)
918 return XML_TOK_INVALID
;
921 switch (BYTE_TYPE(enc
, ptr
)) {
922 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
923 case BT_CR
: case BT_LF
: case BT_S
:
924 case BT_RPAR
: case BT_GT
: case BT_PERCNT
: case BT_VERBAR
:
926 return XML_TOK_POUND_NAME
;
929 return XML_TOK_INVALID
;
932 return -XML_TOK_POUND_NAME
;
936 int PREFIX(scanLit
)(int open
, const ENCODING
*enc
,
937 const char *ptr
, const char *end
,
938 const char **nextTokPtr
)
941 int t
= BYTE_TYPE(enc
, ptr
);
943 INVALID_CASES(ptr
, nextTokPtr
)
950 return -XML_TOK_LITERAL
;
952 switch (BYTE_TYPE(enc
, ptr
)) {
953 case BT_S
: case BT_CR
: case BT_LF
:
954 case BT_GT
: case BT_PERCNT
: case BT_LSQB
:
955 return XML_TOK_LITERAL
;
957 return XML_TOK_INVALID
;
964 return XML_TOK_PARTIAL
;
968 int PREFIX(prologTok
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
969 const char **nextTokPtr
)
974 if (MINBPC(enc
) > 1) {
975 size_t n
= end
- ptr
;
976 if (n
& (MINBPC(enc
) - 1)) {
977 n
&= ~(MINBPC(enc
) - 1);
979 return XML_TOK_PARTIAL
;
983 switch (BYTE_TYPE(enc
, ptr
)) {
985 return PREFIX(scanLit
)(BT_QUOT
, enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
987 return PREFIX(scanLit
)(BT_APOS
, enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
992 return XML_TOK_PARTIAL
;
993 switch (BYTE_TYPE(enc
, ptr
)) {
995 return PREFIX(scanDecl
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
997 return PREFIX(scanPi
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
1004 *nextTokPtr
= ptr
- MINBPC(enc
);
1005 return XML_TOK_INSTANCE_START
;
1008 return XML_TOK_INVALID
;
1011 if (ptr
+ MINBPC(enc
) == end
)
1012 return -XML_TOK_PROLOG_S
;
1014 case BT_S
: case BT_LF
:
1019 switch (BYTE_TYPE(enc
, ptr
)) {
1020 case BT_S
: case BT_LF
:
1023 /* don't split CR/LF pair */
1024 if (ptr
+ MINBPC(enc
) != end
)
1029 return XML_TOK_PROLOG_S
;
1033 return XML_TOK_PROLOG_S
;
1035 return PREFIX(scanPercent
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
1037 *nextTokPtr
= ptr
+ MINBPC(enc
);
1038 return XML_TOK_COMMA
;
1040 *nextTokPtr
= ptr
+ MINBPC(enc
);
1041 return XML_TOK_OPEN_BRACKET
;
1045 return -XML_TOK_CLOSE_BRACKET
;
1046 if (CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) {
1047 if (ptr
+ MINBPC(enc
) == end
)
1048 return XML_TOK_PARTIAL
;
1049 if (CHAR_MATCHES(enc
, ptr
+ MINBPC(enc
), ASCII_GT
)) {
1050 *nextTokPtr
= ptr
+ 2*MINBPC(enc
);
1051 return XML_TOK_COND_SECT_CLOSE
;
1055 return XML_TOK_CLOSE_BRACKET
;
1057 *nextTokPtr
= ptr
+ MINBPC(enc
);
1058 return XML_TOK_OPEN_PAREN
;
1062 return -XML_TOK_CLOSE_PAREN
;
1063 switch (BYTE_TYPE(enc
, ptr
)) {
1065 *nextTokPtr
= ptr
+ MINBPC(enc
);
1066 return XML_TOK_CLOSE_PAREN_ASTERISK
;
1068 *nextTokPtr
= ptr
+ MINBPC(enc
);
1069 return XML_TOK_CLOSE_PAREN_QUESTION
;
1071 *nextTokPtr
= ptr
+ MINBPC(enc
);
1072 return XML_TOK_CLOSE_PAREN_PLUS
;
1073 case BT_CR
: case BT_LF
: case BT_S
:
1074 case BT_GT
: case BT_COMMA
: case BT_VERBAR
:
1077 return XML_TOK_CLOSE_PAREN
;
1080 return XML_TOK_INVALID
;
1082 *nextTokPtr
= ptr
+ MINBPC(enc
);
1085 *nextTokPtr
= ptr
+ MINBPC(enc
);
1086 return XML_TOK_DECL_CLOSE
;
1088 return PREFIX(scanPoundName
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
1089 #define LEAD_CASE(n) \
1090 case BT_LEAD ## n: \
1091 if (end - ptr < n) \
1092 return XML_TOK_PARTIAL_CHAR; \
1093 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1095 tok = XML_TOK_NAME; \
1098 if (IS_NAME_CHAR(enc, ptr, n)) { \
1100 tok = XML_TOK_NMTOKEN; \
1103 *nextTokPtr = ptr; \
1104 return XML_TOK_INVALID;
1105 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1118 tok
= XML_TOK_NMTOKEN
;
1122 if (IS_NMSTRT_CHAR_MINBPC(enc
, ptr
)) {
1127 if (IS_NAME_CHAR_MINBPC(enc
, ptr
)) {
1129 tok
= XML_TOK_NMTOKEN
;
1135 return XML_TOK_INVALID
;
1137 while (ptr
!= end
) {
1138 switch (BYTE_TYPE(enc
, ptr
)) {
1139 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
1140 case BT_GT
: case BT_RPAR
: case BT_COMMA
:
1141 case BT_VERBAR
: case BT_LSQB
: case BT_PERCNT
:
1142 case BT_S
: case BT_CR
: case BT_LF
:
1151 return XML_TOK_PARTIAL
;
1152 tok
= XML_TOK_PREFIXED_NAME
;
1153 switch (BYTE_TYPE(enc
, ptr
)) {
1154 CHECK_NAME_CASES(enc
, ptr
, end
, nextTokPtr
)
1156 tok
= XML_TOK_NMTOKEN
;
1160 case XML_TOK_PREFIXED_NAME
:
1161 tok
= XML_TOK_NMTOKEN
;
1167 if (tok
== XML_TOK_NMTOKEN
) {
1169 return XML_TOK_INVALID
;
1171 *nextTokPtr
= ptr
+ MINBPC(enc
);
1172 return XML_TOK_NAME_PLUS
;
1174 if (tok
== XML_TOK_NMTOKEN
) {
1176 return XML_TOK_INVALID
;
1178 *nextTokPtr
= ptr
+ MINBPC(enc
);
1179 return XML_TOK_NAME_ASTERISK
;
1181 if (tok
== XML_TOK_NMTOKEN
) {
1183 return XML_TOK_INVALID
;
1185 *nextTokPtr
= ptr
+ MINBPC(enc
);
1186 return XML_TOK_NAME_QUESTION
;
1189 return XML_TOK_INVALID
;
1196 int PREFIX(attributeValueTok
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
1197 const char **nextTokPtr
)
1201 return XML_TOK_NONE
;
1203 while (ptr
!= end
) {
1204 switch (BYTE_TYPE(enc
, ptr
)) {
1205 #define LEAD_CASE(n) \
1206 case BT_LEAD ## n: ptr += n; break;
1207 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1211 return PREFIX(scanRef
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
1213 return XML_TOK_DATA_CHARS
;
1215 /* this is for inside entity references */
1217 return XML_TOK_INVALID
;
1220 *nextTokPtr
= ptr
+ MINBPC(enc
);
1221 return XML_TOK_DATA_NEWLINE
;
1224 return XML_TOK_DATA_CHARS
;
1229 return XML_TOK_TRAILING_CR
;
1230 if (BYTE_TYPE(enc
, ptr
) == BT_LF
)
1233 return XML_TOK_DATA_NEWLINE
;
1236 return XML_TOK_DATA_CHARS
;
1239 *nextTokPtr
= ptr
+ MINBPC(enc
);
1240 return XML_TOK_ATTRIBUTE_VALUE_S
;
1243 return XML_TOK_DATA_CHARS
;
1250 return XML_TOK_DATA_CHARS
;
1254 int PREFIX(entityValueTok
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
1255 const char **nextTokPtr
)
1259 return XML_TOK_NONE
;
1261 while (ptr
!= end
) {
1262 switch (BYTE_TYPE(enc
, ptr
)) {
1263 #define LEAD_CASE(n) \
1264 case BT_LEAD ## n: ptr += n; break;
1265 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1269 return PREFIX(scanRef
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
1271 return XML_TOK_DATA_CHARS
;
1274 return PREFIX(scanPercent
)(enc
, ptr
+ MINBPC(enc
), end
, nextTokPtr
);
1276 return XML_TOK_DATA_CHARS
;
1279 *nextTokPtr
= ptr
+ MINBPC(enc
);
1280 return XML_TOK_DATA_NEWLINE
;
1283 return XML_TOK_DATA_CHARS
;
1288 return XML_TOK_TRAILING_CR
;
1289 if (BYTE_TYPE(enc
, ptr
) == BT_LF
)
1292 return XML_TOK_DATA_NEWLINE
;
1295 return XML_TOK_DATA_CHARS
;
1302 return XML_TOK_DATA_CHARS
;
1308 int PREFIX(ignoreSectionTok
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
1309 const char **nextTokPtr
)
1312 if (MINBPC(enc
) > 1) {
1313 size_t n
= end
- ptr
;
1314 if (n
& (MINBPC(enc
) - 1)) {
1315 n
&= ~(MINBPC(enc
) - 1);
1319 while (ptr
!= end
) {
1320 switch (BYTE_TYPE(enc
, ptr
)) {
1321 INVALID_CASES(ptr
, nextTokPtr
)
1323 if ((ptr
+= MINBPC(enc
)) == end
)
1324 return XML_TOK_PARTIAL
;
1325 if (CHAR_MATCHES(enc
, ptr
, ASCII_EXCL
)) {
1326 if ((ptr
+= MINBPC(enc
)) == end
)
1327 return XML_TOK_PARTIAL
;
1328 if (CHAR_MATCHES(enc
, ptr
, ASCII_LSQB
)) {
1335 if ((ptr
+= MINBPC(enc
)) == end
)
1336 return XML_TOK_PARTIAL
;
1337 if (CHAR_MATCHES(enc
, ptr
, ASCII_RSQB
)) {
1338 if ((ptr
+= MINBPC(enc
)) == end
)
1339 return XML_TOK_PARTIAL
;
1340 if (CHAR_MATCHES(enc
, ptr
, ASCII_GT
)) {
1344 return XML_TOK_IGNORE_SECT
;
1355 return XML_TOK_PARTIAL
;
1358 #endif /* XML_DTD */
1361 int PREFIX(isPublicId
)(const ENCODING
*enc
, const char *ptr
, const char *end
,
1362 const char **badPtr
)
1366 for (; ptr
!= end
; ptr
+= MINBPC(enc
)) {
1367 switch (BYTE_TYPE(enc
, ptr
)) {
1391 if (CHAR_MATCHES(enc
, ptr
, ASCII_TAB
)) {
1398 if (!(BYTE_TO_ASCII(enc
, ptr
) & ~0x7f))
1401 switch (BYTE_TO_ASCII(enc
, ptr
)) {
1415 /* This must only be called for a well-formed start-tag or empty element tag.
1416 Returns the number of attributes. Pointers to the first attsMax attributes
1417 are stored in atts. */
1420 int PREFIX(getAtts
)(const ENCODING
*enc
, const char *ptr
,
1421 int attsMax
, ATTRIBUTE
*atts
)
1423 enum { other
, inName
, inValue
} state
= inName
;
1425 int open
= 0; /* defined when state == inValue;
1426 initialization just to shut up compilers */
1428 for (ptr
+= MINBPC(enc
);; ptr
+= MINBPC(enc
)) {
1429 switch (BYTE_TYPE(enc
, ptr
)) {
1430 #define START_NAME \
1431 if (state == other) { \
1432 if (nAtts < attsMax) { \
1433 atts[nAtts].name = ptr; \
1434 atts[nAtts].normalized = 1; \
1438 #define LEAD_CASE(n) \
1439 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
1440 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1449 if (state
!= inValue
) {
1450 if (nAtts
< attsMax
)
1451 atts
[nAtts
].valuePtr
= ptr
+ MINBPC(enc
);
1455 else if (open
== BT_QUOT
) {
1457 if (nAtts
< attsMax
)
1458 atts
[nAtts
].valueEnd
= ptr
;
1463 if (state
!= inValue
) {
1464 if (nAtts
< attsMax
)
1465 atts
[nAtts
].valuePtr
= ptr
+ MINBPC(enc
);
1469 else if (open
== BT_APOS
) {
1471 if (nAtts
< attsMax
)
1472 atts
[nAtts
].valueEnd
= ptr
;
1477 if (nAtts
< attsMax
)
1478 atts
[nAtts
].normalized
= 0;
1481 if (state
== inName
)
1483 else if (state
== inValue
1485 && atts
[nAtts
].normalized
1486 && (ptr
== atts
[nAtts
].valuePtr
1487 || BYTE_TO_ASCII(enc
, ptr
) != ASCII_SPACE
1488 || BYTE_TO_ASCII(enc
, ptr
+ MINBPC(enc
)) == ASCII_SPACE
1489 || BYTE_TYPE(enc
, ptr
+ MINBPC(enc
)) == open
))
1490 atts
[nAtts
].normalized
= 0;
1492 case BT_CR
: case BT_LF
:
1493 /* This case ensures that the first attribute name is counted
1494 Apart from that we could just change state on the quote. */
1495 if (state
== inName
)
1497 else if (state
== inValue
&& nAtts
< attsMax
)
1498 atts
[nAtts
].normalized
= 0;
1502 if (state
!= inValue
)
1513 int PREFIX(charRefNumber
)(const ENCODING
*enc
, const char *ptr
)
1517 ptr
+= 2*MINBPC(enc
);
1518 if (CHAR_MATCHES(enc
, ptr
, ASCII_x
)) {
1519 for (ptr
+= MINBPC(enc
); !CHAR_MATCHES(enc
, ptr
, ASCII_SEMI
); ptr
+= MINBPC(enc
)) {
1520 int c
= BYTE_TO_ASCII(enc
, ptr
);
1522 case ASCII_0
: case ASCII_1
: case ASCII_2
: case ASCII_3
: case ASCII_4
:
1523 case ASCII_5
: case ASCII_6
: case ASCII_7
: case ASCII_8
: case ASCII_9
:
1525 result
|= (c
- ASCII_0
);
1527 case ASCII_A
: case ASCII_B
: case ASCII_C
: case ASCII_D
: case ASCII_E
: case ASCII_F
:
1529 result
+= 10 + (c
- ASCII_A
);
1531 case ASCII_a
: case ASCII_b
: case ASCII_c
: case ASCII_d
: case ASCII_e
: case ASCII_f
:
1533 result
+= 10 + (c
- ASCII_a
);
1536 if (result
>= 0x110000)
1541 for (; !CHAR_MATCHES(enc
, ptr
, ASCII_SEMI
); ptr
+= MINBPC(enc
)) {
1542 int c
= BYTE_TO_ASCII(enc
, ptr
);
1544 result
+= (c
- ASCII_0
);
1545 if (result
>= 0x110000)
1549 return checkCharRefNumber(result
);
1553 int PREFIX(predefinedEntityName
)(const ENCODING
*enc
, const char *ptr
, const char *end
)
1555 switch ((end
- ptr
)/MINBPC(enc
)) {
1557 if (CHAR_MATCHES(enc
, ptr
+ MINBPC(enc
), ASCII_t
)) {
1558 switch (BYTE_TO_ASCII(enc
, ptr
)) {
1567 if (CHAR_MATCHES(enc
, ptr
, ASCII_a
)) {
1569 if (CHAR_MATCHES(enc
, ptr
, ASCII_m
)) {
1571 if (CHAR_MATCHES(enc
, ptr
, ASCII_p
))
1577 switch (BYTE_TO_ASCII(enc
, ptr
)) {
1580 if (CHAR_MATCHES(enc
, ptr
, ASCII_u
)) {
1582 if (CHAR_MATCHES(enc
, ptr
, ASCII_o
)) {
1584 if (CHAR_MATCHES(enc
, ptr
, ASCII_t
))
1591 if (CHAR_MATCHES(enc
, ptr
, ASCII_p
)) {
1593 if (CHAR_MATCHES(enc
, ptr
, ASCII_o
)) {
1595 if (CHAR_MATCHES(enc
, ptr
, ASCII_s
))
1606 int PREFIX(sameName
)(const ENCODING
*enc
, const char *ptr1
, const char *ptr2
)
1609 switch (BYTE_TYPE(enc
, ptr1
)) {
1610 #define LEAD_CASE(n) \
1611 case BT_LEAD ## n: \
1612 if (*ptr1++ != *ptr2++) \
1614 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
1617 if (*ptr1
++ != *ptr2
++)
1629 if (*ptr2
++ != *ptr1
++)
1631 if (MINBPC(enc
) > 1) {
1632 if (*ptr2
++ != *ptr1
++)
1634 if (MINBPC(enc
) > 2) {
1635 if (*ptr2
++ != *ptr1
++)
1637 if (MINBPC(enc
) > 3) {
1638 if (*ptr2
++ != *ptr1
++)
1645 if (MINBPC(enc
) == 1 && *ptr1
== *ptr2
)
1647 switch (BYTE_TYPE(enc
, ptr2
)) {
1670 int PREFIX(nameMatchesAscii
)(const ENCODING
*enc
, const char *ptr1
,
1671 const char *end1
, const char *ptr2
)
1673 for (; *ptr2
; ptr1
+= MINBPC(enc
), ptr2
++) {
1676 if (!CHAR_MATCHES(enc
, ptr1
, *ptr2
))
1679 return ptr1
== end1
;
1683 int PREFIX(nameLength
)(const ENCODING
*enc
, const char *ptr
)
1685 const char *start
= ptr
;
1687 switch (BYTE_TYPE(enc
, ptr
)) {
1688 #define LEAD_CASE(n) \
1689 case BT_LEAD ## n: ptr += n; break;
1690 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1710 const char *PREFIX(skipS
)(const ENCODING
*enc
, const char *ptr
)
1713 switch (BYTE_TYPE(enc
, ptr
)) {
1726 void PREFIX(updatePosition
)(const ENCODING
*enc
,
1731 while (ptr
!= end
) {
1732 switch (BYTE_TYPE(enc
, ptr
)) {
1733 #define LEAD_CASE(n) \
1734 case BT_LEAD ## n: \
1737 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
1740 pos
->columnNumber
= (unsigned)-1;
1747 if (ptr
!= end
&& BYTE_TYPE(enc
, ptr
) == BT_LF
)
1749 pos
->columnNumber
= (unsigned)-1;
1755 pos
->columnNumber
++;
1760 #undef MULTIBYTE_CASES
1761 #undef INVALID_CASES
1762 #undef CHECK_NAME_CASE
1763 #undef CHECK_NAME_CASES
1764 #undef CHECK_NMSTRT_CASE
1765 #undef CHECK_NMSTRT_CASES