]> git.saurik.com Git - wxWidgets.git/blob - contrib/src/xml/expat/xmltok/xmlrole.c
fixed parsing of comments before root node
[wxWidgets.git] / contrib / src / xml / expat / xmltok / xmlrole.c
1 /*
2 Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
3 See the file copying.txt for copying permission.
4 */
5
6 #include "xmldef.h"
7 #include "xmlrole.h"
8 #include "ascii.h"
9
10 /* Doesn't check:
11
12 that ,| are not mixed in a model group
13 content of literals
14
15 */
16
17 static const char KW_ANY[] = { ASCII_A, ASCII_N, ASCII_Y, '\0' };
18 static const char KW_ATTLIST[] = { ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
19 static const char KW_CDATA[] = { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
20 static const char KW_DOCTYPE[] = { ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
21 static const char KW_ELEMENT[] = { ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
22 static const char KW_EMPTY[] = { ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
23 static const char KW_ENTITIES[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
24 static const char KW_ENTITY[] = { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
25 static const char KW_FIXED[] = { ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
26 static const char KW_ID[] = { ASCII_I, ASCII_D, '\0' };
27 static const char KW_IDREF[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
28 static const char KW_IDREFS[] = { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
29 static const char KW_IGNORE[] = { ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
30 static const char KW_IMPLIED[] = { ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
31 static const char KW_INCLUDE[] = { ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
32 static const char KW_NDATA[] = { ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
33 static const char KW_NMTOKEN[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
34 static const char KW_NMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
35 static const char KW_NOTATION[] = { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, '\0' };
36 static const char KW_PCDATA[] = { ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
37 static const char KW_PUBLIC[] = { ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
38 static const char KW_REQUIRED[] = { ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D, '\0' };
39 static const char KW_SYSTEM[] = { ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
40
41 #ifndef MIN_BYTES_PER_CHAR
42 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
43 #endif
44
45 #ifdef XML_DTD
46 #define setTopLevel(state) \
47 ((state)->handler = ((state)->documentEntity \
48 ? internalSubset \
49 : externalSubset1))
50 #else /* not XML_DTD */
51 #define setTopLevel(state) ((state)->handler = internalSubset)
52 #endif /* not XML_DTD */
53
54 typedef int PROLOG_HANDLER(PROLOG_STATE *state,
55 int tok,
56 const char *ptr,
57 const char *end,
58 const ENCODING *enc);
59
60 static PROLOG_HANDLER
61 prolog0, prolog1, prolog2,
62 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
63 internalSubset,
64 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
65 entity7, entity8, entity9,
66 notation0, notation1, notation2, notation3, notation4,
67 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
68 attlist7, attlist8, attlist9,
69 element0, element1, element2, element3, element4, element5, element6,
70 element7,
71 #ifdef XML_DTD
72 externalSubset0, externalSubset1,
73 condSect0, condSect1, condSect2,
74 #endif /* XML_DTD */
75 declClose,
76 error;
77
78 static
79 int common(PROLOG_STATE *state, int tok);
80
81 static
82 int prolog0(PROLOG_STATE *state,
83 int tok,
84 const char *ptr,
85 const char *end,
86 const ENCODING *enc)
87 {
88 switch (tok) {
89 case XML_TOK_PROLOG_S:
90 state->handler = prolog1;
91 return XML_ROLE_NONE;
92 case XML_TOK_XML_DECL:
93 state->handler = prolog1;
94 return XML_ROLE_XML_DECL;
95 case XML_TOK_PI:
96 state->handler = prolog1;
97 return XML_ROLE_NONE;
98 case XML_TOK_COMMENT:
99 state->handler = prolog1;
100 case XML_TOK_BOM:
101 return XML_ROLE_NONE;
102 case XML_TOK_DECL_OPEN:
103 if (!XmlNameMatchesAscii(enc,
104 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
105 end,
106 KW_DOCTYPE))
107 break;
108 state->handler = doctype0;
109 return XML_ROLE_NONE;
110 case XML_TOK_INSTANCE_START:
111 state->handler = error;
112 return XML_ROLE_INSTANCE_START;
113 }
114 return common(state, tok);
115 }
116
117 static
118 int prolog1(PROLOG_STATE *state,
119 int tok,
120 const char *ptr,
121 const char *end,
122 const ENCODING *enc)
123 {
124 switch (tok) {
125 case XML_TOK_PROLOG_S:
126 return XML_ROLE_NONE;
127 case XML_TOK_PI:
128 case XML_TOK_COMMENT:
129 case XML_TOK_BOM:
130 return XML_ROLE_NONE;
131 case XML_TOK_DECL_OPEN:
132 if (!XmlNameMatchesAscii(enc,
133 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
134 end,
135 KW_DOCTYPE))
136 break;
137 state->handler = doctype0;
138 return XML_ROLE_NONE;
139 case XML_TOK_INSTANCE_START:
140 state->handler = error;
141 return XML_ROLE_INSTANCE_START;
142 }
143 return common(state, tok);
144 }
145
146 static
147 int prolog2(PROLOG_STATE *state,
148 int tok,
149 const char *ptr,
150 const char *end,
151 const ENCODING *enc)
152 {
153 switch (tok) {
154 case XML_TOK_PROLOG_S:
155 return XML_ROLE_NONE;
156 case XML_TOK_PI:
157 case XML_TOK_COMMENT:
158 return XML_ROLE_NONE;
159 case XML_TOK_INSTANCE_START:
160 state->handler = error;
161 return XML_ROLE_INSTANCE_START;
162 }
163 return common(state, tok);
164 }
165
166 static
167 int doctype0(PROLOG_STATE *state,
168 int tok,
169 const char *ptr,
170 const char *end,
171 const ENCODING *enc)
172 {
173 switch (tok) {
174 case XML_TOK_PROLOG_S:
175 return XML_ROLE_NONE;
176 case XML_TOK_NAME:
177 case XML_TOK_PREFIXED_NAME:
178 state->handler = doctype1;
179 return XML_ROLE_DOCTYPE_NAME;
180 }
181 return common(state, tok);
182 }
183
184 static
185 int doctype1(PROLOG_STATE *state,
186 int tok,
187 const char *ptr,
188 const char *end,
189 const ENCODING *enc)
190 {
191 switch (tok) {
192 case XML_TOK_PROLOG_S:
193 return XML_ROLE_NONE;
194 case XML_TOK_OPEN_BRACKET:
195 state->handler = internalSubset;
196 return XML_ROLE_NONE;
197 case XML_TOK_DECL_CLOSE:
198 state->handler = prolog2;
199 return XML_ROLE_DOCTYPE_CLOSE;
200 case XML_TOK_NAME:
201 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
202 state->handler = doctype3;
203 return XML_ROLE_NONE;
204 }
205 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
206 state->handler = doctype2;
207 return XML_ROLE_NONE;
208 }
209 break;
210 }
211 return common(state, tok);
212 }
213
214 static
215 int doctype2(PROLOG_STATE *state,
216 int tok,
217 const char *ptr,
218 const char *end,
219 const ENCODING *enc)
220 {
221 switch (tok) {
222 case XML_TOK_PROLOG_S:
223 return XML_ROLE_NONE;
224 case XML_TOK_LITERAL:
225 state->handler = doctype3;
226 return XML_ROLE_DOCTYPE_PUBLIC_ID;
227 }
228 return common(state, tok);
229 }
230
231 static
232 int doctype3(PROLOG_STATE *state,
233 int tok,
234 const char *ptr,
235 const char *end,
236 const ENCODING *enc)
237 {
238 switch (tok) {
239 case XML_TOK_PROLOG_S:
240 return XML_ROLE_NONE;
241 case XML_TOK_LITERAL:
242 state->handler = doctype4;
243 return XML_ROLE_DOCTYPE_SYSTEM_ID;
244 }
245 return common(state, tok);
246 }
247
248 static
249 int doctype4(PROLOG_STATE *state,
250 int tok,
251 const char *ptr,
252 const char *end,
253 const ENCODING *enc)
254 {
255 switch (tok) {
256 case XML_TOK_PROLOG_S:
257 return XML_ROLE_NONE;
258 case XML_TOK_OPEN_BRACKET:
259 state->handler = internalSubset;
260 return XML_ROLE_NONE;
261 case XML_TOK_DECL_CLOSE:
262 state->handler = prolog2;
263 return XML_ROLE_DOCTYPE_CLOSE;
264 }
265 return common(state, tok);
266 }
267
268 static
269 int doctype5(PROLOG_STATE *state,
270 int tok,
271 const char *ptr,
272 const char *end,
273 const ENCODING *enc)
274 {
275 switch (tok) {
276 case XML_TOK_PROLOG_S:
277 return XML_ROLE_NONE;
278 case XML_TOK_DECL_CLOSE:
279 state->handler = prolog2;
280 return XML_ROLE_DOCTYPE_CLOSE;
281 }
282 return common(state, tok);
283 }
284
285 static
286 int internalSubset(PROLOG_STATE *state,
287 int tok,
288 const char *ptr,
289 const char *end,
290 const ENCODING *enc)
291 {
292 switch (tok) {
293 case XML_TOK_PROLOG_S:
294 return XML_ROLE_NONE;
295 case XML_TOK_DECL_OPEN:
296 if (XmlNameMatchesAscii(enc,
297 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
298 end,
299 KW_ENTITY)) {
300 state->handler = entity0;
301 return XML_ROLE_NONE;
302 }
303 if (XmlNameMatchesAscii(enc,
304 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
305 end,
306 KW_ATTLIST)) {
307 state->handler = attlist0;
308 return XML_ROLE_NONE;
309 }
310 if (XmlNameMatchesAscii(enc,
311 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
312 end,
313 KW_ELEMENT)) {
314 state->handler = element0;
315 return XML_ROLE_NONE;
316 }
317 if (XmlNameMatchesAscii(enc,
318 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
319 end,
320 KW_NOTATION)) {
321 state->handler = notation0;
322 return XML_ROLE_NONE;
323 }
324 break;
325 case XML_TOK_PI:
326 case XML_TOK_COMMENT:
327 return XML_ROLE_NONE;
328 case XML_TOK_PARAM_ENTITY_REF:
329 return XML_ROLE_PARAM_ENTITY_REF;
330 case XML_TOK_CLOSE_BRACKET:
331 state->handler = doctype5;
332 return XML_ROLE_NONE;
333 }
334 return common(state, tok);
335 }
336
337 #ifdef XML_DTD
338
339 static
340 int externalSubset0(PROLOG_STATE *state,
341 int tok,
342 const char *ptr,
343 const char *end,
344 const ENCODING *enc)
345 {
346 state->handler = externalSubset1;
347 if (tok == XML_TOK_XML_DECL)
348 return XML_ROLE_TEXT_DECL;
349 return externalSubset1(state, tok, ptr, end, enc);
350 }
351
352 static
353 int externalSubset1(PROLOG_STATE *state,
354 int tok,
355 const char *ptr,
356 const char *end,
357 const ENCODING *enc)
358 {
359 switch (tok) {
360 case XML_TOK_COND_SECT_OPEN:
361 state->handler = condSect0;
362 return XML_ROLE_NONE;
363 case XML_TOK_COND_SECT_CLOSE:
364 if (state->includeLevel == 0)
365 break;
366 state->includeLevel -= 1;
367 return XML_ROLE_NONE;
368 case XML_TOK_PROLOG_S:
369 return XML_ROLE_NONE;
370 case XML_TOK_CLOSE_BRACKET:
371 break;
372 case XML_TOK_NONE:
373 if (state->includeLevel)
374 break;
375 return XML_ROLE_NONE;
376 default:
377 return internalSubset(state, tok, ptr, end, enc);
378 }
379 return common(state, tok);
380 }
381
382 #endif /* XML_DTD */
383
384 static
385 int entity0(PROLOG_STATE *state,
386 int tok,
387 const char *ptr,
388 const char *end,
389 const ENCODING *enc)
390 {
391 switch (tok) {
392 case XML_TOK_PROLOG_S:
393 return XML_ROLE_NONE;
394 case XML_TOK_PERCENT:
395 state->handler = entity1;
396 return XML_ROLE_NONE;
397 case XML_TOK_NAME:
398 state->handler = entity2;
399 return XML_ROLE_GENERAL_ENTITY_NAME;
400 }
401 return common(state, tok);
402 }
403
404 static
405 int entity1(PROLOG_STATE *state,
406 int tok,
407 const char *ptr,
408 const char *end,
409 const ENCODING *enc)
410 {
411 switch (tok) {
412 case XML_TOK_PROLOG_S:
413 return XML_ROLE_NONE;
414 case XML_TOK_NAME:
415 state->handler = entity7;
416 return XML_ROLE_PARAM_ENTITY_NAME;
417 }
418 return common(state, tok);
419 }
420
421 static
422 int entity2(PROLOG_STATE *state,
423 int tok,
424 const char *ptr,
425 const char *end,
426 const ENCODING *enc)
427 {
428 switch (tok) {
429 case XML_TOK_PROLOG_S:
430 return XML_ROLE_NONE;
431 case XML_TOK_NAME:
432 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
433 state->handler = entity4;
434 return XML_ROLE_NONE;
435 }
436 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
437 state->handler = entity3;
438 return XML_ROLE_NONE;
439 }
440 break;
441 case XML_TOK_LITERAL:
442 state->handler = declClose;
443 return XML_ROLE_ENTITY_VALUE;
444 }
445 return common(state, tok);
446 }
447
448 static
449 int entity3(PROLOG_STATE *state,
450 int tok,
451 const char *ptr,
452 const char *end,
453 const ENCODING *enc)
454 {
455 switch (tok) {
456 case XML_TOK_PROLOG_S:
457 return XML_ROLE_NONE;
458 case XML_TOK_LITERAL:
459 state->handler = entity4;
460 return XML_ROLE_ENTITY_PUBLIC_ID;
461 }
462 return common(state, tok);
463 }
464
465
466 static
467 int entity4(PROLOG_STATE *state,
468 int tok,
469 const char *ptr,
470 const char *end,
471 const ENCODING *enc)
472 {
473 switch (tok) {
474 case XML_TOK_PROLOG_S:
475 return XML_ROLE_NONE;
476 case XML_TOK_LITERAL:
477 state->handler = entity5;
478 return XML_ROLE_ENTITY_SYSTEM_ID;
479 }
480 return common(state, tok);
481 }
482
483 static
484 int entity5(PROLOG_STATE *state,
485 int tok,
486 const char *ptr,
487 const char *end,
488 const ENCODING *enc)
489 {
490 switch (tok) {
491 case XML_TOK_PROLOG_S:
492 return XML_ROLE_NONE;
493 case XML_TOK_DECL_CLOSE:
494 setTopLevel(state);
495 return XML_ROLE_EXTERNAL_GENERAL_ENTITY_NO_NOTATION;
496 case XML_TOK_NAME:
497 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
498 state->handler = entity6;
499 return XML_ROLE_NONE;
500 }
501 break;
502 }
503 return common(state, tok);
504 }
505
506 static
507 int entity6(PROLOG_STATE *state,
508 int tok,
509 const char *ptr,
510 const char *end,
511 const ENCODING *enc)
512 {
513 switch (tok) {
514 case XML_TOK_PROLOG_S:
515 return XML_ROLE_NONE;
516 case XML_TOK_NAME:
517 state->handler = declClose;
518 return XML_ROLE_ENTITY_NOTATION_NAME;
519 }
520 return common(state, tok);
521 }
522
523 static
524 int entity7(PROLOG_STATE *state,
525 int tok,
526 const char *ptr,
527 const char *end,
528 const ENCODING *enc)
529 {
530 switch (tok) {
531 case XML_TOK_PROLOG_S:
532 return XML_ROLE_NONE;
533 case XML_TOK_NAME:
534 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
535 state->handler = entity9;
536 return XML_ROLE_NONE;
537 }
538 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
539 state->handler = entity8;
540 return XML_ROLE_NONE;
541 }
542 break;
543 case XML_TOK_LITERAL:
544 state->handler = declClose;
545 return XML_ROLE_ENTITY_VALUE;
546 }
547 return common(state, tok);
548 }
549
550 static
551 int entity8(PROLOG_STATE *state,
552 int tok,
553 const char *ptr,
554 const char *end,
555 const ENCODING *enc)
556 {
557 switch (tok) {
558 case XML_TOK_PROLOG_S:
559 return XML_ROLE_NONE;
560 case XML_TOK_LITERAL:
561 state->handler = entity9;
562 return XML_ROLE_ENTITY_PUBLIC_ID;
563 }
564 return common(state, tok);
565 }
566
567 static
568 int entity9(PROLOG_STATE *state,
569 int tok,
570 const char *ptr,
571 const char *end,
572 const ENCODING *enc)
573 {
574 switch (tok) {
575 case XML_TOK_PROLOG_S:
576 return XML_ROLE_NONE;
577 case XML_TOK_LITERAL:
578 state->handler = declClose;
579 return XML_ROLE_ENTITY_SYSTEM_ID;
580 }
581 return common(state, tok);
582 }
583
584 static
585 int notation0(PROLOG_STATE *state,
586 int tok,
587 const char *ptr,
588 const char *end,
589 const ENCODING *enc)
590 {
591 switch (tok) {
592 case XML_TOK_PROLOG_S:
593 return XML_ROLE_NONE;
594 case XML_TOK_NAME:
595 state->handler = notation1;
596 return XML_ROLE_NOTATION_NAME;
597 }
598 return common(state, tok);
599 }
600
601 static
602 int notation1(PROLOG_STATE *state,
603 int tok,
604 const char *ptr,
605 const char *end,
606 const ENCODING *enc)
607 {
608 switch (tok) {
609 case XML_TOK_PROLOG_S:
610 return XML_ROLE_NONE;
611 case XML_TOK_NAME:
612 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
613 state->handler = notation3;
614 return XML_ROLE_NONE;
615 }
616 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
617 state->handler = notation2;
618 return XML_ROLE_NONE;
619 }
620 break;
621 }
622 return common(state, tok);
623 }
624
625 static
626 int notation2(PROLOG_STATE *state,
627 int tok,
628 const char *ptr,
629 const char *end,
630 const ENCODING *enc)
631 {
632 switch (tok) {
633 case XML_TOK_PROLOG_S:
634 return XML_ROLE_NONE;
635 case XML_TOK_LITERAL:
636 state->handler = notation4;
637 return XML_ROLE_NOTATION_PUBLIC_ID;
638 }
639 return common(state, tok);
640 }
641
642 static
643 int notation3(PROLOG_STATE *state,
644 int tok,
645 const char *ptr,
646 const char *end,
647 const ENCODING *enc)
648 {
649 switch (tok) {
650 case XML_TOK_PROLOG_S:
651 return XML_ROLE_NONE;
652 case XML_TOK_LITERAL:
653 state->handler = declClose;
654 return XML_ROLE_NOTATION_SYSTEM_ID;
655 }
656 return common(state, tok);
657 }
658
659 static
660 int notation4(PROLOG_STATE *state,
661 int tok,
662 const char *ptr,
663 const char *end,
664 const ENCODING *enc)
665 {
666 switch (tok) {
667 case XML_TOK_PROLOG_S:
668 return XML_ROLE_NONE;
669 case XML_TOK_LITERAL:
670 state->handler = declClose;
671 return XML_ROLE_NOTATION_SYSTEM_ID;
672 case XML_TOK_DECL_CLOSE:
673 setTopLevel(state);
674 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
675 }
676 return common(state, tok);
677 }
678
679 static
680 int attlist0(PROLOG_STATE *state,
681 int tok,
682 const char *ptr,
683 const char *end,
684 const ENCODING *enc)
685 {
686 switch (tok) {
687 case XML_TOK_PROLOG_S:
688 return XML_ROLE_NONE;
689 case XML_TOK_NAME:
690 case XML_TOK_PREFIXED_NAME:
691 state->handler = attlist1;
692 return XML_ROLE_ATTLIST_ELEMENT_NAME;
693 }
694 return common(state, tok);
695 }
696
697 static
698 int attlist1(PROLOG_STATE *state,
699 int tok,
700 const char *ptr,
701 const char *end,
702 const ENCODING *enc)
703 {
704 switch (tok) {
705 case XML_TOK_PROLOG_S:
706 return XML_ROLE_NONE;
707 case XML_TOK_DECL_CLOSE:
708 setTopLevel(state);
709 return XML_ROLE_NONE;
710 case XML_TOK_NAME:
711 case XML_TOK_PREFIXED_NAME:
712 state->handler = attlist2;
713 return XML_ROLE_ATTRIBUTE_NAME;
714 }
715 return common(state, tok);
716 }
717
718 static
719 int attlist2(PROLOG_STATE *state,
720 int tok,
721 const char *ptr,
722 const char *end,
723 const ENCODING *enc)
724 {
725 switch (tok) {
726 case XML_TOK_PROLOG_S:
727 return XML_ROLE_NONE;
728 case XML_TOK_NAME:
729 {
730 static const char *types[] = {
731 KW_CDATA,
732 KW_ID,
733 KW_IDREF,
734 KW_IDREFS,
735 KW_ENTITY,
736 KW_ENTITIES,
737 KW_NMTOKEN,
738 KW_NMTOKENS,
739 };
740 int i;
741 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
742 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
743 state->handler = attlist8;
744 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
745 }
746 }
747 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
748 state->handler = attlist5;
749 return XML_ROLE_NONE;
750 }
751 break;
752 case XML_TOK_OPEN_PAREN:
753 state->handler = attlist3;
754 return XML_ROLE_NONE;
755 }
756 return common(state, tok);
757 }
758
759 static
760 int attlist3(PROLOG_STATE *state,
761 int tok,
762 const char *ptr,
763 const char *end,
764 const ENCODING *enc)
765 {
766 switch (tok) {
767 case XML_TOK_PROLOG_S:
768 return XML_ROLE_NONE;
769 case XML_TOK_NMTOKEN:
770 case XML_TOK_NAME:
771 case XML_TOK_PREFIXED_NAME:
772 state->handler = attlist4;
773 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
774 }
775 return common(state, tok);
776 }
777
778 static
779 int attlist4(PROLOG_STATE *state,
780 int tok,
781 const char *ptr,
782 const char *end,
783 const ENCODING *enc)
784 {
785 switch (tok) {
786 case XML_TOK_PROLOG_S:
787 return XML_ROLE_NONE;
788 case XML_TOK_CLOSE_PAREN:
789 state->handler = attlist8;
790 return XML_ROLE_NONE;
791 case XML_TOK_OR:
792 state->handler = attlist3;
793 return XML_ROLE_NONE;
794 }
795 return common(state, tok);
796 }
797
798 static
799 int attlist5(PROLOG_STATE *state,
800 int tok,
801 const char *ptr,
802 const char *end,
803 const ENCODING *enc)
804 {
805 switch (tok) {
806 case XML_TOK_PROLOG_S:
807 return XML_ROLE_NONE;
808 case XML_TOK_OPEN_PAREN:
809 state->handler = attlist6;
810 return XML_ROLE_NONE;
811 }
812 return common(state, tok);
813 }
814
815
816 static
817 int attlist6(PROLOG_STATE *state,
818 int tok,
819 const char *ptr,
820 const char *end,
821 const ENCODING *enc)
822 {
823 switch (tok) {
824 case XML_TOK_PROLOG_S:
825 return XML_ROLE_NONE;
826 case XML_TOK_NAME:
827 state->handler = attlist7;
828 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
829 }
830 return common(state, tok);
831 }
832
833 static
834 int attlist7(PROLOG_STATE *state,
835 int tok,
836 const char *ptr,
837 const char *end,
838 const ENCODING *enc)
839 {
840 switch (tok) {
841 case XML_TOK_PROLOG_S:
842 return XML_ROLE_NONE;
843 case XML_TOK_CLOSE_PAREN:
844 state->handler = attlist8;
845 return XML_ROLE_NONE;
846 case XML_TOK_OR:
847 state->handler = attlist6;
848 return XML_ROLE_NONE;
849 }
850 return common(state, tok);
851 }
852
853 /* default value */
854 static
855 int attlist8(PROLOG_STATE *state,
856 int tok,
857 const char *ptr,
858 const char *end,
859 const ENCODING *enc)
860 {
861 switch (tok) {
862 case XML_TOK_PROLOG_S:
863 return XML_ROLE_NONE;
864 case XML_TOK_POUND_NAME:
865 if (XmlNameMatchesAscii(enc,
866 ptr + MIN_BYTES_PER_CHAR(enc),
867 end,
868 KW_IMPLIED)) {
869 state->handler = attlist1;
870 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
871 }
872 if (XmlNameMatchesAscii(enc,
873 ptr + MIN_BYTES_PER_CHAR(enc),
874 end,
875 KW_REQUIRED)) {
876 state->handler = attlist1;
877 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
878 }
879 if (XmlNameMatchesAscii(enc,
880 ptr + MIN_BYTES_PER_CHAR(enc),
881 end,
882 KW_FIXED)) {
883 state->handler = attlist9;
884 return XML_ROLE_NONE;
885 }
886 break;
887 case XML_TOK_LITERAL:
888 state->handler = attlist1;
889 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
890 }
891 return common(state, tok);
892 }
893
894 static
895 int attlist9(PROLOG_STATE *state,
896 int tok,
897 const char *ptr,
898 const char *end,
899 const ENCODING *enc)
900 {
901 switch (tok) {
902 case XML_TOK_PROLOG_S:
903 return XML_ROLE_NONE;
904 case XML_TOK_LITERAL:
905 state->handler = attlist1;
906 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
907 }
908 return common(state, tok);
909 }
910
911 static
912 int element0(PROLOG_STATE *state,
913 int tok,
914 const char *ptr,
915 const char *end,
916 const ENCODING *enc)
917 {
918 switch (tok) {
919 case XML_TOK_PROLOG_S:
920 return XML_ROLE_NONE;
921 case XML_TOK_NAME:
922 case XML_TOK_PREFIXED_NAME:
923 state->handler = element1;
924 return XML_ROLE_ELEMENT_NAME;
925 }
926 return common(state, tok);
927 }
928
929 static
930 int element1(PROLOG_STATE *state,
931 int tok,
932 const char *ptr,
933 const char *end,
934 const ENCODING *enc)
935 {
936 switch (tok) {
937 case XML_TOK_PROLOG_S:
938 return XML_ROLE_NONE;
939 case XML_TOK_NAME:
940 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
941 state->handler = declClose;
942 return XML_ROLE_CONTENT_EMPTY;
943 }
944 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
945 state->handler = declClose;
946 return XML_ROLE_CONTENT_ANY;
947 }
948 break;
949 case XML_TOK_OPEN_PAREN:
950 state->handler = element2;
951 state->level = 1;
952 return XML_ROLE_GROUP_OPEN;
953 }
954 return common(state, tok);
955 }
956
957 static
958 int element2(PROLOG_STATE *state,
959 int tok,
960 const char *ptr,
961 const char *end,
962 const ENCODING *enc)
963 {
964 switch (tok) {
965 case XML_TOK_PROLOG_S:
966 return XML_ROLE_NONE;
967 case XML_TOK_POUND_NAME:
968 if (XmlNameMatchesAscii(enc,
969 ptr + MIN_BYTES_PER_CHAR(enc),
970 end,
971 KW_PCDATA)) {
972 state->handler = element3;
973 return XML_ROLE_CONTENT_PCDATA;
974 }
975 break;
976 case XML_TOK_OPEN_PAREN:
977 state->level = 2;
978 state->handler = element6;
979 return XML_ROLE_GROUP_OPEN;
980 case XML_TOK_NAME:
981 case XML_TOK_PREFIXED_NAME:
982 state->handler = element7;
983 return XML_ROLE_CONTENT_ELEMENT;
984 case XML_TOK_NAME_QUESTION:
985 state->handler = element7;
986 return XML_ROLE_CONTENT_ELEMENT_OPT;
987 case XML_TOK_NAME_ASTERISK:
988 state->handler = element7;
989 return XML_ROLE_CONTENT_ELEMENT_REP;
990 case XML_TOK_NAME_PLUS:
991 state->handler = element7;
992 return XML_ROLE_CONTENT_ELEMENT_PLUS;
993 }
994 return common(state, tok);
995 }
996
997 static
998 int element3(PROLOG_STATE *state,
999 int tok,
1000 const char *ptr,
1001 const char *end,
1002 const ENCODING *enc)
1003 {
1004 switch (tok) {
1005 case XML_TOK_PROLOG_S:
1006 return XML_ROLE_NONE;
1007 case XML_TOK_CLOSE_PAREN:
1008 case XML_TOK_CLOSE_PAREN_ASTERISK:
1009 state->handler = declClose;
1010 return XML_ROLE_GROUP_CLOSE_REP;
1011 case XML_TOK_OR:
1012 state->handler = element4;
1013 return XML_ROLE_NONE;
1014 }
1015 return common(state, tok);
1016 }
1017
1018 static
1019 int element4(PROLOG_STATE *state,
1020 int tok,
1021 const char *ptr,
1022 const char *end,
1023 const ENCODING *enc)
1024 {
1025 switch (tok) {
1026 case XML_TOK_PROLOG_S:
1027 return XML_ROLE_NONE;
1028 case XML_TOK_NAME:
1029 case XML_TOK_PREFIXED_NAME:
1030 state->handler = element5;
1031 return XML_ROLE_CONTENT_ELEMENT;
1032 }
1033 return common(state, tok);
1034 }
1035
1036 static
1037 int element5(PROLOG_STATE *state,
1038 int tok,
1039 const char *ptr,
1040 const char *end,
1041 const ENCODING *enc)
1042 {
1043 switch (tok) {
1044 case XML_TOK_PROLOG_S:
1045 return XML_ROLE_NONE;
1046 case XML_TOK_CLOSE_PAREN_ASTERISK:
1047 state->handler = declClose;
1048 return XML_ROLE_GROUP_CLOSE_REP;
1049 case XML_TOK_OR:
1050 state->handler = element4;
1051 return XML_ROLE_NONE;
1052 }
1053 return common(state, tok);
1054 }
1055
1056 static
1057 int element6(PROLOG_STATE *state,
1058 int tok,
1059 const char *ptr,
1060 const char *end,
1061 const ENCODING *enc)
1062 {
1063 switch (tok) {
1064 case XML_TOK_PROLOG_S:
1065 return XML_ROLE_NONE;
1066 case XML_TOK_OPEN_PAREN:
1067 state->level += 1;
1068 return XML_ROLE_GROUP_OPEN;
1069 case XML_TOK_NAME:
1070 case XML_TOK_PREFIXED_NAME:
1071 state->handler = element7;
1072 return XML_ROLE_CONTENT_ELEMENT;
1073 case XML_TOK_NAME_QUESTION:
1074 state->handler = element7;
1075 return XML_ROLE_CONTENT_ELEMENT_OPT;
1076 case XML_TOK_NAME_ASTERISK:
1077 state->handler = element7;
1078 return XML_ROLE_CONTENT_ELEMENT_REP;
1079 case XML_TOK_NAME_PLUS:
1080 state->handler = element7;
1081 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1082 }
1083 return common(state, tok);
1084 }
1085
1086 static
1087 int element7(PROLOG_STATE *state,
1088 int tok,
1089 const char *ptr,
1090 const char *end,
1091 const ENCODING *enc)
1092 {
1093 switch (tok) {
1094 case XML_TOK_PROLOG_S:
1095 return XML_ROLE_NONE;
1096 case XML_TOK_CLOSE_PAREN:
1097 state->level -= 1;
1098 if (state->level == 0)
1099 state->handler = declClose;
1100 return XML_ROLE_GROUP_CLOSE;
1101 case XML_TOK_CLOSE_PAREN_ASTERISK:
1102 state->level -= 1;
1103 if (state->level == 0)
1104 state->handler = declClose;
1105 return XML_ROLE_GROUP_CLOSE_REP;
1106 case XML_TOK_CLOSE_PAREN_QUESTION:
1107 state->level -= 1;
1108 if (state->level == 0)
1109 state->handler = declClose;
1110 return XML_ROLE_GROUP_CLOSE_OPT;
1111 case XML_TOK_CLOSE_PAREN_PLUS:
1112 state->level -= 1;
1113 if (state->level == 0)
1114 state->handler = declClose;
1115 return XML_ROLE_GROUP_CLOSE_PLUS;
1116 case XML_TOK_COMMA:
1117 state->handler = element6;
1118 return XML_ROLE_GROUP_SEQUENCE;
1119 case XML_TOK_OR:
1120 state->handler = element6;
1121 return XML_ROLE_GROUP_CHOICE;
1122 }
1123 return common(state, tok);
1124 }
1125
1126 #ifdef XML_DTD
1127
1128 static
1129 int condSect0(PROLOG_STATE *state,
1130 int tok,
1131 const char *ptr,
1132 const char *end,
1133 const ENCODING *enc)
1134 {
1135 switch (tok) {
1136 case XML_TOK_PROLOG_S:
1137 return XML_ROLE_NONE;
1138 case XML_TOK_NAME:
1139 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1140 state->handler = condSect1;
1141 return XML_ROLE_NONE;
1142 }
1143 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1144 state->handler = condSect2;
1145 return XML_ROLE_NONE;
1146 }
1147 break;
1148 }
1149 return common(state, tok);
1150 }
1151
1152 static
1153 int condSect1(PROLOG_STATE *state,
1154 int tok,
1155 const char *ptr,
1156 const char *end,
1157 const ENCODING *enc)
1158 {
1159 switch (tok) {
1160 case XML_TOK_PROLOG_S:
1161 return XML_ROLE_NONE;
1162 case XML_TOK_OPEN_BRACKET:
1163 state->handler = externalSubset1;
1164 state->includeLevel += 1;
1165 return XML_ROLE_NONE;
1166 }
1167 return common(state, tok);
1168 }
1169
1170 static
1171 int condSect2(PROLOG_STATE *state,
1172 int tok,
1173 const char *ptr,
1174 const char *end,
1175 const ENCODING *enc)
1176 {
1177 switch (tok) {
1178 case XML_TOK_PROLOG_S:
1179 return XML_ROLE_NONE;
1180 case XML_TOK_OPEN_BRACKET:
1181 state->handler = externalSubset1;
1182 return XML_ROLE_IGNORE_SECT;
1183 }
1184 return common(state, tok);
1185 }
1186
1187 #endif /* XML_DTD */
1188
1189 static
1190 int declClose(PROLOG_STATE *state,
1191 int tok,
1192 const char *ptr,
1193 const char *end,
1194 const ENCODING *enc)
1195 {
1196 switch (tok) {
1197 case XML_TOK_PROLOG_S:
1198 return XML_ROLE_NONE;
1199 case XML_TOK_DECL_CLOSE:
1200 setTopLevel(state);
1201 return XML_ROLE_NONE;
1202 }
1203 return common(state, tok);
1204 }
1205
1206 #if 0
1207
1208 static
1209 int ignore(PROLOG_STATE *state,
1210 int tok,
1211 const char *ptr,
1212 const char *end,
1213 const ENCODING *enc)
1214 {
1215 switch (tok) {
1216 case XML_TOK_DECL_CLOSE:
1217 state->handler = internalSubset;
1218 return 0;
1219 default:
1220 return XML_ROLE_NONE;
1221 }
1222 return common(state, tok);
1223 }
1224 #endif
1225
1226 static
1227 int error(PROLOG_STATE *state,
1228 int tok,
1229 const char *ptr,
1230 const char *end,
1231 const ENCODING *enc)
1232 {
1233 return XML_ROLE_NONE;
1234 }
1235
1236 static
1237 int common(PROLOG_STATE *state, int tok)
1238 {
1239 #ifdef XML_DTD
1240 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1241 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1242 #endif
1243 state->handler = error;
1244 return XML_ROLE_ERROR;
1245 }
1246
1247 void XmlPrologStateInit(PROLOG_STATE *state)
1248 {
1249 state->handler = prolog0;
1250 #ifdef XML_DTD
1251 state->documentEntity = 1;
1252 state->includeLevel = 0;
1253 #endif /* XML_DTD */
1254 }
1255
1256 #ifdef XML_DTD
1257
1258 void XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1259 {
1260 state->handler = externalSubset0;
1261 state->documentEntity = 0;
1262 state->includeLevel = 0;
1263 }
1264
1265 #endif /* XML_DTD */