]> git.saurik.com Git - wxWidgets.git/blob - src/expat/lib/xmlrole.c
final sweep over docs - replace & with \&
[wxWidgets.git] / src / expat / lib / xmlrole.c
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
3 */
4
5 #ifdef COMPILED_FROM_DSP
6 #include "winconfig.h"
7 #elif defined(MACOS_CLASSIC)
8 #include "macconfig.h"
9 #else
10 #include "expat_config.h"
11 #endif /* ndef COMPILED_FROM_DSP */
12
13 #include "internal.h"
14 #include "xmlrole.h"
15 #include "ascii.h"
16
17 /* Doesn't check:
18
19 that ,| are not mixed in a model group
20 content of literals
21
22 */
23
24 static const char KW_ANY[] = {
25 ASCII_A, ASCII_N, ASCII_Y, '\0' };
26 static const char KW_ATTLIST[] = {
27 ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
28 static const char KW_CDATA[] = {
29 ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
30 static const char KW_DOCTYPE[] = {
31 ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
32 static const char KW_ELEMENT[] = {
33 ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
34 static const char KW_EMPTY[] = {
35 ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
36 static const char KW_ENTITIES[] = {
37 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
38 '\0' };
39 static const char KW_ENTITY[] = {
40 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
41 static const char KW_FIXED[] = {
42 ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
43 static const char KW_ID[] = {
44 ASCII_I, ASCII_D, '\0' };
45 static const char KW_IDREF[] = {
46 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
47 static const char KW_IDREFS[] = {
48 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
49 static const char KW_IGNORE[] = {
50 ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
51 static const char KW_IMPLIED[] = {
52 ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
53 static const char KW_INCLUDE[] = {
54 ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
55 static const char KW_NDATA[] = {
56 ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
57 static const char KW_NMTOKEN[] = {
58 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
59 static const char KW_NMTOKENS[] = {
60 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
61 '\0' };
62 static const char KW_NOTATION[] =
63 { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
64 '\0' };
65 static const char KW_PCDATA[] = {
66 ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
67 static const char KW_PUBLIC[] = {
68 ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
69 static const char KW_REQUIRED[] = {
70 ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
71 '\0' };
72 static const char KW_SYSTEM[] = {
73 ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
74
75 #ifndef MIN_BYTES_PER_CHAR
76 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
77 #endif
78
79 #ifdef XML_DTD
80 #define setTopLevel(state) \
81 ((state)->handler = ((state)->documentEntity \
82 ? internalSubset \
83 : externalSubset1))
84 #else /* not XML_DTD */
85 #define setTopLevel(state) ((state)->handler = internalSubset)
86 #endif /* not XML_DTD */
87
88 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
89 int tok,
90 const char *ptr,
91 const char *end,
92 const ENCODING *enc);
93
94 static PROLOG_HANDLER
95 prolog0, prolog1, prolog2,
96 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
97 internalSubset,
98 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
99 entity7, entity8, entity9, entity10,
100 notation0, notation1, notation2, notation3, notation4,
101 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
102 attlist7, attlist8, attlist9,
103 element0, element1, element2, element3, element4, element5, element6,
104 element7,
105 #ifdef XML_DTD
106 externalSubset0, externalSubset1,
107 condSect0, condSect1, condSect2,
108 #endif /* XML_DTD */
109 declClose,
110 error;
111
112 static int FASTCALL common(PROLOG_STATE *state, int tok);
113
114 static int PTRCALL
115 prolog0(PROLOG_STATE *state,
116 int tok,
117 const char *ptr,
118 const char *end,
119 const ENCODING *enc)
120 {
121 switch (tok) {
122 case XML_TOK_PROLOG_S:
123 state->handler = prolog1;
124 return XML_ROLE_NONE;
125 case XML_TOK_XML_DECL:
126 state->handler = prolog1;
127 return XML_ROLE_XML_DECL;
128 case XML_TOK_PI:
129 state->handler = prolog1;
130 return XML_ROLE_PI;
131 case XML_TOK_COMMENT:
132 state->handler = prolog1;
133 return XML_ROLE_COMMENT;
134 case XML_TOK_BOM:
135 return XML_ROLE_NONE;
136 case XML_TOK_DECL_OPEN:
137 if (!XmlNameMatchesAscii(enc,
138 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
139 end,
140 KW_DOCTYPE))
141 break;
142 state->handler = doctype0;
143 return XML_ROLE_DOCTYPE_NONE;
144 case XML_TOK_INSTANCE_START:
145 state->handler = error;
146 return XML_ROLE_INSTANCE_START;
147 }
148 return common(state, tok);
149 }
150
151 static int PTRCALL
152 prolog1(PROLOG_STATE *state,
153 int tok,
154 const char *ptr,
155 const char *end,
156 const ENCODING *enc)
157 {
158 switch (tok) {
159 case XML_TOK_PROLOG_S:
160 return XML_ROLE_NONE;
161 case XML_TOK_PI:
162 return XML_ROLE_PI;
163 case XML_TOK_COMMENT:
164 return XML_ROLE_COMMENT;
165 case XML_TOK_BOM:
166 return XML_ROLE_NONE;
167 case XML_TOK_DECL_OPEN:
168 if (!XmlNameMatchesAscii(enc,
169 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
170 end,
171 KW_DOCTYPE))
172 break;
173 state->handler = doctype0;
174 return XML_ROLE_DOCTYPE_NONE;
175 case XML_TOK_INSTANCE_START:
176 state->handler = error;
177 return XML_ROLE_INSTANCE_START;
178 }
179 return common(state, tok);
180 }
181
182 static int PTRCALL
183 prolog2(PROLOG_STATE *state,
184 int tok,
185 const char *ptr,
186 const char *end,
187 const ENCODING *enc)
188 {
189 switch (tok) {
190 case XML_TOK_PROLOG_S:
191 return XML_ROLE_NONE;
192 case XML_TOK_PI:
193 return XML_ROLE_PI;
194 case XML_TOK_COMMENT:
195 return XML_ROLE_COMMENT;
196 case XML_TOK_INSTANCE_START:
197 state->handler = error;
198 return XML_ROLE_INSTANCE_START;
199 }
200 return common(state, tok);
201 }
202
203 static int PTRCALL
204 doctype0(PROLOG_STATE *state,
205 int tok,
206 const char *ptr,
207 const char *end,
208 const ENCODING *enc)
209 {
210 switch (tok) {
211 case XML_TOK_PROLOG_S:
212 return XML_ROLE_DOCTYPE_NONE;
213 case XML_TOK_NAME:
214 case XML_TOK_PREFIXED_NAME:
215 state->handler = doctype1;
216 return XML_ROLE_DOCTYPE_NAME;
217 }
218 return common(state, tok);
219 }
220
221 static int PTRCALL
222 doctype1(PROLOG_STATE *state,
223 int tok,
224 const char *ptr,
225 const char *end,
226 const ENCODING *enc)
227 {
228 switch (tok) {
229 case XML_TOK_PROLOG_S:
230 return XML_ROLE_DOCTYPE_NONE;
231 case XML_TOK_OPEN_BRACKET:
232 state->handler = internalSubset;
233 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
234 case XML_TOK_DECL_CLOSE:
235 state->handler = prolog2;
236 return XML_ROLE_DOCTYPE_CLOSE;
237 case XML_TOK_NAME:
238 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
239 state->handler = doctype3;
240 return XML_ROLE_DOCTYPE_NONE;
241 }
242 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
243 state->handler = doctype2;
244 return XML_ROLE_DOCTYPE_NONE;
245 }
246 break;
247 }
248 return common(state, tok);
249 }
250
251 static int PTRCALL
252 doctype2(PROLOG_STATE *state,
253 int tok,
254 const char *ptr,
255 const char *end,
256 const ENCODING *enc)
257 {
258 switch (tok) {
259 case XML_TOK_PROLOG_S:
260 return XML_ROLE_DOCTYPE_NONE;
261 case XML_TOK_LITERAL:
262 state->handler = doctype3;
263 return XML_ROLE_DOCTYPE_PUBLIC_ID;
264 }
265 return common(state, tok);
266 }
267
268 static int PTRCALL
269 doctype3(PROLOG_STATE *state,
270 int tok,
271 const char *ptr,
272 const char *end,
273 const ENCODING *enc)
274 {
275 switch (tok) {
276 case XML_TOK_PROLOG_S:
277 return XML_ROLE_DOCTYPE_NONE;
278 case XML_TOK_LITERAL:
279 state->handler = doctype4;
280 return XML_ROLE_DOCTYPE_SYSTEM_ID;
281 }
282 return common(state, tok);
283 }
284
285 static int PTRCALL
286 doctype4(PROLOG_STATE *state,
287 int tok,
288 const char *ptr,
289 const char *end,
290 const ENCODING *enc)
291 {
292 switch (tok) {
293 case XML_TOK_PROLOG_S:
294 return XML_ROLE_DOCTYPE_NONE;
295 case XML_TOK_OPEN_BRACKET:
296 state->handler = internalSubset;
297 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
298 case XML_TOK_DECL_CLOSE:
299 state->handler = prolog2;
300 return XML_ROLE_DOCTYPE_CLOSE;
301 }
302 return common(state, tok);
303 }
304
305 static int PTRCALL
306 doctype5(PROLOG_STATE *state,
307 int tok,
308 const char *ptr,
309 const char *end,
310 const ENCODING *enc)
311 {
312 switch (tok) {
313 case XML_TOK_PROLOG_S:
314 return XML_ROLE_DOCTYPE_NONE;
315 case XML_TOK_DECL_CLOSE:
316 state->handler = prolog2;
317 return XML_ROLE_DOCTYPE_CLOSE;
318 }
319 return common(state, tok);
320 }
321
322 static int PTRCALL
323 internalSubset(PROLOG_STATE *state,
324 int tok,
325 const char *ptr,
326 const char *end,
327 const ENCODING *enc)
328 {
329 switch (tok) {
330 case XML_TOK_PROLOG_S:
331 return XML_ROLE_NONE;
332 case XML_TOK_DECL_OPEN:
333 if (XmlNameMatchesAscii(enc,
334 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
335 end,
336 KW_ENTITY)) {
337 state->handler = entity0;
338 return XML_ROLE_ENTITY_NONE;
339 }
340 if (XmlNameMatchesAscii(enc,
341 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
342 end,
343 KW_ATTLIST)) {
344 state->handler = attlist0;
345 return XML_ROLE_ATTLIST_NONE;
346 }
347 if (XmlNameMatchesAscii(enc,
348 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
349 end,
350 KW_ELEMENT)) {
351 state->handler = element0;
352 return XML_ROLE_ELEMENT_NONE;
353 }
354 if (XmlNameMatchesAscii(enc,
355 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
356 end,
357 KW_NOTATION)) {
358 state->handler = notation0;
359 return XML_ROLE_NOTATION_NONE;
360 }
361 break;
362 case XML_TOK_PI:
363 return XML_ROLE_PI;
364 case XML_TOK_COMMENT:
365 return XML_ROLE_COMMENT;
366 case XML_TOK_PARAM_ENTITY_REF:
367 return XML_ROLE_PARAM_ENTITY_REF;
368 case XML_TOK_CLOSE_BRACKET:
369 state->handler = doctype5;
370 return XML_ROLE_DOCTYPE_NONE;
371 }
372 return common(state, tok);
373 }
374
375 #ifdef XML_DTD
376
377 static int PTRCALL
378 externalSubset0(PROLOG_STATE *state,
379 int tok,
380 const char *ptr,
381 const char *end,
382 const ENCODING *enc)
383 {
384 state->handler = externalSubset1;
385 if (tok == XML_TOK_XML_DECL)
386 return XML_ROLE_TEXT_DECL;
387 return externalSubset1(state, tok, ptr, end, enc);
388 }
389
390 static int PTRCALL
391 externalSubset1(PROLOG_STATE *state,
392 int tok,
393 const char *ptr,
394 const char *end,
395 const ENCODING *enc)
396 {
397 switch (tok) {
398 case XML_TOK_COND_SECT_OPEN:
399 state->handler = condSect0;
400 return XML_ROLE_NONE;
401 case XML_TOK_COND_SECT_CLOSE:
402 if (state->includeLevel == 0)
403 break;
404 state->includeLevel -= 1;
405 return XML_ROLE_NONE;
406 case XML_TOK_PROLOG_S:
407 return XML_ROLE_NONE;
408 case XML_TOK_CLOSE_BRACKET:
409 break;
410 case XML_TOK_NONE:
411 if (state->includeLevel)
412 break;
413 return XML_ROLE_NONE;
414 default:
415 return internalSubset(state, tok, ptr, end, enc);
416 }
417 return common(state, tok);
418 }
419
420 #endif /* XML_DTD */
421
422 static int PTRCALL
423 entity0(PROLOG_STATE *state,
424 int tok,
425 const char *ptr,
426 const char *end,
427 const ENCODING *enc)
428 {
429 switch (tok) {
430 case XML_TOK_PROLOG_S:
431 return XML_ROLE_ENTITY_NONE;
432 case XML_TOK_PERCENT:
433 state->handler = entity1;
434 return XML_ROLE_ENTITY_NONE;
435 case XML_TOK_NAME:
436 state->handler = entity2;
437 return XML_ROLE_GENERAL_ENTITY_NAME;
438 }
439 return common(state, tok);
440 }
441
442 static int PTRCALL
443 entity1(PROLOG_STATE *state,
444 int tok,
445 const char *ptr,
446 const char *end,
447 const ENCODING *enc)
448 {
449 switch (tok) {
450 case XML_TOK_PROLOG_S:
451 return XML_ROLE_ENTITY_NONE;
452 case XML_TOK_NAME:
453 state->handler = entity7;
454 return XML_ROLE_PARAM_ENTITY_NAME;
455 }
456 return common(state, tok);
457 }
458
459 static int PTRCALL
460 entity2(PROLOG_STATE *state,
461 int tok,
462 const char *ptr,
463 const char *end,
464 const ENCODING *enc)
465 {
466 switch (tok) {
467 case XML_TOK_PROLOG_S:
468 return XML_ROLE_ENTITY_NONE;
469 case XML_TOK_NAME:
470 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
471 state->handler = entity4;
472 return XML_ROLE_ENTITY_NONE;
473 }
474 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
475 state->handler = entity3;
476 return XML_ROLE_ENTITY_NONE;
477 }
478 break;
479 case XML_TOK_LITERAL:
480 state->handler = declClose;
481 state->role_none = XML_ROLE_ENTITY_NONE;
482 return XML_ROLE_ENTITY_VALUE;
483 }
484 return common(state, tok);
485 }
486
487 static int PTRCALL
488 entity3(PROLOG_STATE *state,
489 int tok,
490 const char *ptr,
491 const char *end,
492 const ENCODING *enc)
493 {
494 switch (tok) {
495 case XML_TOK_PROLOG_S:
496 return XML_ROLE_ENTITY_NONE;
497 case XML_TOK_LITERAL:
498 state->handler = entity4;
499 return XML_ROLE_ENTITY_PUBLIC_ID;
500 }
501 return common(state, tok);
502 }
503
504 static int PTRCALL
505 entity4(PROLOG_STATE *state,
506 int tok,
507 const char *ptr,
508 const char *end,
509 const ENCODING *enc)
510 {
511 switch (tok) {
512 case XML_TOK_PROLOG_S:
513 return XML_ROLE_ENTITY_NONE;
514 case XML_TOK_LITERAL:
515 state->handler = entity5;
516 return XML_ROLE_ENTITY_SYSTEM_ID;
517 }
518 return common(state, tok);
519 }
520
521 static int PTRCALL
522 entity5(PROLOG_STATE *state,
523 int tok,
524 const char *ptr,
525 const char *end,
526 const ENCODING *enc)
527 {
528 switch (tok) {
529 case XML_TOK_PROLOG_S:
530 return XML_ROLE_ENTITY_NONE;
531 case XML_TOK_DECL_CLOSE:
532 setTopLevel(state);
533 return XML_ROLE_ENTITY_COMPLETE;
534 case XML_TOK_NAME:
535 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
536 state->handler = entity6;
537 return XML_ROLE_ENTITY_NONE;
538 }
539 break;
540 }
541 return common(state, tok);
542 }
543
544 static int PTRCALL
545 entity6(PROLOG_STATE *state,
546 int tok,
547 const char *ptr,
548 const char *end,
549 const ENCODING *enc)
550 {
551 switch (tok) {
552 case XML_TOK_PROLOG_S:
553 return XML_ROLE_ENTITY_NONE;
554 case XML_TOK_NAME:
555 state->handler = declClose;
556 state->role_none = XML_ROLE_ENTITY_NONE;
557 return XML_ROLE_ENTITY_NOTATION_NAME;
558 }
559 return common(state, tok);
560 }
561
562 static int PTRCALL
563 entity7(PROLOG_STATE *state,
564 int tok,
565 const char *ptr,
566 const char *end,
567 const ENCODING *enc)
568 {
569 switch (tok) {
570 case XML_TOK_PROLOG_S:
571 return XML_ROLE_ENTITY_NONE;
572 case XML_TOK_NAME:
573 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
574 state->handler = entity9;
575 return XML_ROLE_ENTITY_NONE;
576 }
577 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
578 state->handler = entity8;
579 return XML_ROLE_ENTITY_NONE;
580 }
581 break;
582 case XML_TOK_LITERAL:
583 state->handler = declClose;
584 state->role_none = XML_ROLE_ENTITY_NONE;
585 return XML_ROLE_ENTITY_VALUE;
586 }
587 return common(state, tok);
588 }
589
590 static int PTRCALL
591 entity8(PROLOG_STATE *state,
592 int tok,
593 const char *ptr,
594 const char *end,
595 const ENCODING *enc)
596 {
597 switch (tok) {
598 case XML_TOK_PROLOG_S:
599 return XML_ROLE_ENTITY_NONE;
600 case XML_TOK_LITERAL:
601 state->handler = entity9;
602 return XML_ROLE_ENTITY_PUBLIC_ID;
603 }
604 return common(state, tok);
605 }
606
607 static int PTRCALL
608 entity9(PROLOG_STATE *state,
609 int tok,
610 const char *ptr,
611 const char *end,
612 const ENCODING *enc)
613 {
614 switch (tok) {
615 case XML_TOK_PROLOG_S:
616 return XML_ROLE_ENTITY_NONE;
617 case XML_TOK_LITERAL:
618 state->handler = entity10;
619 return XML_ROLE_ENTITY_SYSTEM_ID;
620 }
621 return common(state, tok);
622 }
623
624 static int PTRCALL
625 entity10(PROLOG_STATE *state,
626 int tok,
627 const char *ptr,
628 const char *end,
629 const ENCODING *enc)
630 {
631 switch (tok) {
632 case XML_TOK_PROLOG_S:
633 return XML_ROLE_ENTITY_NONE;
634 case XML_TOK_DECL_CLOSE:
635 setTopLevel(state);
636 return XML_ROLE_ENTITY_COMPLETE;
637 }
638 return common(state, tok);
639 }
640
641 static int PTRCALL
642 notation0(PROLOG_STATE *state,
643 int tok,
644 const char *ptr,
645 const char *end,
646 const ENCODING *enc)
647 {
648 switch (tok) {
649 case XML_TOK_PROLOG_S:
650 return XML_ROLE_NOTATION_NONE;
651 case XML_TOK_NAME:
652 state->handler = notation1;
653 return XML_ROLE_NOTATION_NAME;
654 }
655 return common(state, tok);
656 }
657
658 static int PTRCALL
659 notation1(PROLOG_STATE *state,
660 int tok,
661 const char *ptr,
662 const char *end,
663 const ENCODING *enc)
664 {
665 switch (tok) {
666 case XML_TOK_PROLOG_S:
667 return XML_ROLE_NOTATION_NONE;
668 case XML_TOK_NAME:
669 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
670 state->handler = notation3;
671 return XML_ROLE_NOTATION_NONE;
672 }
673 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
674 state->handler = notation2;
675 return XML_ROLE_NOTATION_NONE;
676 }
677 break;
678 }
679 return common(state, tok);
680 }
681
682 static int PTRCALL
683 notation2(PROLOG_STATE *state,
684 int tok,
685 const char *ptr,
686 const char *end,
687 const ENCODING *enc)
688 {
689 switch (tok) {
690 case XML_TOK_PROLOG_S:
691 return XML_ROLE_NOTATION_NONE;
692 case XML_TOK_LITERAL:
693 state->handler = notation4;
694 return XML_ROLE_NOTATION_PUBLIC_ID;
695 }
696 return common(state, tok);
697 }
698
699 static int PTRCALL
700 notation3(PROLOG_STATE *state,
701 int tok,
702 const char *ptr,
703 const char *end,
704 const ENCODING *enc)
705 {
706 switch (tok) {
707 case XML_TOK_PROLOG_S:
708 return XML_ROLE_NOTATION_NONE;
709 case XML_TOK_LITERAL:
710 state->handler = declClose;
711 state->role_none = XML_ROLE_NOTATION_NONE;
712 return XML_ROLE_NOTATION_SYSTEM_ID;
713 }
714 return common(state, tok);
715 }
716
717 static int PTRCALL
718 notation4(PROLOG_STATE *state,
719 int tok,
720 const char *ptr,
721 const char *end,
722 const ENCODING *enc)
723 {
724 switch (tok) {
725 case XML_TOK_PROLOG_S:
726 return XML_ROLE_NOTATION_NONE;
727 case XML_TOK_LITERAL:
728 state->handler = declClose;
729 state->role_none = XML_ROLE_NOTATION_NONE;
730 return XML_ROLE_NOTATION_SYSTEM_ID;
731 case XML_TOK_DECL_CLOSE:
732 setTopLevel(state);
733 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
734 }
735 return common(state, tok);
736 }
737
738 static int PTRCALL
739 attlist0(PROLOG_STATE *state,
740 int tok,
741 const char *ptr,
742 const char *end,
743 const ENCODING *enc)
744 {
745 switch (tok) {
746 case XML_TOK_PROLOG_S:
747 return XML_ROLE_ATTLIST_NONE;
748 case XML_TOK_NAME:
749 case XML_TOK_PREFIXED_NAME:
750 state->handler = attlist1;
751 return XML_ROLE_ATTLIST_ELEMENT_NAME;
752 }
753 return common(state, tok);
754 }
755
756 static int PTRCALL
757 attlist1(PROLOG_STATE *state,
758 int tok,
759 const char *ptr,
760 const char *end,
761 const ENCODING *enc)
762 {
763 switch (tok) {
764 case XML_TOK_PROLOG_S:
765 return XML_ROLE_ATTLIST_NONE;
766 case XML_TOK_DECL_CLOSE:
767 setTopLevel(state);
768 return XML_ROLE_ATTLIST_NONE;
769 case XML_TOK_NAME:
770 case XML_TOK_PREFIXED_NAME:
771 state->handler = attlist2;
772 return XML_ROLE_ATTRIBUTE_NAME;
773 }
774 return common(state, tok);
775 }
776
777 static int PTRCALL
778 attlist2(PROLOG_STATE *state,
779 int tok,
780 const char *ptr,
781 const char *end,
782 const ENCODING *enc)
783 {
784 switch (tok) {
785 case XML_TOK_PROLOG_S:
786 return XML_ROLE_ATTLIST_NONE;
787 case XML_TOK_NAME:
788 {
789 static const char *types[] = {
790 KW_CDATA,
791 KW_ID,
792 KW_IDREF,
793 KW_IDREFS,
794 KW_ENTITY,
795 KW_ENTITIES,
796 KW_NMTOKEN,
797 KW_NMTOKENS,
798 };
799 int i;
800 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
801 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
802 state->handler = attlist8;
803 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
804 }
805 }
806 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
807 state->handler = attlist5;
808 return XML_ROLE_ATTLIST_NONE;
809 }
810 break;
811 case XML_TOK_OPEN_PAREN:
812 state->handler = attlist3;
813 return XML_ROLE_ATTLIST_NONE;
814 }
815 return common(state, tok);
816 }
817
818 static int PTRCALL
819 attlist3(PROLOG_STATE *state,
820 int tok,
821 const char *ptr,
822 const char *end,
823 const ENCODING *enc)
824 {
825 switch (tok) {
826 case XML_TOK_PROLOG_S:
827 return XML_ROLE_ATTLIST_NONE;
828 case XML_TOK_NMTOKEN:
829 case XML_TOK_NAME:
830 case XML_TOK_PREFIXED_NAME:
831 state->handler = attlist4;
832 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
833 }
834 return common(state, tok);
835 }
836
837 static int PTRCALL
838 attlist4(PROLOG_STATE *state,
839 int tok,
840 const char *ptr,
841 const char *end,
842 const ENCODING *enc)
843 {
844 switch (tok) {
845 case XML_TOK_PROLOG_S:
846 return XML_ROLE_ATTLIST_NONE;
847 case XML_TOK_CLOSE_PAREN:
848 state->handler = attlist8;
849 return XML_ROLE_ATTLIST_NONE;
850 case XML_TOK_OR:
851 state->handler = attlist3;
852 return XML_ROLE_ATTLIST_NONE;
853 }
854 return common(state, tok);
855 }
856
857 static int PTRCALL
858 attlist5(PROLOG_STATE *state,
859 int tok,
860 const char *ptr,
861 const char *end,
862 const ENCODING *enc)
863 {
864 switch (tok) {
865 case XML_TOK_PROLOG_S:
866 return XML_ROLE_ATTLIST_NONE;
867 case XML_TOK_OPEN_PAREN:
868 state->handler = attlist6;
869 return XML_ROLE_ATTLIST_NONE;
870 }
871 return common(state, tok);
872 }
873
874 static int PTRCALL
875 attlist6(PROLOG_STATE *state,
876 int tok,
877 const char *ptr,
878 const char *end,
879 const ENCODING *enc)
880 {
881 switch (tok) {
882 case XML_TOK_PROLOG_S:
883 return XML_ROLE_ATTLIST_NONE;
884 case XML_TOK_NAME:
885 state->handler = attlist7;
886 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
887 }
888 return common(state, tok);
889 }
890
891 static int PTRCALL
892 attlist7(PROLOG_STATE *state,
893 int tok,
894 const char *ptr,
895 const char *end,
896 const ENCODING *enc)
897 {
898 switch (tok) {
899 case XML_TOK_PROLOG_S:
900 return XML_ROLE_ATTLIST_NONE;
901 case XML_TOK_CLOSE_PAREN:
902 state->handler = attlist8;
903 return XML_ROLE_ATTLIST_NONE;
904 case XML_TOK_OR:
905 state->handler = attlist6;
906 return XML_ROLE_ATTLIST_NONE;
907 }
908 return common(state, tok);
909 }
910
911 /* default value */
912 static int PTRCALL
913 attlist8(PROLOG_STATE *state,
914 int tok,
915 const char *ptr,
916 const char *end,
917 const ENCODING *enc)
918 {
919 switch (tok) {
920 case XML_TOK_PROLOG_S:
921 return XML_ROLE_ATTLIST_NONE;
922 case XML_TOK_POUND_NAME:
923 if (XmlNameMatchesAscii(enc,
924 ptr + MIN_BYTES_PER_CHAR(enc),
925 end,
926 KW_IMPLIED)) {
927 state->handler = attlist1;
928 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
929 }
930 if (XmlNameMatchesAscii(enc,
931 ptr + MIN_BYTES_PER_CHAR(enc),
932 end,
933 KW_REQUIRED)) {
934 state->handler = attlist1;
935 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
936 }
937 if (XmlNameMatchesAscii(enc,
938 ptr + MIN_BYTES_PER_CHAR(enc),
939 end,
940 KW_FIXED)) {
941 state->handler = attlist9;
942 return XML_ROLE_ATTLIST_NONE;
943 }
944 break;
945 case XML_TOK_LITERAL:
946 state->handler = attlist1;
947 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
948 }
949 return common(state, tok);
950 }
951
952 static int PTRCALL
953 attlist9(PROLOG_STATE *state,
954 int tok,
955 const char *ptr,
956 const char *end,
957 const ENCODING *enc)
958 {
959 switch (tok) {
960 case XML_TOK_PROLOG_S:
961 return XML_ROLE_ATTLIST_NONE;
962 case XML_TOK_LITERAL:
963 state->handler = attlist1;
964 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
965 }
966 return common(state, tok);
967 }
968
969 static int PTRCALL
970 element0(PROLOG_STATE *state,
971 int tok,
972 const char *ptr,
973 const char *end,
974 const ENCODING *enc)
975 {
976 switch (tok) {
977 case XML_TOK_PROLOG_S:
978 return XML_ROLE_ELEMENT_NONE;
979 case XML_TOK_NAME:
980 case XML_TOK_PREFIXED_NAME:
981 state->handler = element1;
982 return XML_ROLE_ELEMENT_NAME;
983 }
984 return common(state, tok);
985 }
986
987 static int PTRCALL
988 element1(PROLOG_STATE *state,
989 int tok,
990 const char *ptr,
991 const char *end,
992 const ENCODING *enc)
993 {
994 switch (tok) {
995 case XML_TOK_PROLOG_S:
996 return XML_ROLE_ELEMENT_NONE;
997 case XML_TOK_NAME:
998 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
999 state->handler = declClose;
1000 state->role_none = XML_ROLE_ELEMENT_NONE;
1001 return XML_ROLE_CONTENT_EMPTY;
1002 }
1003 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1004 state->handler = declClose;
1005 state->role_none = XML_ROLE_ELEMENT_NONE;
1006 return XML_ROLE_CONTENT_ANY;
1007 }
1008 break;
1009 case XML_TOK_OPEN_PAREN:
1010 state->handler = element2;
1011 state->level = 1;
1012 return XML_ROLE_GROUP_OPEN;
1013 }
1014 return common(state, tok);
1015 }
1016
1017 static int PTRCALL
1018 element2(PROLOG_STATE *state,
1019 int tok,
1020 const char *ptr,
1021 const char *end,
1022 const ENCODING *enc)
1023 {
1024 switch (tok) {
1025 case XML_TOK_PROLOG_S:
1026 return XML_ROLE_ELEMENT_NONE;
1027 case XML_TOK_POUND_NAME:
1028 if (XmlNameMatchesAscii(enc,
1029 ptr + MIN_BYTES_PER_CHAR(enc),
1030 end,
1031 KW_PCDATA)) {
1032 state->handler = element3;
1033 return XML_ROLE_CONTENT_PCDATA;
1034 }
1035 break;
1036 case XML_TOK_OPEN_PAREN:
1037 state->level = 2;
1038 state->handler = element6;
1039 return XML_ROLE_GROUP_OPEN;
1040 case XML_TOK_NAME:
1041 case XML_TOK_PREFIXED_NAME:
1042 state->handler = element7;
1043 return XML_ROLE_CONTENT_ELEMENT;
1044 case XML_TOK_NAME_QUESTION:
1045 state->handler = element7;
1046 return XML_ROLE_CONTENT_ELEMENT_OPT;
1047 case XML_TOK_NAME_ASTERISK:
1048 state->handler = element7;
1049 return XML_ROLE_CONTENT_ELEMENT_REP;
1050 case XML_TOK_NAME_PLUS:
1051 state->handler = element7;
1052 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1053 }
1054 return common(state, tok);
1055 }
1056
1057 static int PTRCALL
1058 element3(PROLOG_STATE *state,
1059 int tok,
1060 const char *ptr,
1061 const char *end,
1062 const ENCODING *enc)
1063 {
1064 switch (tok) {
1065 case XML_TOK_PROLOG_S:
1066 return XML_ROLE_ELEMENT_NONE;
1067 case XML_TOK_CLOSE_PAREN:
1068 state->handler = declClose;
1069 state->role_none = XML_ROLE_ELEMENT_NONE;
1070 return XML_ROLE_GROUP_CLOSE;
1071 case XML_TOK_CLOSE_PAREN_ASTERISK:
1072 state->handler = declClose;
1073 state->role_none = XML_ROLE_ELEMENT_NONE;
1074 return XML_ROLE_GROUP_CLOSE_REP;
1075 case XML_TOK_OR:
1076 state->handler = element4;
1077 return XML_ROLE_ELEMENT_NONE;
1078 }
1079 return common(state, tok);
1080 }
1081
1082 static int PTRCALL
1083 element4(PROLOG_STATE *state,
1084 int tok,
1085 const char *ptr,
1086 const char *end,
1087 const ENCODING *enc)
1088 {
1089 switch (tok) {
1090 case XML_TOK_PROLOG_S:
1091 return XML_ROLE_ELEMENT_NONE;
1092 case XML_TOK_NAME:
1093 case XML_TOK_PREFIXED_NAME:
1094 state->handler = element5;
1095 return XML_ROLE_CONTENT_ELEMENT;
1096 }
1097 return common(state, tok);
1098 }
1099
1100 static int PTRCALL
1101 element5(PROLOG_STATE *state,
1102 int tok,
1103 const char *ptr,
1104 const char *end,
1105 const ENCODING *enc)
1106 {
1107 switch (tok) {
1108 case XML_TOK_PROLOG_S:
1109 return XML_ROLE_ELEMENT_NONE;
1110 case XML_TOK_CLOSE_PAREN_ASTERISK:
1111 state->handler = declClose;
1112 state->role_none = XML_ROLE_ELEMENT_NONE;
1113 return XML_ROLE_GROUP_CLOSE_REP;
1114 case XML_TOK_OR:
1115 state->handler = element4;
1116 return XML_ROLE_ELEMENT_NONE;
1117 }
1118 return common(state, tok);
1119 }
1120
1121 static int PTRCALL
1122 element6(PROLOG_STATE *state,
1123 int tok,
1124 const char *ptr,
1125 const char *end,
1126 const ENCODING *enc)
1127 {
1128 switch (tok) {
1129 case XML_TOK_PROLOG_S:
1130 return XML_ROLE_ELEMENT_NONE;
1131 case XML_TOK_OPEN_PAREN:
1132 state->level += 1;
1133 return XML_ROLE_GROUP_OPEN;
1134 case XML_TOK_NAME:
1135 case XML_TOK_PREFIXED_NAME:
1136 state->handler = element7;
1137 return XML_ROLE_CONTENT_ELEMENT;
1138 case XML_TOK_NAME_QUESTION:
1139 state->handler = element7;
1140 return XML_ROLE_CONTENT_ELEMENT_OPT;
1141 case XML_TOK_NAME_ASTERISK:
1142 state->handler = element7;
1143 return XML_ROLE_CONTENT_ELEMENT_REP;
1144 case XML_TOK_NAME_PLUS:
1145 state->handler = element7;
1146 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1147 }
1148 return common(state, tok);
1149 }
1150
1151 static int PTRCALL
1152 element7(PROLOG_STATE *state,
1153 int tok,
1154 const char *ptr,
1155 const char *end,
1156 const ENCODING *enc)
1157 {
1158 switch (tok) {
1159 case XML_TOK_PROLOG_S:
1160 return XML_ROLE_ELEMENT_NONE;
1161 case XML_TOK_CLOSE_PAREN:
1162 state->level -= 1;
1163 if (state->level == 0) {
1164 state->handler = declClose;
1165 state->role_none = XML_ROLE_ELEMENT_NONE;
1166 }
1167 return XML_ROLE_GROUP_CLOSE;
1168 case XML_TOK_CLOSE_PAREN_ASTERISK:
1169 state->level -= 1;
1170 if (state->level == 0) {
1171 state->handler = declClose;
1172 state->role_none = XML_ROLE_ELEMENT_NONE;
1173 }
1174 return XML_ROLE_GROUP_CLOSE_REP;
1175 case XML_TOK_CLOSE_PAREN_QUESTION:
1176 state->level -= 1;
1177 if (state->level == 0) {
1178 state->handler = declClose;
1179 state->role_none = XML_ROLE_ELEMENT_NONE;
1180 }
1181 return XML_ROLE_GROUP_CLOSE_OPT;
1182 case XML_TOK_CLOSE_PAREN_PLUS:
1183 state->level -= 1;
1184 if (state->level == 0) {
1185 state->handler = declClose;
1186 state->role_none = XML_ROLE_ELEMENT_NONE;
1187 }
1188 return XML_ROLE_GROUP_CLOSE_PLUS;
1189 case XML_TOK_COMMA:
1190 state->handler = element6;
1191 return XML_ROLE_GROUP_SEQUENCE;
1192 case XML_TOK_OR:
1193 state->handler = element6;
1194 return XML_ROLE_GROUP_CHOICE;
1195 }
1196 return common(state, tok);
1197 }
1198
1199 #ifdef XML_DTD
1200
1201 static int PTRCALL
1202 condSect0(PROLOG_STATE *state,
1203 int tok,
1204 const char *ptr,
1205 const char *end,
1206 const ENCODING *enc)
1207 {
1208 switch (tok) {
1209 case XML_TOK_PROLOG_S:
1210 return XML_ROLE_NONE;
1211 case XML_TOK_NAME:
1212 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1213 state->handler = condSect1;
1214 return XML_ROLE_NONE;
1215 }
1216 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1217 state->handler = condSect2;
1218 return XML_ROLE_NONE;
1219 }
1220 break;
1221 }
1222 return common(state, tok);
1223 }
1224
1225 static int PTRCALL
1226 condSect1(PROLOG_STATE *state,
1227 int tok,
1228 const char *ptr,
1229 const char *end,
1230 const ENCODING *enc)
1231 {
1232 switch (tok) {
1233 case XML_TOK_PROLOG_S:
1234 return XML_ROLE_NONE;
1235 case XML_TOK_OPEN_BRACKET:
1236 state->handler = externalSubset1;
1237 state->includeLevel += 1;
1238 return XML_ROLE_NONE;
1239 }
1240 return common(state, tok);
1241 }
1242
1243 static int PTRCALL
1244 condSect2(PROLOG_STATE *state,
1245 int tok,
1246 const char *ptr,
1247 const char *end,
1248 const ENCODING *enc)
1249 {
1250 switch (tok) {
1251 case XML_TOK_PROLOG_S:
1252 return XML_ROLE_NONE;
1253 case XML_TOK_OPEN_BRACKET:
1254 state->handler = externalSubset1;
1255 return XML_ROLE_IGNORE_SECT;
1256 }
1257 return common(state, tok);
1258 }
1259
1260 #endif /* XML_DTD */
1261
1262 static int PTRCALL
1263 declClose(PROLOG_STATE *state,
1264 int tok,
1265 const char *ptr,
1266 const char *end,
1267 const ENCODING *enc)
1268 {
1269 switch (tok) {
1270 case XML_TOK_PROLOG_S:
1271 return state->role_none;
1272 case XML_TOK_DECL_CLOSE:
1273 setTopLevel(state);
1274 return state->role_none;
1275 }
1276 return common(state, tok);
1277 }
1278
1279 static int PTRCALL
1280 error(PROLOG_STATE *state,
1281 int tok,
1282 const char *ptr,
1283 const char *end,
1284 const ENCODING *enc)
1285 {
1286 return XML_ROLE_NONE;
1287 }
1288
1289 static int FASTCALL
1290 common(PROLOG_STATE *state, int tok)
1291 {
1292 #ifdef XML_DTD
1293 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1294 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1295 #endif
1296 state->handler = error;
1297 return XML_ROLE_ERROR;
1298 }
1299
1300 void
1301 XmlPrologStateInit(PROLOG_STATE *state)
1302 {
1303 state->handler = prolog0;
1304 #ifdef XML_DTD
1305 state->documentEntity = 1;
1306 state->includeLevel = 0;
1307 state->inEntityValue = 0;
1308 #endif /* XML_DTD */
1309 }
1310
1311 #ifdef XML_DTD
1312
1313 void
1314 XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1315 {
1316 state->handler = externalSubset0;
1317 state->documentEntity = 0;
1318 state->includeLevel = 0;
1319 }
1320
1321 #endif /* XML_DTD */