]> git.saurik.com Git - wxWidgets.git/blob - src/expat/lib/xmlrole.c
Use wxIsalnum to test for word delimiters
[wxWidgets.git] / src / expat / lib / xmlrole.c
1 /* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
2 See the file COPYING for copying permission.
3 */
4
5 #ifdef COMPILED_FROM_DSP
6 #include "winconfig.h"
7 #elif defined(OS2_32)
8 #include "os2config.h"
9 #elif defined(__MSDOS__)
10 #include "dosconfig.h"
11 #elif defined(MACOS_CLASSIC)
12 #include "macconfig.h"
13 #else
14 #include "expat_config.h"
15 #endif /* ndef COMPILED_FROM_DSP */
16
17 #include "internal.h"
18 #include "xmlrole.h"
19 #include "ascii.h"
20
21 /* Doesn't check:
22
23 that ,| are not mixed in a model group
24 content of literals
25
26 */
27
28 static const char KW_ANY[] = {
29 ASCII_A, ASCII_N, ASCII_Y, '\0' };
30 static const char KW_ATTLIST[] = {
31 ASCII_A, ASCII_T, ASCII_T, ASCII_L, ASCII_I, ASCII_S, ASCII_T, '\0' };
32 static const char KW_CDATA[] = {
33 ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
34 static const char KW_DOCTYPE[] = {
35 ASCII_D, ASCII_O, ASCII_C, ASCII_T, ASCII_Y, ASCII_P, ASCII_E, '\0' };
36 static const char KW_ELEMENT[] = {
37 ASCII_E, ASCII_L, ASCII_E, ASCII_M, ASCII_E, ASCII_N, ASCII_T, '\0' };
38 static const char KW_EMPTY[] = {
39 ASCII_E, ASCII_M, ASCII_P, ASCII_T, ASCII_Y, '\0' };
40 static const char KW_ENTITIES[] = {
41 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S,
42 '\0' };
43 static const char KW_ENTITY[] = {
44 ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
45 static const char KW_FIXED[] = {
46 ASCII_F, ASCII_I, ASCII_X, ASCII_E, ASCII_D, '\0' };
47 static const char KW_ID[] = {
48 ASCII_I, ASCII_D, '\0' };
49 static const char KW_IDREF[] = {
50 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
51 static const char KW_IDREFS[] = {
52 ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
53 static const char KW_IGNORE[] = {
54 ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
55 static const char KW_IMPLIED[] = {
56 ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
57 static const char KW_INCLUDE[] = {
58 ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
59 static const char KW_NDATA[] = {
60 ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
61 static const char KW_NMTOKEN[] = {
62 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
63 static const char KW_NMTOKENS[] = {
64 ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S,
65 '\0' };
66 static const char KW_NOTATION[] =
67 { ASCII_N, ASCII_O, ASCII_T, ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N,
68 '\0' };
69 static const char KW_PCDATA[] = {
70 ASCII_P, ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
71 static const char KW_PUBLIC[] = {
72 ASCII_P, ASCII_U, ASCII_B, ASCII_L, ASCII_I, ASCII_C, '\0' };
73 static const char KW_REQUIRED[] = {
74 ASCII_R, ASCII_E, ASCII_Q, ASCII_U, ASCII_I, ASCII_R, ASCII_E, ASCII_D,
75 '\0' };
76 static const char KW_SYSTEM[] = {
77 ASCII_S, ASCII_Y, ASCII_S, ASCII_T, ASCII_E, ASCII_M, '\0' };
78
79 #ifndef MIN_BYTES_PER_CHAR
80 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
81 #endif
82
83 #ifdef XML_DTD
84 #define setTopLevel(state) \
85 ((state)->handler = ((state)->documentEntity \
86 ? internalSubset \
87 : externalSubset1))
88 #else /* not XML_DTD */
89 #define setTopLevel(state) ((state)->handler = internalSubset)
90 #endif /* not XML_DTD */
91
92 typedef int PTRCALL PROLOG_HANDLER(PROLOG_STATE *state,
93 int tok,
94 const char *ptr,
95 const char *end,
96 const ENCODING *enc);
97
98 static PROLOG_HANDLER
99 prolog0, prolog1, prolog2,
100 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
101 internalSubset,
102 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
103 entity7, entity8, entity9, entity10,
104 notation0, notation1, notation2, notation3, notation4,
105 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
106 attlist7, attlist8, attlist9,
107 element0, element1, element2, element3, element4, element5, element6,
108 element7,
109 #ifdef XML_DTD
110 externalSubset0, externalSubset1,
111 condSect0, condSect1, condSect2,
112 #endif /* XML_DTD */
113 declClose,
114 error;
115
116 static int FASTCALL common(PROLOG_STATE *state, int tok);
117
118 static int PTRCALL
119 prolog0(PROLOG_STATE *state,
120 int tok,
121 const char *ptr,
122 const char *end,
123 const ENCODING *enc)
124 {
125 switch (tok) {
126 case XML_TOK_PROLOG_S:
127 state->handler = prolog1;
128 return XML_ROLE_NONE;
129 case XML_TOK_XML_DECL:
130 state->handler = prolog1;
131 return XML_ROLE_XML_DECL;
132 case XML_TOK_PI:
133 state->handler = prolog1;
134 return XML_ROLE_PI;
135 case XML_TOK_COMMENT:
136 state->handler = prolog1;
137 return XML_ROLE_COMMENT;
138 case XML_TOK_BOM:
139 return XML_ROLE_NONE;
140 case XML_TOK_DECL_OPEN:
141 if (!XmlNameMatchesAscii(enc,
142 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
143 end,
144 KW_DOCTYPE))
145 break;
146 state->handler = doctype0;
147 return XML_ROLE_DOCTYPE_NONE;
148 case XML_TOK_INSTANCE_START:
149 state->handler = error;
150 return XML_ROLE_INSTANCE_START;
151 }
152 return common(state, tok);
153 }
154
155 static int PTRCALL
156 prolog1(PROLOG_STATE *state,
157 int tok,
158 const char *ptr,
159 const char *end,
160 const ENCODING *enc)
161 {
162 switch (tok) {
163 case XML_TOK_PROLOG_S:
164 return XML_ROLE_NONE;
165 case XML_TOK_PI:
166 return XML_ROLE_PI;
167 case XML_TOK_COMMENT:
168 return XML_ROLE_COMMENT;
169 case XML_TOK_BOM:
170 return XML_ROLE_NONE;
171 case XML_TOK_DECL_OPEN:
172 if (!XmlNameMatchesAscii(enc,
173 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
174 end,
175 KW_DOCTYPE))
176 break;
177 state->handler = doctype0;
178 return XML_ROLE_DOCTYPE_NONE;
179 case XML_TOK_INSTANCE_START:
180 state->handler = error;
181 return XML_ROLE_INSTANCE_START;
182 }
183 return common(state, tok);
184 }
185
186 static int PTRCALL
187 prolog2(PROLOG_STATE *state,
188 int tok,
189 const char *ptr,
190 const char *end,
191 const ENCODING *enc)
192 {
193 switch (tok) {
194 case XML_TOK_PROLOG_S:
195 return XML_ROLE_NONE;
196 case XML_TOK_PI:
197 return XML_ROLE_PI;
198 case XML_TOK_COMMENT:
199 return XML_ROLE_COMMENT;
200 case XML_TOK_INSTANCE_START:
201 state->handler = error;
202 return XML_ROLE_INSTANCE_START;
203 }
204 return common(state, tok);
205 }
206
207 static int PTRCALL
208 doctype0(PROLOG_STATE *state,
209 int tok,
210 const char *ptr,
211 const char *end,
212 const ENCODING *enc)
213 {
214 switch (tok) {
215 case XML_TOK_PROLOG_S:
216 return XML_ROLE_DOCTYPE_NONE;
217 case XML_TOK_NAME:
218 case XML_TOK_PREFIXED_NAME:
219 state->handler = doctype1;
220 return XML_ROLE_DOCTYPE_NAME;
221 }
222 return common(state, tok);
223 }
224
225 static int PTRCALL
226 doctype1(PROLOG_STATE *state,
227 int tok,
228 const char *ptr,
229 const char *end,
230 const ENCODING *enc)
231 {
232 switch (tok) {
233 case XML_TOK_PROLOG_S:
234 return XML_ROLE_DOCTYPE_NONE;
235 case XML_TOK_OPEN_BRACKET:
236 state->handler = internalSubset;
237 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
238 case XML_TOK_DECL_CLOSE:
239 state->handler = prolog2;
240 return XML_ROLE_DOCTYPE_CLOSE;
241 case XML_TOK_NAME:
242 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
243 state->handler = doctype3;
244 return XML_ROLE_DOCTYPE_NONE;
245 }
246 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
247 state->handler = doctype2;
248 return XML_ROLE_DOCTYPE_NONE;
249 }
250 break;
251 }
252 return common(state, tok);
253 }
254
255 static int PTRCALL
256 doctype2(PROLOG_STATE *state,
257 int tok,
258 const char *ptr,
259 const char *end,
260 const ENCODING *enc)
261 {
262 switch (tok) {
263 case XML_TOK_PROLOG_S:
264 return XML_ROLE_DOCTYPE_NONE;
265 case XML_TOK_LITERAL:
266 state->handler = doctype3;
267 return XML_ROLE_DOCTYPE_PUBLIC_ID;
268 }
269 return common(state, tok);
270 }
271
272 static int PTRCALL
273 doctype3(PROLOG_STATE *state,
274 int tok,
275 const char *ptr,
276 const char *end,
277 const ENCODING *enc)
278 {
279 switch (tok) {
280 case XML_TOK_PROLOG_S:
281 return XML_ROLE_DOCTYPE_NONE;
282 case XML_TOK_LITERAL:
283 state->handler = doctype4;
284 return XML_ROLE_DOCTYPE_SYSTEM_ID;
285 }
286 return common(state, tok);
287 }
288
289 static int PTRCALL
290 doctype4(PROLOG_STATE *state,
291 int tok,
292 const char *ptr,
293 const char *end,
294 const ENCODING *enc)
295 {
296 switch (tok) {
297 case XML_TOK_PROLOG_S:
298 return XML_ROLE_DOCTYPE_NONE;
299 case XML_TOK_OPEN_BRACKET:
300 state->handler = internalSubset;
301 return XML_ROLE_DOCTYPE_INTERNAL_SUBSET;
302 case XML_TOK_DECL_CLOSE:
303 state->handler = prolog2;
304 return XML_ROLE_DOCTYPE_CLOSE;
305 }
306 return common(state, tok);
307 }
308
309 static int PTRCALL
310 doctype5(PROLOG_STATE *state,
311 int tok,
312 const char *ptr,
313 const char *end,
314 const ENCODING *enc)
315 {
316 switch (tok) {
317 case XML_TOK_PROLOG_S:
318 return XML_ROLE_DOCTYPE_NONE;
319 case XML_TOK_DECL_CLOSE:
320 state->handler = prolog2;
321 return XML_ROLE_DOCTYPE_CLOSE;
322 }
323 return common(state, tok);
324 }
325
326 static int PTRCALL
327 internalSubset(PROLOG_STATE *state,
328 int tok,
329 const char *ptr,
330 const char *end,
331 const ENCODING *enc)
332 {
333 switch (tok) {
334 case XML_TOK_PROLOG_S:
335 return XML_ROLE_NONE;
336 case XML_TOK_DECL_OPEN:
337 if (XmlNameMatchesAscii(enc,
338 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
339 end,
340 KW_ENTITY)) {
341 state->handler = entity0;
342 return XML_ROLE_ENTITY_NONE;
343 }
344 if (XmlNameMatchesAscii(enc,
345 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
346 end,
347 KW_ATTLIST)) {
348 state->handler = attlist0;
349 return XML_ROLE_ATTLIST_NONE;
350 }
351 if (XmlNameMatchesAscii(enc,
352 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
353 end,
354 KW_ELEMENT)) {
355 state->handler = element0;
356 return XML_ROLE_ELEMENT_NONE;
357 }
358 if (XmlNameMatchesAscii(enc,
359 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
360 end,
361 KW_NOTATION)) {
362 state->handler = notation0;
363 return XML_ROLE_NOTATION_NONE;
364 }
365 break;
366 case XML_TOK_PI:
367 return XML_ROLE_PI;
368 case XML_TOK_COMMENT:
369 return XML_ROLE_COMMENT;
370 case XML_TOK_PARAM_ENTITY_REF:
371 return XML_ROLE_PARAM_ENTITY_REF;
372 case XML_TOK_CLOSE_BRACKET:
373 state->handler = doctype5;
374 return XML_ROLE_DOCTYPE_NONE;
375 }
376 return common(state, tok);
377 }
378
379 #ifdef XML_DTD
380
381 static int PTRCALL
382 externalSubset0(PROLOG_STATE *state,
383 int tok,
384 const char *ptr,
385 const char *end,
386 const ENCODING *enc)
387 {
388 state->handler = externalSubset1;
389 if (tok == XML_TOK_XML_DECL)
390 return XML_ROLE_TEXT_DECL;
391 return externalSubset1(state, tok, ptr, end, enc);
392 }
393
394 static int PTRCALL
395 externalSubset1(PROLOG_STATE *state,
396 int tok,
397 const char *ptr,
398 const char *end,
399 const ENCODING *enc)
400 {
401 switch (tok) {
402 case XML_TOK_COND_SECT_OPEN:
403 state->handler = condSect0;
404 return XML_ROLE_NONE;
405 case XML_TOK_COND_SECT_CLOSE:
406 if (state->includeLevel == 0)
407 break;
408 state->includeLevel -= 1;
409 return XML_ROLE_NONE;
410 case XML_TOK_PROLOG_S:
411 return XML_ROLE_NONE;
412 case XML_TOK_CLOSE_BRACKET:
413 break;
414 case XML_TOK_NONE:
415 if (state->includeLevel)
416 break;
417 return XML_ROLE_NONE;
418 default:
419 return internalSubset(state, tok, ptr, end, enc);
420 }
421 return common(state, tok);
422 }
423
424 #endif /* XML_DTD */
425
426 static int PTRCALL
427 entity0(PROLOG_STATE *state,
428 int tok,
429 const char *ptr,
430 const char *end,
431 const ENCODING *enc)
432 {
433 switch (tok) {
434 case XML_TOK_PROLOG_S:
435 return XML_ROLE_ENTITY_NONE;
436 case XML_TOK_PERCENT:
437 state->handler = entity1;
438 return XML_ROLE_ENTITY_NONE;
439 case XML_TOK_NAME:
440 state->handler = entity2;
441 return XML_ROLE_GENERAL_ENTITY_NAME;
442 }
443 return common(state, tok);
444 }
445
446 static int PTRCALL
447 entity1(PROLOG_STATE *state,
448 int tok,
449 const char *ptr,
450 const char *end,
451 const ENCODING *enc)
452 {
453 switch (tok) {
454 case XML_TOK_PROLOG_S:
455 return XML_ROLE_ENTITY_NONE;
456 case XML_TOK_NAME:
457 state->handler = entity7;
458 return XML_ROLE_PARAM_ENTITY_NAME;
459 }
460 return common(state, tok);
461 }
462
463 static int PTRCALL
464 entity2(PROLOG_STATE *state,
465 int tok,
466 const char *ptr,
467 const char *end,
468 const ENCODING *enc)
469 {
470 switch (tok) {
471 case XML_TOK_PROLOG_S:
472 return XML_ROLE_ENTITY_NONE;
473 case XML_TOK_NAME:
474 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
475 state->handler = entity4;
476 return XML_ROLE_ENTITY_NONE;
477 }
478 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
479 state->handler = entity3;
480 return XML_ROLE_ENTITY_NONE;
481 }
482 break;
483 case XML_TOK_LITERAL:
484 state->handler = declClose;
485 state->role_none = XML_ROLE_ENTITY_NONE;
486 return XML_ROLE_ENTITY_VALUE;
487 }
488 return common(state, tok);
489 }
490
491 static int PTRCALL
492 entity3(PROLOG_STATE *state,
493 int tok,
494 const char *ptr,
495 const char *end,
496 const ENCODING *enc)
497 {
498 switch (tok) {
499 case XML_TOK_PROLOG_S:
500 return XML_ROLE_ENTITY_NONE;
501 case XML_TOK_LITERAL:
502 state->handler = entity4;
503 return XML_ROLE_ENTITY_PUBLIC_ID;
504 }
505 return common(state, tok);
506 }
507
508 static int PTRCALL
509 entity4(PROLOG_STATE *state,
510 int tok,
511 const char *ptr,
512 const char *end,
513 const ENCODING *enc)
514 {
515 switch (tok) {
516 case XML_TOK_PROLOG_S:
517 return XML_ROLE_ENTITY_NONE;
518 case XML_TOK_LITERAL:
519 state->handler = entity5;
520 return XML_ROLE_ENTITY_SYSTEM_ID;
521 }
522 return common(state, tok);
523 }
524
525 static int PTRCALL
526 entity5(PROLOG_STATE *state,
527 int tok,
528 const char *ptr,
529 const char *end,
530 const ENCODING *enc)
531 {
532 switch (tok) {
533 case XML_TOK_PROLOG_S:
534 return XML_ROLE_ENTITY_NONE;
535 case XML_TOK_DECL_CLOSE:
536 setTopLevel(state);
537 return XML_ROLE_ENTITY_COMPLETE;
538 case XML_TOK_NAME:
539 if (XmlNameMatchesAscii(enc, ptr, end, KW_NDATA)) {
540 state->handler = entity6;
541 return XML_ROLE_ENTITY_NONE;
542 }
543 break;
544 }
545 return common(state, tok);
546 }
547
548 static int PTRCALL
549 entity6(PROLOG_STATE *state,
550 int tok,
551 const char *ptr,
552 const char *end,
553 const ENCODING *enc)
554 {
555 switch (tok) {
556 case XML_TOK_PROLOG_S:
557 return XML_ROLE_ENTITY_NONE;
558 case XML_TOK_NAME:
559 state->handler = declClose;
560 state->role_none = XML_ROLE_ENTITY_NONE;
561 return XML_ROLE_ENTITY_NOTATION_NAME;
562 }
563 return common(state, tok);
564 }
565
566 static int PTRCALL
567 entity7(PROLOG_STATE *state,
568 int tok,
569 const char *ptr,
570 const char *end,
571 const ENCODING *enc)
572 {
573 switch (tok) {
574 case XML_TOK_PROLOG_S:
575 return XML_ROLE_ENTITY_NONE;
576 case XML_TOK_NAME:
577 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
578 state->handler = entity9;
579 return XML_ROLE_ENTITY_NONE;
580 }
581 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
582 state->handler = entity8;
583 return XML_ROLE_ENTITY_NONE;
584 }
585 break;
586 case XML_TOK_LITERAL:
587 state->handler = declClose;
588 state->role_none = XML_ROLE_ENTITY_NONE;
589 return XML_ROLE_ENTITY_VALUE;
590 }
591 return common(state, tok);
592 }
593
594 static int PTRCALL
595 entity8(PROLOG_STATE *state,
596 int tok,
597 const char *ptr,
598 const char *end,
599 const ENCODING *enc)
600 {
601 switch (tok) {
602 case XML_TOK_PROLOG_S:
603 return XML_ROLE_ENTITY_NONE;
604 case XML_TOK_LITERAL:
605 state->handler = entity9;
606 return XML_ROLE_ENTITY_PUBLIC_ID;
607 }
608 return common(state, tok);
609 }
610
611 static int PTRCALL
612 entity9(PROLOG_STATE *state,
613 int tok,
614 const char *ptr,
615 const char *end,
616 const ENCODING *enc)
617 {
618 switch (tok) {
619 case XML_TOK_PROLOG_S:
620 return XML_ROLE_ENTITY_NONE;
621 case XML_TOK_LITERAL:
622 state->handler = entity10;
623 return XML_ROLE_ENTITY_SYSTEM_ID;
624 }
625 return common(state, tok);
626 }
627
628 static int PTRCALL
629 entity10(PROLOG_STATE *state,
630 int tok,
631 const char *ptr,
632 const char *end,
633 const ENCODING *enc)
634 {
635 switch (tok) {
636 case XML_TOK_PROLOG_S:
637 return XML_ROLE_ENTITY_NONE;
638 case XML_TOK_DECL_CLOSE:
639 setTopLevel(state);
640 return XML_ROLE_ENTITY_COMPLETE;
641 }
642 return common(state, tok);
643 }
644
645 static int PTRCALL
646 notation0(PROLOG_STATE *state,
647 int tok,
648 const char *ptr,
649 const char *end,
650 const ENCODING *enc)
651 {
652 switch (tok) {
653 case XML_TOK_PROLOG_S:
654 return XML_ROLE_NOTATION_NONE;
655 case XML_TOK_NAME:
656 state->handler = notation1;
657 return XML_ROLE_NOTATION_NAME;
658 }
659 return common(state, tok);
660 }
661
662 static int PTRCALL
663 notation1(PROLOG_STATE *state,
664 int tok,
665 const char *ptr,
666 const char *end,
667 const ENCODING *enc)
668 {
669 switch (tok) {
670 case XML_TOK_PROLOG_S:
671 return XML_ROLE_NOTATION_NONE;
672 case XML_TOK_NAME:
673 if (XmlNameMatchesAscii(enc, ptr, end, KW_SYSTEM)) {
674 state->handler = notation3;
675 return XML_ROLE_NOTATION_NONE;
676 }
677 if (XmlNameMatchesAscii(enc, ptr, end, KW_PUBLIC)) {
678 state->handler = notation2;
679 return XML_ROLE_NOTATION_NONE;
680 }
681 break;
682 }
683 return common(state, tok);
684 }
685
686 static int PTRCALL
687 notation2(PROLOG_STATE *state,
688 int tok,
689 const char *ptr,
690 const char *end,
691 const ENCODING *enc)
692 {
693 switch (tok) {
694 case XML_TOK_PROLOG_S:
695 return XML_ROLE_NOTATION_NONE;
696 case XML_TOK_LITERAL:
697 state->handler = notation4;
698 return XML_ROLE_NOTATION_PUBLIC_ID;
699 }
700 return common(state, tok);
701 }
702
703 static int PTRCALL
704 notation3(PROLOG_STATE *state,
705 int tok,
706 const char *ptr,
707 const char *end,
708 const ENCODING *enc)
709 {
710 switch (tok) {
711 case XML_TOK_PROLOG_S:
712 return XML_ROLE_NOTATION_NONE;
713 case XML_TOK_LITERAL:
714 state->handler = declClose;
715 state->role_none = XML_ROLE_NOTATION_NONE;
716 return XML_ROLE_NOTATION_SYSTEM_ID;
717 }
718 return common(state, tok);
719 }
720
721 static int PTRCALL
722 notation4(PROLOG_STATE *state,
723 int tok,
724 const char *ptr,
725 const char *end,
726 const ENCODING *enc)
727 {
728 switch (tok) {
729 case XML_TOK_PROLOG_S:
730 return XML_ROLE_NOTATION_NONE;
731 case XML_TOK_LITERAL:
732 state->handler = declClose;
733 state->role_none = XML_ROLE_NOTATION_NONE;
734 return XML_ROLE_NOTATION_SYSTEM_ID;
735 case XML_TOK_DECL_CLOSE:
736 setTopLevel(state);
737 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
738 }
739 return common(state, tok);
740 }
741
742 static int PTRCALL
743 attlist0(PROLOG_STATE *state,
744 int tok,
745 const char *ptr,
746 const char *end,
747 const ENCODING *enc)
748 {
749 switch (tok) {
750 case XML_TOK_PROLOG_S:
751 return XML_ROLE_ATTLIST_NONE;
752 case XML_TOK_NAME:
753 case XML_TOK_PREFIXED_NAME:
754 state->handler = attlist1;
755 return XML_ROLE_ATTLIST_ELEMENT_NAME;
756 }
757 return common(state, tok);
758 }
759
760 static int PTRCALL
761 attlist1(PROLOG_STATE *state,
762 int tok,
763 const char *ptr,
764 const char *end,
765 const ENCODING *enc)
766 {
767 switch (tok) {
768 case XML_TOK_PROLOG_S:
769 return XML_ROLE_ATTLIST_NONE;
770 case XML_TOK_DECL_CLOSE:
771 setTopLevel(state);
772 return XML_ROLE_ATTLIST_NONE;
773 case XML_TOK_NAME:
774 case XML_TOK_PREFIXED_NAME:
775 state->handler = attlist2;
776 return XML_ROLE_ATTRIBUTE_NAME;
777 }
778 return common(state, tok);
779 }
780
781 static int PTRCALL
782 attlist2(PROLOG_STATE *state,
783 int tok,
784 const char *ptr,
785 const char *end,
786 const ENCODING *enc)
787 {
788 switch (tok) {
789 case XML_TOK_PROLOG_S:
790 return XML_ROLE_ATTLIST_NONE;
791 case XML_TOK_NAME:
792 {
793 static const char *types[] = {
794 KW_CDATA,
795 KW_ID,
796 KW_IDREF,
797 KW_IDREFS,
798 KW_ENTITY,
799 KW_ENTITIES,
800 KW_NMTOKEN,
801 KW_NMTOKENS,
802 };
803 int i;
804 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
805 if (XmlNameMatchesAscii(enc, ptr, end, types[i])) {
806 state->handler = attlist8;
807 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
808 }
809 }
810 if (XmlNameMatchesAscii(enc, ptr, end, KW_NOTATION)) {
811 state->handler = attlist5;
812 return XML_ROLE_ATTLIST_NONE;
813 }
814 break;
815 case XML_TOK_OPEN_PAREN:
816 state->handler = attlist3;
817 return XML_ROLE_ATTLIST_NONE;
818 }
819 return common(state, tok);
820 }
821
822 static int PTRCALL
823 attlist3(PROLOG_STATE *state,
824 int tok,
825 const char *ptr,
826 const char *end,
827 const ENCODING *enc)
828 {
829 switch (tok) {
830 case XML_TOK_PROLOG_S:
831 return XML_ROLE_ATTLIST_NONE;
832 case XML_TOK_NMTOKEN:
833 case XML_TOK_NAME:
834 case XML_TOK_PREFIXED_NAME:
835 state->handler = attlist4;
836 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
837 }
838 return common(state, tok);
839 }
840
841 static int PTRCALL
842 attlist4(PROLOG_STATE *state,
843 int tok,
844 const char *ptr,
845 const char *end,
846 const ENCODING *enc)
847 {
848 switch (tok) {
849 case XML_TOK_PROLOG_S:
850 return XML_ROLE_ATTLIST_NONE;
851 case XML_TOK_CLOSE_PAREN:
852 state->handler = attlist8;
853 return XML_ROLE_ATTLIST_NONE;
854 case XML_TOK_OR:
855 state->handler = attlist3;
856 return XML_ROLE_ATTLIST_NONE;
857 }
858 return common(state, tok);
859 }
860
861 static int PTRCALL
862 attlist5(PROLOG_STATE *state,
863 int tok,
864 const char *ptr,
865 const char *end,
866 const ENCODING *enc)
867 {
868 switch (tok) {
869 case XML_TOK_PROLOG_S:
870 return XML_ROLE_ATTLIST_NONE;
871 case XML_TOK_OPEN_PAREN:
872 state->handler = attlist6;
873 return XML_ROLE_ATTLIST_NONE;
874 }
875 return common(state, tok);
876 }
877
878 static int PTRCALL
879 attlist6(PROLOG_STATE *state,
880 int tok,
881 const char *ptr,
882 const char *end,
883 const ENCODING *enc)
884 {
885 switch (tok) {
886 case XML_TOK_PROLOG_S:
887 return XML_ROLE_ATTLIST_NONE;
888 case XML_TOK_NAME:
889 state->handler = attlist7;
890 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
891 }
892 return common(state, tok);
893 }
894
895 static int PTRCALL
896 attlist7(PROLOG_STATE *state,
897 int tok,
898 const char *ptr,
899 const char *end,
900 const ENCODING *enc)
901 {
902 switch (tok) {
903 case XML_TOK_PROLOG_S:
904 return XML_ROLE_ATTLIST_NONE;
905 case XML_TOK_CLOSE_PAREN:
906 state->handler = attlist8;
907 return XML_ROLE_ATTLIST_NONE;
908 case XML_TOK_OR:
909 state->handler = attlist6;
910 return XML_ROLE_ATTLIST_NONE;
911 }
912 return common(state, tok);
913 }
914
915 /* default value */
916 static int PTRCALL
917 attlist8(PROLOG_STATE *state,
918 int tok,
919 const char *ptr,
920 const char *end,
921 const ENCODING *enc)
922 {
923 switch (tok) {
924 case XML_TOK_PROLOG_S:
925 return XML_ROLE_ATTLIST_NONE;
926 case XML_TOK_POUND_NAME:
927 if (XmlNameMatchesAscii(enc,
928 ptr + MIN_BYTES_PER_CHAR(enc),
929 end,
930 KW_IMPLIED)) {
931 state->handler = attlist1;
932 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
933 }
934 if (XmlNameMatchesAscii(enc,
935 ptr + MIN_BYTES_PER_CHAR(enc),
936 end,
937 KW_REQUIRED)) {
938 state->handler = attlist1;
939 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
940 }
941 if (XmlNameMatchesAscii(enc,
942 ptr + MIN_BYTES_PER_CHAR(enc),
943 end,
944 KW_FIXED)) {
945 state->handler = attlist9;
946 return XML_ROLE_ATTLIST_NONE;
947 }
948 break;
949 case XML_TOK_LITERAL:
950 state->handler = attlist1;
951 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
952 }
953 return common(state, tok);
954 }
955
956 static int PTRCALL
957 attlist9(PROLOG_STATE *state,
958 int tok,
959 const char *ptr,
960 const char *end,
961 const ENCODING *enc)
962 {
963 switch (tok) {
964 case XML_TOK_PROLOG_S:
965 return XML_ROLE_ATTLIST_NONE;
966 case XML_TOK_LITERAL:
967 state->handler = attlist1;
968 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
969 }
970 return common(state, tok);
971 }
972
973 static int PTRCALL
974 element0(PROLOG_STATE *state,
975 int tok,
976 const char *ptr,
977 const char *end,
978 const ENCODING *enc)
979 {
980 switch (tok) {
981 case XML_TOK_PROLOG_S:
982 return XML_ROLE_ELEMENT_NONE;
983 case XML_TOK_NAME:
984 case XML_TOK_PREFIXED_NAME:
985 state->handler = element1;
986 return XML_ROLE_ELEMENT_NAME;
987 }
988 return common(state, tok);
989 }
990
991 static int PTRCALL
992 element1(PROLOG_STATE *state,
993 int tok,
994 const char *ptr,
995 const char *end,
996 const ENCODING *enc)
997 {
998 switch (tok) {
999 case XML_TOK_PROLOG_S:
1000 return XML_ROLE_ELEMENT_NONE;
1001 case XML_TOK_NAME:
1002 if (XmlNameMatchesAscii(enc, ptr, end, KW_EMPTY)) {
1003 state->handler = declClose;
1004 state->role_none = XML_ROLE_ELEMENT_NONE;
1005 return XML_ROLE_CONTENT_EMPTY;
1006 }
1007 if (XmlNameMatchesAscii(enc, ptr, end, KW_ANY)) {
1008 state->handler = declClose;
1009 state->role_none = XML_ROLE_ELEMENT_NONE;
1010 return XML_ROLE_CONTENT_ANY;
1011 }
1012 break;
1013 case XML_TOK_OPEN_PAREN:
1014 state->handler = element2;
1015 state->level = 1;
1016 return XML_ROLE_GROUP_OPEN;
1017 }
1018 return common(state, tok);
1019 }
1020
1021 static int PTRCALL
1022 element2(PROLOG_STATE *state,
1023 int tok,
1024 const char *ptr,
1025 const char *end,
1026 const ENCODING *enc)
1027 {
1028 switch (tok) {
1029 case XML_TOK_PROLOG_S:
1030 return XML_ROLE_ELEMENT_NONE;
1031 case XML_TOK_POUND_NAME:
1032 if (XmlNameMatchesAscii(enc,
1033 ptr + MIN_BYTES_PER_CHAR(enc),
1034 end,
1035 KW_PCDATA)) {
1036 state->handler = element3;
1037 return XML_ROLE_CONTENT_PCDATA;
1038 }
1039 break;
1040 case XML_TOK_OPEN_PAREN:
1041 state->level = 2;
1042 state->handler = element6;
1043 return XML_ROLE_GROUP_OPEN;
1044 case XML_TOK_NAME:
1045 case XML_TOK_PREFIXED_NAME:
1046 state->handler = element7;
1047 return XML_ROLE_CONTENT_ELEMENT;
1048 case XML_TOK_NAME_QUESTION:
1049 state->handler = element7;
1050 return XML_ROLE_CONTENT_ELEMENT_OPT;
1051 case XML_TOK_NAME_ASTERISK:
1052 state->handler = element7;
1053 return XML_ROLE_CONTENT_ELEMENT_REP;
1054 case XML_TOK_NAME_PLUS:
1055 state->handler = element7;
1056 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1057 }
1058 return common(state, tok);
1059 }
1060
1061 static int PTRCALL
1062 element3(PROLOG_STATE *state,
1063 int tok,
1064 const char *ptr,
1065 const char *end,
1066 const ENCODING *enc)
1067 {
1068 switch (tok) {
1069 case XML_TOK_PROLOG_S:
1070 return XML_ROLE_ELEMENT_NONE;
1071 case XML_TOK_CLOSE_PAREN:
1072 state->handler = declClose;
1073 state->role_none = XML_ROLE_ELEMENT_NONE;
1074 return XML_ROLE_GROUP_CLOSE;
1075 case XML_TOK_CLOSE_PAREN_ASTERISK:
1076 state->handler = declClose;
1077 state->role_none = XML_ROLE_ELEMENT_NONE;
1078 return XML_ROLE_GROUP_CLOSE_REP;
1079 case XML_TOK_OR:
1080 state->handler = element4;
1081 return XML_ROLE_ELEMENT_NONE;
1082 }
1083 return common(state, tok);
1084 }
1085
1086 static int PTRCALL
1087 element4(PROLOG_STATE *state,
1088 int tok,
1089 const char *ptr,
1090 const char *end,
1091 const ENCODING *enc)
1092 {
1093 switch (tok) {
1094 case XML_TOK_PROLOG_S:
1095 return XML_ROLE_ELEMENT_NONE;
1096 case XML_TOK_NAME:
1097 case XML_TOK_PREFIXED_NAME:
1098 state->handler = element5;
1099 return XML_ROLE_CONTENT_ELEMENT;
1100 }
1101 return common(state, tok);
1102 }
1103
1104 static int PTRCALL
1105 element5(PROLOG_STATE *state,
1106 int tok,
1107 const char *ptr,
1108 const char *end,
1109 const ENCODING *enc)
1110 {
1111 switch (tok) {
1112 case XML_TOK_PROLOG_S:
1113 return XML_ROLE_ELEMENT_NONE;
1114 case XML_TOK_CLOSE_PAREN_ASTERISK:
1115 state->handler = declClose;
1116 state->role_none = XML_ROLE_ELEMENT_NONE;
1117 return XML_ROLE_GROUP_CLOSE_REP;
1118 case XML_TOK_OR:
1119 state->handler = element4;
1120 return XML_ROLE_ELEMENT_NONE;
1121 }
1122 return common(state, tok);
1123 }
1124
1125 static int PTRCALL
1126 element6(PROLOG_STATE *state,
1127 int tok,
1128 const char *ptr,
1129 const char *end,
1130 const ENCODING *enc)
1131 {
1132 switch (tok) {
1133 case XML_TOK_PROLOG_S:
1134 return XML_ROLE_ELEMENT_NONE;
1135 case XML_TOK_OPEN_PAREN:
1136 state->level += 1;
1137 return XML_ROLE_GROUP_OPEN;
1138 case XML_TOK_NAME:
1139 case XML_TOK_PREFIXED_NAME:
1140 state->handler = element7;
1141 return XML_ROLE_CONTENT_ELEMENT;
1142 case XML_TOK_NAME_QUESTION:
1143 state->handler = element7;
1144 return XML_ROLE_CONTENT_ELEMENT_OPT;
1145 case XML_TOK_NAME_ASTERISK:
1146 state->handler = element7;
1147 return XML_ROLE_CONTENT_ELEMENT_REP;
1148 case XML_TOK_NAME_PLUS:
1149 state->handler = element7;
1150 return XML_ROLE_CONTENT_ELEMENT_PLUS;
1151 }
1152 return common(state, tok);
1153 }
1154
1155 static int PTRCALL
1156 element7(PROLOG_STATE *state,
1157 int tok,
1158 const char *ptr,
1159 const char *end,
1160 const ENCODING *enc)
1161 {
1162 switch (tok) {
1163 case XML_TOK_PROLOG_S:
1164 return XML_ROLE_ELEMENT_NONE;
1165 case XML_TOK_CLOSE_PAREN:
1166 state->level -= 1;
1167 if (state->level == 0) {
1168 state->handler = declClose;
1169 state->role_none = XML_ROLE_ELEMENT_NONE;
1170 }
1171 return XML_ROLE_GROUP_CLOSE;
1172 case XML_TOK_CLOSE_PAREN_ASTERISK:
1173 state->level -= 1;
1174 if (state->level == 0) {
1175 state->handler = declClose;
1176 state->role_none = XML_ROLE_ELEMENT_NONE;
1177 }
1178 return XML_ROLE_GROUP_CLOSE_REP;
1179 case XML_TOK_CLOSE_PAREN_QUESTION:
1180 state->level -= 1;
1181 if (state->level == 0) {
1182 state->handler = declClose;
1183 state->role_none = XML_ROLE_ELEMENT_NONE;
1184 }
1185 return XML_ROLE_GROUP_CLOSE_OPT;
1186 case XML_TOK_CLOSE_PAREN_PLUS:
1187 state->level -= 1;
1188 if (state->level == 0) {
1189 state->handler = declClose;
1190 state->role_none = XML_ROLE_ELEMENT_NONE;
1191 }
1192 return XML_ROLE_GROUP_CLOSE_PLUS;
1193 case XML_TOK_COMMA:
1194 state->handler = element6;
1195 return XML_ROLE_GROUP_SEQUENCE;
1196 case XML_TOK_OR:
1197 state->handler = element6;
1198 return XML_ROLE_GROUP_CHOICE;
1199 }
1200 return common(state, tok);
1201 }
1202
1203 #ifdef XML_DTD
1204
1205 static int PTRCALL
1206 condSect0(PROLOG_STATE *state,
1207 int tok,
1208 const char *ptr,
1209 const char *end,
1210 const ENCODING *enc)
1211 {
1212 switch (tok) {
1213 case XML_TOK_PROLOG_S:
1214 return XML_ROLE_NONE;
1215 case XML_TOK_NAME:
1216 if (XmlNameMatchesAscii(enc, ptr, end, KW_INCLUDE)) {
1217 state->handler = condSect1;
1218 return XML_ROLE_NONE;
1219 }
1220 if (XmlNameMatchesAscii(enc, ptr, end, KW_IGNORE)) {
1221 state->handler = condSect2;
1222 return XML_ROLE_NONE;
1223 }
1224 break;
1225 }
1226 return common(state, tok);
1227 }
1228
1229 static int PTRCALL
1230 condSect1(PROLOG_STATE *state,
1231 int tok,
1232 const char *ptr,
1233 const char *end,
1234 const ENCODING *enc)
1235 {
1236 switch (tok) {
1237 case XML_TOK_PROLOG_S:
1238 return XML_ROLE_NONE;
1239 case XML_TOK_OPEN_BRACKET:
1240 state->handler = externalSubset1;
1241 state->includeLevel += 1;
1242 return XML_ROLE_NONE;
1243 }
1244 return common(state, tok);
1245 }
1246
1247 static int PTRCALL
1248 condSect2(PROLOG_STATE *state,
1249 int tok,
1250 const char *ptr,
1251 const char *end,
1252 const ENCODING *enc)
1253 {
1254 switch (tok) {
1255 case XML_TOK_PROLOG_S:
1256 return XML_ROLE_NONE;
1257 case XML_TOK_OPEN_BRACKET:
1258 state->handler = externalSubset1;
1259 return XML_ROLE_IGNORE_SECT;
1260 }
1261 return common(state, tok);
1262 }
1263
1264 #endif /* XML_DTD */
1265
1266 static int PTRCALL
1267 declClose(PROLOG_STATE *state,
1268 int tok,
1269 const char *ptr,
1270 const char *end,
1271 const ENCODING *enc)
1272 {
1273 switch (tok) {
1274 case XML_TOK_PROLOG_S:
1275 return state->role_none;
1276 case XML_TOK_DECL_CLOSE:
1277 setTopLevel(state);
1278 return state->role_none;
1279 }
1280 return common(state, tok);
1281 }
1282
1283 static int PTRCALL
1284 error(PROLOG_STATE *state,
1285 int tok,
1286 const char *ptr,
1287 const char *end,
1288 const ENCODING *enc)
1289 {
1290 return XML_ROLE_NONE;
1291 }
1292
1293 static int FASTCALL
1294 common(PROLOG_STATE *state, int tok)
1295 {
1296 #ifdef XML_DTD
1297 if (!state->documentEntity && tok == XML_TOK_PARAM_ENTITY_REF)
1298 return XML_ROLE_INNER_PARAM_ENTITY_REF;
1299 #endif
1300 state->handler = error;
1301 return XML_ROLE_ERROR;
1302 }
1303
1304 void
1305 XmlPrologStateInit(PROLOG_STATE *state)
1306 {
1307 state->handler = prolog0;
1308 #ifdef XML_DTD
1309 state->documentEntity = 1;
1310 state->includeLevel = 0;
1311 state->inEntityValue = 0;
1312 #endif /* XML_DTD */
1313 }
1314
1315 #ifdef XML_DTD
1316
1317 void
1318 XmlPrologStateInitExternalEntity(PROLOG_STATE *state)
1319 {
1320 state->handler = externalSubset0;
1321 state->documentEntity = 0;
1322 state->includeLevel = 0;
1323 }
1324
1325 #endif /* XML_DTD */