2086
|
1 /*
|
|
2 The contents of this file are subject to the Mozilla Public License
|
|
3 Version 1.1 (the "License"); you may not use this file except in
|
|
4 compliance with the License. You may obtain a copy of the License at
|
|
5 http://www.mozilla.org/MPL/
|
|
6
|
|
7 Software distributed under the License is distributed on an "AS IS"
|
|
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
9 License for the specific language governing rights and limitations
|
|
10 under the License.
|
|
11
|
|
12 The Original Code is expat.
|
|
13
|
|
14 The Initial Developer of the Original Code is James Clark.
|
|
15 Portions created by James Clark are Copyright (C) 1998, 1999
|
|
16 James Clark. All Rights Reserved.
|
|
17
|
|
18 Contributor(s):
|
|
19
|
|
20 */
|
|
21
|
|
22 #include "xmldef.h"
|
|
23 #include "xmlrole.h"
|
|
24
|
|
25 /* Doesn't check:
|
|
26
|
|
27 that ,| are not mixed in a model group
|
|
28 content of literals
|
|
29
|
|
30 */
|
|
31
|
|
32 #ifndef MIN_BYTES_PER_CHAR
|
|
33 #define MIN_BYTES_PER_CHAR(enc) ((enc)->minBytesPerChar)
|
|
34 #endif
|
|
35
|
|
36 typedef int PROLOG_HANDLER(struct prolog_state *state,
|
|
37 int tok,
|
|
38 const char *ptr,
|
|
39 const char *end,
|
|
40 const ENCODING *enc);
|
|
41
|
|
42 static PROLOG_HANDLER
|
|
43 prolog0, prolog1, prolog2,
|
|
44 doctype0, doctype1, doctype2, doctype3, doctype4, doctype5,
|
|
45 internalSubset,
|
|
46 entity0, entity1, entity2, entity3, entity4, entity5, entity6,
|
|
47 entity7, entity8, entity9,
|
|
48 notation0, notation1, notation2, notation3, notation4,
|
|
49 attlist0, attlist1, attlist2, attlist3, attlist4, attlist5, attlist6,
|
|
50 attlist7, attlist8, attlist9,
|
|
51 element0, element1, element2, element3, element4, element5, element6,
|
|
52 element7,
|
|
53 declClose,
|
|
54 error;
|
|
55
|
|
56 static
|
|
57 int syntaxError(PROLOG_STATE *);
|
|
58
|
|
59 static
|
|
60 int prolog0(PROLOG_STATE *state,
|
|
61 int tok,
|
|
62 const char *ptr,
|
|
63 const char *end,
|
|
64 const ENCODING *enc)
|
|
65 {
|
|
66 switch (tok) {
|
|
67 case XML_TOK_PROLOG_S:
|
|
68 state->handler = prolog1;
|
|
69 return XML_ROLE_NONE;
|
|
70 case XML_TOK_XML_DECL:
|
|
71 state->handler = prolog1;
|
|
72 return XML_ROLE_XML_DECL;
|
|
73 case XML_TOK_PI:
|
|
74 state->handler = prolog1;
|
|
75 return XML_ROLE_NONE;
|
|
76 case XML_TOK_COMMENT:
|
|
77 state->handler = prolog1;
|
|
78 case XML_TOK_BOM:
|
|
79 return XML_ROLE_NONE;
|
|
80 case XML_TOK_DECL_OPEN:
|
|
81 if (!XmlNameMatchesAscii(enc,
|
|
82 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
|
|
83 "DOCTYPE"))
|
|
84 break;
|
|
85 state->handler = doctype0;
|
|
86 return XML_ROLE_NONE;
|
|
87 case XML_TOK_INSTANCE_START:
|
|
88 state->handler = error;
|
|
89 return XML_ROLE_INSTANCE_START;
|
|
90 }
|
|
91 return syntaxError(state);
|
|
92 }
|
|
93
|
|
94 static
|
|
95 int prolog1(PROLOG_STATE *state,
|
|
96 int tok,
|
|
97 const char *ptr,
|
|
98 const char *end,
|
|
99 const ENCODING *enc)
|
|
100 {
|
|
101 switch (tok) {
|
|
102 case XML_TOK_PROLOG_S:
|
|
103 return XML_ROLE_NONE;
|
|
104 case XML_TOK_PI:
|
|
105 case XML_TOK_COMMENT:
|
|
106 case XML_TOK_BOM:
|
|
107 return XML_ROLE_NONE;
|
|
108 case XML_TOK_DECL_OPEN:
|
|
109 if (!XmlNameMatchesAscii(enc,
|
|
110 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
|
|
111 "DOCTYPE"))
|
|
112 break;
|
|
113 state->handler = doctype0;
|
|
114 return XML_ROLE_NONE;
|
|
115 case XML_TOK_INSTANCE_START:
|
|
116 state->handler = error;
|
|
117 return XML_ROLE_INSTANCE_START;
|
|
118 }
|
|
119 return syntaxError(state);
|
|
120 }
|
|
121
|
|
122 static
|
|
123 int prolog2(PROLOG_STATE *state,
|
|
124 int tok,
|
|
125 const char *ptr,
|
|
126 const char *end,
|
|
127 const ENCODING *enc)
|
|
128 {
|
|
129 switch (tok) {
|
|
130 case XML_TOK_PROLOG_S:
|
|
131 return XML_ROLE_NONE;
|
|
132 case XML_TOK_PI:
|
|
133 case XML_TOK_COMMENT:
|
|
134 return XML_ROLE_NONE;
|
|
135 case XML_TOK_INSTANCE_START:
|
|
136 state->handler = error;
|
|
137 return XML_ROLE_INSTANCE_START;
|
|
138 }
|
|
139 return syntaxError(state);
|
|
140 }
|
|
141
|
|
142 static
|
|
143 int doctype0(PROLOG_STATE *state,
|
|
144 int tok,
|
|
145 const char *ptr,
|
|
146 const char *end,
|
|
147 const ENCODING *enc)
|
|
148 {
|
|
149 switch (tok) {
|
|
150 case XML_TOK_PROLOG_S:
|
|
151 return XML_ROLE_NONE;
|
|
152 case XML_TOK_NAME:
|
|
153 case XML_TOK_PREFIXED_NAME:
|
|
154 state->handler = doctype1;
|
|
155 return XML_ROLE_DOCTYPE_NAME;
|
|
156 }
|
|
157 return syntaxError(state);
|
|
158 }
|
|
159
|
|
160 static
|
|
161 int doctype1(PROLOG_STATE *state,
|
|
162 int tok,
|
|
163 const char *ptr,
|
|
164 const char *end,
|
|
165 const ENCODING *enc)
|
|
166 {
|
|
167 switch (tok) {
|
|
168 case XML_TOK_PROLOG_S:
|
|
169 return XML_ROLE_NONE;
|
|
170 case XML_TOK_OPEN_BRACKET:
|
|
171 state->handler = internalSubset;
|
|
172 return XML_ROLE_NONE;
|
|
173 case XML_TOK_DECL_CLOSE:
|
|
174 state->handler = prolog2;
|
|
175 return XML_ROLE_DOCTYPE_CLOSE;
|
|
176 case XML_TOK_NAME:
|
|
177 if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
|
|
178 state->handler = doctype3;
|
|
179 return XML_ROLE_NONE;
|
|
180 }
|
|
181 if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) {
|
|
182 state->handler = doctype2;
|
|
183 return XML_ROLE_NONE;
|
|
184 }
|
|
185 break;
|
|
186 }
|
|
187 return syntaxError(state);
|
|
188 }
|
|
189
|
|
190 static
|
|
191 int doctype2(PROLOG_STATE *state,
|
|
192 int tok,
|
|
193 const char *ptr,
|
|
194 const char *end,
|
|
195 const ENCODING *enc)
|
|
196 {
|
|
197 switch (tok) {
|
|
198 case XML_TOK_PROLOG_S:
|
|
199 return XML_ROLE_NONE;
|
|
200 case XML_TOK_LITERAL:
|
|
201 state->handler = doctype3;
|
|
202 return XML_ROLE_DOCTYPE_PUBLIC_ID;
|
|
203 }
|
|
204 return syntaxError(state);
|
|
205 }
|
|
206
|
|
207 static
|
|
208 int doctype3(PROLOG_STATE *state,
|
|
209 int tok,
|
|
210 const char *ptr,
|
|
211 const char *end,
|
|
212 const ENCODING *enc)
|
|
213 {
|
|
214 switch (tok) {
|
|
215 case XML_TOK_PROLOG_S:
|
|
216 return XML_ROLE_NONE;
|
|
217 case XML_TOK_LITERAL:
|
|
218 state->handler = doctype4;
|
|
219 return XML_ROLE_DOCTYPE_SYSTEM_ID;
|
|
220 }
|
|
221 return syntaxError(state);
|
|
222 }
|
|
223
|
|
224 static
|
|
225 int doctype4(PROLOG_STATE *state,
|
|
226 int tok,
|
|
227 const char *ptr,
|
|
228 const char *end,
|
|
229 const ENCODING *enc)
|
|
230 {
|
|
231 switch (tok) {
|
|
232 case XML_TOK_PROLOG_S:
|
|
233 return XML_ROLE_NONE;
|
|
234 case XML_TOK_OPEN_BRACKET:
|
|
235 state->handler = internalSubset;
|
|
236 return XML_ROLE_NONE;
|
|
237 case XML_TOK_DECL_CLOSE:
|
|
238 state->handler = prolog2;
|
|
239 return XML_ROLE_DOCTYPE_CLOSE;
|
|
240 }
|
|
241 return syntaxError(state);
|
|
242 }
|
|
243
|
|
244 static
|
|
245 int doctype5(PROLOG_STATE *state,
|
|
246 int tok,
|
|
247 const char *ptr,
|
|
248 const char *end,
|
|
249 const ENCODING *enc)
|
|
250 {
|
|
251 switch (tok) {
|
|
252 case XML_TOK_PROLOG_S:
|
|
253 return XML_ROLE_NONE;
|
|
254 case XML_TOK_DECL_CLOSE:
|
|
255 state->handler = prolog2;
|
|
256 return XML_ROLE_DOCTYPE_CLOSE;
|
|
257 }
|
|
258 return syntaxError(state);
|
|
259 }
|
|
260
|
|
261 static
|
|
262 int internalSubset(PROLOG_STATE *state,
|
|
263 int tok,
|
|
264 const char *ptr,
|
|
265 const char *end,
|
|
266 const ENCODING *enc)
|
|
267 {
|
|
268 switch (tok) {
|
|
269 case XML_TOK_PROLOG_S:
|
|
270 return XML_ROLE_NONE;
|
|
271 case XML_TOK_DECL_OPEN:
|
|
272 if (XmlNameMatchesAscii(enc,
|
|
273 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
|
|
274 "ENTITY")) {
|
|
275 state->handler = entity0;
|
|
276 return XML_ROLE_NONE;
|
|
277 }
|
|
278 if (XmlNameMatchesAscii(enc,
|
|
279 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
|
|
280 "ATTLIST")) {
|
|
281 state->handler = attlist0;
|
|
282 return XML_ROLE_NONE;
|
|
283 }
|
|
284 if (XmlNameMatchesAscii(enc,
|
|
285 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
|
|
286 "ELEMENT")) {
|
|
287 state->handler = element0;
|
|
288 return XML_ROLE_NONE;
|
|
289 }
|
|
290 if (XmlNameMatchesAscii(enc,
|
|
291 ptr + 2 * MIN_BYTES_PER_CHAR(enc),
|
|
292 "NOTATION")) {
|
|
293 state->handler = notation0;
|
|
294 return XML_ROLE_NONE;
|
|
295 }
|
|
296 break;
|
|
297 case XML_TOK_PI:
|
|
298 case XML_TOK_COMMENT:
|
|
299 return XML_ROLE_NONE;
|
|
300 case XML_TOK_PARAM_ENTITY_REF:
|
|
301 return XML_ROLE_PARAM_ENTITY_REF;
|
|
302 case XML_TOK_CLOSE_BRACKET:
|
|
303 state->handler = doctype5;
|
|
304 return XML_ROLE_NONE;
|
|
305 }
|
|
306 return syntaxError(state);
|
|
307 }
|
|
308
|
|
309 static
|
|
310 int entity0(PROLOG_STATE *state,
|
|
311 int tok,
|
|
312 const char *ptr,
|
|
313 const char *end,
|
|
314 const ENCODING *enc)
|
|
315 {
|
|
316 switch (tok) {
|
|
317 case XML_TOK_PROLOG_S:
|
|
318 return XML_ROLE_NONE;
|
|
319 case XML_TOK_PERCENT:
|
|
320 state->handler = entity1;
|
|
321 return XML_ROLE_NONE;
|
|
322 case XML_TOK_NAME:
|
|
323 state->handler = entity2;
|
|
324 return XML_ROLE_GENERAL_ENTITY_NAME;
|
|
325 }
|
|
326 return syntaxError(state);
|
|
327 }
|
|
328
|
|
329 static
|
|
330 int entity1(PROLOG_STATE *state,
|
|
331 int tok,
|
|
332 const char *ptr,
|
|
333 const char *end,
|
|
334 const ENCODING *enc)
|
|
335 {
|
|
336 switch (tok) {
|
|
337 case XML_TOK_PROLOG_S:
|
|
338 return XML_ROLE_NONE;
|
|
339 case XML_TOK_NAME:
|
|
340 state->handler = entity7;
|
|
341 return XML_ROLE_PARAM_ENTITY_NAME;
|
|
342 }
|
|
343 return syntaxError(state);
|
|
344 }
|
|
345
|
|
346 static
|
|
347 int entity2(PROLOG_STATE *state,
|
|
348 int tok,
|
|
349 const char *ptr,
|
|
350 const char *end,
|
|
351 const ENCODING *enc)
|
|
352 {
|
|
353 switch (tok) {
|
|
354 case XML_TOK_PROLOG_S:
|
|
355 return XML_ROLE_NONE;
|
|
356 case XML_TOK_NAME:
|
|
357 if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
|
|
358 state->handler = entity4;
|
|
359 return XML_ROLE_NONE;
|
|
360 }
|
|
361 if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) {
|
|
362 state->handler = entity3;
|
|
363 return XML_ROLE_NONE;
|
|
364 }
|
|
365 break;
|
|
366 case XML_TOK_LITERAL:
|
|
367 state->handler = declClose;
|
|
368 return XML_ROLE_ENTITY_VALUE;
|
|
369 }
|
|
370 return syntaxError(state);
|
|
371 }
|
|
372
|
|
373 static
|
|
374 int entity3(PROLOG_STATE *state,
|
|
375 int tok,
|
|
376 const char *ptr,
|
|
377 const char *end,
|
|
378 const ENCODING *enc)
|
|
379 {
|
|
380 switch (tok) {
|
|
381 case XML_TOK_PROLOG_S:
|
|
382 return XML_ROLE_NONE;
|
|
383 case XML_TOK_LITERAL:
|
|
384 state->handler = entity4;
|
|
385 return XML_ROLE_ENTITY_PUBLIC_ID;
|
|
386 }
|
|
387 return syntaxError(state);
|
|
388 }
|
|
389
|
|
390
|
|
391 static
|
|
392 int entity4(PROLOG_STATE *state,
|
|
393 int tok,
|
|
394 const char *ptr,
|
|
395 const char *end,
|
|
396 const ENCODING *enc)
|
|
397 {
|
|
398 switch (tok) {
|
|
399 case XML_TOK_PROLOG_S:
|
|
400 return XML_ROLE_NONE;
|
|
401 case XML_TOK_LITERAL:
|
|
402 state->handler = entity5;
|
|
403 return XML_ROLE_ENTITY_SYSTEM_ID;
|
|
404 }
|
|
405 return syntaxError(state);
|
|
406 }
|
|
407
|
|
408 static
|
|
409 int entity5(PROLOG_STATE *state,
|
|
410 int tok,
|
|
411 const char *ptr,
|
|
412 const char *end,
|
|
413 const ENCODING *enc)
|
|
414 {
|
|
415 switch (tok) {
|
|
416 case XML_TOK_PROLOG_S:
|
|
417 return XML_ROLE_NONE;
|
|
418 case XML_TOK_DECL_CLOSE:
|
|
419 state->handler = internalSubset;
|
|
420 return XML_ROLE_NONE;
|
|
421 case XML_TOK_NAME:
|
|
422 if (XmlNameMatchesAscii(enc, ptr, "NDATA")) {
|
|
423 state->handler = entity6;
|
|
424 return XML_ROLE_NONE;
|
|
425 }
|
|
426 break;
|
|
427 }
|
|
428 return syntaxError(state);
|
|
429 }
|
|
430
|
|
431 static
|
|
432 int entity6(PROLOG_STATE *state,
|
|
433 int tok,
|
|
434 const char *ptr,
|
|
435 const char *end,
|
|
436 const ENCODING *enc)
|
|
437 {
|
|
438 switch (tok) {
|
|
439 case XML_TOK_PROLOG_S:
|
|
440 return XML_ROLE_NONE;
|
|
441 case XML_TOK_NAME:
|
|
442 state->handler = declClose;
|
|
443 return XML_ROLE_ENTITY_NOTATION_NAME;
|
|
444 }
|
|
445 return syntaxError(state);
|
|
446 }
|
|
447
|
|
448 static
|
|
449 int entity7(PROLOG_STATE *state,
|
|
450 int tok,
|
|
451 const char *ptr,
|
|
452 const char *end,
|
|
453 const ENCODING *enc)
|
|
454 {
|
|
455 switch (tok) {
|
|
456 case XML_TOK_PROLOG_S:
|
|
457 return XML_ROLE_NONE;
|
|
458 case XML_TOK_NAME:
|
|
459 if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
|
|
460 state->handler = entity9;
|
|
461 return XML_ROLE_NONE;
|
|
462 }
|
|
463 if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) {
|
|
464 state->handler = entity8;
|
|
465 return XML_ROLE_NONE;
|
|
466 }
|
|
467 break;
|
|
468 case XML_TOK_LITERAL:
|
|
469 state->handler = declClose;
|
|
470 return XML_ROLE_ENTITY_VALUE;
|
|
471 }
|
|
472 return syntaxError(state);
|
|
473 }
|
|
474
|
|
475 static
|
|
476 int entity8(PROLOG_STATE *state,
|
|
477 int tok,
|
|
478 const char *ptr,
|
|
479 const char *end,
|
|
480 const ENCODING *enc)
|
|
481 {
|
|
482 switch (tok) {
|
|
483 case XML_TOK_PROLOG_S:
|
|
484 return XML_ROLE_NONE;
|
|
485 case XML_TOK_LITERAL:
|
|
486 state->handler = entity9;
|
|
487 return XML_ROLE_ENTITY_PUBLIC_ID;
|
|
488 }
|
|
489 return syntaxError(state);
|
|
490 }
|
|
491
|
|
492 static
|
|
493 int entity9(PROLOG_STATE *state,
|
|
494 int tok,
|
|
495 const char *ptr,
|
|
496 const char *end,
|
|
497 const ENCODING *enc)
|
|
498 {
|
|
499 switch (tok) {
|
|
500 case XML_TOK_PROLOG_S:
|
|
501 return XML_ROLE_NONE;
|
|
502 case XML_TOK_LITERAL:
|
|
503 state->handler = declClose;
|
|
504 return XML_ROLE_ENTITY_SYSTEM_ID;
|
|
505 }
|
|
506 return syntaxError(state);
|
|
507 }
|
|
508
|
|
509 static
|
|
510 int notation0(PROLOG_STATE *state,
|
|
511 int tok,
|
|
512 const char *ptr,
|
|
513 const char *end,
|
|
514 const ENCODING *enc)
|
|
515 {
|
|
516 switch (tok) {
|
|
517 case XML_TOK_PROLOG_S:
|
|
518 return XML_ROLE_NONE;
|
|
519 case XML_TOK_NAME:
|
|
520 state->handler = notation1;
|
|
521 return XML_ROLE_NOTATION_NAME;
|
|
522 }
|
|
523 return syntaxError(state);
|
|
524 }
|
|
525
|
|
526 static
|
|
527 int notation1(PROLOG_STATE *state,
|
|
528 int tok,
|
|
529 const char *ptr,
|
|
530 const char *end,
|
|
531 const ENCODING *enc)
|
|
532 {
|
|
533 switch (tok) {
|
|
534 case XML_TOK_PROLOG_S:
|
|
535 return XML_ROLE_NONE;
|
|
536 case XML_TOK_NAME:
|
|
537 if (XmlNameMatchesAscii(enc, ptr, "SYSTEM")) {
|
|
538 state->handler = notation3;
|
|
539 return XML_ROLE_NONE;
|
|
540 }
|
|
541 if (XmlNameMatchesAscii(enc, ptr, "PUBLIC")) {
|
|
542 state->handler = notation2;
|
|
543 return XML_ROLE_NONE;
|
|
544 }
|
|
545 break;
|
|
546 }
|
|
547 return syntaxError(state);
|
|
548 }
|
|
549
|
|
550 static
|
|
551 int notation2(PROLOG_STATE *state,
|
|
552 int tok,
|
|
553 const char *ptr,
|
|
554 const char *end,
|
|
555 const ENCODING *enc)
|
|
556 {
|
|
557 switch (tok) {
|
|
558 case XML_TOK_PROLOG_S:
|
|
559 return XML_ROLE_NONE;
|
|
560 case XML_TOK_LITERAL:
|
|
561 state->handler = notation4;
|
|
562 return XML_ROLE_NOTATION_PUBLIC_ID;
|
|
563 }
|
|
564 return syntaxError(state);
|
|
565 }
|
|
566
|
|
567 static
|
|
568 int notation3(PROLOG_STATE *state,
|
|
569 int tok,
|
|
570 const char *ptr,
|
|
571 const char *end,
|
|
572 const ENCODING *enc)
|
|
573 {
|
|
574 switch (tok) {
|
|
575 case XML_TOK_PROLOG_S:
|
|
576 return XML_ROLE_NONE;
|
|
577 case XML_TOK_LITERAL:
|
|
578 state->handler = declClose;
|
|
579 return XML_ROLE_NOTATION_SYSTEM_ID;
|
|
580 }
|
|
581 return syntaxError(state);
|
|
582 }
|
|
583
|
|
584 static
|
|
585 int notation4(PROLOG_STATE *state,
|
|
586 int tok,
|
|
587 const char *ptr,
|
|
588 const char *end,
|
|
589 const ENCODING *enc)
|
|
590 {
|
|
591 switch (tok) {
|
|
592 case XML_TOK_PROLOG_S:
|
|
593 return XML_ROLE_NONE;
|
|
594 case XML_TOK_LITERAL:
|
|
595 state->handler = declClose;
|
|
596 return XML_ROLE_NOTATION_SYSTEM_ID;
|
|
597 case XML_TOK_DECL_CLOSE:
|
|
598 state->handler = internalSubset;
|
|
599 return XML_ROLE_NOTATION_NO_SYSTEM_ID;
|
|
600 }
|
|
601 return syntaxError(state);
|
|
602 }
|
|
603
|
|
604 static
|
|
605 int attlist0(PROLOG_STATE *state,
|
|
606 int tok,
|
|
607 const char *ptr,
|
|
608 const char *end,
|
|
609 const ENCODING *enc)
|
|
610 {
|
|
611 switch (tok) {
|
|
612 case XML_TOK_PROLOG_S:
|
|
613 return XML_ROLE_NONE;
|
|
614 case XML_TOK_NAME:
|
|
615 case XML_TOK_PREFIXED_NAME:
|
|
616 state->handler = attlist1;
|
|
617 return XML_ROLE_ATTLIST_ELEMENT_NAME;
|
|
618 }
|
|
619 return syntaxError(state);
|
|
620 }
|
|
621
|
|
622 static
|
|
623 int attlist1(PROLOG_STATE *state,
|
|
624 int tok,
|
|
625 const char *ptr,
|
|
626 const char *end,
|
|
627 const ENCODING *enc)
|
|
628 {
|
|
629 switch (tok) {
|
|
630 case XML_TOK_PROLOG_S:
|
|
631 return XML_ROLE_NONE;
|
|
632 case XML_TOK_DECL_CLOSE:
|
|
633 state->handler = internalSubset;
|
|
634 return XML_ROLE_NONE;
|
|
635 case XML_TOK_NAME:
|
|
636 case XML_TOK_PREFIXED_NAME:
|
|
637 state->handler = attlist2;
|
|
638 return XML_ROLE_ATTRIBUTE_NAME;
|
|
639 }
|
|
640 return syntaxError(state);
|
|
641 }
|
|
642
|
|
643 static
|
|
644 int attlist2(PROLOG_STATE *state,
|
|
645 int tok,
|
|
646 const char *ptr,
|
|
647 const char *end,
|
|
648 const ENCODING *enc)
|
|
649 {
|
|
650 switch (tok) {
|
|
651 case XML_TOK_PROLOG_S:
|
|
652 return XML_ROLE_NONE;
|
|
653 case XML_TOK_NAME:
|
|
654 {
|
|
655 static const char *types[] = {
|
|
656 "CDATA",
|
|
657 "ID",
|
|
658 "IDREF",
|
|
659 "IDREFS",
|
|
660 "ENTITY",
|
|
661 "ENTITIES",
|
|
662 "NMTOKEN",
|
|
663 "NMTOKENS",
|
|
664 };
|
|
665 int i;
|
|
666 for (i = 0; i < (int)(sizeof(types)/sizeof(types[0])); i++)
|
|
667 if (XmlNameMatchesAscii(enc, ptr, types[i])) {
|
|
668 state->handler = attlist8;
|
|
669 return XML_ROLE_ATTRIBUTE_TYPE_CDATA + i;
|
|
670 }
|
|
671 }
|
|
672 if (XmlNameMatchesAscii(enc, ptr, "NOTATION")) {
|
|
673 state->handler = attlist5;
|
|
674 return XML_ROLE_NONE;
|
|
675 }
|
|
676 break;
|
|
677 case XML_TOK_OPEN_PAREN:
|
|
678 state->handler = attlist3;
|
|
679 return XML_ROLE_NONE;
|
|
680 }
|
|
681 return syntaxError(state);
|
|
682 }
|
|
683
|
|
684 static
|
|
685 int attlist3(PROLOG_STATE *state,
|
|
686 int tok,
|
|
687 const char *ptr,
|
|
688 const char *end,
|
|
689 const ENCODING *enc)
|
|
690 {
|
|
691 switch (tok) {
|
|
692 case XML_TOK_PROLOG_S:
|
|
693 return XML_ROLE_NONE;
|
|
694 case XML_TOK_NMTOKEN:
|
|
695 case XML_TOK_NAME:
|
|
696 case XML_TOK_PREFIXED_NAME:
|
|
697 state->handler = attlist4;
|
|
698 return XML_ROLE_ATTRIBUTE_ENUM_VALUE;
|
|
699 }
|
|
700 return syntaxError(state);
|
|
701 }
|
|
702
|
|
703 static
|
|
704 int attlist4(PROLOG_STATE *state,
|
|
705 int tok,
|
|
706 const char *ptr,
|
|
707 const char *end,
|
|
708 const ENCODING *enc)
|
|
709 {
|
|
710 switch (tok) {
|
|
711 case XML_TOK_PROLOG_S:
|
|
712 return XML_ROLE_NONE;
|
|
713 case XML_TOK_CLOSE_PAREN:
|
|
714 state->handler = attlist8;
|
|
715 return XML_ROLE_NONE;
|
|
716 case XML_TOK_OR:
|
|
717 state->handler = attlist3;
|
|
718 return XML_ROLE_NONE;
|
|
719 }
|
|
720 return syntaxError(state);
|
|
721 }
|
|
722
|
|
723 static
|
|
724 int attlist5(PROLOG_STATE *state,
|
|
725 int tok,
|
|
726 const char *ptr,
|
|
727 const char *end,
|
|
728 const ENCODING *enc)
|
|
729 {
|
|
730 switch (tok) {
|
|
731 case XML_TOK_PROLOG_S:
|
|
732 return XML_ROLE_NONE;
|
|
733 case XML_TOK_OPEN_PAREN:
|
|
734 state->handler = attlist6;
|
|
735 return XML_ROLE_NONE;
|
|
736 }
|
|
737 return syntaxError(state);
|
|
738 }
|
|
739
|
|
740
|
|
741 static
|
|
742 int attlist6(PROLOG_STATE *state,
|
|
743 int tok,
|
|
744 const char *ptr,
|
|
745 const char *end,
|
|
746 const ENCODING *enc)
|
|
747 {
|
|
748 switch (tok) {
|
|
749 case XML_TOK_PROLOG_S:
|
|
750 return XML_ROLE_NONE;
|
|
751 case XML_TOK_NAME:
|
|
752 state->handler = attlist7;
|
|
753 return XML_ROLE_ATTRIBUTE_NOTATION_VALUE;
|
|
754 }
|
|
755 return syntaxError(state);
|
|
756 }
|
|
757
|
|
758 static
|
|
759 int attlist7(PROLOG_STATE *state,
|
|
760 int tok,
|
|
761 const char *ptr,
|
|
762 const char *end,
|
|
763 const ENCODING *enc)
|
|
764 {
|
|
765 switch (tok) {
|
|
766 case XML_TOK_PROLOG_S:
|
|
767 return XML_ROLE_NONE;
|
|
768 case XML_TOK_CLOSE_PAREN:
|
|
769 state->handler = attlist8;
|
|
770 return XML_ROLE_NONE;
|
|
771 case XML_TOK_OR:
|
|
772 state->handler = attlist6;
|
|
773 return XML_ROLE_NONE;
|
|
774 }
|
|
775 return syntaxError(state);
|
|
776 }
|
|
777
|
|
778 /* default value */
|
|
779 static
|
|
780 int attlist8(PROLOG_STATE *state,
|
|
781 int tok,
|
|
782 const char *ptr,
|
|
783 const char *end,
|
|
784 const ENCODING *enc)
|
|
785 {
|
|
786 switch (tok) {
|
|
787 case XML_TOK_PROLOG_S:
|
|
788 return XML_ROLE_NONE;
|
|
789 case XML_TOK_POUND_NAME:
|
|
790 if (XmlNameMatchesAscii(enc,
|
|
791 ptr + MIN_BYTES_PER_CHAR(enc),
|
|
792 "IMPLIED")) {
|
|
793 state->handler = attlist1;
|
|
794 return XML_ROLE_IMPLIED_ATTRIBUTE_VALUE;
|
|
795 }
|
|
796 if (XmlNameMatchesAscii(enc,
|
|
797 ptr + MIN_BYTES_PER_CHAR(enc),
|
|
798 "REQUIRED")) {
|
|
799 state->handler = attlist1;
|
|
800 return XML_ROLE_REQUIRED_ATTRIBUTE_VALUE;
|
|
801 }
|
|
802 if (XmlNameMatchesAscii(enc,
|
|
803 ptr + MIN_BYTES_PER_CHAR(enc),
|
|
804 "FIXED")) {
|
|
805 state->handler = attlist9;
|
|
806 return XML_ROLE_NONE;
|
|
807 }
|
|
808 break;
|
|
809 case XML_TOK_LITERAL:
|
|
810 state->handler = attlist1;
|
|
811 return XML_ROLE_DEFAULT_ATTRIBUTE_VALUE;
|
|
812 }
|
|
813 return syntaxError(state);
|
|
814 }
|
|
815
|
|
816 static
|
|
817 int attlist9(PROLOG_STATE *state,
|
|
818 int tok,
|
|
819 const char *ptr,
|
|
820 const char *end,
|
|
821 const ENCODING *enc)
|
|
822 {
|
|
823 switch (tok) {
|
|
824 case XML_TOK_PROLOG_S:
|
|
825 return XML_ROLE_NONE;
|
|
826 case XML_TOK_LITERAL:
|
|
827 state->handler = attlist1;
|
|
828 return XML_ROLE_FIXED_ATTRIBUTE_VALUE;
|
|
829 }
|
|
830 return syntaxError(state);
|
|
831 }
|
|
832
|
|
833 static
|
|
834 int element0(PROLOG_STATE *state,
|
|
835 int tok,
|
|
836 const char *ptr,
|
|
837 const char *end,
|
|
838 const ENCODING *enc)
|
|
839 {
|
|
840 switch (tok) {
|
|
841 case XML_TOK_PROLOG_S:
|
|
842 return XML_ROLE_NONE;
|
|
843 case XML_TOK_NAME:
|
|
844 case XML_TOK_PREFIXED_NAME:
|
|
845 state->handler = element1;
|
|
846 return XML_ROLE_ELEMENT_NAME;
|
|
847 }
|
|
848 return syntaxError(state);
|
|
849 }
|
|
850
|
|
851 static
|
|
852 int element1(PROLOG_STATE *state,
|
|
853 int tok,
|
|
854 const char *ptr,
|
|
855 const char *end,
|
|
856 const ENCODING *enc)
|
|
857 {
|
|
858 switch (tok) {
|
|
859 case XML_TOK_PROLOG_S:
|
|
860 return XML_ROLE_NONE;
|
|
861 case XML_TOK_NAME:
|
|
862 if (XmlNameMatchesAscii(enc, ptr, "EMPTY")) {
|
|
863 state->handler = declClose;
|
|
864 return XML_ROLE_CONTENT_EMPTY;
|
|
865 }
|
|
866 if (XmlNameMatchesAscii(enc, ptr, "ANY")) {
|
|
867 state->handler = declClose;
|
|
868 return XML_ROLE_CONTENT_ANY;
|
|
869 }
|
|
870 break;
|
|
871 case XML_TOK_OPEN_PAREN:
|
|
872 state->handler = element2;
|
|
873 state->level = 1;
|
|
874 return XML_ROLE_GROUP_OPEN;
|
|
875 }
|
|
876 return syntaxError(state);
|
|
877 }
|
|
878
|
|
879 static
|
|
880 int element2(PROLOG_STATE *state,
|
|
881 int tok,
|
|
882 const char *ptr,
|
|
883 const char *end,
|
|
884 const ENCODING *enc)
|
|
885 {
|
|
886 switch (tok) {
|
|
887 case XML_TOK_PROLOG_S:
|
|
888 return XML_ROLE_NONE;
|
|
889 case XML_TOK_POUND_NAME:
|
|
890 if (XmlNameMatchesAscii(enc,
|
|
891 ptr + MIN_BYTES_PER_CHAR(enc),
|
|
892 "PCDATA")) {
|
|
893 state->handler = element3;
|
|
894 return XML_ROLE_CONTENT_PCDATA;
|
|
895 }
|
|
896 break;
|
|
897 case XML_TOK_OPEN_PAREN:
|
|
898 state->level = 2;
|
|
899 state->handler = element6;
|
|
900 return XML_ROLE_GROUP_OPEN;
|
|
901 case XML_TOK_NAME:
|
|
902 case XML_TOK_PREFIXED_NAME:
|
|
903 state->handler = element7;
|
|
904 return XML_ROLE_CONTENT_ELEMENT;
|
|
905 case XML_TOK_NAME_QUESTION:
|
|
906 state->handler = element7;
|
|
907 return XML_ROLE_CONTENT_ELEMENT_OPT;
|
|
908 case XML_TOK_NAME_ASTERISK:
|
|
909 state->handler = element7;
|
|
910 return XML_ROLE_CONTENT_ELEMENT_REP;
|
|
911 case XML_TOK_NAME_PLUS:
|
|
912 state->handler = element7;
|
|
913 return XML_ROLE_CONTENT_ELEMENT_PLUS;
|
|
914 }
|
|
915 return syntaxError(state);
|
|
916 }
|
|
917
|
|
918 static
|
|
919 int element3(PROLOG_STATE *state,
|
|
920 int tok,
|
|
921 const char *ptr,
|
|
922 const char *end,
|
|
923 const ENCODING *enc)
|
|
924 {
|
|
925 switch (tok) {
|
|
926 case XML_TOK_PROLOG_S:
|
|
927 return XML_ROLE_NONE;
|
|
928 case XML_TOK_CLOSE_PAREN:
|
|
929 case XML_TOK_CLOSE_PAREN_ASTERISK:
|
|
930 state->handler = declClose;
|
|
931 return XML_ROLE_GROUP_CLOSE_REP;
|
|
932 case XML_TOK_OR:
|
|
933 state->handler = element4;
|
|
934 return XML_ROLE_NONE;
|
|
935 }
|
|
936 return syntaxError(state);
|
|
937 }
|
|
938
|
|
939 static
|
|
940 int element4(PROLOG_STATE *state,
|
|
941 int tok,
|
|
942 const char *ptr,
|
|
943 const char *end,
|
|
944 const ENCODING *enc)
|
|
945 {
|
|
946 switch (tok) {
|
|
947 case XML_TOK_PROLOG_S:
|
|
948 return XML_ROLE_NONE;
|
|
949 case XML_TOK_NAME:
|
|
950 case XML_TOK_PREFIXED_NAME:
|
|
951 state->handler = element5;
|
|
952 return XML_ROLE_CONTENT_ELEMENT;
|
|
953 }
|
|
954 return syntaxError(state);
|
|
955 }
|
|
956
|
|
957 static
|
|
958 int element5(PROLOG_STATE *state,
|
|
959 int tok,
|
|
960 const char *ptr,
|
|
961 const char *end,
|
|
962 const ENCODING *enc)
|
|
963 {
|
|
964 switch (tok) {
|
|
965 case XML_TOK_PROLOG_S:
|
|
966 return XML_ROLE_NONE;
|
|
967 case XML_TOK_CLOSE_PAREN_ASTERISK:
|
|
968 state->handler = declClose;
|
|
969 return XML_ROLE_GROUP_CLOSE_REP;
|
|
970 case XML_TOK_OR:
|
|
971 state->handler = element4;
|
|
972 return XML_ROLE_NONE;
|
|
973 }
|
|
974 return syntaxError(state);
|
|
975 }
|
|
976
|
|
977 static
|
|
978 int element6(PROLOG_STATE *state,
|
|
979 int tok,
|
|
980 const char *ptr,
|
|
981 const char *end,
|
|
982 const ENCODING *enc)
|
|
983 {
|
|
984 switch (tok) {
|
|
985 case XML_TOK_PROLOG_S:
|
|
986 return XML_ROLE_NONE;
|
|
987 case XML_TOK_OPEN_PAREN:
|
|
988 state->level += 1;
|
|
989 return XML_ROLE_GROUP_OPEN;
|
|
990 case XML_TOK_NAME:
|
|
991 case XML_TOK_PREFIXED_NAME:
|
|
992 state->handler = element7;
|
|
993 return XML_ROLE_CONTENT_ELEMENT;
|
|
994 case XML_TOK_NAME_QUESTION:
|
|
995 state->handler = element7;
|
|
996 return XML_ROLE_CONTENT_ELEMENT_OPT;
|
|
997 case XML_TOK_NAME_ASTERISK:
|
|
998 state->handler = element7;
|
|
999 return XML_ROLE_CONTENT_ELEMENT_REP;
|
|
1000 case XML_TOK_NAME_PLUS:
|
|
1001 state->handler = element7;
|
|
1002 return XML_ROLE_CONTENT_ELEMENT_PLUS;
|
|
1003 }
|
|
1004 return syntaxError(state);
|
|
1005 }
|
|
1006
|
|
1007 static
|
|
1008 int element7(PROLOG_STATE *state,
|
|
1009 int tok,
|
|
1010 const char *ptr,
|
|
1011 const char *end,
|
|
1012 const ENCODING *enc)
|
|
1013 {
|
|
1014 switch (tok) {
|
|
1015 case XML_TOK_PROLOG_S:
|
|
1016 return XML_ROLE_NONE;
|
|
1017 case XML_TOK_CLOSE_PAREN:
|
|
1018 state->level -= 1;
|
|
1019 if (state->level == 0)
|
|
1020 state->handler = declClose;
|
|
1021 return XML_ROLE_GROUP_CLOSE;
|
|
1022 case XML_TOK_CLOSE_PAREN_ASTERISK:
|
|
1023 state->level -= 1;
|
|
1024 if (state->level == 0)
|
|
1025 state->handler = declClose;
|
|
1026 return XML_ROLE_GROUP_CLOSE_REP;
|
|
1027 case XML_TOK_CLOSE_PAREN_QUESTION:
|
|
1028 state->level -= 1;
|
|
1029 if (state->level == 0)
|
|
1030 state->handler = declClose;
|
|
1031 return XML_ROLE_GROUP_CLOSE_OPT;
|
|
1032 case XML_TOK_CLOSE_PAREN_PLUS:
|
|
1033 state->level -= 1;
|
|
1034 if (state->level == 0)
|
|
1035 state->handler = declClose;
|
|
1036 return XML_ROLE_GROUP_CLOSE_PLUS;
|
|
1037 case XML_TOK_COMMA:
|
|
1038 state->handler = element6;
|
|
1039 return XML_ROLE_GROUP_SEQUENCE;
|
|
1040 case XML_TOK_OR:
|
|
1041 state->handler = element6;
|
|
1042 return XML_ROLE_GROUP_CHOICE;
|
|
1043 }
|
|
1044 return syntaxError(state);
|
|
1045 }
|
|
1046
|
|
1047 static
|
|
1048 int declClose(PROLOG_STATE *state,
|
|
1049 int tok,
|
|
1050 const char *ptr,
|
|
1051 const char *end,
|
|
1052 const ENCODING *enc)
|
|
1053 {
|
|
1054 switch (tok) {
|
|
1055 case XML_TOK_PROLOG_S:
|
|
1056 return XML_ROLE_NONE;
|
|
1057 case XML_TOK_DECL_CLOSE:
|
|
1058 state->handler = internalSubset;
|
|
1059 return XML_ROLE_NONE;
|
|
1060 }
|
|
1061 return syntaxError(state);
|
|
1062 }
|
|
1063
|
|
1064 #if 0
|
|
1065
|
|
1066 static
|
|
1067 int ignore(PROLOG_STATE *state,
|
|
1068 int tok,
|
|
1069 const char *ptr,
|
|
1070 const char *end,
|
|
1071 const ENCODING *enc)
|
|
1072 {
|
|
1073 switch (tok) {
|
|
1074 case XML_TOK_DECL_CLOSE:
|
|
1075 state->handler = internalSubset;
|
|
1076 return 0;
|
|
1077 default:
|
|
1078 return XML_ROLE_NONE;
|
|
1079 }
|
|
1080 return syntaxError(state);
|
|
1081 }
|
|
1082 #endif
|
|
1083
|
|
1084 static
|
|
1085 int error(PROLOG_STATE *state,
|
|
1086 int tok,
|
|
1087 const char *ptr,
|
|
1088 const char *end,
|
|
1089 const ENCODING *enc)
|
|
1090 {
|
|
1091 return XML_ROLE_NONE;
|
|
1092 }
|
|
1093
|
|
1094 static
|
|
1095 int syntaxError(PROLOG_STATE *state)
|
|
1096 {
|
|
1097 state->handler = error;
|
|
1098 return XML_ROLE_ERROR;
|
|
1099 }
|
|
1100
|
|
1101 void XmlPrologStateInit(PROLOG_STATE *state)
|
|
1102 {
|
|
1103 state->handler = prolog0;
|
|
1104 }
|