2086
|
1 /*
|
|
2 The contents of this file are subject to the Mozilla Public License
|
|
3 Version 1.1 (the "License"); you may not use this file except in
|
|
4 compliance with the License. You may obtain a copy of the License at
|
|
5 http://www.mozilla.org/MPL/
|
|
6
|
|
7 Software distributed under the License is distributed on an "AS IS"
|
|
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
9 License for the specific language governing rights and limitations
|
|
10 under the License.
|
|
11
|
|
12 The Original Code is expat.
|
|
13
|
|
14 The Initial Developer of the Original Code is James Clark.
|
|
15 Portions created by James Clark are Copyright (C) 1998, 1999
|
|
16 James Clark. All Rights Reserved.
|
|
17
|
|
18 Contributor(s):
|
|
19
|
|
20 Alternatively, the contents of this file may be used under the terms
|
|
21 of the GNU General Public License (the "GPL"), in which case the
|
|
22 provisions of the GPL are applicable instead of those above. If you
|
|
23 wish to allow use of your version of this file only under the terms of
|
|
24 the GPL and not to allow others to use your version of this file under
|
|
25 the MPL, indicate your decision by deleting the provisions above and
|
|
26 replace them with the notice and other provisions required by the
|
|
27 GPL. If you do not delete the provisions above, a recipient may use
|
|
28 your version of this file under either the MPL or the GPL.
|
|
29 */
|
|
30
|
|
31 #ifndef IS_INVALID_CHAR
|
|
32 #define IS_INVALID_CHAR(enc, ptr, n) (0)
|
|
33 #endif
|
|
34
|
|
35 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
|
|
36 case BT_LEAD ## n: \
|
|
37 if (end - ptr < n) \
|
|
38 return XML_TOK_PARTIAL_CHAR; \
|
|
39 if (IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
40 *(nextTokPtr) = (ptr); \
|
|
41 return XML_TOK_INVALID; \
|
|
42 } \
|
|
43 ptr += n; \
|
|
44 break;
|
|
45
|
|
46 #define INVALID_CASES(ptr, nextTokPtr) \
|
|
47 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
|
|
48 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
|
|
49 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
|
|
50 case BT_NONXML: \
|
|
51 case BT_MALFORM: \
|
|
52 case BT_TRAIL: \
|
|
53 *(nextTokPtr) = (ptr); \
|
|
54 return XML_TOK_INVALID;
|
|
55
|
|
56 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
|
|
57 case BT_LEAD ## n: \
|
|
58 if (end - ptr < n) \
|
|
59 return XML_TOK_PARTIAL_CHAR; \
|
|
60 if (!IS_NAME_CHAR(enc, ptr, n)) { \
|
|
61 *nextTokPtr = ptr; \
|
|
62 return XML_TOK_INVALID; \
|
|
63 } \
|
|
64 ptr += n; \
|
|
65 break;
|
|
66
|
|
67 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
|
|
68 case BT_NONASCII: \
|
|
69 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
|
|
70 *nextTokPtr = ptr; \
|
|
71 return XML_TOK_INVALID; \
|
|
72 } \
|
|
73 case BT_NMSTRT: \
|
|
74 case BT_HEX: \
|
|
75 case BT_DIGIT: \
|
|
76 case BT_NAME: \
|
|
77 case BT_MINUS: \
|
|
78 ptr += MINBPC(enc); \
|
|
79 break; \
|
|
80 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
|
|
81 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
|
|
82 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
|
|
83
|
|
84 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
|
|
85 case BT_LEAD ## n: \
|
|
86 if (end - ptr < n) \
|
|
87 return XML_TOK_PARTIAL_CHAR; \
|
|
88 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
89 *nextTokPtr = ptr; \
|
|
90 return XML_TOK_INVALID; \
|
|
91 } \
|
|
92 ptr += n; \
|
|
93 break;
|
|
94
|
|
95 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
|
|
96 case BT_NONASCII: \
|
|
97 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
|
|
98 *nextTokPtr = ptr; \
|
|
99 return XML_TOK_INVALID; \
|
|
100 } \
|
|
101 case BT_NMSTRT: \
|
|
102 case BT_HEX: \
|
|
103 ptr += MINBPC(enc); \
|
|
104 break; \
|
|
105 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
|
|
106 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
|
|
107 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
|
|
108
|
|
109 #ifndef PREFIX
|
|
110 #define PREFIX(ident) ident
|
|
111 #endif
|
|
112
|
|
113 /* ptr points to character following "<!-" */
|
|
114
|
|
115 static
|
|
116 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
|
|
117 const char **nextTokPtr)
|
|
118 {
|
|
119 if (ptr != end) {
|
|
120 if (!CHAR_MATCHES(enc, ptr, '-')) {
|
|
121 *nextTokPtr = ptr;
|
|
122 return XML_TOK_INVALID;
|
|
123 }
|
|
124 ptr += MINBPC(enc);
|
|
125 while (ptr != end) {
|
|
126 switch (BYTE_TYPE(enc, ptr)) {
|
|
127 INVALID_CASES(ptr, nextTokPtr)
|
|
128 case BT_MINUS:
|
|
129 if ((ptr += MINBPC(enc)) == end)
|
|
130 return XML_TOK_PARTIAL;
|
|
131 if (CHAR_MATCHES(enc, ptr, '-')) {
|
|
132 if ((ptr += MINBPC(enc)) == end)
|
|
133 return XML_TOK_PARTIAL;
|
|
134 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
135 *nextTokPtr = ptr;
|
|
136 return XML_TOK_INVALID;
|
|
137 }
|
|
138 *nextTokPtr = ptr + MINBPC(enc);
|
|
139 return XML_TOK_COMMENT;
|
|
140 }
|
|
141 break;
|
|
142 default:
|
|
143 ptr += MINBPC(enc);
|
|
144 break;
|
|
145 }
|
|
146 }
|
|
147 }
|
|
148 return XML_TOK_PARTIAL;
|
|
149 }
|
|
150
|
|
151 /* ptr points to character following "<!" */
|
|
152
|
|
153 static
|
|
154 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
|
|
155 const char **nextTokPtr)
|
|
156 {
|
|
157 if (ptr == end)
|
|
158 return XML_TOK_PARTIAL;
|
|
159 switch (BYTE_TYPE(enc, ptr)) {
|
|
160 case BT_MINUS:
|
|
161 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
162 case BT_LSQB:
|
|
163 *nextTokPtr = ptr + MINBPC(enc);
|
|
164 return XML_TOK_COND_SECT_OPEN;
|
|
165 case BT_NMSTRT:
|
|
166 case BT_HEX:
|
|
167 ptr += MINBPC(enc);
|
|
168 break;
|
|
169 default:
|
|
170 *nextTokPtr = ptr;
|
|
171 return XML_TOK_INVALID;
|
|
172 }
|
|
173 while (ptr != end) {
|
|
174 switch (BYTE_TYPE(enc, ptr)) {
|
|
175 case BT_PERCNT:
|
|
176 if (ptr + MINBPC(enc) == end)
|
|
177 return XML_TOK_PARTIAL;
|
|
178 /* don't allow <!ENTITY% foo "whatever"> */
|
|
179 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
|
|
180 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
|
|
181 *nextTokPtr = ptr;
|
|
182 return XML_TOK_INVALID;
|
|
183 }
|
|
184 /* fall through */
|
|
185 case BT_S: case BT_CR: case BT_LF:
|
|
186 *nextTokPtr = ptr;
|
|
187 return XML_TOK_DECL_OPEN;
|
|
188 case BT_NMSTRT:
|
|
189 case BT_HEX:
|
|
190 ptr += MINBPC(enc);
|
|
191 break;
|
|
192 default:
|
|
193 *nextTokPtr = ptr;
|
|
194 return XML_TOK_INVALID;
|
|
195 }
|
|
196 }
|
|
197 return XML_TOK_PARTIAL;
|
|
198 }
|
|
199
|
|
200 static
|
|
201 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
|
|
202 {
|
|
203 int upper = 0;
|
|
204 *tokPtr = XML_TOK_PI;
|
|
205 if (end - ptr != MINBPC(enc)*3)
|
|
206 return 1;
|
|
207 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
208 case 'x':
|
|
209 break;
|
|
210 case 'X':
|
|
211 upper = 1;
|
|
212 break;
|
|
213 default:
|
|
214 return 1;
|
|
215 }
|
|
216 ptr += MINBPC(enc);
|
|
217 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
218 case 'm':
|
|
219 break;
|
|
220 case 'M':
|
|
221 upper = 1;
|
|
222 break;
|
|
223 default:
|
|
224 return 1;
|
|
225 }
|
|
226 ptr += MINBPC(enc);
|
|
227 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
228 case 'l':
|
|
229 break;
|
|
230 case 'L':
|
|
231 upper = 1;
|
|
232 break;
|
|
233 default:
|
|
234 return 1;
|
|
235 }
|
|
236 if (upper)
|
|
237 return 0;
|
|
238 *tokPtr = XML_TOK_XML_DECL;
|
|
239 return 1;
|
|
240 }
|
|
241
|
|
242 /* ptr points to character following "<?" */
|
|
243
|
|
244 static
|
|
245 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
|
|
246 const char **nextTokPtr)
|
|
247 {
|
|
248 int tok;
|
|
249 const char *target = ptr;
|
|
250 if (ptr == end)
|
|
251 return XML_TOK_PARTIAL;
|
|
252 switch (BYTE_TYPE(enc, ptr)) {
|
|
253 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
254 default:
|
|
255 *nextTokPtr = ptr;
|
|
256 return XML_TOK_INVALID;
|
|
257 }
|
|
258 while (ptr != end) {
|
|
259 switch (BYTE_TYPE(enc, ptr)) {
|
|
260 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
261 case BT_S: case BT_CR: case BT_LF:
|
|
262 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
|
|
263 *nextTokPtr = ptr;
|
|
264 return XML_TOK_INVALID;
|
|
265 }
|
|
266 ptr += MINBPC(enc);
|
|
267 while (ptr != end) {
|
|
268 switch (BYTE_TYPE(enc, ptr)) {
|
|
269 INVALID_CASES(ptr, nextTokPtr)
|
|
270 case BT_QUEST:
|
|
271 ptr += MINBPC(enc);
|
|
272 if (ptr == end)
|
|
273 return XML_TOK_PARTIAL;
|
|
274 if (CHAR_MATCHES(enc, ptr, '>')) {
|
|
275 *nextTokPtr = ptr + MINBPC(enc);
|
|
276 return tok;
|
|
277 }
|
|
278 break;
|
|
279 default:
|
|
280 ptr += MINBPC(enc);
|
|
281 break;
|
|
282 }
|
|
283 }
|
|
284 return XML_TOK_PARTIAL;
|
|
285 case BT_QUEST:
|
|
286 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
|
|
287 *nextTokPtr = ptr;
|
|
288 return XML_TOK_INVALID;
|
|
289 }
|
|
290 ptr += MINBPC(enc);
|
|
291 if (ptr == end)
|
|
292 return XML_TOK_PARTIAL;
|
|
293 if (CHAR_MATCHES(enc, ptr, '>')) {
|
|
294 *nextTokPtr = ptr + MINBPC(enc);
|
|
295 return tok;
|
|
296 }
|
|
297 /* fall through */
|
|
298 default:
|
|
299 *nextTokPtr = ptr;
|
|
300 return XML_TOK_INVALID;
|
|
301 }
|
|
302 }
|
|
303 return XML_TOK_PARTIAL;
|
|
304 }
|
|
305
|
|
306
|
|
307 static
|
|
308 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
|
|
309 const char **nextTokPtr)
|
|
310 {
|
|
311 int i;
|
|
312 /* CDATA[ */
|
|
313 if (end - ptr < 6 * MINBPC(enc))
|
|
314 return XML_TOK_PARTIAL;
|
|
315 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
|
|
316 if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) {
|
|
317 *nextTokPtr = ptr;
|
|
318 return XML_TOK_INVALID;
|
|
319 }
|
|
320 }
|
|
321 *nextTokPtr = ptr;
|
|
322 return XML_TOK_CDATA_SECT_OPEN;
|
|
323 }
|
|
324
|
|
325 static
|
|
326 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
327 const char **nextTokPtr)
|
|
328 {
|
|
329 if (ptr == end)
|
|
330 return XML_TOK_NONE;
|
|
331 if (MINBPC(enc) > 1) {
|
|
332 size_t n = end - ptr;
|
|
333 if (n & (MINBPC(enc) - 1)) {
|
|
334 n &= ~(MINBPC(enc) - 1);
|
|
335 if (n == 0)
|
|
336 return XML_TOK_PARTIAL;
|
|
337 end = ptr + n;
|
|
338 }
|
|
339 }
|
|
340 switch (BYTE_TYPE(enc, ptr)) {
|
|
341 case BT_RSQB:
|
|
342 ptr += MINBPC(enc);
|
|
343 if (ptr == end)
|
|
344 return XML_TOK_PARTIAL;
|
|
345 if (!CHAR_MATCHES(enc, ptr, ']'))
|
|
346 break;
|
|
347 ptr += MINBPC(enc);
|
|
348 if (ptr == end)
|
|
349 return XML_TOK_PARTIAL;
|
|
350 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
351 ptr -= MINBPC(enc);
|
|
352 break;
|
|
353 }
|
|
354 *nextTokPtr = ptr + MINBPC(enc);
|
|
355 return XML_TOK_CDATA_SECT_CLOSE;
|
|
356 case BT_CR:
|
|
357 ptr += MINBPC(enc);
|
|
358 if (ptr == end)
|
|
359 return XML_TOK_PARTIAL;
|
|
360 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
361 ptr += MINBPC(enc);
|
|
362 *nextTokPtr = ptr;
|
|
363 return XML_TOK_DATA_NEWLINE;
|
|
364 case BT_LF:
|
|
365 *nextTokPtr = ptr + MINBPC(enc);
|
|
366 return XML_TOK_DATA_NEWLINE;
|
|
367 INVALID_CASES(ptr, nextTokPtr)
|
|
368 default:
|
|
369 ptr += MINBPC(enc);
|
|
370 break;
|
|
371 }
|
|
372 while (ptr != end) {
|
|
373 switch (BYTE_TYPE(enc, ptr)) {
|
|
374 #define LEAD_CASE(n) \
|
|
375 case BT_LEAD ## n: \
|
|
376 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
377 *nextTokPtr = ptr; \
|
|
378 return XML_TOK_DATA_CHARS; \
|
|
379 } \
|
|
380 ptr += n; \
|
|
381 break;
|
|
382 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
383 #undef LEAD_CASE
|
|
384 case BT_NONXML:
|
|
385 case BT_MALFORM:
|
|
386 case BT_TRAIL:
|
|
387 case BT_CR:
|
|
388 case BT_LF:
|
|
389 case BT_RSQB:
|
|
390 *nextTokPtr = ptr;
|
|
391 return XML_TOK_DATA_CHARS;
|
|
392 default:
|
|
393 ptr += MINBPC(enc);
|
|
394 break;
|
|
395 }
|
|
396 }
|
|
397 *nextTokPtr = ptr;
|
|
398 return XML_TOK_DATA_CHARS;
|
|
399 }
|
|
400
|
|
401 /* ptr points to character following "</" */
|
|
402
|
|
403 static
|
|
404 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
|
|
405 const char **nextTokPtr)
|
|
406 {
|
|
407 if (ptr == end)
|
|
408 return XML_TOK_PARTIAL;
|
|
409 switch (BYTE_TYPE(enc, ptr)) {
|
|
410 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
411 default:
|
|
412 *nextTokPtr = ptr;
|
|
413 return XML_TOK_INVALID;
|
|
414 }
|
|
415 while (ptr != end) {
|
|
416 switch (BYTE_TYPE(enc, ptr)) {
|
|
417 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
418 case BT_S: case BT_CR: case BT_LF:
|
|
419 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
|
420 switch (BYTE_TYPE(enc, ptr)) {
|
|
421 case BT_S: case BT_CR: case BT_LF:
|
|
422 break;
|
|
423 case BT_GT:
|
|
424 *nextTokPtr = ptr + MINBPC(enc);
|
|
425 return XML_TOK_END_TAG;
|
|
426 default:
|
|
427 *nextTokPtr = ptr;
|
|
428 return XML_TOK_INVALID;
|
|
429 }
|
|
430 }
|
|
431 return XML_TOK_PARTIAL;
|
|
432 #ifdef XML_NS
|
|
433 case BT_COLON:
|
|
434 /* no need to check qname syntax here, since end-tag must match exactly */
|
|
435 ptr += MINBPC(enc);
|
|
436 break;
|
|
437 #endif
|
|
438 case BT_GT:
|
|
439 *nextTokPtr = ptr + MINBPC(enc);
|
|
440 return XML_TOK_END_TAG;
|
|
441 default:
|
|
442 *nextTokPtr = ptr;
|
|
443 return XML_TOK_INVALID;
|
|
444 }
|
|
445 }
|
|
446 return XML_TOK_PARTIAL;
|
|
447 }
|
|
448
|
|
449 /* ptr points to character following "&#X" */
|
|
450
|
|
451 static
|
|
452 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
|
|
453 const char **nextTokPtr)
|
|
454 {
|
|
455 if (ptr != end) {
|
|
456 switch (BYTE_TYPE(enc, ptr)) {
|
|
457 case BT_DIGIT:
|
|
458 case BT_HEX:
|
|
459 break;
|
|
460 default:
|
|
461 *nextTokPtr = ptr;
|
|
462 return XML_TOK_INVALID;
|
|
463 }
|
|
464 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
|
465 switch (BYTE_TYPE(enc, ptr)) {
|
|
466 case BT_DIGIT:
|
|
467 case BT_HEX:
|
|
468 break;
|
|
469 case BT_SEMI:
|
|
470 *nextTokPtr = ptr + MINBPC(enc);
|
|
471 return XML_TOK_CHAR_REF;
|
|
472 default:
|
|
473 *nextTokPtr = ptr;
|
|
474 return XML_TOK_INVALID;
|
|
475 }
|
|
476 }
|
|
477 }
|
|
478 return XML_TOK_PARTIAL;
|
|
479 }
|
|
480
|
|
481 /* ptr points to character following "&#" */
|
|
482
|
|
483 static
|
|
484 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
|
|
485 const char **nextTokPtr)
|
|
486 {
|
|
487 if (ptr != end) {
|
|
488 if (CHAR_MATCHES(enc, ptr, 'x'))
|
|
489 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
490 switch (BYTE_TYPE(enc, ptr)) {
|
|
491 case BT_DIGIT:
|
|
492 break;
|
|
493 default:
|
|
494 *nextTokPtr = ptr;
|
|
495 return XML_TOK_INVALID;
|
|
496 }
|
|
497 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
|
498 switch (BYTE_TYPE(enc, ptr)) {
|
|
499 case BT_DIGIT:
|
|
500 break;
|
|
501 case BT_SEMI:
|
|
502 *nextTokPtr = ptr + MINBPC(enc);
|
|
503 return XML_TOK_CHAR_REF;
|
|
504 default:
|
|
505 *nextTokPtr = ptr;
|
|
506 return XML_TOK_INVALID;
|
|
507 }
|
|
508 }
|
|
509 }
|
|
510 return XML_TOK_PARTIAL;
|
|
511 }
|
|
512
|
|
513 /* ptr points to character following "&" */
|
|
514
|
|
515 static
|
|
516 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
|
|
517 const char **nextTokPtr)
|
|
518 {
|
|
519 if (ptr == end)
|
|
520 return XML_TOK_PARTIAL;
|
|
521 switch (BYTE_TYPE(enc, ptr)) {
|
|
522 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
523 case BT_NUM:
|
|
524 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
525 default:
|
|
526 *nextTokPtr = ptr;
|
|
527 return XML_TOK_INVALID;
|
|
528 }
|
|
529 while (ptr != end) {
|
|
530 switch (BYTE_TYPE(enc, ptr)) {
|
|
531 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
532 case BT_SEMI:
|
|
533 *nextTokPtr = ptr + MINBPC(enc);
|
|
534 return XML_TOK_ENTITY_REF;
|
|
535 default:
|
|
536 *nextTokPtr = ptr;
|
|
537 return XML_TOK_INVALID;
|
|
538 }
|
|
539 }
|
|
540 return XML_TOK_PARTIAL;
|
|
541 }
|
|
542
|
|
543 /* ptr points to character following first character of attribute name */
|
|
544
|
|
545 static
|
|
546 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
|
547 const char **nextTokPtr)
|
|
548 {
|
|
549 #ifdef XML_NS
|
|
550 int hadColon = 0;
|
|
551 #endif
|
|
552 while (ptr != end) {
|
|
553 switch (BYTE_TYPE(enc, ptr)) {
|
|
554 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
555 #ifdef XML_NS
|
|
556 case BT_COLON:
|
|
557 if (hadColon) {
|
|
558 *nextTokPtr = ptr;
|
|
559 return XML_TOK_INVALID;
|
|
560 }
|
|
561 hadColon = 1;
|
|
562 ptr += MINBPC(enc);
|
|
563 if (ptr == end)
|
|
564 return XML_TOK_PARTIAL;
|
|
565 switch (BYTE_TYPE(enc, ptr)) {
|
|
566 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
567 default:
|
|
568 *nextTokPtr = ptr;
|
|
569 return XML_TOK_INVALID;
|
|
570 }
|
|
571 break;
|
|
572 #endif
|
|
573 case BT_S: case BT_CR: case BT_LF:
|
|
574 for (;;) {
|
|
575 int t;
|
|
576
|
|
577 ptr += MINBPC(enc);
|
|
578 if (ptr == end)
|
|
579 return XML_TOK_PARTIAL;
|
|
580 t = BYTE_TYPE(enc, ptr);
|
|
581 if (t == BT_EQUALS)
|
|
582 break;
|
|
583 switch (t) {
|
|
584 case BT_S:
|
|
585 case BT_LF:
|
|
586 case BT_CR:
|
|
587 break;
|
|
588 default:
|
|
589 *nextTokPtr = ptr;
|
|
590 return XML_TOK_INVALID;
|
|
591 }
|
|
592 }
|
|
593 /* fall through */
|
|
594 case BT_EQUALS:
|
|
595 {
|
|
596 int open;
|
|
597 #ifdef XML_NS
|
|
598 hadColon = 0;
|
|
599 #endif
|
|
600 for (;;) {
|
|
601
|
|
602 ptr += MINBPC(enc);
|
|
603 if (ptr == end)
|
|
604 return XML_TOK_PARTIAL;
|
|
605 open = BYTE_TYPE(enc, ptr);
|
|
606 if (open == BT_QUOT || open == BT_APOS)
|
|
607 break;
|
|
608 switch (open) {
|
|
609 case BT_S:
|
|
610 case BT_LF:
|
|
611 case BT_CR:
|
|
612 break;
|
|
613 default:
|
|
614 *nextTokPtr = ptr;
|
|
615 return XML_TOK_INVALID;
|
|
616 }
|
|
617 }
|
|
618 ptr += MINBPC(enc);
|
|
619 /* in attribute value */
|
|
620 for (;;) {
|
|
621 int t;
|
|
622 if (ptr == end)
|
|
623 return XML_TOK_PARTIAL;
|
|
624 t = BYTE_TYPE(enc, ptr);
|
|
625 if (t == open)
|
|
626 break;
|
|
627 switch (t) {
|
|
628 INVALID_CASES(ptr, nextTokPtr)
|
|
629 case BT_AMP:
|
|
630 {
|
|
631 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
|
|
632 if (tok <= 0) {
|
|
633 if (tok == XML_TOK_INVALID)
|
|
634 *nextTokPtr = ptr;
|
|
635 return tok;
|
|
636 }
|
|
637 break;
|
|
638 }
|
|
639 case BT_LT:
|
|
640 *nextTokPtr = ptr;
|
|
641 return XML_TOK_INVALID;
|
|
642 default:
|
|
643 ptr += MINBPC(enc);
|
|
644 break;
|
|
645 }
|
|
646 }
|
|
647 ptr += MINBPC(enc);
|
|
648 if (ptr == end)
|
|
649 return XML_TOK_PARTIAL;
|
|
650 switch (BYTE_TYPE(enc, ptr)) {
|
|
651 case BT_S:
|
|
652 case BT_CR:
|
|
653 case BT_LF:
|
|
654 break;
|
|
655 case BT_SOL:
|
|
656 goto sol;
|
|
657 case BT_GT:
|
|
658 goto gt;
|
|
659 default:
|
|
660 *nextTokPtr = ptr;
|
|
661 return XML_TOK_INVALID;
|
|
662 }
|
|
663 /* ptr points to closing quote */
|
|
664 for (;;) {
|
|
665 ptr += MINBPC(enc);
|
|
666 if (ptr == end)
|
|
667 return XML_TOK_PARTIAL;
|
|
668 switch (BYTE_TYPE(enc, ptr)) {
|
|
669 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
670 case BT_S: case BT_CR: case BT_LF:
|
|
671 continue;
|
|
672 case BT_GT:
|
|
673 gt:
|
|
674 *nextTokPtr = ptr + MINBPC(enc);
|
|
675 return XML_TOK_START_TAG_WITH_ATTS;
|
|
676 case BT_SOL:
|
|
677 sol:
|
|
678 ptr += MINBPC(enc);
|
|
679 if (ptr == end)
|
|
680 return XML_TOK_PARTIAL;
|
|
681 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
682 *nextTokPtr = ptr;
|
|
683 return XML_TOK_INVALID;
|
|
684 }
|
|
685 *nextTokPtr = ptr + MINBPC(enc);
|
|
686 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
|
|
687 default:
|
|
688 *nextTokPtr = ptr;
|
|
689 return XML_TOK_INVALID;
|
|
690 }
|
|
691 break;
|
|
692 }
|
|
693 break;
|
|
694 }
|
|
695 default:
|
|
696 *nextTokPtr = ptr;
|
|
697 return XML_TOK_INVALID;
|
|
698 }
|
|
699 }
|
|
700 return XML_TOK_PARTIAL;
|
|
701 }
|
|
702
|
|
703 /* ptr points to character following "<" */
|
|
704
|
|
705 static
|
|
706 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
|
707 const char **nextTokPtr)
|
|
708 {
|
|
709 #ifdef XML_NS
|
|
710 int hadColon;
|
|
711 #endif
|
|
712 if (ptr == end)
|
|
713 return XML_TOK_PARTIAL;
|
|
714 switch (BYTE_TYPE(enc, ptr)) {
|
|
715 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
716 case BT_EXCL:
|
|
717 if ((ptr += MINBPC(enc)) == end)
|
|
718 return XML_TOK_PARTIAL;
|
|
719 switch (BYTE_TYPE(enc, ptr)) {
|
|
720 case BT_MINUS:
|
|
721 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
722 case BT_LSQB:
|
|
723 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
724 }
|
|
725 *nextTokPtr = ptr;
|
|
726 return XML_TOK_INVALID;
|
|
727 case BT_QUEST:
|
|
728 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
729 case BT_SOL:
|
|
730 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
731 default:
|
|
732 *nextTokPtr = ptr;
|
|
733 return XML_TOK_INVALID;
|
|
734 }
|
|
735 #ifdef XML_NS
|
|
736 hadColon = 0;
|
|
737 #endif
|
|
738 /* we have a start-tag */
|
|
739 while (ptr != end) {
|
|
740 switch (BYTE_TYPE(enc, ptr)) {
|
|
741 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
742 #ifdef XML_NS
|
|
743 case BT_COLON:
|
|
744 if (hadColon) {
|
|
745 *nextTokPtr = ptr;
|
|
746 return XML_TOK_INVALID;
|
|
747 }
|
|
748 hadColon = 1;
|
|
749 ptr += MINBPC(enc);
|
|
750 if (ptr == end)
|
|
751 return XML_TOK_PARTIAL;
|
|
752 switch (BYTE_TYPE(enc, ptr)) {
|
|
753 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
754 default:
|
|
755 *nextTokPtr = ptr;
|
|
756 return XML_TOK_INVALID;
|
|
757 }
|
|
758 break;
|
|
759 #endif
|
|
760 case BT_S: case BT_CR: case BT_LF:
|
|
761 {
|
|
762 ptr += MINBPC(enc);
|
|
763 while (ptr != end) {
|
|
764 switch (BYTE_TYPE(enc, ptr)) {
|
|
765 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
766 case BT_GT:
|
|
767 goto gt;
|
|
768 case BT_SOL:
|
|
769 goto sol;
|
|
770 case BT_S: case BT_CR: case BT_LF:
|
|
771 ptr += MINBPC(enc);
|
|
772 continue;
|
|
773 default:
|
|
774 *nextTokPtr = ptr;
|
|
775 return XML_TOK_INVALID;
|
|
776 }
|
|
777 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
|
|
778 }
|
|
779 return XML_TOK_PARTIAL;
|
|
780 }
|
|
781 case BT_GT:
|
|
782 gt:
|
|
783 *nextTokPtr = ptr + MINBPC(enc);
|
|
784 return XML_TOK_START_TAG_NO_ATTS;
|
|
785 case BT_SOL:
|
|
786 sol:
|
|
787 ptr += MINBPC(enc);
|
|
788 if (ptr == end)
|
|
789 return XML_TOK_PARTIAL;
|
|
790 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
791 *nextTokPtr = ptr;
|
|
792 return XML_TOK_INVALID;
|
|
793 }
|
|
794 *nextTokPtr = ptr + MINBPC(enc);
|
|
795 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
|
|
796 default:
|
|
797 *nextTokPtr = ptr;
|
|
798 return XML_TOK_INVALID;
|
|
799 }
|
|
800 }
|
|
801 return XML_TOK_PARTIAL;
|
|
802 }
|
|
803
|
|
804 static
|
|
805 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
806 const char **nextTokPtr)
|
|
807 {
|
|
808 if (ptr == end)
|
|
809 return XML_TOK_NONE;
|
|
810 if (MINBPC(enc) > 1) {
|
|
811 size_t n = end - ptr;
|
|
812 if (n & (MINBPC(enc) - 1)) {
|
|
813 n &= ~(MINBPC(enc) - 1);
|
|
814 if (n == 0)
|
|
815 return XML_TOK_PARTIAL;
|
|
816 end = ptr + n;
|
|
817 }
|
|
818 }
|
|
819 switch (BYTE_TYPE(enc, ptr)) {
|
|
820 case BT_LT:
|
|
821 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
822 case BT_AMP:
|
|
823 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
824 case BT_CR:
|
|
825 ptr += MINBPC(enc);
|
|
826 if (ptr == end)
|
|
827 return XML_TOK_TRAILING_CR;
|
|
828 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
829 ptr += MINBPC(enc);
|
|
830 *nextTokPtr = ptr;
|
|
831 return XML_TOK_DATA_NEWLINE;
|
|
832 case BT_LF:
|
|
833 *nextTokPtr = ptr + MINBPC(enc);
|
|
834 return XML_TOK_DATA_NEWLINE;
|
|
835 case BT_RSQB:
|
|
836 ptr += MINBPC(enc);
|
|
837 if (ptr == end)
|
|
838 return XML_TOK_TRAILING_RSQB;
|
|
839 if (!CHAR_MATCHES(enc, ptr, ']'))
|
|
840 break;
|
|
841 ptr += MINBPC(enc);
|
|
842 if (ptr == end)
|
|
843 return XML_TOK_TRAILING_RSQB;
|
|
844 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
845 ptr -= MINBPC(enc);
|
|
846 break;
|
|
847 }
|
|
848 *nextTokPtr = ptr;
|
|
849 return XML_TOK_INVALID;
|
|
850 INVALID_CASES(ptr, nextTokPtr)
|
|
851 default:
|
|
852 ptr += MINBPC(enc);
|
|
853 break;
|
|
854 }
|
|
855 while (ptr != end) {
|
|
856 switch (BYTE_TYPE(enc, ptr)) {
|
|
857 #define LEAD_CASE(n) \
|
|
858 case BT_LEAD ## n: \
|
|
859 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
860 *nextTokPtr = ptr; \
|
|
861 return XML_TOK_DATA_CHARS; \
|
|
862 } \
|
|
863 ptr += n; \
|
|
864 break;
|
|
865 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
866 #undef LEAD_CASE
|
|
867 case BT_RSQB:
|
|
868 if (ptr + MINBPC(enc) != end) {
|
|
869 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) {
|
|
870 ptr += MINBPC(enc);
|
|
871 break;
|
|
872 }
|
|
873 if (ptr + 2*MINBPC(enc) != end) {
|
|
874 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) {
|
|
875 ptr += MINBPC(enc);
|
|
876 break;
|
|
877 }
|
|
878 *nextTokPtr = ptr + 2*MINBPC(enc);
|
|
879 return XML_TOK_INVALID;
|
|
880 }
|
|
881 }
|
|
882 /* fall through */
|
|
883 case BT_AMP:
|
|
884 case BT_LT:
|
|
885 case BT_NONXML:
|
|
886 case BT_MALFORM:
|
|
887 case BT_TRAIL:
|
|
888 case BT_CR:
|
|
889 case BT_LF:
|
|
890 *nextTokPtr = ptr;
|
|
891 return XML_TOK_DATA_CHARS;
|
|
892 default:
|
|
893 ptr += MINBPC(enc);
|
|
894 break;
|
|
895 }
|
|
896 }
|
|
897 *nextTokPtr = ptr;
|
|
898 return XML_TOK_DATA_CHARS;
|
|
899 }
|
|
900
|
|
901 /* ptr points to character following "%" */
|
|
902
|
|
903 static
|
|
904 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
|
|
905 const char **nextTokPtr)
|
|
906 {
|
|
907 if (ptr == end)
|
|
908 return XML_TOK_PARTIAL;
|
|
909 switch (BYTE_TYPE(enc, ptr)) {
|
|
910 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
911 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
|
|
912 *nextTokPtr = ptr;
|
|
913 return XML_TOK_PERCENT;
|
|
914 default:
|
|
915 *nextTokPtr = ptr;
|
|
916 return XML_TOK_INVALID;
|
|
917 }
|
|
918 while (ptr != end) {
|
|
919 switch (BYTE_TYPE(enc, ptr)) {
|
|
920 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
921 case BT_SEMI:
|
|
922 *nextTokPtr = ptr + MINBPC(enc);
|
|
923 return XML_TOK_PARAM_ENTITY_REF;
|
|
924 default:
|
|
925 *nextTokPtr = ptr;
|
|
926 return XML_TOK_INVALID;
|
|
927 }
|
|
928 }
|
|
929 return XML_TOK_PARTIAL;
|
|
930 }
|
|
931
|
|
932 static
|
|
933 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
|
|
934 const char **nextTokPtr)
|
|
935 {
|
|
936 if (ptr == end)
|
|
937 return XML_TOK_PARTIAL;
|
|
938 switch (BYTE_TYPE(enc, ptr)) {
|
|
939 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
940 default:
|
|
941 *nextTokPtr = ptr;
|
|
942 return XML_TOK_INVALID;
|
|
943 }
|
|
944 while (ptr != end) {
|
|
945 switch (BYTE_TYPE(enc, ptr)) {
|
|
946 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
947 case BT_CR: case BT_LF: case BT_S:
|
|
948 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
|
|
949 *nextTokPtr = ptr;
|
|
950 return XML_TOK_POUND_NAME;
|
|
951 default:
|
|
952 *nextTokPtr = ptr;
|
|
953 return XML_TOK_INVALID;
|
|
954 }
|
|
955 }
|
|
956 return XML_TOK_PARTIAL;
|
|
957 }
|
|
958
|
|
959 static
|
|
960 int PREFIX(scanLit)(int open, const ENCODING *enc,
|
|
961 const char *ptr, const char *end,
|
|
962 const char **nextTokPtr)
|
|
963 {
|
|
964 while (ptr != end) {
|
|
965 int t = BYTE_TYPE(enc, ptr);
|
|
966 switch (t) {
|
|
967 INVALID_CASES(ptr, nextTokPtr)
|
|
968 case BT_QUOT:
|
|
969 case BT_APOS:
|
|
970 ptr += MINBPC(enc);
|
|
971 if (t != open)
|
|
972 break;
|
|
973 if (ptr == end)
|
|
974 return XML_TOK_PARTIAL;
|
|
975 *nextTokPtr = ptr;
|
|
976 switch (BYTE_TYPE(enc, ptr)) {
|
|
977 case BT_S: case BT_CR: case BT_LF:
|
|
978 case BT_GT: case BT_PERCNT: case BT_LSQB:
|
|
979 return XML_TOK_LITERAL;
|
|
980 default:
|
|
981 return XML_TOK_INVALID;
|
|
982 }
|
|
983 default:
|
|
984 ptr += MINBPC(enc);
|
|
985 break;
|
|
986 }
|
|
987 }
|
|
988 return XML_TOK_PARTIAL;
|
|
989 }
|
|
990
|
|
991 static
|
|
992 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
993 const char **nextTokPtr)
|
|
994 {
|
|
995 int tok;
|
|
996 if (ptr == end)
|
|
997 return XML_TOK_NONE;
|
|
998 if (MINBPC(enc) > 1) {
|
|
999 size_t n = end - ptr;
|
|
1000 if (n & (MINBPC(enc) - 1)) {
|
|
1001 n &= ~(MINBPC(enc) - 1);
|
|
1002 if (n == 0)
|
|
1003 return XML_TOK_PARTIAL;
|
|
1004 end = ptr + n;
|
|
1005 }
|
|
1006 }
|
|
1007 switch (BYTE_TYPE(enc, ptr)) {
|
|
1008 case BT_QUOT:
|
|
1009 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1010 case BT_APOS:
|
|
1011 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1012 case BT_LT:
|
|
1013 {
|
|
1014 ptr += MINBPC(enc);
|
|
1015 if (ptr == end)
|
|
1016 return XML_TOK_PARTIAL;
|
|
1017 switch (BYTE_TYPE(enc, ptr)) {
|
|
1018 case BT_EXCL:
|
|
1019 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1020 case BT_QUEST:
|
|
1021 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1022 case BT_NMSTRT:
|
|
1023 case BT_HEX:
|
|
1024 case BT_NONASCII:
|
|
1025 case BT_LEAD2:
|
|
1026 case BT_LEAD3:
|
|
1027 case BT_LEAD4:
|
|
1028 *nextTokPtr = ptr - MINBPC(enc);
|
|
1029 return XML_TOK_INSTANCE_START;
|
|
1030 }
|
|
1031 *nextTokPtr = ptr;
|
|
1032 return XML_TOK_INVALID;
|
|
1033 }
|
|
1034 case BT_CR:
|
|
1035 if (ptr + MINBPC(enc) == end)
|
|
1036 return XML_TOK_TRAILING_CR;
|
|
1037 /* fall through */
|
|
1038 case BT_S: case BT_LF:
|
|
1039 for (;;) {
|
|
1040 ptr += MINBPC(enc);
|
|
1041 if (ptr == end)
|
|
1042 break;
|
|
1043 switch (BYTE_TYPE(enc, ptr)) {
|
|
1044 case BT_S: case BT_LF:
|
|
1045 break;
|
|
1046 case BT_CR:
|
|
1047 /* don't split CR/LF pair */
|
|
1048 if (ptr + MINBPC(enc) != end)
|
|
1049 break;
|
|
1050 /* fall through */
|
|
1051 default:
|
|
1052 *nextTokPtr = ptr;
|
|
1053 return XML_TOK_PROLOG_S;
|
|
1054 }
|
|
1055 }
|
|
1056 *nextTokPtr = ptr;
|
|
1057 return XML_TOK_PROLOG_S;
|
|
1058 case BT_PERCNT:
|
|
1059 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1060 case BT_COMMA:
|
|
1061 *nextTokPtr = ptr + MINBPC(enc);
|
|
1062 return XML_TOK_COMMA;
|
|
1063 case BT_LSQB:
|
|
1064 *nextTokPtr = ptr + MINBPC(enc);
|
|
1065 return XML_TOK_OPEN_BRACKET;
|
|
1066 case BT_RSQB:
|
|
1067 ptr += MINBPC(enc);
|
|
1068 if (ptr == end)
|
|
1069 return XML_TOK_PARTIAL;
|
|
1070 if (CHAR_MATCHES(enc, ptr, ']')) {
|
|
1071 if (ptr + MINBPC(enc) == end)
|
|
1072 return XML_TOK_PARTIAL;
|
|
1073 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) {
|
|
1074 *nextTokPtr = ptr + 2*MINBPC(enc);
|
|
1075 return XML_TOK_COND_SECT_CLOSE;
|
|
1076 }
|
|
1077 }
|
|
1078 *nextTokPtr = ptr;
|
|
1079 return XML_TOK_CLOSE_BRACKET;
|
|
1080 case BT_LPAR:
|
|
1081 *nextTokPtr = ptr + MINBPC(enc);
|
|
1082 return XML_TOK_OPEN_PAREN;
|
|
1083 case BT_RPAR:
|
|
1084 ptr += MINBPC(enc);
|
|
1085 if (ptr == end)
|
|
1086 return XML_TOK_PARTIAL;
|
|
1087 switch (BYTE_TYPE(enc, ptr)) {
|
|
1088 case BT_AST:
|
|
1089 *nextTokPtr = ptr + MINBPC(enc);
|
|
1090 return XML_TOK_CLOSE_PAREN_ASTERISK;
|
|
1091 case BT_QUEST:
|
|
1092 *nextTokPtr = ptr + MINBPC(enc);
|
|
1093 return XML_TOK_CLOSE_PAREN_QUESTION;
|
|
1094 case BT_PLUS:
|
|
1095 *nextTokPtr = ptr + MINBPC(enc);
|
|
1096 return XML_TOK_CLOSE_PAREN_PLUS;
|
|
1097 case BT_CR: case BT_LF: case BT_S:
|
|
1098 case BT_GT: case BT_COMMA: case BT_VERBAR:
|
|
1099 case BT_RPAR:
|
|
1100 *nextTokPtr = ptr;
|
|
1101 return XML_TOK_CLOSE_PAREN;
|
|
1102 }
|
|
1103 *nextTokPtr = ptr;
|
|
1104 return XML_TOK_INVALID;
|
|
1105 case BT_VERBAR:
|
|
1106 *nextTokPtr = ptr + MINBPC(enc);
|
|
1107 return XML_TOK_OR;
|
|
1108 case BT_GT:
|
|
1109 *nextTokPtr = ptr + MINBPC(enc);
|
|
1110 return XML_TOK_DECL_CLOSE;
|
|
1111 case BT_NUM:
|
|
1112 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1113 #define LEAD_CASE(n) \
|
|
1114 case BT_LEAD ## n: \
|
|
1115 if (end - ptr < n) \
|
|
1116 return XML_TOK_PARTIAL_CHAR; \
|
|
1117 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
1118 ptr += n; \
|
|
1119 tok = XML_TOK_NAME; \
|
|
1120 break; \
|
|
1121 } \
|
|
1122 if (IS_NAME_CHAR(enc, ptr, n)) { \
|
|
1123 ptr += n; \
|
|
1124 tok = XML_TOK_NMTOKEN; \
|
|
1125 break; \
|
|
1126 } \
|
|
1127 *nextTokPtr = ptr; \
|
|
1128 return XML_TOK_INVALID;
|
|
1129 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1130 #undef LEAD_CASE
|
|
1131 case BT_NMSTRT:
|
|
1132 case BT_HEX:
|
|
1133 tok = XML_TOK_NAME;
|
|
1134 ptr += MINBPC(enc);
|
|
1135 break;
|
|
1136 case BT_DIGIT:
|
|
1137 case BT_NAME:
|
|
1138 case BT_MINUS:
|
|
1139 #ifdef XML_NS
|
|
1140 case BT_COLON:
|
|
1141 #endif
|
|
1142 tok = XML_TOK_NMTOKEN;
|
|
1143 ptr += MINBPC(enc);
|
|
1144 break;
|
|
1145 case BT_NONASCII:
|
|
1146 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
|
|
1147 ptr += MINBPC(enc);
|
|
1148 tok = XML_TOK_NAME;
|
|
1149 break;
|
|
1150 }
|
|
1151 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
|
|
1152 ptr += MINBPC(enc);
|
|
1153 tok = XML_TOK_NMTOKEN;
|
|
1154 break;
|
|
1155 }
|
|
1156 /* fall through */
|
|
1157 default:
|
|
1158 *nextTokPtr = ptr;
|
|
1159 return XML_TOK_INVALID;
|
|
1160 }
|
|
1161 while (ptr != end) {
|
|
1162 switch (BYTE_TYPE(enc, ptr)) {
|
|
1163 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
1164 case BT_GT: case BT_RPAR: case BT_COMMA:
|
|
1165 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
|
|
1166 case BT_S: case BT_CR: case BT_LF:
|
|
1167 *nextTokPtr = ptr;
|
|
1168 return tok;
|
|
1169 #ifdef XML_NS
|
|
1170 case BT_COLON:
|
|
1171 ptr += MINBPC(enc);
|
|
1172 switch (tok) {
|
|
1173 case XML_TOK_NAME:
|
|
1174 if (ptr == end)
|
|
1175 return XML_TOK_PARTIAL;
|
|
1176 tok = XML_TOK_PREFIXED_NAME;
|
|
1177 switch (BYTE_TYPE(enc, ptr)) {
|
|
1178 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
1179 default:
|
|
1180 tok = XML_TOK_NMTOKEN;
|
|
1181 break;
|
|
1182 }
|
|
1183 break;
|
|
1184 case XML_TOK_PREFIXED_NAME:
|
|
1185 tok = XML_TOK_NMTOKEN;
|
|
1186 break;
|
|
1187 }
|
|
1188 break;
|
|
1189 #endif
|
|
1190 case BT_PLUS:
|
|
1191 if (tok == XML_TOK_NMTOKEN) {
|
|
1192 *nextTokPtr = ptr;
|
|
1193 return XML_TOK_INVALID;
|
|
1194 }
|
|
1195 *nextTokPtr = ptr + MINBPC(enc);
|
|
1196 return XML_TOK_NAME_PLUS;
|
|
1197 case BT_AST:
|
|
1198 if (tok == XML_TOK_NMTOKEN) {
|
|
1199 *nextTokPtr = ptr;
|
|
1200 return XML_TOK_INVALID;
|
|
1201 }
|
|
1202 *nextTokPtr = ptr + MINBPC(enc);
|
|
1203 return XML_TOK_NAME_ASTERISK;
|
|
1204 case BT_QUEST:
|
|
1205 if (tok == XML_TOK_NMTOKEN) {
|
|
1206 *nextTokPtr = ptr;
|
|
1207 return XML_TOK_INVALID;
|
|
1208 }
|
|
1209 *nextTokPtr = ptr + MINBPC(enc);
|
|
1210 return XML_TOK_NAME_QUESTION;
|
|
1211 default:
|
|
1212 *nextTokPtr = ptr;
|
|
1213 return XML_TOK_INVALID;
|
|
1214 }
|
|
1215 }
|
|
1216 return XML_TOK_PARTIAL;
|
|
1217 }
|
|
1218
|
|
1219 static
|
|
1220 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
1221 const char **nextTokPtr)
|
|
1222 {
|
|
1223 const char *start;
|
|
1224 if (ptr == end)
|
|
1225 return XML_TOK_NONE;
|
|
1226 start = ptr;
|
|
1227 while (ptr != end) {
|
|
1228 switch (BYTE_TYPE(enc, ptr)) {
|
|
1229 #define LEAD_CASE(n) \
|
|
1230 case BT_LEAD ## n: ptr += n; break;
|
|
1231 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1232 #undef LEAD_CASE
|
|
1233 case BT_AMP:
|
|
1234 if (ptr == start)
|
|
1235 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1236 *nextTokPtr = ptr;
|
|
1237 return XML_TOK_DATA_CHARS;
|
|
1238 case BT_LT:
|
|
1239 /* this is for inside entity references */
|
|
1240 *nextTokPtr = ptr;
|
|
1241 return XML_TOK_INVALID;
|
|
1242 case BT_LF:
|
|
1243 if (ptr == start) {
|
|
1244 *nextTokPtr = ptr + MINBPC(enc);
|
|
1245 return XML_TOK_DATA_NEWLINE;
|
|
1246 }
|
|
1247 *nextTokPtr = ptr;
|
|
1248 return XML_TOK_DATA_CHARS;
|
|
1249 case BT_CR:
|
|
1250 if (ptr == start) {
|
|
1251 ptr += MINBPC(enc);
|
|
1252 if (ptr == end)
|
|
1253 return XML_TOK_TRAILING_CR;
|
|
1254 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
1255 ptr += MINBPC(enc);
|
|
1256 *nextTokPtr = ptr;
|
|
1257 return XML_TOK_DATA_NEWLINE;
|
|
1258 }
|
|
1259 *nextTokPtr = ptr;
|
|
1260 return XML_TOK_DATA_CHARS;
|
|
1261 case BT_S:
|
|
1262 if (ptr == start) {
|
|
1263 *nextTokPtr = ptr + MINBPC(enc);
|
|
1264 return XML_TOK_ATTRIBUTE_VALUE_S;
|
|
1265 }
|
|
1266 *nextTokPtr = ptr;
|
|
1267 return XML_TOK_DATA_CHARS;
|
|
1268 default:
|
|
1269 ptr += MINBPC(enc);
|
|
1270 break;
|
|
1271 }
|
|
1272 }
|
|
1273 *nextTokPtr = ptr;
|
|
1274 return XML_TOK_DATA_CHARS;
|
|
1275 }
|
|
1276
|
|
1277 static
|
|
1278 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
1279 const char **nextTokPtr)
|
|
1280 {
|
|
1281 const char *start;
|
|
1282 if (ptr == end)
|
|
1283 return XML_TOK_NONE;
|
|
1284 start = ptr;
|
|
1285 while (ptr != end) {
|
|
1286 switch (BYTE_TYPE(enc, ptr)) {
|
|
1287 #define LEAD_CASE(n) \
|
|
1288 case BT_LEAD ## n: ptr += n; break;
|
|
1289 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1290 #undef LEAD_CASE
|
|
1291 case BT_AMP:
|
|
1292 if (ptr == start)
|
|
1293 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1294 *nextTokPtr = ptr;
|
|
1295 return XML_TOK_DATA_CHARS;
|
|
1296 case BT_PERCNT:
|
|
1297 if (ptr == start)
|
|
1298 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1299 *nextTokPtr = ptr;
|
|
1300 return XML_TOK_DATA_CHARS;
|
|
1301 case BT_LF:
|
|
1302 if (ptr == start) {
|
|
1303 *nextTokPtr = ptr + MINBPC(enc);
|
|
1304 return XML_TOK_DATA_NEWLINE;
|
|
1305 }
|
|
1306 *nextTokPtr = ptr;
|
|
1307 return XML_TOK_DATA_CHARS;
|
|
1308 case BT_CR:
|
|
1309 if (ptr == start) {
|
|
1310 ptr += MINBPC(enc);
|
|
1311 if (ptr == end)
|
|
1312 return XML_TOK_TRAILING_CR;
|
|
1313 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
1314 ptr += MINBPC(enc);
|
|
1315 *nextTokPtr = ptr;
|
|
1316 return XML_TOK_DATA_NEWLINE;
|
|
1317 }
|
|
1318 *nextTokPtr = ptr;
|
|
1319 return XML_TOK_DATA_CHARS;
|
|
1320 default:
|
|
1321 ptr += MINBPC(enc);
|
|
1322 break;
|
|
1323 }
|
|
1324 }
|
|
1325 *nextTokPtr = ptr;
|
|
1326 return XML_TOK_DATA_CHARS;
|
|
1327 }
|
|
1328
|
|
1329 static
|
|
1330 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
|
|
1331 const char **badPtr)
|
|
1332 {
|
|
1333 ptr += MINBPC(enc);
|
|
1334 end -= MINBPC(enc);
|
|
1335 for (; ptr != end; ptr += MINBPC(enc)) {
|
|
1336 switch (BYTE_TYPE(enc, ptr)) {
|
|
1337 case BT_DIGIT:
|
|
1338 case BT_HEX:
|
|
1339 case BT_MINUS:
|
|
1340 case BT_APOS:
|
|
1341 case BT_LPAR:
|
|
1342 case BT_RPAR:
|
|
1343 case BT_PLUS:
|
|
1344 case BT_COMMA:
|
|
1345 case BT_SOL:
|
|
1346 case BT_EQUALS:
|
|
1347 case BT_QUEST:
|
|
1348 case BT_CR:
|
|
1349 case BT_LF:
|
|
1350 case BT_SEMI:
|
|
1351 case BT_EXCL:
|
|
1352 case BT_AST:
|
|
1353 case BT_PERCNT:
|
|
1354 case BT_NUM:
|
|
1355 #ifdef XML_NS
|
|
1356 case BT_COLON:
|
|
1357 #endif
|
|
1358 break;
|
|
1359 case BT_S:
|
|
1360 if (CHAR_MATCHES(enc, ptr, '\t')) {
|
|
1361 *badPtr = ptr;
|
|
1362 return 0;
|
|
1363 }
|
|
1364 break;
|
|
1365 case BT_NAME:
|
|
1366 case BT_NMSTRT:
|
|
1367 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
|
|
1368 break;
|
|
1369 default:
|
|
1370 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
1371 case 0x24: /* $ */
|
|
1372 case 0x40: /* @ */
|
|
1373 break;
|
|
1374 default:
|
|
1375 *badPtr = ptr;
|
|
1376 return 0;
|
|
1377 }
|
|
1378 break;
|
|
1379 }
|
|
1380 }
|
|
1381 return 1;
|
|
1382 }
|
|
1383
|
|
1384 /* This must only be called for a well-formed start-tag or empty element tag.
|
|
1385 Returns the number of attributes. Pointers to the first attsMax attributes
|
|
1386 are stored in atts. */
|
|
1387
|
|
1388 static
|
|
1389 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
|
|
1390 int attsMax, ATTRIBUTE *atts)
|
|
1391 {
|
|
1392 enum { other, inName, inValue } state = inName;
|
|
1393 int nAtts = 0;
|
|
1394 int open;
|
|
1395
|
|
1396 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
|
|
1397 switch (BYTE_TYPE(enc, ptr)) {
|
|
1398 #define START_NAME \
|
|
1399 if (state == other) { \
|
|
1400 if (nAtts < attsMax) { \
|
|
1401 atts[nAtts].name = ptr; \
|
|
1402 atts[nAtts].normalized = 1; \
|
|
1403 } \
|
|
1404 state = inName; \
|
|
1405 }
|
|
1406 #define LEAD_CASE(n) \
|
|
1407 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
|
|
1408 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1409 #undef LEAD_CASE
|
|
1410 case BT_NONASCII:
|
|
1411 case BT_NMSTRT:
|
|
1412 case BT_HEX:
|
|
1413 START_NAME
|
|
1414 break;
|
|
1415 #undef START_NAME
|
|
1416 case BT_QUOT:
|
|
1417 if (state != inValue) {
|
|
1418 if (nAtts < attsMax)
|
|
1419 atts[nAtts].valuePtr = ptr + MINBPC(enc);
|
|
1420 state = inValue;
|
|
1421 open = BT_QUOT;
|
|
1422 }
|
|
1423 else if (open == BT_QUOT) {
|
|
1424 state = other;
|
|
1425 if (nAtts < attsMax)
|
|
1426 atts[nAtts].valueEnd = ptr;
|
|
1427 nAtts++;
|
|
1428 }
|
|
1429 break;
|
|
1430 case BT_APOS:
|
|
1431 if (state != inValue) {
|
|
1432 if (nAtts < attsMax)
|
|
1433 atts[nAtts].valuePtr = ptr + MINBPC(enc);
|
|
1434 state = inValue;
|
|
1435 open = BT_APOS;
|
|
1436 }
|
|
1437 else if (open == BT_APOS) {
|
|
1438 state = other;
|
|
1439 if (nAtts < attsMax)
|
|
1440 atts[nAtts].valueEnd = ptr;
|
|
1441 nAtts++;
|
|
1442 }
|
|
1443 break;
|
|
1444 case BT_AMP:
|
|
1445 if (nAtts < attsMax)
|
|
1446 atts[nAtts].normalized = 0;
|
|
1447 break;
|
|
1448 case BT_S:
|
|
1449 if (state == inName)
|
|
1450 state = other;
|
|
1451 else if (state == inValue
|
|
1452 && nAtts < attsMax
|
|
1453 && atts[nAtts].normalized
|
|
1454 && (ptr == atts[nAtts].valuePtr
|
|
1455 || BYTE_TO_ASCII(enc, ptr) != ' '
|
|
1456 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' '
|
|
1457 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
|
|
1458 atts[nAtts].normalized = 0;
|
|
1459 break;
|
|
1460 case BT_CR: case BT_LF:
|
|
1461 /* This case ensures that the first attribute name is counted
|
|
1462 Apart from that we could just change state on the quote. */
|
|
1463 if (state == inName)
|
|
1464 state = other;
|
|
1465 else if (state == inValue && nAtts < attsMax)
|
|
1466 atts[nAtts].normalized = 0;
|
|
1467 break;
|
|
1468 case BT_GT:
|
|
1469 case BT_SOL:
|
|
1470 if (state != inValue)
|
|
1471 return nAtts;
|
|
1472 break;
|
|
1473 default:
|
|
1474 break;
|
|
1475 }
|
|
1476 }
|
|
1477 /* not reached */
|
|
1478 }
|
|
1479
|
|
1480 static
|
|
1481 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
|
|
1482 {
|
|
1483 int result = 0;
|
|
1484 /* skip &# */
|
|
1485 ptr += 2*MINBPC(enc);
|
|
1486 if (CHAR_MATCHES(enc, ptr, 'x')) {
|
|
1487 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) {
|
|
1488 int c = BYTE_TO_ASCII(enc, ptr);
|
|
1489 switch (c) {
|
|
1490 case '0': case '1': case '2': case '3': case '4':
|
|
1491 case '5': case '6': case '7': case '8': case '9':
|
|
1492 result <<= 4;
|
|
1493 result |= (c - '0');
|
|
1494 break;
|
|
1495 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
1496 result <<= 4;
|
|
1497 result += 10 + (c - 'A');
|
|
1498 break;
|
|
1499 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
1500 result <<= 4;
|
|
1501 result += 10 + (c - 'a');
|
|
1502 break;
|
|
1503 }
|
|
1504 if (result >= 0x110000)
|
|
1505 return -1;
|
|
1506 }
|
|
1507 }
|
|
1508 else {
|
|
1509 for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) {
|
|
1510 int c = BYTE_TO_ASCII(enc, ptr);
|
|
1511 result *= 10;
|
|
1512 result += (c - '0');
|
|
1513 if (result >= 0x110000)
|
|
1514 return -1;
|
|
1515 }
|
|
1516 }
|
|
1517 return checkCharRefNumber(result);
|
|
1518 }
|
|
1519
|
|
1520 static
|
|
1521 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
|
|
1522 {
|
|
1523 switch ((end - ptr)/MINBPC(enc)) {
|
|
1524 case 2:
|
|
1525 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) {
|
|
1526 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
1527 case 'l':
|
|
1528 return '<';
|
|
1529 case 'g':
|
|
1530 return '>';
|
|
1531 }
|
|
1532 }
|
|
1533 break;
|
|
1534 case 3:
|
|
1535 if (CHAR_MATCHES(enc, ptr, 'a')) {
|
|
1536 ptr += MINBPC(enc);
|
|
1537 if (CHAR_MATCHES(enc, ptr, 'm')) {
|
|
1538 ptr += MINBPC(enc);
|
|
1539 if (CHAR_MATCHES(enc, ptr, 'p'))
|
|
1540 return '&';
|
|
1541 }
|
|
1542 }
|
|
1543 break;
|
|
1544 case 4:
|
|
1545 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
1546 case 'q':
|
|
1547 ptr += MINBPC(enc);
|
|
1548 if (CHAR_MATCHES(enc, ptr, 'u')) {
|
|
1549 ptr += MINBPC(enc);
|
|
1550 if (CHAR_MATCHES(enc, ptr, 'o')) {
|
|
1551 ptr += MINBPC(enc);
|
|
1552 if (CHAR_MATCHES(enc, ptr, 't'))
|
|
1553 return '"';
|
|
1554 }
|
|
1555 }
|
|
1556 break;
|
|
1557 case 'a':
|
|
1558 ptr += MINBPC(enc);
|
|
1559 if (CHAR_MATCHES(enc, ptr, 'p')) {
|
|
1560 ptr += MINBPC(enc);
|
|
1561 if (CHAR_MATCHES(enc, ptr, 'o')) {
|
|
1562 ptr += MINBPC(enc);
|
|
1563 if (CHAR_MATCHES(enc, ptr, 's'))
|
|
1564 return '\'';
|
|
1565 }
|
|
1566 }
|
|
1567 break;
|
|
1568 }
|
|
1569 }
|
|
1570 return 0;
|
|
1571 }
|
|
1572
|
|
1573 static
|
|
1574 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
|
1575 {
|
|
1576 for (;;) {
|
|
1577 switch (BYTE_TYPE(enc, ptr1)) {
|
|
1578 #define LEAD_CASE(n) \
|
|
1579 case BT_LEAD ## n: \
|
|
1580 if (*ptr1++ != *ptr2++) \
|
|
1581 return 0;
|
|
1582 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
|
|
1583 #undef LEAD_CASE
|
|
1584 /* fall through */
|
|
1585 if (*ptr1++ != *ptr2++)
|
|
1586 return 0;
|
|
1587 break;
|
|
1588 case BT_NONASCII:
|
|
1589 case BT_NMSTRT:
|
|
1590 #ifdef XML_NS
|
|
1591 case BT_COLON:
|
|
1592 #endif
|
|
1593 case BT_HEX:
|
|
1594 case BT_DIGIT:
|
|
1595 case BT_NAME:
|
|
1596 case BT_MINUS:
|
|
1597 if (*ptr2++ != *ptr1++)
|
|
1598 return 0;
|
|
1599 if (MINBPC(enc) > 1) {
|
|
1600 if (*ptr2++ != *ptr1++)
|
|
1601 return 0;
|
|
1602 if (MINBPC(enc) > 2) {
|
|
1603 if (*ptr2++ != *ptr1++)
|
|
1604 return 0;
|
|
1605 if (MINBPC(enc) > 3) {
|
|
1606 if (*ptr2++ != *ptr1++)
|
|
1607 return 0;
|
|
1608 }
|
|
1609 }
|
|
1610 }
|
|
1611 break;
|
|
1612 default:
|
|
1613 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
|
|
1614 return 1;
|
|
1615 switch (BYTE_TYPE(enc, ptr2)) {
|
|
1616 case BT_LEAD2:
|
|
1617 case BT_LEAD3:
|
|
1618 case BT_LEAD4:
|
|
1619 case BT_NONASCII:
|
|
1620 case BT_NMSTRT:
|
|
1621 #ifdef XML_NS
|
|
1622 case BT_COLON:
|
|
1623 #endif
|
|
1624 case BT_HEX:
|
|
1625 case BT_DIGIT:
|
|
1626 case BT_NAME:
|
|
1627 case BT_MINUS:
|
|
1628 return 0;
|
|
1629 default:
|
|
1630 return 1;
|
|
1631 }
|
|
1632 }
|
|
1633 }
|
|
1634 /* not reached */
|
|
1635 }
|
|
1636
|
|
1637 static
|
|
1638 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
|
1639 {
|
|
1640 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
|
|
1641 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
|
|
1642 return 0;
|
|
1643 }
|
|
1644 switch (BYTE_TYPE(enc, ptr1)) {
|
|
1645 case BT_LEAD2:
|
|
1646 case BT_LEAD3:
|
|
1647 case BT_LEAD4:
|
|
1648 case BT_NONASCII:
|
|
1649 case BT_NMSTRT:
|
|
1650 #ifdef XML_NS
|
|
1651 case BT_COLON:
|
|
1652 #endif
|
|
1653 case BT_HEX:
|
|
1654 case BT_DIGIT:
|
|
1655 case BT_NAME:
|
|
1656 case BT_MINUS:
|
|
1657 return 0;
|
|
1658 default:
|
|
1659 return 1;
|
|
1660 }
|
|
1661 }
|
|
1662
|
|
1663 static
|
|
1664 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
|
|
1665 {
|
|
1666 const char *start = ptr;
|
|
1667 for (;;) {
|
|
1668 switch (BYTE_TYPE(enc, ptr)) {
|
|
1669 #define LEAD_CASE(n) \
|
|
1670 case BT_LEAD ## n: ptr += n; break;
|
|
1671 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1672 #undef LEAD_CASE
|
|
1673 case BT_NONASCII:
|
|
1674 case BT_NMSTRT:
|
|
1675 #ifdef XML_NS
|
|
1676 case BT_COLON:
|
|
1677 #endif
|
|
1678 case BT_HEX:
|
|
1679 case BT_DIGIT:
|
|
1680 case BT_NAME:
|
|
1681 case BT_MINUS:
|
|
1682 ptr += MINBPC(enc);
|
|
1683 break;
|
|
1684 default:
|
|
1685 return ptr - start;
|
|
1686 }
|
|
1687 }
|
|
1688 }
|
|
1689
|
|
1690 static
|
|
1691 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
|
|
1692 {
|
|
1693 for (;;) {
|
|
1694 switch (BYTE_TYPE(enc, ptr)) {
|
|
1695 case BT_LF:
|
|
1696 case BT_CR:
|
|
1697 case BT_S:
|
|
1698 ptr += MINBPC(enc);
|
|
1699 break;
|
|
1700 default:
|
|
1701 return ptr;
|
|
1702 }
|
|
1703 }
|
|
1704 }
|
|
1705
|
|
1706 static
|
|
1707 void PREFIX(updatePosition)(const ENCODING *enc,
|
|
1708 const char *ptr,
|
|
1709 const char *end,
|
|
1710 POSITION *pos)
|
|
1711 {
|
|
1712 while (ptr != end) {
|
|
1713 switch (BYTE_TYPE(enc, ptr)) {
|
|
1714 #define LEAD_CASE(n) \
|
|
1715 case BT_LEAD ## n: \
|
|
1716 ptr += n; \
|
|
1717 break;
|
|
1718 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1719 #undef LEAD_CASE
|
|
1720 case BT_LF:
|
|
1721 pos->columnNumber = (unsigned)-1;
|
|
1722 pos->lineNumber++;
|
|
1723 ptr += MINBPC(enc);
|
|
1724 break;
|
|
1725 case BT_CR:
|
|
1726 pos->lineNumber++;
|
|
1727 ptr += MINBPC(enc);
|
|
1728 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
|
|
1729 ptr += MINBPC(enc);
|
|
1730 pos->columnNumber = (unsigned)-1;
|
|
1731 break;
|
|
1732 default:
|
|
1733 ptr += MINBPC(enc);
|
|
1734 break;
|
|
1735 }
|
|
1736 pos->columnNumber++;
|
|
1737 }
|
|
1738 }
|
|
1739
|
|
1740 #undef DO_LEAD_CASE
|
|
1741 #undef MULTIBYTE_CASES
|
|
1742 #undef INVALID_CASES
|
|
1743 #undef CHECK_NAME_CASE
|
|
1744 #undef CHECK_NAME_CASES
|
|
1745 #undef CHECK_NMSTRT_CASE
|
|
1746 #undef CHECK_NMSTRT_CASES
|