2086
|
1 /*
|
|
2 The contents of this file are subject to the Mozilla Public License
|
|
3 Version 1.1 (the "License"); you may not use this file except in
|
|
4 compliance with the License. You may obtain a copy of the License at
|
|
5 http://www.mozilla.org/MPL/
|
|
6
|
|
7 Software distributed under the License is distributed on an "AS IS"
|
|
8 basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
9 License for the specific language governing rights and limitations
|
|
10 under the License.
|
|
11
|
|
12 The Original Code is expat.
|
|
13
|
|
14 The Initial Developer of the Original Code is James Clark.
|
|
15 Portions created by James Clark are Copyright (C) 1998, 1999
|
|
16 James Clark. All Rights Reserved.
|
|
17
|
|
18 Contributor(s):
|
|
19
|
|
20 */
|
|
21
|
|
22 #ifndef IS_INVALID_CHAR
|
|
23 #define IS_INVALID_CHAR(enc, ptr, n) (0)
|
|
24 #endif
|
|
25
|
|
26 #define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
|
|
27 case BT_LEAD ## n: \
|
|
28 if (end - ptr < n) \
|
|
29 return XML_TOK_PARTIAL_CHAR; \
|
|
30 if (IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
31 *(nextTokPtr) = (ptr); \
|
|
32 return XML_TOK_INVALID; \
|
|
33 } \
|
|
34 ptr += n; \
|
|
35 break;
|
|
36
|
|
37 #define INVALID_CASES(ptr, nextTokPtr) \
|
|
38 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
|
|
39 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
|
|
40 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
|
|
41 case BT_NONXML: \
|
|
42 case BT_MALFORM: \
|
|
43 case BT_TRAIL: \
|
|
44 *(nextTokPtr) = (ptr); \
|
|
45 return XML_TOK_INVALID;
|
|
46
|
|
47 #define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
|
|
48 case BT_LEAD ## n: \
|
|
49 if (end - ptr < n) \
|
|
50 return XML_TOK_PARTIAL_CHAR; \
|
|
51 if (!IS_NAME_CHAR(enc, ptr, n)) { \
|
|
52 *nextTokPtr = ptr; \
|
|
53 return XML_TOK_INVALID; \
|
|
54 } \
|
|
55 ptr += n; \
|
|
56 break;
|
|
57
|
|
58 #define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
|
|
59 case BT_NONASCII: \
|
|
60 if (!IS_NAME_CHAR_MINBPC(enc, ptr)) { \
|
|
61 *nextTokPtr = ptr; \
|
|
62 return XML_TOK_INVALID; \
|
|
63 } \
|
|
64 case BT_NMSTRT: \
|
|
65 case BT_HEX: \
|
|
66 case BT_DIGIT: \
|
|
67 case BT_NAME: \
|
|
68 case BT_MINUS: \
|
|
69 ptr += MINBPC(enc); \
|
|
70 break; \
|
|
71 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
|
|
72 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
|
|
73 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
|
|
74
|
|
75 #define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
|
|
76 case BT_LEAD ## n: \
|
|
77 if (end - ptr < n) \
|
|
78 return XML_TOK_PARTIAL_CHAR; \
|
|
79 if (!IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
80 *nextTokPtr = ptr; \
|
|
81 return XML_TOK_INVALID; \
|
|
82 } \
|
|
83 ptr += n; \
|
|
84 break;
|
|
85
|
|
86 #define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
|
|
87 case BT_NONASCII: \
|
|
88 if (!IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
|
|
89 *nextTokPtr = ptr; \
|
|
90 return XML_TOK_INVALID; \
|
|
91 } \
|
|
92 case BT_NMSTRT: \
|
|
93 case BT_HEX: \
|
|
94 ptr += MINBPC(enc); \
|
|
95 break; \
|
|
96 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
|
|
97 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
|
|
98 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
|
|
99
|
|
100 #ifndef PREFIX
|
|
101 #define PREFIX(ident) ident
|
|
102 #endif
|
|
103
|
|
104 /* ptr points to character following "<!-" */
|
|
105
|
|
106 static
|
|
107 int PREFIX(scanComment)(const ENCODING *enc, const char *ptr, const char *end,
|
|
108 const char **nextTokPtr)
|
|
109 {
|
|
110 if (ptr != end) {
|
|
111 if (!CHAR_MATCHES(enc, ptr, '-')) {
|
|
112 *nextTokPtr = ptr;
|
|
113 return XML_TOK_INVALID;
|
|
114 }
|
|
115 ptr += MINBPC(enc);
|
|
116 while (ptr != end) {
|
|
117 switch (BYTE_TYPE(enc, ptr)) {
|
|
118 INVALID_CASES(ptr, nextTokPtr)
|
|
119 case BT_MINUS:
|
|
120 if ((ptr += MINBPC(enc)) == end)
|
|
121 return XML_TOK_PARTIAL;
|
|
122 if (CHAR_MATCHES(enc, ptr, '-')) {
|
|
123 if ((ptr += MINBPC(enc)) == end)
|
|
124 return XML_TOK_PARTIAL;
|
|
125 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
126 *nextTokPtr = ptr;
|
|
127 return XML_TOK_INVALID;
|
|
128 }
|
|
129 *nextTokPtr = ptr + MINBPC(enc);
|
|
130 return XML_TOK_COMMENT;
|
|
131 }
|
|
132 break;
|
|
133 default:
|
|
134 ptr += MINBPC(enc);
|
|
135 break;
|
|
136 }
|
|
137 }
|
|
138 }
|
|
139 return XML_TOK_PARTIAL;
|
|
140 }
|
|
141
|
|
142 /* ptr points to character following "<!" */
|
|
143
|
|
144 static
|
|
145 int PREFIX(scanDecl)(const ENCODING *enc, const char *ptr, const char *end,
|
|
146 const char **nextTokPtr)
|
|
147 {
|
|
148 if (ptr == end)
|
|
149 return XML_TOK_PARTIAL;
|
|
150 switch (BYTE_TYPE(enc, ptr)) {
|
|
151 case BT_MINUS:
|
|
152 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
153 case BT_LSQB:
|
|
154 *nextTokPtr = ptr + MINBPC(enc);
|
|
155 return XML_TOK_COND_SECT_OPEN;
|
|
156 case BT_NMSTRT:
|
|
157 case BT_HEX:
|
|
158 ptr += MINBPC(enc);
|
|
159 break;
|
|
160 default:
|
|
161 *nextTokPtr = ptr;
|
|
162 return XML_TOK_INVALID;
|
|
163 }
|
|
164 while (ptr != end) {
|
|
165 switch (BYTE_TYPE(enc, ptr)) {
|
|
166 case BT_PERCNT:
|
|
167 if (ptr + MINBPC(enc) == end)
|
|
168 return XML_TOK_PARTIAL;
|
|
169 /* don't allow <!ENTITY% foo "whatever"> */
|
|
170 switch (BYTE_TYPE(enc, ptr + MINBPC(enc))) {
|
|
171 case BT_S: case BT_CR: case BT_LF: case BT_PERCNT:
|
|
172 *nextTokPtr = ptr;
|
|
173 return XML_TOK_INVALID;
|
|
174 }
|
|
175 /* fall through */
|
|
176 case BT_S: case BT_CR: case BT_LF:
|
|
177 *nextTokPtr = ptr;
|
|
178 return XML_TOK_DECL_OPEN;
|
|
179 case BT_NMSTRT:
|
|
180 case BT_HEX:
|
|
181 ptr += MINBPC(enc);
|
|
182 break;
|
|
183 default:
|
|
184 *nextTokPtr = ptr;
|
|
185 return XML_TOK_INVALID;
|
|
186 }
|
|
187 }
|
|
188 return XML_TOK_PARTIAL;
|
|
189 }
|
|
190
|
|
191 static
|
|
192 int PREFIX(checkPiTarget)(const ENCODING *enc, const char *ptr, const char *end, int *tokPtr)
|
|
193 {
|
|
194 int upper = 0;
|
|
195 *tokPtr = XML_TOK_PI;
|
|
196 if (end - ptr != MINBPC(enc)*3)
|
|
197 return 1;
|
|
198 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
199 case 'x':
|
|
200 break;
|
|
201 case 'X':
|
|
202 upper = 1;
|
|
203 break;
|
|
204 default:
|
|
205 return 1;
|
|
206 }
|
|
207 ptr += MINBPC(enc);
|
|
208 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
209 case 'm':
|
|
210 break;
|
|
211 case 'M':
|
|
212 upper = 1;
|
|
213 break;
|
|
214 default:
|
|
215 return 1;
|
|
216 }
|
|
217 ptr += MINBPC(enc);
|
|
218 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
219 case 'l':
|
|
220 break;
|
|
221 case 'L':
|
|
222 upper = 1;
|
|
223 break;
|
|
224 default:
|
|
225 return 1;
|
|
226 }
|
|
227 if (upper)
|
|
228 return 0;
|
|
229 *tokPtr = XML_TOK_XML_DECL;
|
|
230 return 1;
|
|
231 }
|
|
232
|
|
233 /* ptr points to character following "<?" */
|
|
234
|
|
235 static
|
|
236 int PREFIX(scanPi)(const ENCODING *enc, const char *ptr, const char *end,
|
|
237 const char **nextTokPtr)
|
|
238 {
|
|
239 int tok;
|
|
240 const char *target = ptr;
|
|
241 if (ptr == end)
|
|
242 return XML_TOK_PARTIAL;
|
|
243 switch (BYTE_TYPE(enc, ptr)) {
|
|
244 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
245 default:
|
|
246 *nextTokPtr = ptr;
|
|
247 return XML_TOK_INVALID;
|
|
248 }
|
|
249 while (ptr != end) {
|
|
250 switch (BYTE_TYPE(enc, ptr)) {
|
|
251 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
252 case BT_S: case BT_CR: case BT_LF:
|
|
253 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
|
|
254 *nextTokPtr = ptr;
|
|
255 return XML_TOK_INVALID;
|
|
256 }
|
|
257 ptr += MINBPC(enc);
|
|
258 while (ptr != end) {
|
|
259 switch (BYTE_TYPE(enc, ptr)) {
|
|
260 INVALID_CASES(ptr, nextTokPtr)
|
|
261 case BT_QUEST:
|
|
262 ptr += MINBPC(enc);
|
|
263 if (ptr == end)
|
|
264 return XML_TOK_PARTIAL;
|
|
265 if (CHAR_MATCHES(enc, ptr, '>')) {
|
|
266 *nextTokPtr = ptr + MINBPC(enc);
|
|
267 return tok;
|
|
268 }
|
|
269 break;
|
|
270 default:
|
|
271 ptr += MINBPC(enc);
|
|
272 break;
|
|
273 }
|
|
274 }
|
|
275 return XML_TOK_PARTIAL;
|
|
276 case BT_QUEST:
|
|
277 if (!PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
|
|
278 *nextTokPtr = ptr;
|
|
279 return XML_TOK_INVALID;
|
|
280 }
|
|
281 ptr += MINBPC(enc);
|
|
282 if (ptr == end)
|
|
283 return XML_TOK_PARTIAL;
|
|
284 if (CHAR_MATCHES(enc, ptr, '>')) {
|
|
285 *nextTokPtr = ptr + MINBPC(enc);
|
|
286 return tok;
|
|
287 }
|
|
288 /* fall through */
|
|
289 default:
|
|
290 *nextTokPtr = ptr;
|
|
291 return XML_TOK_INVALID;
|
|
292 }
|
|
293 }
|
|
294 return XML_TOK_PARTIAL;
|
|
295 }
|
|
296
|
|
297
|
|
298 static
|
|
299 int PREFIX(scanCdataSection)(const ENCODING *enc, const char *ptr, const char *end,
|
|
300 const char **nextTokPtr)
|
|
301 {
|
|
302 int i;
|
|
303 /* CDATA[ */
|
|
304 if (end - ptr < 6 * MINBPC(enc))
|
|
305 return XML_TOK_PARTIAL;
|
|
306 for (i = 0; i < 6; i++, ptr += MINBPC(enc)) {
|
|
307 if (!CHAR_MATCHES(enc, ptr, "CDATA["[i])) {
|
|
308 *nextTokPtr = ptr;
|
|
309 return XML_TOK_INVALID;
|
|
310 }
|
|
311 }
|
|
312 *nextTokPtr = ptr;
|
|
313 return XML_TOK_CDATA_SECT_OPEN;
|
|
314 }
|
|
315
|
|
316 static
|
|
317 int PREFIX(cdataSectionTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
318 const char **nextTokPtr)
|
|
319 {
|
|
320 if (ptr == end)
|
|
321 return XML_TOK_NONE;
|
|
322 if (MINBPC(enc) > 1) {
|
|
323 size_t n = end - ptr;
|
|
324 if (n & (MINBPC(enc) - 1)) {
|
|
325 n &= ~(MINBPC(enc) - 1);
|
|
326 if (n == 0)
|
|
327 return XML_TOK_PARTIAL;
|
|
328 end = ptr + n;
|
|
329 }
|
|
330 }
|
|
331 switch (BYTE_TYPE(enc, ptr)) {
|
|
332 case BT_RSQB:
|
|
333 ptr += MINBPC(enc);
|
|
334 if (ptr == end)
|
|
335 return XML_TOK_PARTIAL;
|
|
336 if (!CHAR_MATCHES(enc, ptr, ']'))
|
|
337 break;
|
|
338 ptr += MINBPC(enc);
|
|
339 if (ptr == end)
|
|
340 return XML_TOK_PARTIAL;
|
|
341 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
342 ptr -= MINBPC(enc);
|
|
343 break;
|
|
344 }
|
|
345 *nextTokPtr = ptr + MINBPC(enc);
|
|
346 return XML_TOK_CDATA_SECT_CLOSE;
|
|
347 case BT_CR:
|
|
348 ptr += MINBPC(enc);
|
|
349 if (ptr == end)
|
|
350 return XML_TOK_PARTIAL;
|
|
351 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
352 ptr += MINBPC(enc);
|
|
353 *nextTokPtr = ptr;
|
|
354 return XML_TOK_DATA_NEWLINE;
|
|
355 case BT_LF:
|
|
356 *nextTokPtr = ptr + MINBPC(enc);
|
|
357 return XML_TOK_DATA_NEWLINE;
|
|
358 INVALID_CASES(ptr, nextTokPtr)
|
|
359 default:
|
|
360 ptr += MINBPC(enc);
|
|
361 break;
|
|
362 }
|
|
363 while (ptr != end) {
|
|
364 switch (BYTE_TYPE(enc, ptr)) {
|
|
365 #define LEAD_CASE(n) \
|
|
366 case BT_LEAD ## n: \
|
|
367 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
368 *nextTokPtr = ptr; \
|
|
369 return XML_TOK_DATA_CHARS; \
|
|
370 } \
|
|
371 ptr += n; \
|
|
372 break;
|
|
373 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
374 #undef LEAD_CASE
|
|
375 case BT_NONXML:
|
|
376 case BT_MALFORM:
|
|
377 case BT_TRAIL:
|
|
378 case BT_CR:
|
|
379 case BT_LF:
|
|
380 case BT_RSQB:
|
|
381 *nextTokPtr = ptr;
|
|
382 return XML_TOK_DATA_CHARS;
|
|
383 default:
|
|
384 ptr += MINBPC(enc);
|
|
385 break;
|
|
386 }
|
|
387 }
|
|
388 *nextTokPtr = ptr;
|
|
389 return XML_TOK_DATA_CHARS;
|
|
390 }
|
|
391
|
|
392 /* ptr points to character following "</" */
|
|
393
|
|
394 static
|
|
395 int PREFIX(scanEndTag)(const ENCODING *enc, const char *ptr, const char *end,
|
|
396 const char **nextTokPtr)
|
|
397 {
|
|
398 if (ptr == end)
|
|
399 return XML_TOK_PARTIAL;
|
|
400 switch (BYTE_TYPE(enc, ptr)) {
|
|
401 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
402 default:
|
|
403 *nextTokPtr = ptr;
|
|
404 return XML_TOK_INVALID;
|
|
405 }
|
|
406 while (ptr != end) {
|
|
407 switch (BYTE_TYPE(enc, ptr)) {
|
|
408 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
409 case BT_S: case BT_CR: case BT_LF:
|
|
410 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
|
411 switch (BYTE_TYPE(enc, ptr)) {
|
|
412 case BT_S: case BT_CR: case BT_LF:
|
|
413 break;
|
|
414 case BT_GT:
|
|
415 *nextTokPtr = ptr + MINBPC(enc);
|
|
416 return XML_TOK_END_TAG;
|
|
417 default:
|
|
418 *nextTokPtr = ptr;
|
|
419 return XML_TOK_INVALID;
|
|
420 }
|
|
421 }
|
|
422 return XML_TOK_PARTIAL;
|
|
423 #ifdef XML_NS
|
|
424 case BT_COLON:
|
|
425 /* no need to check qname syntax here, since end-tag must match exactly */
|
|
426 ptr += MINBPC(enc);
|
|
427 break;
|
|
428 #endif
|
|
429 case BT_GT:
|
|
430 *nextTokPtr = ptr + MINBPC(enc);
|
|
431 return XML_TOK_END_TAG;
|
|
432 default:
|
|
433 *nextTokPtr = ptr;
|
|
434 return XML_TOK_INVALID;
|
|
435 }
|
|
436 }
|
|
437 return XML_TOK_PARTIAL;
|
|
438 }
|
|
439
|
|
440 /* ptr points to character following "&#X" */
|
|
441
|
|
442 static
|
|
443 int PREFIX(scanHexCharRef)(const ENCODING *enc, const char *ptr, const char *end,
|
|
444 const char **nextTokPtr)
|
|
445 {
|
|
446 if (ptr != end) {
|
|
447 switch (BYTE_TYPE(enc, ptr)) {
|
|
448 case BT_DIGIT:
|
|
449 case BT_HEX:
|
|
450 break;
|
|
451 default:
|
|
452 *nextTokPtr = ptr;
|
|
453 return XML_TOK_INVALID;
|
|
454 }
|
|
455 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
|
456 switch (BYTE_TYPE(enc, ptr)) {
|
|
457 case BT_DIGIT:
|
|
458 case BT_HEX:
|
|
459 break;
|
|
460 case BT_SEMI:
|
|
461 *nextTokPtr = ptr + MINBPC(enc);
|
|
462 return XML_TOK_CHAR_REF;
|
|
463 default:
|
|
464 *nextTokPtr = ptr;
|
|
465 return XML_TOK_INVALID;
|
|
466 }
|
|
467 }
|
|
468 }
|
|
469 return XML_TOK_PARTIAL;
|
|
470 }
|
|
471
|
|
472 /* ptr points to character following "&#" */
|
|
473
|
|
474 static
|
|
475 int PREFIX(scanCharRef)(const ENCODING *enc, const char *ptr, const char *end,
|
|
476 const char **nextTokPtr)
|
|
477 {
|
|
478 if (ptr != end) {
|
|
479 if (CHAR_MATCHES(enc, ptr, 'x'))
|
|
480 return PREFIX(scanHexCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
481 switch (BYTE_TYPE(enc, ptr)) {
|
|
482 case BT_DIGIT:
|
|
483 break;
|
|
484 default:
|
|
485 *nextTokPtr = ptr;
|
|
486 return XML_TOK_INVALID;
|
|
487 }
|
|
488 for (ptr += MINBPC(enc); ptr != end; ptr += MINBPC(enc)) {
|
|
489 switch (BYTE_TYPE(enc, ptr)) {
|
|
490 case BT_DIGIT:
|
|
491 break;
|
|
492 case BT_SEMI:
|
|
493 *nextTokPtr = ptr + MINBPC(enc);
|
|
494 return XML_TOK_CHAR_REF;
|
|
495 default:
|
|
496 *nextTokPtr = ptr;
|
|
497 return XML_TOK_INVALID;
|
|
498 }
|
|
499 }
|
|
500 }
|
|
501 return XML_TOK_PARTIAL;
|
|
502 }
|
|
503
|
|
504 /* ptr points to character following "&" */
|
|
505
|
|
506 static
|
|
507 int PREFIX(scanRef)(const ENCODING *enc, const char *ptr, const char *end,
|
|
508 const char **nextTokPtr)
|
|
509 {
|
|
510 if (ptr == end)
|
|
511 return XML_TOK_PARTIAL;
|
|
512 switch (BYTE_TYPE(enc, ptr)) {
|
|
513 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
514 case BT_NUM:
|
|
515 return PREFIX(scanCharRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
516 default:
|
|
517 *nextTokPtr = ptr;
|
|
518 return XML_TOK_INVALID;
|
|
519 }
|
|
520 while (ptr != end) {
|
|
521 switch (BYTE_TYPE(enc, ptr)) {
|
|
522 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
523 case BT_SEMI:
|
|
524 *nextTokPtr = ptr + MINBPC(enc);
|
|
525 return XML_TOK_ENTITY_REF;
|
|
526 default:
|
|
527 *nextTokPtr = ptr;
|
|
528 return XML_TOK_INVALID;
|
|
529 }
|
|
530 }
|
|
531 return XML_TOK_PARTIAL;
|
|
532 }
|
|
533
|
|
534 /* ptr points to character following first character of attribute name */
|
|
535
|
|
536 static
|
|
537 int PREFIX(scanAtts)(const ENCODING *enc, const char *ptr, const char *end,
|
|
538 const char **nextTokPtr)
|
|
539 {
|
|
540 #ifdef XML_NS
|
|
541 int hadColon = 0;
|
|
542 #endif
|
|
543 while (ptr != end) {
|
|
544 switch (BYTE_TYPE(enc, ptr)) {
|
|
545 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
546 #ifdef XML_NS
|
|
547 case BT_COLON:
|
|
548 if (hadColon) {
|
|
549 *nextTokPtr = ptr;
|
|
550 return XML_TOK_INVALID;
|
|
551 }
|
|
552 hadColon = 1;
|
|
553 ptr += MINBPC(enc);
|
|
554 if (ptr == end)
|
|
555 return XML_TOK_PARTIAL;
|
|
556 switch (BYTE_TYPE(enc, ptr)) {
|
|
557 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
558 default:
|
|
559 *nextTokPtr = ptr;
|
|
560 return XML_TOK_INVALID;
|
|
561 }
|
|
562 break;
|
|
563 #endif
|
|
564 case BT_S: case BT_CR: case BT_LF:
|
|
565 for (;;) {
|
|
566 int t;
|
|
567
|
|
568 ptr += MINBPC(enc);
|
|
569 if (ptr == end)
|
|
570 return XML_TOK_PARTIAL;
|
|
571 t = BYTE_TYPE(enc, ptr);
|
|
572 if (t == BT_EQUALS)
|
|
573 break;
|
|
574 switch (t) {
|
|
575 case BT_S:
|
|
576 case BT_LF:
|
|
577 case BT_CR:
|
|
578 break;
|
|
579 default:
|
|
580 *nextTokPtr = ptr;
|
|
581 return XML_TOK_INVALID;
|
|
582 }
|
|
583 }
|
|
584 /* fall through */
|
|
585 case BT_EQUALS:
|
|
586 {
|
|
587 int open;
|
|
588 #ifdef XML_NS
|
|
589 hadColon = 0;
|
|
590 #endif
|
|
591 for (;;) {
|
|
592
|
|
593 ptr += MINBPC(enc);
|
|
594 if (ptr == end)
|
|
595 return XML_TOK_PARTIAL;
|
|
596 open = BYTE_TYPE(enc, ptr);
|
|
597 if (open == BT_QUOT || open == BT_APOS)
|
|
598 break;
|
|
599 switch (open) {
|
|
600 case BT_S:
|
|
601 case BT_LF:
|
|
602 case BT_CR:
|
|
603 break;
|
|
604 default:
|
|
605 *nextTokPtr = ptr;
|
|
606 return XML_TOK_INVALID;
|
|
607 }
|
|
608 }
|
|
609 ptr += MINBPC(enc);
|
|
610 /* in attribute value */
|
|
611 for (;;) {
|
|
612 int t;
|
|
613 if (ptr == end)
|
|
614 return XML_TOK_PARTIAL;
|
|
615 t = BYTE_TYPE(enc, ptr);
|
|
616 if (t == open)
|
|
617 break;
|
|
618 switch (t) {
|
|
619 INVALID_CASES(ptr, nextTokPtr)
|
|
620 case BT_AMP:
|
|
621 {
|
|
622 int tok = PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, &ptr);
|
|
623 if (tok <= 0) {
|
|
624 if (tok == XML_TOK_INVALID)
|
|
625 *nextTokPtr = ptr;
|
|
626 return tok;
|
|
627 }
|
|
628 break;
|
|
629 }
|
|
630 case BT_LT:
|
|
631 *nextTokPtr = ptr;
|
|
632 return XML_TOK_INVALID;
|
|
633 default:
|
|
634 ptr += MINBPC(enc);
|
|
635 break;
|
|
636 }
|
|
637 }
|
|
638 ptr += MINBPC(enc);
|
|
639 if (ptr == end)
|
|
640 return XML_TOK_PARTIAL;
|
|
641 switch (BYTE_TYPE(enc, ptr)) {
|
|
642 case BT_S:
|
|
643 case BT_CR:
|
|
644 case BT_LF:
|
|
645 break;
|
|
646 case BT_SOL:
|
|
647 goto sol;
|
|
648 case BT_GT:
|
|
649 goto gt;
|
|
650 default:
|
|
651 *nextTokPtr = ptr;
|
|
652 return XML_TOK_INVALID;
|
|
653 }
|
|
654 /* ptr points to closing quote */
|
|
655 for (;;) {
|
|
656 ptr += MINBPC(enc);
|
|
657 if (ptr == end)
|
|
658 return XML_TOK_PARTIAL;
|
|
659 switch (BYTE_TYPE(enc, ptr)) {
|
|
660 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
661 case BT_S: case BT_CR: case BT_LF:
|
|
662 continue;
|
|
663 case BT_GT:
|
|
664 gt:
|
|
665 *nextTokPtr = ptr + MINBPC(enc);
|
|
666 return XML_TOK_START_TAG_WITH_ATTS;
|
|
667 case BT_SOL:
|
|
668 sol:
|
|
669 ptr += MINBPC(enc);
|
|
670 if (ptr == end)
|
|
671 return XML_TOK_PARTIAL;
|
|
672 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
673 *nextTokPtr = ptr;
|
|
674 return XML_TOK_INVALID;
|
|
675 }
|
|
676 *nextTokPtr = ptr + MINBPC(enc);
|
|
677 return XML_TOK_EMPTY_ELEMENT_WITH_ATTS;
|
|
678 default:
|
|
679 *nextTokPtr = ptr;
|
|
680 return XML_TOK_INVALID;
|
|
681 }
|
|
682 break;
|
|
683 }
|
|
684 break;
|
|
685 }
|
|
686 default:
|
|
687 *nextTokPtr = ptr;
|
|
688 return XML_TOK_INVALID;
|
|
689 }
|
|
690 }
|
|
691 return XML_TOK_PARTIAL;
|
|
692 }
|
|
693
|
|
694 /* ptr points to character following "<" */
|
|
695
|
|
696 static
|
|
697 int PREFIX(scanLt)(const ENCODING *enc, const char *ptr, const char *end,
|
|
698 const char **nextTokPtr)
|
|
699 {
|
|
700 #ifdef XML_NS
|
|
701 int hadColon;
|
|
702 #endif
|
|
703 if (ptr == end)
|
|
704 return XML_TOK_PARTIAL;
|
|
705 switch (BYTE_TYPE(enc, ptr)) {
|
|
706 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
707 case BT_EXCL:
|
|
708 if ((ptr += MINBPC(enc)) == end)
|
|
709 return XML_TOK_PARTIAL;
|
|
710 switch (BYTE_TYPE(enc, ptr)) {
|
|
711 case BT_MINUS:
|
|
712 return PREFIX(scanComment)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
713 case BT_LSQB:
|
|
714 return PREFIX(scanCdataSection)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
715 }
|
|
716 *nextTokPtr = ptr;
|
|
717 return XML_TOK_INVALID;
|
|
718 case BT_QUEST:
|
|
719 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
720 case BT_SOL:
|
|
721 return PREFIX(scanEndTag)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
722 default:
|
|
723 *nextTokPtr = ptr;
|
|
724 return XML_TOK_INVALID;
|
|
725 }
|
|
726 #ifdef XML_NS
|
|
727 hadColon = 0;
|
|
728 #endif
|
|
729 /* we have a start-tag */
|
|
730 while (ptr != end) {
|
|
731 switch (BYTE_TYPE(enc, ptr)) {
|
|
732 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
733 #ifdef XML_NS
|
|
734 case BT_COLON:
|
|
735 if (hadColon) {
|
|
736 *nextTokPtr = ptr;
|
|
737 return XML_TOK_INVALID;
|
|
738 }
|
|
739 hadColon = 1;
|
|
740 ptr += MINBPC(enc);
|
|
741 if (ptr == end)
|
|
742 return XML_TOK_PARTIAL;
|
|
743 switch (BYTE_TYPE(enc, ptr)) {
|
|
744 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
745 default:
|
|
746 *nextTokPtr = ptr;
|
|
747 return XML_TOK_INVALID;
|
|
748 }
|
|
749 break;
|
|
750 #endif
|
|
751 case BT_S: case BT_CR: case BT_LF:
|
|
752 {
|
|
753 ptr += MINBPC(enc);
|
|
754 while (ptr != end) {
|
|
755 switch (BYTE_TYPE(enc, ptr)) {
|
|
756 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
757 case BT_GT:
|
|
758 goto gt;
|
|
759 case BT_SOL:
|
|
760 goto sol;
|
|
761 case BT_S: case BT_CR: case BT_LF:
|
|
762 ptr += MINBPC(enc);
|
|
763 continue;
|
|
764 default:
|
|
765 *nextTokPtr = ptr;
|
|
766 return XML_TOK_INVALID;
|
|
767 }
|
|
768 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
|
|
769 }
|
|
770 return XML_TOK_PARTIAL;
|
|
771 }
|
|
772 case BT_GT:
|
|
773 gt:
|
|
774 *nextTokPtr = ptr + MINBPC(enc);
|
|
775 return XML_TOK_START_TAG_NO_ATTS;
|
|
776 case BT_SOL:
|
|
777 sol:
|
|
778 ptr += MINBPC(enc);
|
|
779 if (ptr == end)
|
|
780 return XML_TOK_PARTIAL;
|
|
781 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
782 *nextTokPtr = ptr;
|
|
783 return XML_TOK_INVALID;
|
|
784 }
|
|
785 *nextTokPtr = ptr + MINBPC(enc);
|
|
786 return XML_TOK_EMPTY_ELEMENT_NO_ATTS;
|
|
787 default:
|
|
788 *nextTokPtr = ptr;
|
|
789 return XML_TOK_INVALID;
|
|
790 }
|
|
791 }
|
|
792 return XML_TOK_PARTIAL;
|
|
793 }
|
|
794
|
|
795 static
|
|
796 int PREFIX(contentTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
797 const char **nextTokPtr)
|
|
798 {
|
|
799 if (ptr == end)
|
|
800 return XML_TOK_NONE;
|
|
801 if (MINBPC(enc) > 1) {
|
|
802 size_t n = end - ptr;
|
|
803 if (n & (MINBPC(enc) - 1)) {
|
|
804 n &= ~(MINBPC(enc) - 1);
|
|
805 if (n == 0)
|
|
806 return XML_TOK_PARTIAL;
|
|
807 end = ptr + n;
|
|
808 }
|
|
809 }
|
|
810 switch (BYTE_TYPE(enc, ptr)) {
|
|
811 case BT_LT:
|
|
812 return PREFIX(scanLt)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
813 case BT_AMP:
|
|
814 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
815 case BT_CR:
|
|
816 ptr += MINBPC(enc);
|
|
817 if (ptr == end)
|
|
818 return XML_TOK_TRAILING_CR;
|
|
819 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
820 ptr += MINBPC(enc);
|
|
821 *nextTokPtr = ptr;
|
|
822 return XML_TOK_DATA_NEWLINE;
|
|
823 case BT_LF:
|
|
824 *nextTokPtr = ptr + MINBPC(enc);
|
|
825 return XML_TOK_DATA_NEWLINE;
|
|
826 case BT_RSQB:
|
|
827 ptr += MINBPC(enc);
|
|
828 if (ptr == end)
|
|
829 return XML_TOK_TRAILING_RSQB;
|
|
830 if (!CHAR_MATCHES(enc, ptr, ']'))
|
|
831 break;
|
|
832 ptr += MINBPC(enc);
|
|
833 if (ptr == end)
|
|
834 return XML_TOK_TRAILING_RSQB;
|
|
835 if (!CHAR_MATCHES(enc, ptr, '>')) {
|
|
836 ptr -= MINBPC(enc);
|
|
837 break;
|
|
838 }
|
|
839 *nextTokPtr = ptr;
|
|
840 return XML_TOK_INVALID;
|
|
841 INVALID_CASES(ptr, nextTokPtr)
|
|
842 default:
|
|
843 ptr += MINBPC(enc);
|
|
844 break;
|
|
845 }
|
|
846 while (ptr != end) {
|
|
847 switch (BYTE_TYPE(enc, ptr)) {
|
|
848 #define LEAD_CASE(n) \
|
|
849 case BT_LEAD ## n: \
|
|
850 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
|
|
851 *nextTokPtr = ptr; \
|
|
852 return XML_TOK_DATA_CHARS; \
|
|
853 } \
|
|
854 ptr += n; \
|
|
855 break;
|
|
856 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
857 #undef LEAD_CASE
|
|
858 case BT_RSQB:
|
|
859 if (ptr + MINBPC(enc) != end) {
|
|
860 if (!CHAR_MATCHES(enc, ptr + MINBPC(enc), ']')) {
|
|
861 ptr += MINBPC(enc);
|
|
862 break;
|
|
863 }
|
|
864 if (ptr + 2*MINBPC(enc) != end) {
|
|
865 if (!CHAR_MATCHES(enc, ptr + 2*MINBPC(enc), '>')) {
|
|
866 ptr += MINBPC(enc);
|
|
867 break;
|
|
868 }
|
|
869 *nextTokPtr = ptr + 2*MINBPC(enc);
|
|
870 return XML_TOK_INVALID;
|
|
871 }
|
|
872 }
|
|
873 /* fall through */
|
|
874 case BT_AMP:
|
|
875 case BT_LT:
|
|
876 case BT_NONXML:
|
|
877 case BT_MALFORM:
|
|
878 case BT_TRAIL:
|
|
879 case BT_CR:
|
|
880 case BT_LF:
|
|
881 *nextTokPtr = ptr;
|
|
882 return XML_TOK_DATA_CHARS;
|
|
883 default:
|
|
884 ptr += MINBPC(enc);
|
|
885 break;
|
|
886 }
|
|
887 }
|
|
888 *nextTokPtr = ptr;
|
|
889 return XML_TOK_DATA_CHARS;
|
|
890 }
|
|
891
|
|
892 /* ptr points to character following "%" */
|
|
893
|
|
894 static
|
|
895 int PREFIX(scanPercent)(const ENCODING *enc, const char *ptr, const char *end,
|
|
896 const char **nextTokPtr)
|
|
897 {
|
|
898 if (ptr == end)
|
|
899 return XML_TOK_PARTIAL;
|
|
900 switch (BYTE_TYPE(enc, ptr)) {
|
|
901 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
902 case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
|
|
903 *nextTokPtr = ptr;
|
|
904 return XML_TOK_PERCENT;
|
|
905 default:
|
|
906 *nextTokPtr = ptr;
|
|
907 return XML_TOK_INVALID;
|
|
908 }
|
|
909 while (ptr != end) {
|
|
910 switch (BYTE_TYPE(enc, ptr)) {
|
|
911 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
912 case BT_SEMI:
|
|
913 *nextTokPtr = ptr + MINBPC(enc);
|
|
914 return XML_TOK_PARAM_ENTITY_REF;
|
|
915 default:
|
|
916 *nextTokPtr = ptr;
|
|
917 return XML_TOK_INVALID;
|
|
918 }
|
|
919 }
|
|
920 return XML_TOK_PARTIAL;
|
|
921 }
|
|
922
|
|
923 static
|
|
924 int PREFIX(scanPoundName)(const ENCODING *enc, const char *ptr, const char *end,
|
|
925 const char **nextTokPtr)
|
|
926 {
|
|
927 if (ptr == end)
|
|
928 return XML_TOK_PARTIAL;
|
|
929 switch (BYTE_TYPE(enc, ptr)) {
|
|
930 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
|
|
931 default:
|
|
932 *nextTokPtr = ptr;
|
|
933 return XML_TOK_INVALID;
|
|
934 }
|
|
935 while (ptr != end) {
|
|
936 switch (BYTE_TYPE(enc, ptr)) {
|
|
937 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
938 case BT_CR: case BT_LF: case BT_S:
|
|
939 case BT_RPAR: case BT_GT: case BT_PERCNT: case BT_VERBAR:
|
|
940 *nextTokPtr = ptr;
|
|
941 return XML_TOK_POUND_NAME;
|
|
942 default:
|
|
943 *nextTokPtr = ptr;
|
|
944 return XML_TOK_INVALID;
|
|
945 }
|
|
946 }
|
|
947 return XML_TOK_PARTIAL;
|
|
948 }
|
|
949
|
|
950 static
|
|
951 int PREFIX(scanLit)(int open, const ENCODING *enc,
|
|
952 const char *ptr, const char *end,
|
|
953 const char **nextTokPtr)
|
|
954 {
|
|
955 while (ptr != end) {
|
|
956 int t = BYTE_TYPE(enc, ptr);
|
|
957 switch (t) {
|
|
958 INVALID_CASES(ptr, nextTokPtr)
|
|
959 case BT_QUOT:
|
|
960 case BT_APOS:
|
|
961 ptr += MINBPC(enc);
|
|
962 if (t != open)
|
|
963 break;
|
|
964 if (ptr == end)
|
|
965 return XML_TOK_PARTIAL;
|
|
966 *nextTokPtr = ptr;
|
|
967 switch (BYTE_TYPE(enc, ptr)) {
|
|
968 case BT_S: case BT_CR: case BT_LF:
|
|
969 case BT_GT: case BT_PERCNT: case BT_LSQB:
|
|
970 return XML_TOK_LITERAL;
|
|
971 default:
|
|
972 return XML_TOK_INVALID;
|
|
973 }
|
|
974 default:
|
|
975 ptr += MINBPC(enc);
|
|
976 break;
|
|
977 }
|
|
978 }
|
|
979 return XML_TOK_PARTIAL;
|
|
980 }
|
|
981
|
|
982 static
|
|
983 int PREFIX(prologTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
984 const char **nextTokPtr)
|
|
985 {
|
|
986 int tok;
|
|
987 if (ptr == end)
|
|
988 return XML_TOK_NONE;
|
|
989 if (MINBPC(enc) > 1) {
|
|
990 size_t n = end - ptr;
|
|
991 if (n & (MINBPC(enc) - 1)) {
|
|
992 n &= ~(MINBPC(enc) - 1);
|
|
993 if (n == 0)
|
|
994 return XML_TOK_PARTIAL;
|
|
995 end = ptr + n;
|
|
996 }
|
|
997 }
|
|
998 switch (BYTE_TYPE(enc, ptr)) {
|
|
999 case BT_QUOT:
|
|
1000 return PREFIX(scanLit)(BT_QUOT, enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1001 case BT_APOS:
|
|
1002 return PREFIX(scanLit)(BT_APOS, enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1003 case BT_LT:
|
|
1004 {
|
|
1005 ptr += MINBPC(enc);
|
|
1006 if (ptr == end)
|
|
1007 return XML_TOK_PARTIAL;
|
|
1008 switch (BYTE_TYPE(enc, ptr)) {
|
|
1009 case BT_EXCL:
|
|
1010 return PREFIX(scanDecl)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1011 case BT_QUEST:
|
|
1012 return PREFIX(scanPi)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1013 case BT_NMSTRT:
|
|
1014 case BT_HEX:
|
|
1015 case BT_NONASCII:
|
|
1016 case BT_LEAD2:
|
|
1017 case BT_LEAD3:
|
|
1018 case BT_LEAD4:
|
|
1019 *nextTokPtr = ptr - MINBPC(enc);
|
|
1020 return XML_TOK_INSTANCE_START;
|
|
1021 }
|
|
1022 *nextTokPtr = ptr;
|
|
1023 return XML_TOK_INVALID;
|
|
1024 }
|
|
1025 case BT_CR:
|
|
1026 if (ptr + MINBPC(enc) == end)
|
|
1027 return XML_TOK_TRAILING_CR;
|
|
1028 /* fall through */
|
|
1029 case BT_S: case BT_LF:
|
|
1030 for (;;) {
|
|
1031 ptr += MINBPC(enc);
|
|
1032 if (ptr == end)
|
|
1033 break;
|
|
1034 switch (BYTE_TYPE(enc, ptr)) {
|
|
1035 case BT_S: case BT_LF:
|
|
1036 break;
|
|
1037 case BT_CR:
|
|
1038 /* don't split CR/LF pair */
|
|
1039 if (ptr + MINBPC(enc) != end)
|
|
1040 break;
|
|
1041 /* fall through */
|
|
1042 default:
|
|
1043 *nextTokPtr = ptr;
|
|
1044 return XML_TOK_PROLOG_S;
|
|
1045 }
|
|
1046 }
|
|
1047 *nextTokPtr = ptr;
|
|
1048 return XML_TOK_PROLOG_S;
|
|
1049 case BT_PERCNT:
|
|
1050 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1051 case BT_COMMA:
|
|
1052 *nextTokPtr = ptr + MINBPC(enc);
|
|
1053 return XML_TOK_COMMA;
|
|
1054 case BT_LSQB:
|
|
1055 *nextTokPtr = ptr + MINBPC(enc);
|
|
1056 return XML_TOK_OPEN_BRACKET;
|
|
1057 case BT_RSQB:
|
|
1058 ptr += MINBPC(enc);
|
|
1059 if (ptr == end)
|
|
1060 return XML_TOK_PARTIAL;
|
|
1061 if (CHAR_MATCHES(enc, ptr, ']')) {
|
|
1062 if (ptr + MINBPC(enc) == end)
|
|
1063 return XML_TOK_PARTIAL;
|
|
1064 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), '>')) {
|
|
1065 *nextTokPtr = ptr + 2*MINBPC(enc);
|
|
1066 return XML_TOK_COND_SECT_CLOSE;
|
|
1067 }
|
|
1068 }
|
|
1069 *nextTokPtr = ptr;
|
|
1070 return XML_TOK_CLOSE_BRACKET;
|
|
1071 case BT_LPAR:
|
|
1072 *nextTokPtr = ptr + MINBPC(enc);
|
|
1073 return XML_TOK_OPEN_PAREN;
|
|
1074 case BT_RPAR:
|
|
1075 ptr += MINBPC(enc);
|
|
1076 if (ptr == end)
|
|
1077 return XML_TOK_PARTIAL;
|
|
1078 switch (BYTE_TYPE(enc, ptr)) {
|
|
1079 case BT_AST:
|
|
1080 *nextTokPtr = ptr + MINBPC(enc);
|
|
1081 return XML_TOK_CLOSE_PAREN_ASTERISK;
|
|
1082 case BT_QUEST:
|
|
1083 *nextTokPtr = ptr + MINBPC(enc);
|
|
1084 return XML_TOK_CLOSE_PAREN_QUESTION;
|
|
1085 case BT_PLUS:
|
|
1086 *nextTokPtr = ptr + MINBPC(enc);
|
|
1087 return XML_TOK_CLOSE_PAREN_PLUS;
|
|
1088 case BT_CR: case BT_LF: case BT_S:
|
|
1089 case BT_GT: case BT_COMMA: case BT_VERBAR:
|
|
1090 case BT_RPAR:
|
|
1091 *nextTokPtr = ptr;
|
|
1092 return XML_TOK_CLOSE_PAREN;
|
|
1093 }
|
|
1094 *nextTokPtr = ptr;
|
|
1095 return XML_TOK_INVALID;
|
|
1096 case BT_VERBAR:
|
|
1097 *nextTokPtr = ptr + MINBPC(enc);
|
|
1098 return XML_TOK_OR;
|
|
1099 case BT_GT:
|
|
1100 *nextTokPtr = ptr + MINBPC(enc);
|
|
1101 return XML_TOK_DECL_CLOSE;
|
|
1102 case BT_NUM:
|
|
1103 return PREFIX(scanPoundName)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1104 #define LEAD_CASE(n) \
|
|
1105 case BT_LEAD ## n: \
|
|
1106 if (end - ptr < n) \
|
|
1107 return XML_TOK_PARTIAL_CHAR; \
|
|
1108 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
|
|
1109 ptr += n; \
|
|
1110 tok = XML_TOK_NAME; \
|
|
1111 break; \
|
|
1112 } \
|
|
1113 if (IS_NAME_CHAR(enc, ptr, n)) { \
|
|
1114 ptr += n; \
|
|
1115 tok = XML_TOK_NMTOKEN; \
|
|
1116 break; \
|
|
1117 } \
|
|
1118 *nextTokPtr = ptr; \
|
|
1119 return XML_TOK_INVALID;
|
|
1120 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1121 #undef LEAD_CASE
|
|
1122 case BT_NMSTRT:
|
|
1123 case BT_HEX:
|
|
1124 tok = XML_TOK_NAME;
|
|
1125 ptr += MINBPC(enc);
|
|
1126 break;
|
|
1127 case BT_DIGIT:
|
|
1128 case BT_NAME:
|
|
1129 case BT_MINUS:
|
|
1130 #ifdef XML_NS
|
|
1131 case BT_COLON:
|
|
1132 #endif
|
|
1133 tok = XML_TOK_NMTOKEN;
|
|
1134 ptr += MINBPC(enc);
|
|
1135 break;
|
|
1136 case BT_NONASCII:
|
|
1137 if (IS_NMSTRT_CHAR_MINBPC(enc, ptr)) {
|
|
1138 ptr += MINBPC(enc);
|
|
1139 tok = XML_TOK_NAME;
|
|
1140 break;
|
|
1141 }
|
|
1142 if (IS_NAME_CHAR_MINBPC(enc, ptr)) {
|
|
1143 ptr += MINBPC(enc);
|
|
1144 tok = XML_TOK_NMTOKEN;
|
|
1145 break;
|
|
1146 }
|
|
1147 /* fall through */
|
|
1148 default:
|
|
1149 *nextTokPtr = ptr;
|
|
1150 return XML_TOK_INVALID;
|
|
1151 }
|
|
1152 while (ptr != end) {
|
|
1153 switch (BYTE_TYPE(enc, ptr)) {
|
|
1154 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
1155 case BT_GT: case BT_RPAR: case BT_COMMA:
|
|
1156 case BT_VERBAR: case BT_LSQB: case BT_PERCNT:
|
|
1157 case BT_S: case BT_CR: case BT_LF:
|
|
1158 *nextTokPtr = ptr;
|
|
1159 return tok;
|
|
1160 #ifdef XML_NS
|
|
1161 case BT_COLON:
|
|
1162 ptr += MINBPC(enc);
|
|
1163 switch (tok) {
|
|
1164 case XML_TOK_NAME:
|
|
1165 if (ptr == end)
|
|
1166 return XML_TOK_PARTIAL;
|
|
1167 tok = XML_TOK_PREFIXED_NAME;
|
|
1168 switch (BYTE_TYPE(enc, ptr)) {
|
|
1169 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
|
|
1170 default:
|
|
1171 tok = XML_TOK_NMTOKEN;
|
|
1172 break;
|
|
1173 }
|
|
1174 break;
|
|
1175 case XML_TOK_PREFIXED_NAME:
|
|
1176 tok = XML_TOK_NMTOKEN;
|
|
1177 break;
|
|
1178 }
|
|
1179 break;
|
|
1180 #endif
|
|
1181 case BT_PLUS:
|
|
1182 if (tok == XML_TOK_NMTOKEN) {
|
|
1183 *nextTokPtr = ptr;
|
|
1184 return XML_TOK_INVALID;
|
|
1185 }
|
|
1186 *nextTokPtr = ptr + MINBPC(enc);
|
|
1187 return XML_TOK_NAME_PLUS;
|
|
1188 case BT_AST:
|
|
1189 if (tok == XML_TOK_NMTOKEN) {
|
|
1190 *nextTokPtr = ptr;
|
|
1191 return XML_TOK_INVALID;
|
|
1192 }
|
|
1193 *nextTokPtr = ptr + MINBPC(enc);
|
|
1194 return XML_TOK_NAME_ASTERISK;
|
|
1195 case BT_QUEST:
|
|
1196 if (tok == XML_TOK_NMTOKEN) {
|
|
1197 *nextTokPtr = ptr;
|
|
1198 return XML_TOK_INVALID;
|
|
1199 }
|
|
1200 *nextTokPtr = ptr + MINBPC(enc);
|
|
1201 return XML_TOK_NAME_QUESTION;
|
|
1202 default:
|
|
1203 *nextTokPtr = ptr;
|
|
1204 return XML_TOK_INVALID;
|
|
1205 }
|
|
1206 }
|
|
1207 return XML_TOK_PARTIAL;
|
|
1208 }
|
|
1209
|
|
1210 static
|
|
1211 int PREFIX(attributeValueTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
1212 const char **nextTokPtr)
|
|
1213 {
|
|
1214 const char *start;
|
|
1215 if (ptr == end)
|
|
1216 return XML_TOK_NONE;
|
|
1217 start = ptr;
|
|
1218 while (ptr != end) {
|
|
1219 switch (BYTE_TYPE(enc, ptr)) {
|
|
1220 #define LEAD_CASE(n) \
|
|
1221 case BT_LEAD ## n: ptr += n; break;
|
|
1222 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1223 #undef LEAD_CASE
|
|
1224 case BT_AMP:
|
|
1225 if (ptr == start)
|
|
1226 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1227 *nextTokPtr = ptr;
|
|
1228 return XML_TOK_DATA_CHARS;
|
|
1229 case BT_LT:
|
|
1230 /* this is for inside entity references */
|
|
1231 *nextTokPtr = ptr;
|
|
1232 return XML_TOK_INVALID;
|
|
1233 case BT_LF:
|
|
1234 if (ptr == start) {
|
|
1235 *nextTokPtr = ptr + MINBPC(enc);
|
|
1236 return XML_TOK_DATA_NEWLINE;
|
|
1237 }
|
|
1238 *nextTokPtr = ptr;
|
|
1239 return XML_TOK_DATA_CHARS;
|
|
1240 case BT_CR:
|
|
1241 if (ptr == start) {
|
|
1242 ptr += MINBPC(enc);
|
|
1243 if (ptr == end)
|
|
1244 return XML_TOK_TRAILING_CR;
|
|
1245 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
1246 ptr += MINBPC(enc);
|
|
1247 *nextTokPtr = ptr;
|
|
1248 return XML_TOK_DATA_NEWLINE;
|
|
1249 }
|
|
1250 *nextTokPtr = ptr;
|
|
1251 return XML_TOK_DATA_CHARS;
|
|
1252 case BT_S:
|
|
1253 if (ptr == start) {
|
|
1254 *nextTokPtr = ptr + MINBPC(enc);
|
|
1255 return XML_TOK_ATTRIBUTE_VALUE_S;
|
|
1256 }
|
|
1257 *nextTokPtr = ptr;
|
|
1258 return XML_TOK_DATA_CHARS;
|
|
1259 default:
|
|
1260 ptr += MINBPC(enc);
|
|
1261 break;
|
|
1262 }
|
|
1263 }
|
|
1264 *nextTokPtr = ptr;
|
|
1265 return XML_TOK_DATA_CHARS;
|
|
1266 }
|
|
1267
|
|
1268 static
|
|
1269 int PREFIX(entityValueTok)(const ENCODING *enc, const char *ptr, const char *end,
|
|
1270 const char **nextTokPtr)
|
|
1271 {
|
|
1272 const char *start;
|
|
1273 if (ptr == end)
|
|
1274 return XML_TOK_NONE;
|
|
1275 start = ptr;
|
|
1276 while (ptr != end) {
|
|
1277 switch (BYTE_TYPE(enc, ptr)) {
|
|
1278 #define LEAD_CASE(n) \
|
|
1279 case BT_LEAD ## n: ptr += n; break;
|
|
1280 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1281 #undef LEAD_CASE
|
|
1282 case BT_AMP:
|
|
1283 if (ptr == start)
|
|
1284 return PREFIX(scanRef)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1285 *nextTokPtr = ptr;
|
|
1286 return XML_TOK_DATA_CHARS;
|
|
1287 case BT_PERCNT:
|
|
1288 if (ptr == start)
|
|
1289 return PREFIX(scanPercent)(enc, ptr + MINBPC(enc), end, nextTokPtr);
|
|
1290 *nextTokPtr = ptr;
|
|
1291 return XML_TOK_DATA_CHARS;
|
|
1292 case BT_LF:
|
|
1293 if (ptr == start) {
|
|
1294 *nextTokPtr = ptr + MINBPC(enc);
|
|
1295 return XML_TOK_DATA_NEWLINE;
|
|
1296 }
|
|
1297 *nextTokPtr = ptr;
|
|
1298 return XML_TOK_DATA_CHARS;
|
|
1299 case BT_CR:
|
|
1300 if (ptr == start) {
|
|
1301 ptr += MINBPC(enc);
|
|
1302 if (ptr == end)
|
|
1303 return XML_TOK_TRAILING_CR;
|
|
1304 if (BYTE_TYPE(enc, ptr) == BT_LF)
|
|
1305 ptr += MINBPC(enc);
|
|
1306 *nextTokPtr = ptr;
|
|
1307 return XML_TOK_DATA_NEWLINE;
|
|
1308 }
|
|
1309 *nextTokPtr = ptr;
|
|
1310 return XML_TOK_DATA_CHARS;
|
|
1311 default:
|
|
1312 ptr += MINBPC(enc);
|
|
1313 break;
|
|
1314 }
|
|
1315 }
|
|
1316 *nextTokPtr = ptr;
|
|
1317 return XML_TOK_DATA_CHARS;
|
|
1318 }
|
|
1319
|
|
1320 static
|
|
1321 int PREFIX(isPublicId)(const ENCODING *enc, const char *ptr, const char *end,
|
|
1322 const char **badPtr)
|
|
1323 {
|
|
1324 ptr += MINBPC(enc);
|
|
1325 end -= MINBPC(enc);
|
|
1326 for (; ptr != end; ptr += MINBPC(enc)) {
|
|
1327 switch (BYTE_TYPE(enc, ptr)) {
|
|
1328 case BT_DIGIT:
|
|
1329 case BT_HEX:
|
|
1330 case BT_MINUS:
|
|
1331 case BT_APOS:
|
|
1332 case BT_LPAR:
|
|
1333 case BT_RPAR:
|
|
1334 case BT_PLUS:
|
|
1335 case BT_COMMA:
|
|
1336 case BT_SOL:
|
|
1337 case BT_EQUALS:
|
|
1338 case BT_QUEST:
|
|
1339 case BT_CR:
|
|
1340 case BT_LF:
|
|
1341 case BT_SEMI:
|
|
1342 case BT_EXCL:
|
|
1343 case BT_AST:
|
|
1344 case BT_PERCNT:
|
|
1345 case BT_NUM:
|
|
1346 #ifdef XML_NS
|
|
1347 case BT_COLON:
|
|
1348 #endif
|
|
1349 break;
|
|
1350 case BT_S:
|
|
1351 if (CHAR_MATCHES(enc, ptr, '\t')) {
|
|
1352 *badPtr = ptr;
|
|
1353 return 0;
|
|
1354 }
|
|
1355 break;
|
|
1356 case BT_NAME:
|
|
1357 case BT_NMSTRT:
|
|
1358 if (!(BYTE_TO_ASCII(enc, ptr) & ~0x7f))
|
|
1359 break;
|
|
1360 default:
|
|
1361 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
1362 case 0x24: /* $ */
|
|
1363 case 0x40: /* @ */
|
|
1364 break;
|
|
1365 default:
|
|
1366 *badPtr = ptr;
|
|
1367 return 0;
|
|
1368 }
|
|
1369 break;
|
|
1370 }
|
|
1371 }
|
|
1372 return 1;
|
|
1373 }
|
|
1374
|
|
1375 /* This must only be called for a well-formed start-tag or empty element tag.
|
|
1376 Returns the number of attributes. Pointers to the first attsMax attributes
|
|
1377 are stored in atts. */
|
|
1378
|
|
1379 static
|
|
1380 int PREFIX(getAtts)(const ENCODING *enc, const char *ptr,
|
|
1381 int attsMax, ATTRIBUTE *atts)
|
|
1382 {
|
|
1383 enum { other, inName, inValue } state = inName;
|
|
1384 int nAtts = 0;
|
3127
|
1385 int open = 0;
|
2086
|
1386
|
|
1387 for (ptr += MINBPC(enc);; ptr += MINBPC(enc)) {
|
|
1388 switch (BYTE_TYPE(enc, ptr)) {
|
|
1389 #define START_NAME \
|
|
1390 if (state == other) { \
|
|
1391 if (nAtts < attsMax) { \
|
|
1392 atts[nAtts].name = ptr; \
|
|
1393 atts[nAtts].normalized = 1; \
|
|
1394 } \
|
|
1395 state = inName; \
|
|
1396 }
|
|
1397 #define LEAD_CASE(n) \
|
|
1398 case BT_LEAD ## n: START_NAME ptr += (n - MINBPC(enc)); break;
|
|
1399 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1400 #undef LEAD_CASE
|
|
1401 case BT_NONASCII:
|
|
1402 case BT_NMSTRT:
|
|
1403 case BT_HEX:
|
|
1404 START_NAME
|
|
1405 break;
|
|
1406 #undef START_NAME
|
|
1407 case BT_QUOT:
|
|
1408 if (state != inValue) {
|
|
1409 if (nAtts < attsMax)
|
|
1410 atts[nAtts].valuePtr = ptr + MINBPC(enc);
|
|
1411 state = inValue;
|
|
1412 open = BT_QUOT;
|
|
1413 }
|
|
1414 else if (open == BT_QUOT) {
|
|
1415 state = other;
|
|
1416 if (nAtts < attsMax)
|
|
1417 atts[nAtts].valueEnd = ptr;
|
|
1418 nAtts++;
|
|
1419 }
|
|
1420 break;
|
|
1421 case BT_APOS:
|
|
1422 if (state != inValue) {
|
|
1423 if (nAtts < attsMax)
|
|
1424 atts[nAtts].valuePtr = ptr + MINBPC(enc);
|
|
1425 state = inValue;
|
|
1426 open = BT_APOS;
|
|
1427 }
|
|
1428 else if (open == BT_APOS) {
|
|
1429 state = other;
|
|
1430 if (nAtts < attsMax)
|
|
1431 atts[nAtts].valueEnd = ptr;
|
|
1432 nAtts++;
|
|
1433 }
|
|
1434 break;
|
|
1435 case BT_AMP:
|
|
1436 if (nAtts < attsMax)
|
|
1437 atts[nAtts].normalized = 0;
|
|
1438 break;
|
|
1439 case BT_S:
|
|
1440 if (state == inName)
|
|
1441 state = other;
|
|
1442 else if (state == inValue
|
|
1443 && nAtts < attsMax
|
|
1444 && atts[nAtts].normalized
|
|
1445 && (ptr == atts[nAtts].valuePtr
|
|
1446 || BYTE_TO_ASCII(enc, ptr) != ' '
|
|
1447 || BYTE_TO_ASCII(enc, ptr + MINBPC(enc)) == ' '
|
|
1448 || BYTE_TYPE(enc, ptr + MINBPC(enc)) == open))
|
|
1449 atts[nAtts].normalized = 0;
|
|
1450 break;
|
|
1451 case BT_CR: case BT_LF:
|
|
1452 /* This case ensures that the first attribute name is counted
|
|
1453 Apart from that we could just change state on the quote. */
|
|
1454 if (state == inName)
|
|
1455 state = other;
|
|
1456 else if (state == inValue && nAtts < attsMax)
|
|
1457 atts[nAtts].normalized = 0;
|
|
1458 break;
|
|
1459 case BT_GT:
|
|
1460 case BT_SOL:
|
|
1461 if (state != inValue)
|
|
1462 return nAtts;
|
|
1463 break;
|
|
1464 default:
|
|
1465 break;
|
|
1466 }
|
|
1467 }
|
|
1468 /* not reached */
|
|
1469 }
|
|
1470
|
|
1471 static
|
|
1472 int PREFIX(charRefNumber)(const ENCODING *enc, const char *ptr)
|
|
1473 {
|
|
1474 int result = 0;
|
|
1475 /* skip &# */
|
|
1476 ptr += 2*MINBPC(enc);
|
|
1477 if (CHAR_MATCHES(enc, ptr, 'x')) {
|
|
1478 for (ptr += MINBPC(enc); !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) {
|
|
1479 int c = BYTE_TO_ASCII(enc, ptr);
|
|
1480 switch (c) {
|
|
1481 case '0': case '1': case '2': case '3': case '4':
|
|
1482 case '5': case '6': case '7': case '8': case '9':
|
|
1483 result <<= 4;
|
|
1484 result |= (c - '0');
|
|
1485 break;
|
|
1486 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
|
|
1487 result <<= 4;
|
|
1488 result += 10 + (c - 'A');
|
|
1489 break;
|
|
1490 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
|
|
1491 result <<= 4;
|
|
1492 result += 10 + (c - 'a');
|
|
1493 break;
|
|
1494 }
|
|
1495 if (result >= 0x110000)
|
|
1496 return -1;
|
|
1497 }
|
|
1498 }
|
|
1499 else {
|
|
1500 for (; !CHAR_MATCHES(enc, ptr, ';'); ptr += MINBPC(enc)) {
|
|
1501 int c = BYTE_TO_ASCII(enc, ptr);
|
|
1502 result *= 10;
|
|
1503 result += (c - '0');
|
|
1504 if (result >= 0x110000)
|
|
1505 return -1;
|
|
1506 }
|
|
1507 }
|
|
1508 return checkCharRefNumber(result);
|
|
1509 }
|
|
1510
|
|
1511 static
|
|
1512 int PREFIX(predefinedEntityName)(const ENCODING *enc, const char *ptr, const char *end)
|
|
1513 {
|
|
1514 switch ((end - ptr)/MINBPC(enc)) {
|
|
1515 case 2:
|
|
1516 if (CHAR_MATCHES(enc, ptr + MINBPC(enc), 't')) {
|
|
1517 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
1518 case 'l':
|
|
1519 return '<';
|
|
1520 case 'g':
|
|
1521 return '>';
|
|
1522 }
|
|
1523 }
|
|
1524 break;
|
|
1525 case 3:
|
|
1526 if (CHAR_MATCHES(enc, ptr, 'a')) {
|
|
1527 ptr += MINBPC(enc);
|
|
1528 if (CHAR_MATCHES(enc, ptr, 'm')) {
|
|
1529 ptr += MINBPC(enc);
|
|
1530 if (CHAR_MATCHES(enc, ptr, 'p'))
|
|
1531 return '&';
|
|
1532 }
|
|
1533 }
|
|
1534 break;
|
|
1535 case 4:
|
|
1536 switch (BYTE_TO_ASCII(enc, ptr)) {
|
|
1537 case 'q':
|
|
1538 ptr += MINBPC(enc);
|
|
1539 if (CHAR_MATCHES(enc, ptr, 'u')) {
|
|
1540 ptr += MINBPC(enc);
|
|
1541 if (CHAR_MATCHES(enc, ptr, 'o')) {
|
|
1542 ptr += MINBPC(enc);
|
|
1543 if (CHAR_MATCHES(enc, ptr, 't'))
|
|
1544 return '"';
|
|
1545 }
|
|
1546 }
|
|
1547 break;
|
|
1548 case 'a':
|
|
1549 ptr += MINBPC(enc);
|
|
1550 if (CHAR_MATCHES(enc, ptr, 'p')) {
|
|
1551 ptr += MINBPC(enc);
|
|
1552 if (CHAR_MATCHES(enc, ptr, 'o')) {
|
|
1553 ptr += MINBPC(enc);
|
|
1554 if (CHAR_MATCHES(enc, ptr, 's'))
|
|
1555 return '\'';
|
|
1556 }
|
|
1557 }
|
|
1558 break;
|
|
1559 }
|
|
1560 }
|
|
1561 return 0;
|
|
1562 }
|
|
1563
|
|
1564 static
|
|
1565 int PREFIX(sameName)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
|
1566 {
|
|
1567 for (;;) {
|
|
1568 switch (BYTE_TYPE(enc, ptr1)) {
|
|
1569 #define LEAD_CASE(n) \
|
|
1570 case BT_LEAD ## n: \
|
|
1571 if (*ptr1++ != *ptr2++) \
|
|
1572 return 0;
|
|
1573 LEAD_CASE(4) LEAD_CASE(3) LEAD_CASE(2)
|
|
1574 #undef LEAD_CASE
|
|
1575 /* fall through */
|
|
1576 if (*ptr1++ != *ptr2++)
|
|
1577 return 0;
|
|
1578 break;
|
|
1579 case BT_NONASCII:
|
|
1580 case BT_NMSTRT:
|
|
1581 #ifdef XML_NS
|
|
1582 case BT_COLON:
|
|
1583 #endif
|
|
1584 case BT_HEX:
|
|
1585 case BT_DIGIT:
|
|
1586 case BT_NAME:
|
|
1587 case BT_MINUS:
|
|
1588 if (*ptr2++ != *ptr1++)
|
|
1589 return 0;
|
|
1590 if (MINBPC(enc) > 1) {
|
|
1591 if (*ptr2++ != *ptr1++)
|
|
1592 return 0;
|
|
1593 if (MINBPC(enc) > 2) {
|
|
1594 if (*ptr2++ != *ptr1++)
|
|
1595 return 0;
|
|
1596 if (MINBPC(enc) > 3) {
|
|
1597 if (*ptr2++ != *ptr1++)
|
|
1598 return 0;
|
|
1599 }
|
|
1600 }
|
|
1601 }
|
|
1602 break;
|
|
1603 default:
|
|
1604 if (MINBPC(enc) == 1 && *ptr1 == *ptr2)
|
|
1605 return 1;
|
|
1606 switch (BYTE_TYPE(enc, ptr2)) {
|
|
1607 case BT_LEAD2:
|
|
1608 case BT_LEAD3:
|
|
1609 case BT_LEAD4:
|
|
1610 case BT_NONASCII:
|
|
1611 case BT_NMSTRT:
|
|
1612 #ifdef XML_NS
|
|
1613 case BT_COLON:
|
|
1614 #endif
|
|
1615 case BT_HEX:
|
|
1616 case BT_DIGIT:
|
|
1617 case BT_NAME:
|
|
1618 case BT_MINUS:
|
|
1619 return 0;
|
|
1620 default:
|
|
1621 return 1;
|
|
1622 }
|
|
1623 }
|
|
1624 }
|
|
1625 /* not reached */
|
|
1626 }
|
|
1627
|
|
1628 static
|
|
1629 int PREFIX(nameMatchesAscii)(const ENCODING *enc, const char *ptr1, const char *ptr2)
|
|
1630 {
|
|
1631 for (; *ptr2; ptr1 += MINBPC(enc), ptr2++) {
|
|
1632 if (!CHAR_MATCHES(enc, ptr1, *ptr2))
|
|
1633 return 0;
|
|
1634 }
|
|
1635 switch (BYTE_TYPE(enc, ptr1)) {
|
|
1636 case BT_LEAD2:
|
|
1637 case BT_LEAD3:
|
|
1638 case BT_LEAD4:
|
|
1639 case BT_NONASCII:
|
|
1640 case BT_NMSTRT:
|
|
1641 #ifdef XML_NS
|
|
1642 case BT_COLON:
|
|
1643 #endif
|
|
1644 case BT_HEX:
|
|
1645 case BT_DIGIT:
|
|
1646 case BT_NAME:
|
|
1647 case BT_MINUS:
|
|
1648 return 0;
|
|
1649 default:
|
|
1650 return 1;
|
|
1651 }
|
|
1652 }
|
|
1653
|
|
1654 static
|
|
1655 int PREFIX(nameLength)(const ENCODING *enc, const char *ptr)
|
|
1656 {
|
|
1657 const char *start = ptr;
|
|
1658 for (;;) {
|
|
1659 switch (BYTE_TYPE(enc, ptr)) {
|
|
1660 #define LEAD_CASE(n) \
|
|
1661 case BT_LEAD ## n: ptr += n; break;
|
|
1662 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1663 #undef LEAD_CASE
|
|
1664 case BT_NONASCII:
|
|
1665 case BT_NMSTRT:
|
|
1666 #ifdef XML_NS
|
|
1667 case BT_COLON:
|
|
1668 #endif
|
|
1669 case BT_HEX:
|
|
1670 case BT_DIGIT:
|
|
1671 case BT_NAME:
|
|
1672 case BT_MINUS:
|
|
1673 ptr += MINBPC(enc);
|
|
1674 break;
|
|
1675 default:
|
|
1676 return ptr - start;
|
|
1677 }
|
|
1678 }
|
|
1679 }
|
|
1680
|
|
1681 static
|
|
1682 const char *PREFIX(skipS)(const ENCODING *enc, const char *ptr)
|
|
1683 {
|
|
1684 for (;;) {
|
|
1685 switch (BYTE_TYPE(enc, ptr)) {
|
|
1686 case BT_LF:
|
|
1687 case BT_CR:
|
|
1688 case BT_S:
|
|
1689 ptr += MINBPC(enc);
|
|
1690 break;
|
|
1691 default:
|
|
1692 return ptr;
|
|
1693 }
|
|
1694 }
|
|
1695 }
|
|
1696
|
|
1697 static
|
|
1698 void PREFIX(updatePosition)(const ENCODING *enc,
|
|
1699 const char *ptr,
|
|
1700 const char *end,
|
|
1701 POSITION *pos)
|
|
1702 {
|
|
1703 while (ptr != end) {
|
|
1704 switch (BYTE_TYPE(enc, ptr)) {
|
|
1705 #define LEAD_CASE(n) \
|
|
1706 case BT_LEAD ## n: \
|
|
1707 ptr += n; \
|
|
1708 break;
|
|
1709 LEAD_CASE(2) LEAD_CASE(3) LEAD_CASE(4)
|
|
1710 #undef LEAD_CASE
|
|
1711 case BT_LF:
|
|
1712 pos->columnNumber = (unsigned)-1;
|
|
1713 pos->lineNumber++;
|
|
1714 ptr += MINBPC(enc);
|
|
1715 break;
|
|
1716 case BT_CR:
|
|
1717 pos->lineNumber++;
|
|
1718 ptr += MINBPC(enc);
|
|
1719 if (ptr != end && BYTE_TYPE(enc, ptr) == BT_LF)
|
|
1720 ptr += MINBPC(enc);
|
|
1721 pos->columnNumber = (unsigned)-1;
|
|
1722 break;
|
|
1723 default:
|
|
1724 ptr += MINBPC(enc);
|
|
1725 break;
|
|
1726 }
|
|
1727 pos->columnNumber++;
|
|
1728 }
|
|
1729 }
|
|
1730
|
|
1731 #undef DO_LEAD_CASE
|
|
1732 #undef MULTIBYTE_CASES
|
|
1733 #undef INVALID_CASES
|
|
1734 #undef CHECK_NAME_CASE
|
|
1735 #undef CHECK_NAME_CASES
|
|
1736 #undef CHECK_NMSTRT_CASE
|
|
1737 #undef CHECK_NMSTRT_CASES
|