comparison src/regex.c @ 18262:e5e99de79a88

Fix up whitespace.
author Richard M. Stallman <rms@gnu.org>
date Sun, 15 Jun 1997 19:05:59 +0000
parents a642c99198ec
children 5e9d099a4751
comparison
equal deleted inserted replaced
18261:2f0b00246056 18262:e5e99de79a88
9 the Free Software Foundation; either version 2, or (at your option) 9 the Free Software Foundation; either version 2, or (at your option)
10 any later version. 10 any later version.
11 11
12 This program is distributed in the hope that it will be useful, 12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details. 15 GNU General Public License for more details.
16 16
17 You should have received a copy of the GNU General Public License 17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software 18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, 19 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20 USA. */ 20 USA. */
21 21
22 /* AIX requires this to be the first thing in the file. */ 22 /* AIX requires this to be the first thing in the file. */
23 #if defined (_AIX) && !defined (REGEX_MALLOC) 23 #if defined (_AIX) && !defined (REGEX_MALLOC)
24 #pragma alloca 24 #pragma alloca
25 #endif 25 #endif
26 26
27 #undef _GNU_SOURCE 27 #undef _GNU_SOURCE
28 #define _GNU_SOURCE 28 #define _GNU_SOURCE
29 29
30 /* Converts the pointer to the char to BEG-based offset from the start. */ 30 /* Converts the pointer to the char to BEG-based offset from the start. */
31 #define PTR_TO_OFFSET(d) \ 31 #define PTR_TO_OFFSET(d) \
32 POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \ 32 POS_AS_IN_BUFFER (MATCHING_IN_FIRST_STRING \
33 ? (d) - string1 : (d) - (string2 - size1)) 33 ? (d) - string1 : (d) - (string2 - size1))
34 #define POS_AS_IN_BUFFER(p) ((p) + 1) 34 #define POS_AS_IN_BUFFER(p) ((p) + 1)
35 35
36 #ifdef HAVE_CONFIG_H 36 #ifdef HAVE_CONFIG_H
37 #include <config.h> 37 #include <config.h>
38 #endif 38 #endif
39 39
40 /* We need this for `regex.h', and perhaps for the Emacs include files. */ 40 /* We need this for `regex.h', and perhaps for the Emacs include files. */
41 #include <sys/types.h> 41 #include <sys/types.h>
42 42
43 /* This is for other GNU distributions with internationalized messages. */ 43 /* This is for other GNU distributions with internationalized messages. */
44 #if HAVE_LIBINTL_H || defined (_LIBC) 44 #if HAVE_LIBINTL_H || defined (_LIBC)
45 # include <libintl.h> 45 # include <libintl.h>
46 #else 46 #else
47 # define gettext(msgid) (msgid) 47 # define gettext(msgid) (msgid)
48 #endif 48 #endif
83 char *malloc (); 83 char *malloc ();
84 char *realloc (); 84 char *realloc ();
85 #endif 85 #endif
86 86
87 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. 87 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
88 If nothing else has been done, use the method below. */ 88 If nothing else has been done, use the method below. */
89 #ifdef INHIBIT_STRING_HEADER 89 #ifdef INHIBIT_STRING_HEADER
90 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY)) 90 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
91 #if !defined (bzero) && !defined (bcopy) 91 #if !defined (bzero) && !defined (bcopy)
92 #undef INHIBIT_STRING_HEADER 92 #undef INHIBIT_STRING_HEADER
93 #endif 93 #endif
192 /* Jim Meyering writes: 192 /* Jim Meyering writes:
193 193
194 "... Some ctype macros are valid only for character codes that 194 "... Some ctype macros are valid only for character codes that
195 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when 195 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
196 using /bin/cc or gcc but without giving an ansi option). So, all 196 using /bin/cc or gcc but without giving an ansi option). So, all
197 ctype uses should be through macros like ISPRINT... If 197 ctype uses should be through macros like ISPRINT... If
198 STDC_HEADERS is defined, then autoconf has verified that the ctype 198 STDC_HEADERS is defined, then autoconf has verified that the ctype
199 macros don't need to be guarded with references to isascii. ... 199 macros don't need to be guarded with references to isascii. ...
200 Defining isascii to 1 should let any compiler worth its salt 200 Defining isascii to 1 should let any compiler worth its salt
201 eliminate the && through constant folding." */ 201 eliminate the && through constant folding." */
202 202
203 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) 203 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
204 #define ISASCII(c) 1 204 #define ISASCII(c) 1
205 #else 205 #else
206 #define ISASCII(c) isascii(c) 206 #define ISASCII(c) isascii(c)
233 #endif 233 #endif
234 234
235 /* We remove any previous definition of `SIGN_EXTEND_CHAR', 235 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
236 since ours (we hope) works properly with all combinations of 236 since ours (we hope) works properly with all combinations of
237 machines, compilers, `char' and `unsigned char' argument types. 237 machines, compilers, `char' and `unsigned char' argument types.
238 (Per Bothner suggested the basic approach.) */ 238 (Per Bothner suggested the basic approach.) */
239 #undef SIGN_EXTEND_CHAR 239 #undef SIGN_EXTEND_CHAR
240 #if __STDC__ 240 #if __STDC__
241 #define SIGN_EXTEND_CHAR(c) ((signed char) (c)) 241 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
242 #else /* not __STDC__ */ 242 #else /* not __STDC__ */
243 /* As in Harbison and Steele. */ 243 /* As in Harbison and Steele. */
271 #else /* not __GNUC__ */ 271 #else /* not __GNUC__ */
272 #if HAVE_ALLOCA_H 272 #if HAVE_ALLOCA_H
273 #include <alloca.h> 273 #include <alloca.h>
274 #else /* not __GNUC__ or HAVE_ALLOCA_H */ 274 #else /* not __GNUC__ or HAVE_ALLOCA_H */
275 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */ 275 #if 0 /* It is a bad idea to declare alloca. We always cast the result. */
276 #ifndef _AIX /* Already did AIX, up at the top. */ 276 #ifndef _AIX /* Already did AIX, up at the top. */
277 char *alloca (); 277 char *alloca ();
278 #endif /* not _AIX */ 278 #endif /* not _AIX */
279 #endif 279 #endif
280 #endif /* not HAVE_ALLOCA_H */ 280 #endif /* not HAVE_ALLOCA_H */
281 #endif /* not __GNUC__ */ 281 #endif /* not __GNUC__ */
318 318
319 #define REGEX_ALLOCATE_STACK alloca 319 #define REGEX_ALLOCATE_STACK alloca
320 320
321 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \ 321 #define REGEX_REALLOCATE_STACK(source, osize, nsize) \
322 REGEX_REALLOCATE (source, osize, nsize) 322 REGEX_REALLOCATE (source, osize, nsize)
323 /* No need to explicitly free anything. */ 323 /* No need to explicitly free anything. */
324 #define REGEX_FREE_STACK(arg) 324 #define REGEX_FREE_STACK(arg)
325 325
326 #endif /* not REGEX_MALLOC */ 326 #endif /* not REGEX_MALLOC */
327 #endif /* not using relocating allocator */ 327 #endif /* not using relocating allocator */
328 328
329 329
330 /* True if `size1' is non-NULL and PTR is pointing anywhere inside 330 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
331 `string1' or just past its end. This works if PTR is NULL, which is 331 `string1' or just past its end. This works if PTR is NULL, which is
332 a good thing. */ 332 a good thing. */
333 #define FIRST_STRING_P(ptr) \ 333 #define FIRST_STRING_P(ptr) \
334 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1) 334 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
335 335
336 /* (Re)Allocate N items of type T using malloc, or fail. */ 336 /* (Re)Allocate N items of type T using malloc, or fail. */
337 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) 337 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
338 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) 338 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
339 #define RETALLOC_IF(addr, n, t) \ 339 #define RETALLOC_IF(addr, n, t) \
340 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) 340 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
341 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) 341 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
342 342
343 #define BYTEWIDTH 8 /* In bits. */ 343 #define BYTEWIDTH 8 /* In bits. */
344 344
345 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) 345 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
346 346
347 #undef MAX 347 #undef MAX
348 #undef MIN 348 #undef MIN
354 #define true 1 354 #define true 1
355 355
356 static int re_match_2_internal (); 356 static int re_match_2_internal ();
357 357
358 /* These are the command codes that appear in compiled regular 358 /* These are the command codes that appear in compiled regular
359 expressions. Some opcodes are followed by argument bytes. A 359 expressions. Some opcodes are followed by argument bytes. A
360 command code can specify any interpretation whatsoever for its 360 command code can specify any interpretation whatsoever for its
361 arguments. Zero bytes may appear in the compiled regular expression. */ 361 arguments. Zero bytes may appear in the compiled regular expression. */
362 362
363 typedef enum 363 typedef enum
364 { 364 {
365 no_op = 0, 365 no_op = 0,
366 366
367 /* Succeed right away--no more backtracking. */ 367 /* Succeed right away--no more backtracking. */
368 succeed, 368 succeed,
369 369
370 /* Followed by one byte giving n, then by n literal bytes. */ 370 /* Followed by one byte giving n, then by n literal bytes. */
371 exactn, 371 exactn,
372 372
373 /* Matches any (more or less) character. */ 373 /* Matches any (more or less) character. */
374 anychar, 374 anychar,
375 375
376 /* Matches any one char belonging to specified set. First 376 /* Matches any one char belonging to specified set. First
377 following byte is number of bitmap bytes. Then come bytes 377 following byte is number of bitmap bytes. Then come bytes
378 for a bitmap saying which chars are in. Bits in each byte 378 for a bitmap saying which chars are in. Bits in each byte
379 are ordered low-bit-first. A character is in the set if its 379 are ordered low-bit-first. A character is in the set if its
380 bit is 1. A character too large to have a bit in the map is 380 bit is 1. A character too large to have a bit in the map is
381 automatically not in the set. */ 381 automatically not in the set. */
382 charset, 382 charset,
383 383
384 /* Same parameters as charset, but match any character that is 384 /* Same parameters as charset, but match any character that is
385 not one of those specified. */ 385 not one of those specified. */
386 charset_not, 386 charset_not,
387 387
388 /* Start remembering the text that is matched, for storing in a 388 /* Start remembering the text that is matched, for storing in a
389 register. Followed by one byte with the register number, in 389 register. Followed by one byte with the register number, in
390 the range 0 to one less than the pattern buffer's re_nsub 390 the range 0 to one less than the pattern buffer's re_nsub
391 field. Then followed by one byte with the number of groups 391 field. Then followed by one byte with the number of groups
392 inner to this one. (This last has to be part of the 392 inner to this one. (This last has to be part of the
393 start_memory only because we need it in the on_failure_jump 393 start_memory only because we need it in the on_failure_jump
394 of re_match_2.) */ 394 of re_match_2.) */
395 start_memory, 395 start_memory,
396 396
397 /* Stop remembering the text that is matched and store it in a 397 /* Stop remembering the text that is matched and store it in a
398 memory register. Followed by one byte with the register 398 memory register. Followed by one byte with the register
399 number, in the range 0 to one less than `re_nsub' in the 399 number, in the range 0 to one less than `re_nsub' in the
400 pattern buffer, and one byte with the number of inner groups, 400 pattern buffer, and one byte with the number of inner groups,
401 just like `start_memory'. (We need the number of inner 401 just like `start_memory'. (We need the number of inner
402 groups here because we don't have any easy way of finding the 402 groups here because we don't have any easy way of finding the
403 corresponding start_memory when we're at a stop_memory.) */ 403 corresponding start_memory when we're at a stop_memory.) */
404 stop_memory, 404 stop_memory,
405 405
406 /* Match a duplicate of something remembered. Followed by one 406 /* Match a duplicate of something remembered. Followed by one
407 byte containing the register number. */ 407 byte containing the register number. */
408 duplicate, 408 duplicate,
409 409
410 /* Fail unless at beginning of line. */ 410 /* Fail unless at beginning of line. */
411 begline, 411 begline,
412 412
413 /* Fail unless at end of line. */ 413 /* Fail unless at end of line. */
414 endline, 414 endline,
415 415
416 /* Succeeds if at beginning of buffer (if emacs) or at beginning 416 /* Succeeds if at beginning of buffer (if emacs) or at beginning
417 of string to be matched (if not). */ 417 of string to be matched (if not). */
418 begbuf, 418 begbuf,
419 419
420 /* Analogously, for end of buffer/string. */ 420 /* Analogously, for end of buffer/string. */
421 endbuf, 421 endbuf,
422 422
423 /* Followed by two byte relative address to which to jump. */ 423 /* Followed by two byte relative address to which to jump. */
424 jump, 424 jump,
425 425
426 /* Same as jump, but marks the end of an alternative. */ 426 /* Same as jump, but marks the end of an alternative. */
427 jump_past_alt, 427 jump_past_alt,
428 428
429 /* Followed by two-byte relative address of place to resume at 429 /* Followed by two-byte relative address of place to resume at
430 in case of failure. */ 430 in case of failure. */
431 on_failure_jump, 431 on_failure_jump,
432 432
433 /* Like on_failure_jump, but pushes a placeholder instead of the 433 /* Like on_failure_jump, but pushes a placeholder instead of the
434 current string position when executed. */ 434 current string position when executed. */
435 on_failure_keep_string_jump, 435 on_failure_keep_string_jump,
436 436
437 /* Throw away latest failure point and then jump to following 437 /* Throw away latest failure point and then jump to following
438 two-byte relative address. */ 438 two-byte relative address. */
439 pop_failure_jump, 439 pop_failure_jump,
440 440
441 /* Change to pop_failure_jump if know won't have to backtrack to 441 /* Change to pop_failure_jump if know won't have to backtrack to
442 match; otherwise change to jump. This is used to jump 442 match; otherwise change to jump. This is used to jump
443 back to the beginning of a repeat. If what follows this jump 443 back to the beginning of a repeat. If what follows this jump
444 clearly won't match what the repeat does, such that we can be 444 clearly won't match what the repeat does, such that we can be
445 sure that there is no use backtracking out of repetitions 445 sure that there is no use backtracking out of repetitions
446 already matched, then we change it to a pop_failure_jump. 446 already matched, then we change it to a pop_failure_jump.
447 Followed by two-byte address. */ 447 Followed by two-byte address. */
448 maybe_pop_jump, 448 maybe_pop_jump,
449 449
450 /* Jump to following two-byte address, and push a dummy failure 450 /* Jump to following two-byte address, and push a dummy failure
451 point. This failure point will be thrown away if an attempt 451 point. This failure point will be thrown away if an attempt
452 is made to use it for a failure. A `+' construct makes this 452 is made to use it for a failure. A `+' construct makes this
453 before the first repeat. Also used as an intermediary kind 453 before the first repeat. Also used as an intermediary kind
454 of jump when compiling an alternative. */ 454 of jump when compiling an alternative. */
455 dummy_failure_jump, 455 dummy_failure_jump,
456 456
457 /* Push a dummy failure point and continue. Used at the end of 457 /* Push a dummy failure point and continue. Used at the end of
458 alternatives. */ 458 alternatives. */
459 push_dummy_failure, 459 push_dummy_failure,
460 460
461 /* Followed by two-byte relative address and two-byte number n. 461 /* Followed by two-byte relative address and two-byte number n.
462 After matching N times, jump to the address upon failure. */ 462 After matching N times, jump to the address upon failure. */
463 succeed_n, 463 succeed_n,
464 464
465 /* Followed by two-byte relative address, and two-byte number n. 465 /* Followed by two-byte relative address, and two-byte number n.
466 Jump to the address N times, then fail. */ 466 Jump to the address N times, then fail. */
467 jump_n, 467 jump_n,
468 468
469 /* Set the following two-byte relative address to the 469 /* Set the following two-byte relative address to the
470 subsequent two-byte number. The address *includes* the two 470 subsequent two-byte number. The address *includes* the two
471 bytes of number. */ 471 bytes of number. */
472 set_number_at, 472 set_number_at,
473 473
474 wordchar, /* Matches any word-constituent character. */ 474 wordchar, /* Matches any word-constituent character. */
475 notwordchar, /* Matches any char that is not a word-constituent. */ 475 notwordchar, /* Matches any char that is not a word-constituent. */
476 476
477 wordbeg, /* Succeeds if at word beginning. */ 477 wordbeg, /* Succeeds if at word beginning. */
478 wordend, /* Succeeds if at word end. */ 478 wordend, /* Succeeds if at word end. */
479 479
480 wordbound, /* Succeeds if at a word boundary. */ 480 wordbound, /* Succeeds if at a word boundary. */
481 notwordbound /* Succeeds if not at a word boundary. */ 481 notwordbound /* Succeeds if not at a word boundary. */
482 482
483 #ifdef emacs 483 #ifdef emacs
484 ,before_dot, /* Succeeds if before point. */ 484 ,before_dot, /* Succeeds if before point. */
485 at_dot, /* Succeeds if at point. */ 485 at_dot, /* Succeeds if at point. */
486 after_dot, /* Succeeds if after point. */ 486 after_dot, /* Succeeds if after point. */
487 487
488 /* Matches any character whose syntax is specified. Followed by 488 /* Matches any character whose syntax is specified. Followed by
489 a byte which contains a syntax code, e.g., Sword. */ 489 a byte which contains a syntax code, e.g., Sword. */
490 syntaxspec, 490 syntaxspec,
491 491
492 /* Matches any character whose syntax is not that specified. */ 492 /* Matches any character whose syntax is not that specified. */
493 notsyntaxspec, 493 notsyntaxspec,
494 494
495 /* Matches any character whose category-set contains the specified 495 /* Matches any character whose category-set contains the specified
496 category. The operator is followed by a byte which contains a 496 category. The operator is followed by a byte which contains a
497 category code (mnemonic ASCII character). */ 497 category code (mnemonic ASCII character). */
498 categoryspec, 498 categoryspec,
499 499
500 /* Matches any character whose category-set does not contain the 500 /* Matches any character whose category-set does not contain the
501 specified category. The operator is followed by a byte which 501 specified category. The operator is followed by a byte which
502 contains the category code (mnemonic ASCII character). */ 502 contains the category code (mnemonic ASCII character). */
542 int temp = SIGN_EXTEND_CHAR (*(source + 1)); 542 int temp = SIGN_EXTEND_CHAR (*(source + 1));
543 *dest = *source & 0377; 543 *dest = *source & 0377;
544 *dest += temp << 8; 544 *dest += temp << 8;
545 } 545 }
546 546
547 #ifndef EXTRACT_MACROS /* To debug the macros. */ 547 #ifndef EXTRACT_MACROS /* To debug the macros. */
548 #undef EXTRACT_NUMBER 548 #undef EXTRACT_NUMBER
549 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) 549 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
550 #endif /* not EXTRACT_MACROS */ 550 #endif /* not EXTRACT_MACROS */
551 551
552 #endif /* DEBUG */ 552 #endif /* DEBUG */
555 SOURCE must be an lvalue. */ 555 SOURCE must be an lvalue. */
556 556
557 #define EXTRACT_NUMBER_AND_INCR(destination, source) \ 557 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
558 do { \ 558 do { \
559 EXTRACT_NUMBER (destination, source); \ 559 EXTRACT_NUMBER (destination, source); \
560 (source) += 2; \ 560 (source) += 2; \
561 } while (0) 561 } while (0)
562 562
563 #ifdef DEBUG 563 #ifdef DEBUG
564 static void 564 static void
565 extract_number_and_incr (destination, source) 565 extract_number_and_incr (destination, source)
578 578
579 #endif /* DEBUG */ 579 #endif /* DEBUG */
580 580
581 /* Store a multibyte character in three contiguous bytes starting 581 /* Store a multibyte character in three contiguous bytes starting
582 DESTINATION, and increment DESTINATION to the byte after where the 582 DESTINATION, and increment DESTINATION to the byte after where the
583 character is stored. Therefore, DESTINATION must be an lvalue. */ 583 character is stored. Therefore, DESTINATION must be an lvalue. */
584 584
585 #define STORE_CHARACTER_AND_INCR(destination, character) \ 585 #define STORE_CHARACTER_AND_INCR(destination, character) \
586 do { \ 586 do { \
587 (destination)[0] = (character) & 0377; \ 587 (destination)[0] = (character) & 0377; \
588 (destination)[1] = ((character) >> 8) & 0377; \ 588 (destination)[1] = ((character) >> 8) & 0377; \
589 (destination)[2] = (character) >> 16; \ 589 (destination)[2] = (character) >> 16; \
590 (destination) += 3; \ 590 (destination) += 3; \
591 } while (0) 591 } while (0)
592 592
593 /* Put into DESTINATION a character stored in three contiguous bytes 593 /* Put into DESTINATION a character stored in three contiguous bytes
594 starting at SOURCE. */ 594 starting at SOURCE. */
595 595
596 #define EXTRACT_CHARACTER(destination, source) \ 596 #define EXTRACT_CHARACTER(destination, source) \
597 do { \ 597 do { \
598 (destination) = ((source)[0] \ 598 (destination) = ((source)[0] \
599 | ((source)[1] << 8) \ 599 | ((source)[1] << 8) \
606 /* Size of bitmap of charset P in bytes. P is a start of charset, 606 /* Size of bitmap of charset P in bytes. P is a start of charset,
607 i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not. */ 607 i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not. */
608 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F) 608 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
609 609
610 /* Nonzero if charset P has range table. */ 610 /* Nonzero if charset P has range table. */
611 #define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80) 611 #define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80)
612 612
613 /* Return the address of range table of charset P. But not the start 613 /* Return the address of range table of charset P. But not the start
614 of table itself, but the before where the number of ranges is 614 of table itself, but the before where the number of ranges is
615 stored. `2 +' means to skip re_opcode_t and size of bitmap. */ 615 stored. `2 +' means to skip re_opcode_t and size of bitmap. */
616 #define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)]) 616 #define CHARSET_RANGE_TABLE(p) (&(p)[2 + CHARSET_BITMAP_SIZE (p)])
617 617
618 /* Test if C is listed in the bitmap of charset P. */ 618 /* Test if C is listed in the bitmap of charset P. */
619 #define CHARSET_LOOKUP_BITMAP(p, c) \ 619 #define CHARSET_LOOKUP_BITMAP(p, c) \
620 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \ 620 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
621 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH))) 621 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
622 622
623 /* Return the address of end of RANGE_TABLE. COUNT is number of 623 /* Return the address of end of RANGE_TABLE. COUNT is number of
624 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2' 624 ranges (which is a pair of (start, end)) in the RANGE_TABLE. `* 2'
625 is start of range and end of range. `* 3' is size of each start 625 is start of range and end of range. `* 3' is size of each start
626 and end. */ 626 and end. */
627 #define CHARSET_RANGE_TABLE_END(range_table, count) \ 627 #define CHARSET_RANGE_TABLE_END(range_table, count) \
628 ((range_table) + (count) * 2 * 3) 628 ((range_table) + (count) * 2 * 3)
629 629
630 /* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in. 630 /* Test if C is in RANGE_TABLE. A flag NOT is negated if C is in.
631 COUNT is number of ranges in RANGE_TABLE. */ 631 COUNT is number of ranges in RANGE_TABLE. */
632 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \ 632 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
633 do \ 633 do \
634 { \ 634 { \
635 int range_start, range_end; \ 635 int range_start, range_end; \
667 667
668 /* If DEBUG is defined, Regex prints many voluminous messages about what 668 /* If DEBUG is defined, Regex prints many voluminous messages about what
669 it is doing (if the variable `debug' is nonzero). If linked with the 669 it is doing (if the variable `debug' is nonzero). If linked with the
670 main program in `iregex.c', you can enter patterns and strings 670 main program in `iregex.c', you can enter patterns and strings
671 interactively. And if linked with the main program in `main.c' and 671 interactively. And if linked with the main program in `main.c' and
672 the other test files, you can run the already-written tests. */ 672 the other test files, you can run the already-written tests. */
673 673
674 #ifdef DEBUG 674 #ifdef DEBUG
675 675
676 /* We use standard I/O for debugging. */ 676 /* We use standard I/O for debugging. */
677 #include <stdio.h> 677 #include <stdio.h>
684 #define DEBUG_STATEMENT(e) e 684 #define DEBUG_STATEMENT(e) e
685 #define DEBUG_PRINT1(x) if (debug) printf (x) 685 #define DEBUG_PRINT1(x) if (debug) printf (x)
686 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) 686 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
687 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) 687 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
688 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) 688 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
689 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ 689 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
690 if (debug) print_partial_compiled_pattern (s, e) 690 if (debug) print_partial_compiled_pattern (s, e)
691 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ 691 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
692 if (debug) print_double_string (w, s1, sz1, s2, sz2) 692 if (debug) print_double_string (w, s1, sz1, s2, sz2)
693 693
694 694
704 while (i < (1 << BYTEWIDTH)) 704 while (i < (1 << BYTEWIDTH))
705 { 705 {
706 if (fastmap[i++]) 706 if (fastmap[i++])
707 { 707 {
708 was_a_range = 0; 708 was_a_range = 0;
709 putchar (i - 1); 709 putchar (i - 1);
710 while (i < (1 << BYTEWIDTH) && fastmap[i]) 710 while (i < (1 << BYTEWIDTH) && fastmap[i])
711 { 711 {
712 was_a_range = 1; 712 was_a_range = 1;
713 i++; 713 i++;
714 } 714 }
715 if (was_a_range) 715 if (was_a_range)
716 { 716 {
717 printf ("-"); 717 printf ("-");
718 putchar (i - 1); 718 putchar (i - 1);
719 } 719 }
720 } 720 }
721 } 721 }
722 putchar ('\n'); 722 putchar ('\n');
723 } 723 }
724 724
725 725
746 { 746 {
747 printf ("%d:\t", p - start); 747 printf ("%d:\t", p - start);
748 748
749 switch ((re_opcode_t) *p++) 749 switch ((re_opcode_t) *p++)
750 { 750 {
751 case no_op: 751 case no_op:
752 printf ("/no_op"); 752 printf ("/no_op");
753 break; 753 break;
754 754
755 case exactn: 755 case exactn:
756 mcnt = *p++; 756 mcnt = *p++;
757 printf ("/exactn/%d", mcnt); 757 printf ("/exactn/%d", mcnt);
758 do 758 do
759 { 759 {
760 putchar ('/'); 760 putchar ('/');
761 putchar (*p++); 761 putchar (*p++);
762 } 762 }
763 while (--mcnt); 763 while (--mcnt);
764 break; 764 break;
765 765
766 case start_memory: 766 case start_memory:
767 mcnt = *p++; 767 mcnt = *p++;
768 printf ("/start_memory/%d/%d", mcnt, *p++); 768 printf ("/start_memory/%d/%d", mcnt, *p++);
769 break; 769 break;
770 770
771 case stop_memory: 771 case stop_memory:
772 mcnt = *p++; 772 mcnt = *p++;
773 printf ("/stop_memory/%d/%d", mcnt, *p++); 773 printf ("/stop_memory/%d/%d", mcnt, *p++);
774 break; 774 break;
775 775
776 case duplicate: 776 case duplicate:
777 printf ("/duplicate/%d", *p++); 777 printf ("/duplicate/%d", *p++);
778 break; 778 break;
779 779
780 case anychar: 780 case anychar:
781 printf ("/anychar"); 781 printf ("/anychar");
782 break; 782 break;
783 783
784 case charset: 784 case charset:
785 case charset_not: 785 case charset_not:
786 { 786 {
787 register int c, last = -100; 787 register int c, last = -100;
788 register int in_range = 0; 788 register int in_range = 0;
789 789
790 printf ("/charset [%s", 790 printf ("/charset [%s",
791 (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); 791 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
792 792
793 assert (p + *p < pend); 793 assert (p + *p < pend);
794 794
795 for (c = 0; c < 256; c++) 795 for (c = 0; c < 256; c++)
796 if (c / 8 < *p 796 if (c / 8 < *p
797 && (p[1 + (c/8)] & (1 << (c % 8)))) 797 && (p[1 + (c/8)] & (1 << (c % 8))))
798 { 798 {
799 /* Are we starting a range? */ 799 /* Are we starting a range? */
800 if (last + 1 == c && ! in_range) 800 if (last + 1 == c && ! in_range)
802 putchar ('-'); 802 putchar ('-');
803 in_range = 1; 803 in_range = 1;
804 } 804 }
805 /* Have we broken a range? */ 805 /* Have we broken a range? */
806 else if (last + 1 != c && in_range) 806 else if (last + 1 != c && in_range)
807 { 807 {
808 putchar (last); 808 putchar (last);
809 in_range = 0; 809 in_range = 0;
810 } 810 }
811 811
812 if (! in_range) 812 if (! in_range)
813 putchar (c); 813 putchar (c);
814 814
815 last = c; 815 last = c;
816 } 816 }
817 817
818 if (in_range) 818 if (in_range)
819 putchar (last); 819 putchar (last);
820 820
821 putchar (']'); 821 putchar (']');
824 } 824 }
825 break; 825 break;
826 826
827 case begline: 827 case begline:
828 printf ("/begline"); 828 printf ("/begline");
829 break; 829 break;
830 830
831 case endline: 831 case endline:
832 printf ("/endline"); 832 printf ("/endline");
833 break; 833 break;
834 834
835 case on_failure_jump: 835 case on_failure_jump:
836 extract_number_and_incr (&mcnt, &p); 836 extract_number_and_incr (&mcnt, &p);
837 printf ("/on_failure_jump to %d", p + mcnt - start); 837 printf ("/on_failure_jump to %d", p + mcnt - start);
838 break; 838 break;
839 839
840 case on_failure_keep_string_jump: 840 case on_failure_keep_string_jump:
841 extract_number_and_incr (&mcnt, &p); 841 extract_number_and_incr (&mcnt, &p);
842 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start); 842 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
843 break; 843 break;
844 844
845 case dummy_failure_jump: 845 case dummy_failure_jump:
846 extract_number_and_incr (&mcnt, &p); 846 extract_number_and_incr (&mcnt, &p);
847 printf ("/dummy_failure_jump to %d", p + mcnt - start); 847 printf ("/dummy_failure_jump to %d", p + mcnt - start);
848 break; 848 break;
849 849
850 case push_dummy_failure: 850 case push_dummy_failure:
851 printf ("/push_dummy_failure"); 851 printf ("/push_dummy_failure");
852 break; 852 break;
853 853
854 case maybe_pop_jump: 854 case maybe_pop_jump:
855 extract_number_and_incr (&mcnt, &p);
856 printf ("/maybe_pop_jump to %d", p + mcnt - start);
857 break;
858
859 case pop_failure_jump:
860 extract_number_and_incr (&mcnt, &p); 855 extract_number_and_incr (&mcnt, &p);
861 printf ("/pop_failure_jump to %d", p + mcnt - start); 856 printf ("/maybe_pop_jump to %d", p + mcnt - start);
862 break; 857 break;
863 858
864 case jump_past_alt: 859 case pop_failure_jump:
865 extract_number_and_incr (&mcnt, &p); 860 extract_number_and_incr (&mcnt, &p);
866 printf ("/jump_past_alt to %d", p + mcnt - start); 861 printf ("/pop_failure_jump to %d", p + mcnt - start);
867 break; 862 break;
868 863
869 case jump: 864 case jump_past_alt:
870 extract_number_and_incr (&mcnt, &p); 865 extract_number_and_incr (&mcnt, &p);
871 printf ("/jump to %d", p + mcnt - start); 866 printf ("/jump_past_alt to %d", p + mcnt - start);
872 break; 867 break;
873 868
874 case succeed_n: 869 case jump:
875 extract_number_and_incr (&mcnt, &p); 870 extract_number_and_incr (&mcnt, &p);
876 extract_number_and_incr (&mcnt2, &p); 871 printf ("/jump to %d", p + mcnt - start);
872 break;
873
874 case succeed_n:
875 extract_number_and_incr (&mcnt, &p);
876 extract_number_and_incr (&mcnt2, &p);
877 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); 877 printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
878 break; 878 break;
879 879
880 case jump_n: 880 case jump_n:
881 extract_number_and_incr (&mcnt, &p); 881 extract_number_and_incr (&mcnt, &p);
882 extract_number_and_incr (&mcnt2, &p); 882 extract_number_and_incr (&mcnt2, &p);
883 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); 883 printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
884 break; 884 break;
885 885
886 case set_number_at: 886 case set_number_at:
887 extract_number_and_incr (&mcnt, &p); 887 extract_number_and_incr (&mcnt, &p);
888 extract_number_and_incr (&mcnt2, &p); 888 extract_number_and_incr (&mcnt2, &p);
889 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); 889 printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
890 break; 890 break;
891 891
892 case wordbound: 892 case wordbound:
893 printf ("/wordbound"); 893 printf ("/wordbound");
894 break; 894 break;
895 895
896 case notwordbound: 896 case notwordbound:
897 printf ("/notwordbound"); 897 printf ("/notwordbound");
898 break; 898 break;
899 899
900 case wordbeg: 900 case wordbeg:
901 printf ("/wordbeg"); 901 printf ("/wordbeg");
902 break; 902 break;
903 903
905 printf ("/wordend"); 905 printf ("/wordend");
906 906
907 #ifdef emacs 907 #ifdef emacs
908 case before_dot: 908 case before_dot:
909 printf ("/before_dot"); 909 printf ("/before_dot");
910 break; 910 break;
911 911
912 case at_dot: 912 case at_dot:
913 printf ("/at_dot"); 913 printf ("/at_dot");
914 break; 914 break;
915 915
916 case after_dot: 916 case after_dot:
917 printf ("/after_dot"); 917 printf ("/after_dot");
918 break; 918 break;
919 919
920 case syntaxspec: 920 case syntaxspec:
921 printf ("/syntaxspec"); 921 printf ("/syntaxspec");
922 mcnt = *p++; 922 mcnt = *p++;
923 printf ("/%d", mcnt); 923 printf ("/%d", mcnt);
924 break; 924 break;
925 925
926 case notsyntaxspec: 926 case notsyntaxspec:
927 printf ("/notsyntaxspec"); 927 printf ("/notsyntaxspec");
928 mcnt = *p++; 928 mcnt = *p++;
929 printf ("/%d", mcnt); 929 printf ("/%d", mcnt);
930 break; 930 break;
931 #endif /* emacs */ 931 #endif /* emacs */
932 932
933 case wordchar: 933 case wordchar:
934 printf ("/wordchar"); 934 printf ("/wordchar");
935 break; 935 break;
936 936
937 case notwordchar: 937 case notwordchar:
938 printf ("/notwordchar"); 938 printf ("/notwordchar");
939 break; 939 break;
940 940
941 case begbuf: 941 case begbuf:
942 printf ("/begbuf"); 942 printf ("/begbuf");
943 break; 943 break;
944 944
945 case endbuf: 945 case endbuf:
946 printf ("/endbuf"); 946 printf ("/endbuf");
947 break; 947 break;
948 948
949 default: 949 default:
950 printf ("?%d", *(p-1)); 950 printf ("?%d", *(p-1));
951 } 951 }
952 952
953 putchar ('\n'); 953 putchar ('\n');
954 } 954 }
955 955
997 if (where == NULL) 997 if (where == NULL)
998 printf ("(null)"); 998 printf ("(null)");
999 else 999 else
1000 { 1000 {
1001 if (FIRST_STRING_P (where)) 1001 if (FIRST_STRING_P (where))
1002 { 1002 {
1003 for (this_char = where - string1; this_char < size1; this_char++) 1003 for (this_char = where - string1; this_char < size1; this_char++)
1004 putchar (string1[this_char]); 1004 putchar (string1[this_char]);
1005 1005
1006 where = string2; 1006 where = string2;
1007 } 1007 }
1008 1008
1009 for (this_char = where - string2; this_char < size2; this_char++) 1009 for (this_char = where - string2; this_char < size2; this_char++)
1010 putchar (string2[this_char]); 1010 putchar (string2[this_char]);
1011 } 1011 }
1012 } 1012 }
1013 1013
1014 #else /* not DEBUG */ 1014 #else /* not DEBUG */
1015 1015
1037 /* Specify the precise syntax of regexps for compilation. This provides 1037 /* Specify the precise syntax of regexps for compilation. This provides
1038 for compatibility for various utilities which historically have 1038 for compatibility for various utilities which historically have
1039 different, incompatible syntaxes. 1039 different, incompatible syntaxes.
1040 1040
1041 The argument SYNTAX is a bit mask comprised of the various bits 1041 The argument SYNTAX is a bit mask comprised of the various bits
1042 defined in regex.h. We return the old syntax. */ 1042 defined in regex.h. We return the old syntax. */
1043 1043
1044 reg_syntax_t 1044 reg_syntax_t
1045 re_set_syntax (syntax) 1045 re_set_syntax (syntax)
1046 reg_syntax_t syntax; 1046 reg_syntax_t syntax;
1047 { 1047 {
1050 re_syntax_options = syntax; 1050 re_syntax_options = syntax;
1051 return ret; 1051 return ret;
1052 } 1052 }
1053 1053
1054 /* This table gives an error message for each of the error codes listed 1054 /* This table gives an error message for each of the error codes listed
1055 in regex.h. Obviously the order here has to be same as there. 1055 in regex.h. Obviously the order here has to be same as there.
1056 POSIX doesn't require that we do anything for REG_NOERROR, 1056 POSIX doesn't require that we do anything for REG_NOERROR,
1057 but why not be nice? */ 1057 but why not be nice? */
1058 1058
1059 static const char *re_error_msgid[] = 1059 static const char *re_error_msgid[] =
1060 { 1060 {
1061 gettext_noop ("Success"), /* REG_NOERROR */ 1061 gettext_noop ("Success"), /* REG_NOERROR */
1062 gettext_noop ("No match"), /* REG_NOMATCH */ 1062 gettext_noop ("No match"), /* REG_NOMATCH */
1075 gettext_noop ("Premature end of regular expression"), /* REG_EEND */ 1075 gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1076 gettext_noop ("Regular expression too big"), /* REG_ESIZE */ 1076 gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1077 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */ 1077 gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1078 }; 1078 };
1079 1079
1080 /* Avoiding alloca during matching, to placate r_alloc. */ 1080 /* Avoiding alloca during matching, to placate r_alloc. */
1081 1081
1082 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the 1082 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1083 searching and matching functions should not call alloca. On some 1083 searching and matching functions should not call alloca. On some
1084 systems, alloca is implemented in terms of malloc, and if we're 1084 systems, alloca is implemented in terms of malloc, and if we're
1085 using the relocating allocator routines, then malloc could cause a 1085 using the relocating allocator routines, then malloc could cause a
1107 1107
1108 /* The match routines may not allocate if (1) they would do it with malloc 1108 /* The match routines may not allocate if (1) they would do it with malloc
1109 and (2) it's not safe for them to use malloc. 1109 and (2) it's not safe for them to use malloc.
1110 Note that if REL_ALLOC is defined, matching would not use malloc for the 1110 Note that if REL_ALLOC is defined, matching would not use malloc for the
1111 failure stack, but we would still use it for the register vectors; 1111 failure stack, but we would still use it for the register vectors;
1112 so REL_ALLOC should not affect this. */ 1112 so REL_ALLOC should not affect this. */
1113 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs) 1113 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1114 #undef MATCH_MAY_ALLOCATE 1114 #undef MATCH_MAY_ALLOCATE
1115 #endif 1115 #endif
1116 1116
1117 1117
1128 #endif 1128 #endif
1129 1129
1130 /* Roughly the maximum number of failure points on the stack. Would be 1130 /* Roughly the maximum number of failure points on the stack. Would be
1131 exactly that if always used MAX_FAILURE_ITEMS items each time we failed. 1131 exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1132 This is a variable only so users of regex can assign to it; we never 1132 This is a variable only so users of regex can assign to it; we never
1133 change it ourselves. */ 1133 change it ourselves. */
1134 #if defined (MATCH_MAY_ALLOCATE) 1134 #if defined (MATCH_MAY_ALLOCATE)
1135 /* 4400 was enough to cause a crash on Alpha OSF/1, 1135 /* 4400 was enough to cause a crash on Alpha OSF/1,
1136 whose default stack limit is 2mb. */ 1136 whose default stack limit is 2mb. */
1137 int re_max_failures = 20000; 1137 int re_max_failures = 20000;
1138 #else 1138 #else
1189 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. 1189 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1190 1190
1191 Return 1 if succeeds, and 0 if either ran out of memory 1191 Return 1 if succeeds, and 0 if either ran out of memory
1192 allocating space for it or it was already too large. 1192 allocating space for it or it was already too large.
1193 1193
1194 REGEX_REALLOCATE_STACK requires `destination' be declared. */ 1194 REGEX_REALLOCATE_STACK requires `destination' be declared. */
1195 1195
1196 #define DOUBLE_FAIL_STACK(fail_stack) \ 1196 #define DOUBLE_FAIL_STACK(fail_stack) \
1197 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ 1197 ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \
1198 ? 0 \ 1198 ? 0 \
1199 : ((fail_stack).stack = (fail_stack_elt_t *) \ 1199 : ((fail_stack).stack = (fail_stack_elt_t *) \
1200 REGEX_REALLOCATE_STACK ((fail_stack).stack, \ 1200 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1201 (fail_stack).size * sizeof (fail_stack_elt_t), \ 1201 (fail_stack).size * sizeof (fail_stack_elt_t), \
1202 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ 1202 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
1203 \ 1203 \
1204 (fail_stack).stack == NULL \ 1204 (fail_stack).stack == NULL \
1205 ? 0 \ 1205 ? 0 \
1206 : ((fail_stack).size <<= 1, \ 1206 : ((fail_stack).size <<= 1, \
1207 1))) 1207 1)))
1208 1208
1209 1209
1210 /* Push pointer POINTER on FAIL_STACK. 1210 /* Push pointer POINTER on FAIL_STACK.
1211 Return 1 if was able to do so and 0 if ran out of memory allocating 1211 Return 1 if was able to do so and 0 if ran out of memory allocating
1212 space to do so. */ 1212 space to do so. */
1217 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ 1217 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
1218 1)) 1218 1))
1219 1219
1220 /* Push a pointer value onto the failure stack. 1220 /* Push a pointer value onto the failure stack.
1221 Assumes the variable `fail_stack'. Probably should only 1221 Assumes the variable `fail_stack'. Probably should only
1222 be called from within `PUSH_FAILURE_POINT'. */ 1222 be called from within `PUSH_FAILURE_POINT'. */
1223 #define PUSH_FAILURE_POINTER(item) \ 1223 #define PUSH_FAILURE_POINTER(item) \
1224 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) 1224 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1225 1225
1226 /* This pushes an integer-valued item onto the failure stack. 1226 /* This pushes an integer-valued item onto the failure stack.
1227 Assumes the variable `fail_stack'. Probably should only 1227 Assumes the variable `fail_stack'. Probably should only
1228 be called from within `PUSH_FAILURE_POINT'. */ 1228 be called from within `PUSH_FAILURE_POINT'. */
1229 #define PUSH_FAILURE_INT(item) \ 1229 #define PUSH_FAILURE_INT(item) \
1230 fail_stack.stack[fail_stack.avail++].integer = (item) 1230 fail_stack.stack[fail_stack.avail++].integer = (item)
1231 1231
1232 /* Push a fail_stack_elt_t value onto the failure stack. 1232 /* Push a fail_stack_elt_t value onto the failure stack.
1233 Assumes the variable `fail_stack'. Probably should only 1233 Assumes the variable `fail_stack'. Probably should only
1234 be called from within `PUSH_FAILURE_POINT'. */ 1234 be called from within `PUSH_FAILURE_POINT'. */
1235 #define PUSH_FAILURE_ELT(item) \ 1235 #define PUSH_FAILURE_ELT(item) \
1236 fail_stack.stack[fail_stack.avail++] = (item) 1236 fail_stack.stack[fail_stack.avail++] = (item)
1237 1237
1238 /* These three POP... operations complement the three PUSH... operations. 1238 /* These three POP... operations complement the three PUSH... operations.
1239 All assume that `fail_stack' is nonempty. */ 1239 All assume that `fail_stack' is nonempty. */
1264 do { \ 1264 do { \
1265 char *destination; \ 1265 char *destination; \
1266 /* Must be int, so when we don't save any registers, the arithmetic \ 1266 /* Must be int, so when we don't save any registers, the arithmetic \
1267 of 0 + -1 isn't done as unsigned. */ \ 1267 of 0 + -1 isn't done as unsigned. */ \
1268 int this_reg; \ 1268 int this_reg; \
1269 \ 1269 \
1270 DEBUG_STATEMENT (failure_id++); \ 1270 DEBUG_STATEMENT (failure_id++); \
1271 DEBUG_STATEMENT (nfailure_points_pushed++); \ 1271 DEBUG_STATEMENT (nfailure_points_pushed++); \
1272 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ 1272 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
1273 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ 1273 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
1274 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ 1274 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1275 \ 1275 \
1276 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ 1276 DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \
1277 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ 1277 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
1278 \ 1278 \
1279 /* Ensure we have enough space allocated for what we will push. */ \ 1279 /* Ensure we have enough space allocated for what we will push. */ \
1280 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ 1280 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
1281 { \ 1281 { \
1282 if (!DOUBLE_FAIL_STACK (fail_stack)) \ 1282 if (!DOUBLE_FAIL_STACK (fail_stack)) \
1283 return failure_code; \ 1283 return failure_code; \
1284 \ 1284 \
1285 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ 1285 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
1286 (fail_stack).size); \ 1286 (fail_stack).size); \
1287 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ 1287 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1288 } \ 1288 } \
1289 \ 1289 \
1290 /* Push the info, starting with the registers. */ \ 1290 /* Push the info, starting with the registers. */ \
1291 DEBUG_PRINT1 ("\n"); \ 1291 DEBUG_PRINT1 ("\n"); \
1292 \ 1292 \
1324 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ 1324 DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \
1325 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ 1325 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
1326 PUSH_FAILURE_POINTER (pattern_place); \ 1326 PUSH_FAILURE_POINTER (pattern_place); \
1327 \ 1327 \
1328 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ 1328 DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \
1329 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ 1329 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
1330 size2); \ 1330 size2); \
1331 DEBUG_PRINT1 ("'\n"); \ 1331 DEBUG_PRINT1 ("'\n"); \
1332 PUSH_FAILURE_POINTER (string_place); \ 1332 PUSH_FAILURE_POINTER (string_place); \
1333 \ 1333 \
1334 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ 1334 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
1371 LOW_REG, HIGH_REG -- the highest and lowest active registers. 1371 LOW_REG, HIGH_REG -- the highest and lowest active registers.
1372 REGSTART, REGEND -- arrays of string positions. 1372 REGSTART, REGEND -- arrays of string positions.
1373 REG_INFO -- array of information about each subexpression. 1373 REG_INFO -- array of information about each subexpression.
1374 1374
1375 Also assumes the variables `fail_stack' and (if debugging), `bufp', 1375 Also assumes the variables `fail_stack' and (if debugging), `bufp',
1376 `pend', `string1', `size1', `string2', and `size2'. */ 1376 `pend', `string1', `size1', `string2', and `size2'. */
1377 1377
1378 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ 1378 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1379 { \ 1379 { \
1380 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ 1380 DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \
1381 int this_reg; \ 1381 int this_reg; \
1384 assert (!FAIL_STACK_EMPTY ()); \ 1384 assert (!FAIL_STACK_EMPTY ()); \
1385 \ 1385 \
1386 /* Remove failure points and point to how many regs pushed. */ \ 1386 /* Remove failure points and point to how many regs pushed. */ \
1387 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ 1387 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1388 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ 1388 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1389 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ 1389 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1390 \ 1390 \
1391 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ 1391 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
1392 \ 1392 \
1393 DEBUG_POP (&failure_id); \ 1393 DEBUG_POP (&failure_id); \
1394 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ 1394 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
1416 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ 1416 DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \
1417 \ 1417 \
1418 if (1) \ 1418 if (1) \
1419 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ 1419 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
1420 { \ 1420 { \
1421 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ 1421 DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \
1422 \ 1422 \
1423 reg_info[this_reg].word = POP_FAILURE_ELT (); \ 1423 reg_info[this_reg].word = POP_FAILURE_ELT (); \
1424 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ 1424 DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \
1425 \ 1425 \
1426 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1426 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1427 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ 1427 DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \
1428 \ 1428 \
1429 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ 1429 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
1430 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ 1430 DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \
1431 } \ 1431 } \
1432 else \ 1432 else \
1433 { \ 1433 { \
1434 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ 1434 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1435 { \ 1435 { \
1461 { 1461 {
1462 fail_stack_elt_t word; 1462 fail_stack_elt_t word;
1463 struct 1463 struct
1464 { 1464 {
1465 /* This field is one if this group can match the empty string, 1465 /* This field is one if this group can match the empty string,
1466 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ 1466 zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */
1467 #define MATCH_NULL_UNSET_VALUE 3 1467 #define MATCH_NULL_UNSET_VALUE 3
1468 unsigned match_null_string_p : 2; 1468 unsigned match_null_string_p : 2;
1469 unsigned is_active : 1; 1469 unsigned is_active : 1;
1470 unsigned matched_something : 1; 1470 unsigned matched_something : 1;
1471 unsigned ever_matched_something : 1; 1471 unsigned ever_matched_something : 1;
1522 if (translate) c = (unsigned char) translate[c]; \ 1522 if (translate) c = (unsigned char) translate[c]; \
1523 } while (0) 1523 } while (0)
1524 #endif 1524 #endif
1525 1525
1526 /* Fetch the next character in the uncompiled pattern, with no 1526 /* Fetch the next character in the uncompiled pattern, with no
1527 translation. */ 1527 translation. */
1528 #define PATFETCH_RAW(c) \ 1528 #define PATFETCH_RAW(c) \
1529 do {if (p == pend) return REG_EEND; \ 1529 do {if (p == pend) return REG_EEND; \
1530 c = (unsigned char) *p++; \ 1530 c = (unsigned char) *p++; \
1531 } while (0) 1531 } while (0)
1532 1532
1533 /* Go backwards one character in the pattern. */ 1533 /* Go backwards one character in the pattern. */
1534 #define PATUNFETCH p-- 1534 #define PATUNFETCH p--
1535 1535
1547 /* Macros for outputting the compiled pattern into `buffer'. */ 1547 /* Macros for outputting the compiled pattern into `buffer'. */
1548 1548
1549 /* If the buffer isn't allocated when it comes in, use this. */ 1549 /* If the buffer isn't allocated when it comes in, use this. */
1550 #define INIT_BUF_SIZE 32 1550 #define INIT_BUF_SIZE 32
1551 1551
1552 /* Make sure we have at least N more bytes of space in buffer. */ 1552 /* Make sure we have at least N more bytes of space in buffer. */
1553 #define GET_BUFFER_SPACE(n) \ 1553 #define GET_BUFFER_SPACE(n) \
1554 while (b - bufp->buffer + (n) > bufp->allocated) \ 1554 while (b - bufp->buffer + (n) > bufp->allocated) \
1555 EXTEND_BUFFER () 1555 EXTEND_BUFFER ()
1556 1556
1557 /* Make sure we have one more byte of buffer space and then add C to it. */ 1557 /* Make sure we have one more byte of buffer space and then add C to it. */
1569 *b++ = (unsigned char) (c1); \ 1569 *b++ = (unsigned char) (c1); \
1570 *b++ = (unsigned char) (c2); \ 1570 *b++ = (unsigned char) (c2); \
1571 } while (0) 1571 } while (0)
1572 1572
1573 1573
1574 /* As with BUF_PUSH_2, except for three bytes. */ 1574 /* As with BUF_PUSH_2, except for three bytes. */
1575 #define BUF_PUSH_3(c1, c2, c3) \ 1575 #define BUF_PUSH_3(c1, c2, c3) \
1576 do { \ 1576 do { \
1577 GET_BUFFER_SPACE (3); \ 1577 GET_BUFFER_SPACE (3); \
1578 *b++ = (unsigned char) (c1); \ 1578 *b++ = (unsigned char) (c1); \
1579 *b++ = (unsigned char) (c2); \ 1579 *b++ = (unsigned char) (c2); \
1580 *b++ = (unsigned char) (c3); \ 1580 *b++ = (unsigned char) (c3); \
1581 } while (0) 1581 } while (0)
1582 1582
1583 1583
1584 /* Store a jump with opcode OP at LOC to location TO. We store a 1584 /* Store a jump with opcode OP at LOC to location TO. We store a
1585 relative address offset by the three bytes the jump itself occupies. */ 1585 relative address offset by the three bytes the jump itself occupies. */
1586 #define STORE_JUMP(op, loc, to) \ 1586 #define STORE_JUMP(op, loc, to) \
1587 store_op1 (op, loc, (to) - (loc) - 3) 1587 store_op1 (op, loc, (to) - (loc) - 3)
1588 1588
1589 /* Likewise, for a two-argument jump. */ 1589 /* Likewise, for a two-argument jump. */
1590 #define STORE_JUMP2(op, loc, to, arg) \ 1590 #define STORE_JUMP2(op, loc, to, arg) \
1591 store_op2 (op, loc, (to) - (loc) - 3, arg) 1591 store_op2 (op, loc, (to) - (loc) - 3, arg)
1592 1592
1593 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ 1593 /* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */
1594 #define INSERT_JUMP(op, loc, to) \ 1594 #define INSERT_JUMP(op, loc, to) \
1595 insert_op1 (op, loc, (to) - (loc) - 3, b) 1595 insert_op1 (op, loc, (to) - (loc) - 3, b)
1596 1596
1597 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ 1597 /* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */
1598 #define INSERT_JUMP2(op, loc, to, arg) \ 1598 #define INSERT_JUMP2(op, loc, to, arg) \
1599 insert_op2 (op, loc, (to) - (loc) - 3, arg, b) 1599 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1600 1600
1601 1601
1602 /* This is not an arbitrary limit: the arguments which represent offsets 1602 /* This is not an arbitrary limit: the arguments which represent offsets
1603 into the pattern are two bytes long. So if 2^16 bytes turns out to 1603 into the pattern are two bytes long. So if 2^16 bytes turns out to
1604 be too small, many things would have to change. */ 1604 be too small, many things would have to change. */
1605 #define MAX_BUF_SIZE (1L << 16) 1605 #define MAX_BUF_SIZE (1L << 16)
1606 1606
1607 1607
1608 /* Extend the buffer by twice its current size via realloc and 1608 /* Extend the buffer by twice its current size via realloc and
1609 reset the pointers that pointed into the old block to point to the 1609 reset the pointers that pointed into the old block to point to the
1610 correct places in the new one. If extending the buffer results in it 1610 correct places in the new one. If extending the buffer results in it
1611 being larger than MAX_BUF_SIZE, then flag memory exhausted. */ 1611 being larger than MAX_BUF_SIZE, then flag memory exhausted. */
1612 #define EXTEND_BUFFER() \ 1612 #define EXTEND_BUFFER() \
1613 do { \ 1613 do { \
1614 unsigned char *old_buffer = bufp->buffer; \ 1614 unsigned char *old_buffer = bufp->buffer; \
1615 if (bufp->allocated == MAX_BUF_SIZE) \ 1615 if (bufp->allocated == MAX_BUF_SIZE) \
1616 return REG_ESIZE; \ 1616 return REG_ESIZE; \
1617 bufp->allocated <<= 1; \ 1617 bufp->allocated <<= 1; \
1618 if (bufp->allocated > MAX_BUF_SIZE) \ 1618 if (bufp->allocated > MAX_BUF_SIZE) \
1619 bufp->allocated = MAX_BUF_SIZE; \ 1619 bufp->allocated = MAX_BUF_SIZE; \
1620 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ 1620 bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1621 if (bufp->buffer == NULL) \ 1621 if (bufp->buffer == NULL) \
1622 return REG_ESPACE; \ 1622 return REG_ESPACE; \
1623 /* If the buffer moved, move all the pointers into it. */ \ 1623 /* If the buffer moved, move all the pointers into it. */ \
1624 if (old_buffer != bufp->buffer) \ 1624 if (old_buffer != bufp->buffer) \
1625 { \ 1625 { \
1626 b = (b - old_buffer) + bufp->buffer; \ 1626 b = (b - old_buffer) + bufp->buffer; \
1627 begalt = (begalt - old_buffer) + bufp->buffer; \ 1627 begalt = (begalt - old_buffer) + bufp->buffer; \
1628 if (fixup_alt_jump) \ 1628 if (fixup_alt_jump) \
1629 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\ 1629 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1630 if (laststart) \ 1630 if (laststart) \
1631 laststart = (laststart - old_buffer) + bufp->buffer; \ 1631 laststart = (laststart - old_buffer) + bufp->buffer; \
1632 if (pending_exact) \ 1632 if (pending_exact) \
1633 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \ 1633 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
1634 } \ 1634 } \
1635 } while (0) 1635 } while (0)
1636 1636
1637 1637
1638 /* Since we have one byte reserved for the register number argument to 1638 /* Since we have one byte reserved for the register number argument to
1646 1646
1647 1647
1648 /* Macros for the compile stack. */ 1648 /* Macros for the compile stack. */
1649 1649
1650 /* Since offsets can go either forwards or backwards, this type needs to 1650 /* Since offsets can go either forwards or backwards, this type needs to
1651 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ 1651 be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */
1652 typedef int pattern_offset_t; 1652 typedef int pattern_offset_t;
1653 1653
1654 typedef struct 1654 typedef struct
1655 { 1655 {
1656 pattern_offset_t begalt_offset; 1656 pattern_offset_t begalt_offset;
1672 #define INIT_COMPILE_STACK_SIZE 32 1672 #define INIT_COMPILE_STACK_SIZE 32
1673 1673
1674 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0) 1674 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1675 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size) 1675 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1676 1676
1677 /* The next available element. */ 1677 /* The next available element. */
1678 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail]) 1678 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1679 1679
1680 1680
1681 /* Structure to manage work area for range table. */ 1681 /* Structure to manage work area for range table. */
1682 struct range_table_work_area 1682 struct range_table_work_area
1683 { 1683 {
1684 int *table; /* actual work area. */ 1684 int *table; /* actual work area. */
1685 int allocated; /* allocated size for work area in bytes. */ 1685 int allocated; /* allocated size for work area in bytes. */
1686 int used; /* actually used size in words. */ 1686 int used; /* actually used size in words. */
1687 }; 1687 };
1688 1688
1689 /* Make sure that WORK_AREA can hold more N multibyte characters. */ 1689 /* Make sure that WORK_AREA can hold more N multibyte characters. */
1690 #define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \ 1690 #define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n) \
1691 do { \ 1691 do { \
1709 EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \ 1709 EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2); \
1710 (work_area).table[(work_area).used++] = (range_start); \ 1710 (work_area).table[(work_area).used++] = (range_start); \
1711 (work_area).table[(work_area).used++] = (range_end); \ 1711 (work_area).table[(work_area).used++] = (range_end); \
1712 } while (0) 1712 } while (0)
1713 1713
1714 /* Free allocated memory for WORK_AREA. */ 1714 /* Free allocated memory for WORK_AREA. */
1715 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \ 1715 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1716 do { \ 1716 do { \
1717 if ((work_area).table) \ 1717 if ((work_area).table) \
1718 free ((work_area).table); \ 1718 free ((work_area).table); \
1719 } while (0) 1719 } while (0)
1722 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used) 1722 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1723 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i]) 1723 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1724 1724
1725 1725
1726 /* Set the bit for character C in a list. */ 1726 /* Set the bit for character C in a list. */
1727 #define SET_LIST_BIT(c) \ 1727 #define SET_LIST_BIT(c) \
1728 (b[((unsigned char) (c)) / BYTEWIDTH] \ 1728 (b[((unsigned char) (c)) / BYTEWIDTH] \
1729 |= 1 << (((unsigned char) c) % BYTEWIDTH)) 1729 |= 1 << (((unsigned char) c) % BYTEWIDTH))
1730 1730
1731 1731
1732 /* Get the next unsigned number in the uncompiled pattern. */ 1732 /* Get the next unsigned number in the uncompiled pattern. */
1733 #define GET_UNSIGNED_NUMBER(num) \ 1733 #define GET_UNSIGNED_NUMBER(num) \
1734 { if (p != pend) \ 1734 { if (p != pend) \
1735 { \ 1735 { \
1736 PATFETCH (c); \ 1736 PATFETCH (c); \
1737 while (ISDIGIT (c)) \ 1737 while (ISDIGIT (c)) \
1738 { \ 1738 { \
1739 if (num < 0) \ 1739 if (num < 0) \
1740 num = 0; \ 1740 num = 0; \
1741 num = num * 10 + c - '0'; \ 1741 num = num * 10 + c - '0'; \
1742 if (p == pend) \ 1742 if (p == pend) \
1743 break; \ 1743 break; \
1744 PATFETCH (c); \ 1744 PATFETCH (c); \
1745 } \ 1745 } \
1746 } \ 1746 } \
1747 } 1747 }
1748 1748
1749 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ 1749 #define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
1750 1750
1751 #define IS_CHAR_CLASS(string) \ 1751 #define IS_CHAR_CLASS(string) \
1767 1767
1768 static fail_stack_type fail_stack; 1768 static fail_stack_type fail_stack;
1769 1769
1770 /* Size with which the following vectors are currently allocated. 1770 /* Size with which the following vectors are currently allocated.
1771 That is so we can make them bigger as needed, 1771 That is so we can make them bigger as needed,
1772 but never make them smaller. */ 1772 but never make them smaller. */
1773 static int regs_allocated_size; 1773 static int regs_allocated_size;
1774 1774
1775 static const char ** regstart, ** regend; 1775 static const char ** regstart, ** regend;
1776 static const char ** old_regstart, ** old_regend; 1776 static const char ** old_regstart, ** old_regend;
1777 static const char **best_regstart, **best_regend; 1777 static const char **best_regstart, **best_regend;
1778 static register_info_type *reg_info; 1778 static register_info_type *reg_info;
1779 static const char **reg_dummy; 1779 static const char **reg_dummy;
1780 static register_info_type *reg_info_dummy; 1780 static register_info_type *reg_info_dummy;
1781 1781
1782 /* Make the register vectors big enough for NUM_REGS registers, 1782 /* Make the register vectors big enough for NUM_REGS registers,
1783 but don't make them smaller. */ 1783 but don't make them smaller. */
1784 1784
1785 static 1785 static
1786 regex_grow_registers (num_regs) 1786 regex_grow_registers (num_regs)
1787 int num_regs; 1787 int num_regs;
1788 { 1788 {
1875 /* Place in the uncompiled pattern (i.e., the {) to 1875 /* Place in the uncompiled pattern (i.e., the {) to
1876 which to go back if the interval is invalid. */ 1876 which to go back if the interval is invalid. */
1877 const char *beg_interval; 1877 const char *beg_interval;
1878 1878
1879 /* Address of the place where a forward jump should go to the end of 1879 /* Address of the place where a forward jump should go to the end of
1880 the containing expression. Each alternative of an `or' -- except the 1880 the containing expression. Each alternative of an `or' -- except the
1881 last -- ends with a forward jump of this sort. */ 1881 last -- ends with a forward jump of this sort. */
1882 unsigned char *fixup_alt_jump = 0; 1882 unsigned char *fixup_alt_jump = 0;
1883 1883
1884 /* Counts open-groups as they are encountered. Remembered for the 1884 /* Counts open-groups as they are encountered. Remembered for the
1885 matching close-group on the compile stack, so the same register 1885 matching close-group on the compile stack, so the same register
1894 if (debug) 1894 if (debug)
1895 { 1895 {
1896 unsigned debug_count; 1896 unsigned debug_count;
1897 1897
1898 for (debug_count = 0; debug_count < size; debug_count++) 1898 for (debug_count = 0; debug_count < size; debug_count++)
1899 putchar (pattern[debug_count]); 1899 putchar (pattern[debug_count]);
1900 putchar ('\n'); 1900 putchar ('\n');
1901 } 1901 }
1902 #endif /* DEBUG */ 1902 #endif /* DEBUG */
1903 1903
1904 /* Initialize the compile stack. */ 1904 /* Initialize the compile stack. */
1940 1940
1941 if (bufp->allocated == 0) 1941 if (bufp->allocated == 0)
1942 { 1942 {
1943 if (bufp->buffer) 1943 if (bufp->buffer)
1944 { /* If zero allocated, but buffer is non-null, try to realloc 1944 { /* If zero allocated, but buffer is non-null, try to realloc
1945 enough space. This loses if buffer's address is bogus, but 1945 enough space. This loses if buffer's address is bogus, but
1946 that is the user's responsibility. */ 1946 that is the user's responsibility. */
1947 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char); 1947 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1948 } 1948 }
1949 else 1949 else
1950 { /* Caller did not allocate a buffer. Do it for them. */ 1950 { /* Caller did not allocate a buffer. Do it for them. */
1951 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); 1951 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1952 } 1952 }
1953 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); 1953 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1954 1954
1955 bufp->allocated = INIT_BUF_SIZE; 1955 bufp->allocated = INIT_BUF_SIZE;
1956 } 1956 }
1957 1957
1961 while (p != pend) 1961 while (p != pend)
1962 { 1962 {
1963 PATFETCH (c); 1963 PATFETCH (c);
1964 1964
1965 switch (c) 1965 switch (c)
1966 { 1966 {
1967 case '^': 1967 case '^':
1968 { 1968 {
1969 if ( /* If at start of pattern, it's an operator. */ 1969 if ( /* If at start of pattern, it's an operator. */
1970 p == pattern + 1 1970 p == pattern + 1
1971 /* If context independent, it's an operator. */ 1971 /* If context independent, it's an operator. */
1972 || syntax & RE_CONTEXT_INDEP_ANCHORS 1972 || syntax & RE_CONTEXT_INDEP_ANCHORS
1973 /* Otherwise, depends on what's come before. */ 1973 /* Otherwise, depends on what's come before. */
1974 || at_begline_loc_p (pattern, p, syntax)) 1974 || at_begline_loc_p (pattern, p, syntax))
1975 BUF_PUSH (begline); 1975 BUF_PUSH (begline);
1976 else 1976 else
1977 goto normal_char; 1977 goto normal_char;
1978 } 1978 }
1979 break; 1979 break;
1980 1980
1981 1981
1982 case '$': 1982 case '$':
1983 { 1983 {
1984 if ( /* If at end of pattern, it's an operator. */ 1984 if ( /* If at end of pattern, it's an operator. */
1985 p == pend 1985 p == pend
1986 /* If context independent, it's an operator. */ 1986 /* If context independent, it's an operator. */
1987 || syntax & RE_CONTEXT_INDEP_ANCHORS 1987 || syntax & RE_CONTEXT_INDEP_ANCHORS
1988 /* Otherwise, depends on what's next. */ 1988 /* Otherwise, depends on what's next. */
1989 || at_endline_loc_p (p, pend, syntax)) 1989 || at_endline_loc_p (p, pend, syntax))
1990 BUF_PUSH (endline); 1990 BUF_PUSH (endline);
1991 else 1991 else
1992 goto normal_char; 1992 goto normal_char;
1993 } 1993 }
1994 break; 1994 break;
1995 1995
1996 1996
1997 case '+': 1997 case '+':
1998 case '?': 1998 case '?':
1999 if ((syntax & RE_BK_PLUS_QM) 1999 if ((syntax & RE_BK_PLUS_QM)
2000 || (syntax & RE_LIMITED_OPS)) 2000 || (syntax & RE_LIMITED_OPS))
2001 goto normal_char; 2001 goto normal_char;
2002 handle_plus: 2002 handle_plus:
2003 case '*': 2003 case '*':
2004 /* If there is no previous pattern... */ 2004 /* If there is no previous pattern... */
2005 if (!laststart) 2005 if (!laststart)
2006 { 2006 {
2007 if (syntax & RE_CONTEXT_INVALID_OPS) 2007 if (syntax & RE_CONTEXT_INVALID_OPS)
2008 FREE_STACK_RETURN (REG_BADRPT); 2008 FREE_STACK_RETURN (REG_BADRPT);
2009 else if (!(syntax & RE_CONTEXT_INDEP_OPS)) 2009 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2010 goto normal_char; 2010 goto normal_char;
2011 } 2011 }
2012 2012
2013 { 2013 {
2014 /* Are we optimizing this jump? */ 2014 /* Are we optimizing this jump? */
2015 boolean keep_string_p = false; 2015 boolean keep_string_p = false;
2016 2016
2017 /* 1 means zero (many) matches is allowed. */ 2017 /* 1 means zero (many) matches is allowed. */
2018 char zero_times_ok = 0, many_times_ok = 0; 2018 char zero_times_ok = 0, many_times_ok = 0;
2019 2019
2020 /* If there is a sequence of repetition chars, collapse it 2020 /* If there is a sequence of repetition chars, collapse it
2021 down to just one (the right one). We can't combine 2021 down to just one (the right one). We can't combine
2022 interval operators with these because of, e.g., `a{2}*', 2022 interval operators with these because of, e.g., `a{2}*',
2023 which should only match an even number of `a's. */ 2023 which should only match an even number of `a's. */
2024 2024
2025 for (;;) 2025 for (;;)
2026 { 2026 {
2027 zero_times_ok |= c != '+'; 2027 zero_times_ok |= c != '+';
2028 many_times_ok |= c != '?'; 2028 many_times_ok |= c != '?';
2029 2029
2030 if (p == pend) 2030 if (p == pend)
2031 break; 2031 break;
2032 2032
2033 PATFETCH (c); 2033 PATFETCH (c);
2034 2034
2035 if (c == '*' 2035 if (c == '*'
2036 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?'))) 2036 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2037 ; 2037 ;
2038 2038
2039 else if (syntax & RE_BK_PLUS_QM && c == '\\') 2039 else if (syntax & RE_BK_PLUS_QM && c == '\\')
2040 { 2040 {
2041 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2041 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2042 2042
2043 PATFETCH (c1); 2043 PATFETCH (c1);
2044 if (!(c1 == '+' || c1 == '?')) 2044 if (!(c1 == '+' || c1 == '?'))
2045 { 2045 {
2046 PATUNFETCH; 2046 PATUNFETCH;
2047 PATUNFETCH; 2047 PATUNFETCH;
2048 break; 2048 break;
2049 } 2049 }
2050 2050
2051 c = c1; 2051 c = c1;
2052 } 2052 }
2053 else 2053 else
2054 { 2054 {
2055 PATUNFETCH; 2055 PATUNFETCH;
2056 break; 2056 break;
2057 } 2057 }
2058 2058
2059 /* If we get here, we found another repeat character. */ 2059 /* If we get here, we found another repeat character. */
2060 } 2060 }
2061 2061
2062 /* Star, etc. applied to an empty pattern is equivalent 2062 /* Star, etc. applied to an empty pattern is equivalent
2063 to an empty pattern. */ 2063 to an empty pattern. */
2064 if (!laststart) 2064 if (!laststart)
2065 break; 2065 break;
2066 2066
2067 /* Now we know whether or not zero matches is allowed 2067 /* Now we know whether or not zero matches is allowed
2068 and also whether or not two or more matches is allowed. */ 2068 and also whether or not two or more matches is allowed. */
2069 if (many_times_ok) 2069 if (many_times_ok)
2070 { /* More than one repetition is allowed, so put in at the 2070 { /* More than one repetition is allowed, so put in at the
2071 end a backward relative jump from `b' to before the next 2071 end a backward relative jump from `b' to before the next
2072 jump we're going to put in below (which jumps from 2072 jump we're going to put in below (which jumps from
2073 laststart to after this jump). 2073 laststart to after this jump).
2074 2074
2075 But if we are at the `*' in the exact sequence `.*\n', 2075 But if we are at the `*' in the exact sequence `.*\n',
2076 insert an unconditional jump backwards to the ., 2076 insert an unconditional jump backwards to the .,
2077 instead of the beginning of the loop. This way we only 2077 instead of the beginning of the loop. This way we only
2078 push a failure point once, instead of every time 2078 push a failure point once, instead of every time
2079 through the loop. */ 2079 through the loop. */
2080 assert (p - 1 > pattern); 2080 assert (p - 1 > pattern);
2081 2081
2082 /* Allocate the space for the jump. */ 2082 /* Allocate the space for the jump. */
2083 GET_BUFFER_SPACE (3); 2083 GET_BUFFER_SPACE (3);
2084 2084
2085 /* We know we are not at the first character of the pattern, 2085 /* We know we are not at the first character of the pattern,
2086 because laststart was nonzero. And we've already 2086 because laststart was nonzero. And we've already
2087 incremented `p', by the way, to be the character after 2087 incremented `p', by the way, to be the character after
2088 the `*'. Do we have to do something analogous here 2088 the `*'. Do we have to do something analogous here
2089 for null bytes, because of RE_DOT_NOT_NULL? */ 2089 for null bytes, because of RE_DOT_NOT_NULL? */
2090 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.') 2090 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2091 && zero_times_ok 2091 && zero_times_ok
2092 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n') 2092 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2093 && !(syntax & RE_DOT_NEWLINE)) 2093 && !(syntax & RE_DOT_NEWLINE))
2094 { /* We have .*\n. */ 2094 { /* We have .*\n. */
2095 STORE_JUMP (jump, b, laststart); 2095 STORE_JUMP (jump, b, laststart);
2096 keep_string_p = true; 2096 keep_string_p = true;
2097 } 2097 }
2098 else 2098 else
2099 /* Anything else. */ 2099 /* Anything else. */
2100 STORE_JUMP (maybe_pop_jump, b, laststart - 3); 2100 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2101 2101
2102 /* We've added more stuff to the buffer. */ 2102 /* We've added more stuff to the buffer. */
2103 b += 3; 2103 b += 3;
2104 } 2104 }
2105 2105
2106 /* On failure, jump from laststart to b + 3, which will be the 2106 /* On failure, jump from laststart to b + 3, which will be the
2107 end of the buffer after this jump is inserted. */ 2107 end of the buffer after this jump is inserted. */
2108 GET_BUFFER_SPACE (3); 2108 GET_BUFFER_SPACE (3);
2109 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump 2109 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2110 : on_failure_jump, 2110 : on_failure_jump,
2111 laststart, b + 3); 2111 laststart, b + 3);
2112 pending_exact = 0; 2112 pending_exact = 0;
2113 b += 3; 2113 b += 3;
2114 2114
2115 if (!zero_times_ok) 2115 if (!zero_times_ok)
2116 { 2116 {
2117 /* At least one repetition is required, so insert a 2117 /* At least one repetition is required, so insert a
2118 `dummy_failure_jump' before the initial 2118 `dummy_failure_jump' before the initial
2119 `on_failure_jump' instruction of the loop. This 2119 `on_failure_jump' instruction of the loop. This
2120 effects a skip over that instruction the first time 2120 effects a skip over that instruction the first time
2121 we hit that loop. */ 2121 we hit that loop. */
2122 GET_BUFFER_SPACE (3); 2122 GET_BUFFER_SPACE (3);
2123 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6); 2123 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2124 b += 3; 2124 b += 3;
2125 } 2125 }
2126 } 2126 }
2127 break; 2127 break;
2128 2128
2129 2129
2130 case '.': 2130 case '.':
2131 laststart = b; 2131 laststart = b;
2132 BUF_PUSH (anychar); 2132 BUF_PUSH (anychar);
2133 break; 2133 break;
2134 2134
2135 2135
2136 case '[': 2136 case '[':
2137 { 2137 {
2138 CLEAR_RANGE_TABLE_WORK_USED (range_table_work); 2138 CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2139 2139
2140 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2140 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2141 2141
2142 /* Ensure that we have enough space to push a charset: the 2142 /* Ensure that we have enough space to push a charset: the
2143 opcode, the length count, and the bitset; 34 bytes in all. */ 2143 opcode, the length count, and the bitset; 34 bytes in all. */
2144 GET_BUFFER_SPACE (34); 2144 GET_BUFFER_SPACE (34);
2145 2145
2146 laststart = b; 2146 laststart = b;
2147 2147
2148 /* We test `*p == '^' twice, instead of using an if 2148 /* We test `*p == '^' twice, instead of using an if
2149 statement, so we only need one BUF_PUSH. */ 2149 statement, so we only need one BUF_PUSH. */
2150 BUF_PUSH (*p == '^' ? charset_not : charset); 2150 BUF_PUSH (*p == '^' ? charset_not : charset);
2151 if (*p == '^') 2151 if (*p == '^')
2152 p++; 2152 p++;
2153 2153
2154 /* Remember the first position in the bracket expression. */ 2154 /* Remember the first position in the bracket expression. */
2155 p1 = p; 2155 p1 = p;
2156 2156
2157 /* Push the number of bytes in the bitmap. */ 2157 /* Push the number of bytes in the bitmap. */
2158 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH); 2158 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2159 2159
2160 /* Clear the whole map. */ 2160 /* Clear the whole map. */
2161 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH); 2161 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2162 2162
2163 /* charset_not matches newline according to a syntax bit. */ 2163 /* charset_not matches newline according to a syntax bit. */
2164 if ((re_opcode_t) b[-2] == charset_not 2164 if ((re_opcode_t) b[-2] == charset_not
2165 && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) 2165 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2166 SET_LIST_BIT ('\n'); 2166 SET_LIST_BIT ('\n');
2167 2167
2168 /* Read in characters and ranges, setting map bits. */ 2168 /* Read in characters and ranges, setting map bits. */
2169 for (;;) 2169 for (;;)
2170 { 2170 {
2171 int len; 2171 int len;
2172 boolean escaped_char = false; 2172 boolean escaped_char = false;
2173 2173
2174 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2174 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2175 2175
2176 PATFETCH (c); 2176 PATFETCH (c);
2177 2177
2178 /* \ might escape characters inside [...] and [^...]. */ 2178 /* \ might escape characters inside [...] and [^...]. */
2179 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') 2179 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2180 { 2180 {
2181 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2181 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2182 2182
2183 PATFETCH (c); 2183 PATFETCH (c);
2184 escaped_char = true; 2184 escaped_char = true;
2185 } 2185 }
2186 else 2186 else
2187 { 2187 {
2188 /* Could be the end of the bracket expression. If it's 2188 /* Could be the end of the bracket expression. If it's
2189 not (i.e., when the bracket expression is `[]' so 2189 not (i.e., when the bracket expression is `[]' so
2190 far), the ']' character bit gets set way below. */ 2190 far), the ']' character bit gets set way below. */
2191 if (c == ']' && p != p1 + 1) 2191 if (c == ']' && p != p1 + 1)
2192 break; 2192 break;
2193 } 2193 }
2194 2194
2195 /* If C indicates start of multibyte char, get the 2195 /* If C indicates start of multibyte char, get the
2196 actual character code in C, and set the pattern 2196 actual character code in C, and set the pattern
2197 pointer P to the next character boundary. */ 2197 pointer P to the next character boundary. */
2198 if (bufp->multibyte && BASE_LEADING_CODE_P (c)) 2198 if (bufp->multibyte && BASE_LEADING_CODE_P (c))
2199 { 2199 {
2200 PATUNFETCH; 2200 PATUNFETCH;
2201 c = STRING_CHAR_AND_LENGTH (p, pend - p, len); 2201 c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
2202 p += len; 2202 p += len;
2203 } 2203 }
2204 /* What should we do for the character which is 2204 /* What should we do for the character which is
2205 greater than 0x7F, but not BASE_LEADING_CODE_P? 2205 greater than 0x7F, but not BASE_LEADING_CODE_P?
2206 XXX */ 2206 XXX */
2207 2207
2208 /* See if we're at the beginning of a possible character 2208 /* See if we're at the beginning of a possible character
2209 class. */ 2209 class. */
2210 2210
2211 else if (!escaped_char && 2211 else if (!escaped_char &&
2212 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') 2212 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2213 { /* Leave room for the null. */ 2213 { /* Leave room for the null. */
2214 char str[CHAR_CLASS_MAX_LENGTH + 1]; 2214 char str[CHAR_CLASS_MAX_LENGTH + 1];
2215 2215
2216 PATFETCH (c); 2216 PATFETCH (c);
2217 c1 = 0; 2217 c1 = 0;
2218 2218
2219 /* If pattern is `[[:'. */ 2219 /* If pattern is `[[:'. */
2220 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2220 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2221 2221
2222 for (;;) 2222 for (;;)
2223 { 2223 {
2224 PATFETCH (c); 2224 PATFETCH (c);
2225 if (c == ':' || c == ']' || p == pend 2225 if (c == ':' || c == ']' || p == pend
2226 || c1 == CHAR_CLASS_MAX_LENGTH) 2226 || c1 == CHAR_CLASS_MAX_LENGTH)
2227 break; 2227 break;
2228 str[c1++] = c; 2228 str[c1++] = c;
2229 } 2229 }
2230 str[c1] = '\0'; 2230 str[c1] = '\0';
2231 2231
2232 /* If isn't a word bracketed by `[:' and `:]': 2232 /* If isn't a word bracketed by `[:' and `:]':
2233 undo the ending character, the letters, and 2233 undo the ending character, the letters, and
2234 leave the leading `:' and `[' (but set bits for 2234 leave the leading `:' and `[' (but set bits for
2235 them). */ 2235 them). */
2236 if (c == ':' && *p == ']') 2236 if (c == ':' && *p == ']')
2237 { 2237 {
2238 int ch; 2238 int ch;
2239 boolean is_alnum = STREQ (str, "alnum"); 2239 boolean is_alnum = STREQ (str, "alnum");
2240 boolean is_alpha = STREQ (str, "alpha"); 2240 boolean is_alpha = STREQ (str, "alpha");
2241 boolean is_blank = STREQ (str, "blank"); 2241 boolean is_blank = STREQ (str, "blank");
2242 boolean is_cntrl = STREQ (str, "cntrl"); 2242 boolean is_cntrl = STREQ (str, "cntrl");
2243 boolean is_digit = STREQ (str, "digit"); 2243 boolean is_digit = STREQ (str, "digit");
2244 boolean is_graph = STREQ (str, "graph"); 2244 boolean is_graph = STREQ (str, "graph");
2245 boolean is_lower = STREQ (str, "lower"); 2245 boolean is_lower = STREQ (str, "lower");
2246 boolean is_print = STREQ (str, "print"); 2246 boolean is_print = STREQ (str, "print");
2247 boolean is_punct = STREQ (str, "punct"); 2247 boolean is_punct = STREQ (str, "punct");
2248 boolean is_space = STREQ (str, "space"); 2248 boolean is_space = STREQ (str, "space");
2249 boolean is_upper = STREQ (str, "upper"); 2249 boolean is_upper = STREQ (str, "upper");
2250 boolean is_xdigit = STREQ (str, "xdigit"); 2250 boolean is_xdigit = STREQ (str, "xdigit");
2251 2251
2252 if (!IS_CHAR_CLASS (str)) 2252 if (!IS_CHAR_CLASS (str))
2253 FREE_STACK_RETURN (REG_ECTYPE); 2253 FREE_STACK_RETURN (REG_ECTYPE);
2254 2254
2255 /* Throw away the ] at the end of the character 2255 /* Throw away the ] at the end of the character
2256 class. */ 2256 class. */
2257 PATFETCH (c); 2257 PATFETCH (c);
2258 2258
2259 if (p == pend) FREE_STACK_RETURN (REG_EBRACK); 2259 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2260 2260
2261 for (ch = 0; ch < 1 << BYTEWIDTH; ch++) 2261 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2262 { 2262 {
2263 int translated = TRANSLATE (ch); 2263 int translated = TRANSLATE (ch);
2264 /* This was split into 3 if's to 2264 /* This was split into 3 if's to
2265 avoid an arbitrary limit in some compiler. */ 2265 avoid an arbitrary limit in some compiler. */
2266 if ( (is_alnum && ISALNUM (ch)) 2266 if ( (is_alnum && ISALNUM (ch))
2267 || (is_alpha && ISALPHA (ch)) 2267 || (is_alpha && ISALPHA (ch))
2268 || (is_blank && ISBLANK (ch)) 2268 || (is_blank && ISBLANK (ch))
2269 || (is_cntrl && ISCNTRL (ch))) 2269 || (is_cntrl && ISCNTRL (ch)))
2270 SET_LIST_BIT (translated); 2270 SET_LIST_BIT (translated);
2271 if ( (is_digit && ISDIGIT (ch)) 2271 if ( (is_digit && ISDIGIT (ch))
2272 || (is_graph && ISGRAPH (ch)) 2272 || (is_graph && ISGRAPH (ch))
2273 || (is_lower && ISLOWER (ch)) 2273 || (is_lower && ISLOWER (ch))
2274 || (is_print && ISPRINT (ch))) 2274 || (is_print && ISPRINT (ch)))
2275 SET_LIST_BIT (translated); 2275 SET_LIST_BIT (translated);
2276 if ( (is_punct && ISPUNCT (ch)) 2276 if ( (is_punct && ISPUNCT (ch))
2277 || (is_space && ISSPACE (ch)) 2277 || (is_space && ISSPACE (ch))
2278 || (is_upper && ISUPPER (ch)) 2278 || (is_upper && ISUPPER (ch))
2279 || (is_xdigit && ISXDIGIT (ch))) 2279 || (is_xdigit && ISXDIGIT (ch)))
2280 SET_LIST_BIT (translated); 2280 SET_LIST_BIT (translated);
2281 } 2281 }
2282 2282
2283 /* Repeat the loop. */ 2283 /* Repeat the loop. */
2284 continue; 2284 continue;
2285 } 2285 }
2286 else 2286 else
2287 { 2287 {
2288 c1++; 2288 c1++;
2289 while (c1--) 2289 while (c1--)
2290 PATUNFETCH; 2290 PATUNFETCH;
2291 SET_LIST_BIT ('['); 2291 SET_LIST_BIT ('[');
2292 2292
2293 /* Because the `:' may starts the range, we 2293 /* Because the `:' may starts the range, we
2294 can't simply set bit and repeat the loop. 2294 can't simply set bit and repeat the loop.
2295 Instead, just set it to C and handle below. */ 2295 Instead, just set it to C and handle below. */
2296 c = ':'; 2296 c = ':';
2297 } 2297 }
2298 } 2298 }
2299 2299
2300 if (p < pend && p[0] == '-' && p[1] != ']') 2300 if (p < pend && p[0] == '-' && p[1] != ']')
2301 { 2301 {
2302 2302
2303 /* Discard the `-'. */ 2303 /* Discard the `-'. */
2313 } 2313 }
2314 2314
2315 if (!SAME_CHARSET_P (c, c1)) 2315 if (!SAME_CHARSET_P (c, c1))
2316 FREE_STACK_RETURN (REG_ERANGE); 2316 FREE_STACK_RETURN (REG_ERANGE);
2317 } 2317 }
2318 else 2318 else
2319 /* Range from C to C. */ 2319 /* Range from C to C. */
2320 c1 = c; 2320 c1 = c;
2321 2321
2322 /* Set the range ... */ 2322 /* Set the range ... */
2323 if (SINGLE_BYTE_CHAR_P (c)) 2323 if (SINGLE_BYTE_CHAR_P (c))
2324 /* ... into bitmap. */ 2324 /* ... into bitmap. */
2325 { 2325 {
2326 unsigned this_char; 2326 unsigned this_char;
2327 int range_start = c, range_end = c1; 2327 int range_start = c, range_end = c1;
2328 2328
2329 /* If the start is after the end, the range is empty. */ 2329 /* If the start is after the end, the range is empty. */
2330 if (range_start > range_end) 2330 if (range_start > range_end)
2336 else 2336 else
2337 { 2337 {
2338 for (this_char = range_start; this_char <= range_end; 2338 for (this_char = range_start; this_char <= range_end;
2339 this_char++) 2339 this_char++)
2340 SET_LIST_BIT (TRANSLATE (this_char)); 2340 SET_LIST_BIT (TRANSLATE (this_char));
2341 } 2341 }
2342 } 2342 }
2343 else 2343 else
2344 /* ... into range table. */ 2344 /* ... into range table. */
2345 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1); 2345 SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2346 } 2346 }
2347 2347
2348 /* Discard any (non)matching list bytes that are all 0 at the 2348 /* Discard any (non)matching list bytes that are all 0 at the
2349 end of the map. Decrease the map-length byte too. */ 2349 end of the map. Decrease the map-length byte too. */
2350 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) 2350 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2351 b[-1]--; 2351 b[-1]--;
2352 b += b[-1]; 2352 b += b[-1];
2353 2353
2354 /* Build real range table from work area. */ 2354 /* Build real range table from work area. */
2355 if (RANGE_TABLE_WORK_USED (range_table_work)) 2355 if (RANGE_TABLE_WORK_USED (range_table_work))
2356 { 2356 {
2357 int i; 2357 int i;
2358 int used = RANGE_TABLE_WORK_USED (range_table_work); 2358 int used = RANGE_TABLE_WORK_USED (range_table_work);
2359 2359
2360 /* Allocate space for COUNT + RANGE_TABLE. Needs two 2360 /* Allocate space for COUNT + RANGE_TABLE. Needs two
2361 bytes for COUNT and three bytes for each character. */ 2361 bytes for COUNT and three bytes for each character. */
2362 GET_BUFFER_SPACE (2 + used * 3); 2362 GET_BUFFER_SPACE (2 + used * 3);
2363 2363
2364 /* Indicate the existence of range table. */ 2364 /* Indicate the existence of range table. */
2365 laststart[1] |= 0x80; 2365 laststart[1] |= 0x80;
2366 2366
2367 STORE_NUMBER_AND_INCR (b, used / 2); 2367 STORE_NUMBER_AND_INCR (b, used / 2);
2368 for (i = 0; i < used; i++) 2368 for (i = 0; i < used; i++)
2369 STORE_CHARACTER_AND_INCR 2369 STORE_CHARACTER_AND_INCR
2370 (b, RANGE_TABLE_WORK_ELT (range_table_work, i)); 2370 (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2371 } 2371 }
2372 } 2372 }
2373 break; 2373 break;
2374 2374
2375 2375
2376 case '(': 2376 case '(':
2377 if (syntax & RE_NO_BK_PARENS) 2377 if (syntax & RE_NO_BK_PARENS)
2378 goto handle_open; 2378 goto handle_open;
2379 else 2379 else
2380 goto normal_char; 2380 goto normal_char;
2381 2381
2382 2382
2383 case ')': 2383 case ')':
2384 if (syntax & RE_NO_BK_PARENS) 2384 if (syntax & RE_NO_BK_PARENS)
2385 goto handle_close; 2385 goto handle_close;
2386 else 2386 else
2387 goto normal_char; 2387 goto normal_char;
2388 2388
2389 2389
2390 case '\n': 2390 case '\n':
2391 if (syntax & RE_NEWLINE_ALT) 2391 if (syntax & RE_NEWLINE_ALT)
2392 goto handle_alt; 2392 goto handle_alt;
2393 else 2393 else
2394 goto normal_char; 2394 goto normal_char;
2395 2395
2396 2396
2397 case '|': 2397 case '|':
2398 if (syntax & RE_NO_BK_VBAR) 2398 if (syntax & RE_NO_BK_VBAR)
2399 goto handle_alt; 2399 goto handle_alt;
2400 else 2400 else
2401 goto normal_char; 2401 goto normal_char;
2402 2402
2403 2403
2404 case '{': 2404 case '{':
2405 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) 2405 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2406 goto handle_interval; 2406 goto handle_interval;
2407 else 2407 else
2408 goto normal_char; 2408 goto normal_char;
2409 2409
2410 2410
2411 case '\\': 2411 case '\\':
2412 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); 2412 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2413 2413
2414 /* Do not translate the character after the \, so that we can 2414 /* Do not translate the character after the \, so that we can
2415 distinguish, e.g., \B from \b, even if we normally would 2415 distinguish, e.g., \B from \b, even if we normally would
2416 translate, e.g., B to b. */ 2416 translate, e.g., B to b. */
2417 PATFETCH_RAW (c); 2417 PATFETCH_RAW (c);
2418 2418
2419 switch (c) 2419 switch (c)
2420 { 2420 {
2421 case '(': 2421 case '(':
2422 if (syntax & RE_NO_BK_PARENS) 2422 if (syntax & RE_NO_BK_PARENS)
2423 goto normal_backslash; 2423 goto normal_backslash;
2424 2424
2425 handle_open: 2425 handle_open:
2426 bufp->re_nsub++; 2426 bufp->re_nsub++;
2427 regnum++; 2427 regnum++;
2428 2428
2429 if (COMPILE_STACK_FULL) 2429 if (COMPILE_STACK_FULL)
2430 { 2430 {
2431 RETALLOC (compile_stack.stack, compile_stack.size << 1, 2431 RETALLOC (compile_stack.stack, compile_stack.size << 1,
2432 compile_stack_elt_t); 2432 compile_stack_elt_t);
2433 if (compile_stack.stack == NULL) return REG_ESPACE; 2433 if (compile_stack.stack == NULL) return REG_ESPACE;
2434 2434
2435 compile_stack.size <<= 1; 2435 compile_stack.size <<= 1;
2436 } 2436 }
2437 2437
2438 /* These are the values to restore when we hit end of this 2438 /* These are the values to restore when we hit end of this
2439 group. They are all relative offsets, so that if the 2439 group. They are all relative offsets, so that if the
2440 whole pattern moves because of realloc, they will still 2440 whole pattern moves because of realloc, they will still
2441 be valid. */ 2441 be valid. */
2442 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 2442 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2443 COMPILE_STACK_TOP.fixup_alt_jump 2443 COMPILE_STACK_TOP.fixup_alt_jump
2444 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 2444 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2445 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 2445 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2446 COMPILE_STACK_TOP.regnum = regnum; 2446 COMPILE_STACK_TOP.regnum = regnum;
2447 2447
2448 /* We will eventually replace the 0 with the number of 2448 /* We will eventually replace the 0 with the number of
2449 groups inner to this one. But do not push a 2449 groups inner to this one. But do not push a
2450 start_memory for groups beyond the last one we can 2450 start_memory for groups beyond the last one we can
2451 represent in the compiled pattern. */ 2451 represent in the compiled pattern. */
2452 if (regnum <= MAX_REGNUM) 2452 if (regnum <= MAX_REGNUM)
2453 { 2453 {
2454 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; 2454 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2455 BUF_PUSH_3 (start_memory, regnum, 0); 2455 BUF_PUSH_3 (start_memory, regnum, 0);
2456 } 2456 }
2457 2457
2458 compile_stack.avail++; 2458 compile_stack.avail++;
2459 2459
2460 fixup_alt_jump = 0; 2460 fixup_alt_jump = 0;
2461 laststart = 0; 2461 laststart = 0;
2462 begalt = b; 2462 begalt = b;
2463 /* If we've reached MAX_REGNUM groups, then this open 2463 /* If we've reached MAX_REGNUM groups, then this open
2464 won't actually generate any code, so we'll have to 2464 won't actually generate any code, so we'll have to
2465 clear pending_exact explicitly. */ 2465 clear pending_exact explicitly. */
2466 pending_exact = 0; 2466 pending_exact = 0;
2467 break; 2467 break;
2468 2468
2469 2469
2470 case ')': 2470 case ')':
2471 if (syntax & RE_NO_BK_PARENS) goto normal_backslash; 2471 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2472 2472
2473 if (COMPILE_STACK_EMPTY) 2473 if (COMPILE_STACK_EMPTY)
2474 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 2474 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2475 goto normal_backslash; 2475 goto normal_backslash;
2476 else 2476 else
2477 FREE_STACK_RETURN (REG_ERPAREN); 2477 FREE_STACK_RETURN (REG_ERPAREN);
2478 2478
2479 handle_close: 2479 handle_close:
2480 if (fixup_alt_jump) 2480 if (fixup_alt_jump)
2481 { /* Push a dummy failure point at the end of the 2481 { /* Push a dummy failure point at the end of the
2482 alternative for a possible future 2482 alternative for a possible future
2483 `pop_failure_jump' to pop. See comments at 2483 `pop_failure_jump' to pop. See comments at
2484 `push_dummy_failure' in `re_match_2'. */ 2484 `push_dummy_failure' in `re_match_2'. */
2485 BUF_PUSH (push_dummy_failure); 2485 BUF_PUSH (push_dummy_failure);
2486 2486
2487 /* We allocated space for this jump when we assigned 2487 /* We allocated space for this jump when we assigned
2488 to `fixup_alt_jump', in the `handle_alt' case below. */ 2488 to `fixup_alt_jump', in the `handle_alt' case below. */
2489 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); 2489 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2490 } 2490 }
2491 2491
2492 /* See similar code for backslashed left paren above. */ 2492 /* See similar code for backslashed left paren above. */
2493 if (COMPILE_STACK_EMPTY) 2493 if (COMPILE_STACK_EMPTY)
2494 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) 2494 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2495 goto normal_char; 2495 goto normal_char;
2496 else 2496 else
2497 FREE_STACK_RETURN (REG_ERPAREN); 2497 FREE_STACK_RETURN (REG_ERPAREN);
2498 2498
2499 /* Since we just checked for an empty stack above, this 2499 /* Since we just checked for an empty stack above, this
2500 ``can't happen''. */ 2500 ``can't happen''. */
2501 assert (compile_stack.avail != 0); 2501 assert (compile_stack.avail != 0);
2502 { 2502 {
2503 /* We don't just want to restore into `regnum', because 2503 /* We don't just want to restore into `regnum', because
2504 later groups should continue to be numbered higher, 2504 later groups should continue to be numbered higher,
2505 as in `(ab)c(de)' -- the second group is #2. */ 2505 as in `(ab)c(de)' -- the second group is #2. */
2506 regnum_t this_group_regnum; 2506 regnum_t this_group_regnum;
2507 2507
2508 compile_stack.avail--; 2508 compile_stack.avail--;
2509 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 2509 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2510 fixup_alt_jump 2510 fixup_alt_jump
2511 = COMPILE_STACK_TOP.fixup_alt_jump 2511 = COMPILE_STACK_TOP.fixup_alt_jump
2512 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 2512 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2513 : 0; 2513 : 0;
2514 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; 2514 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2515 this_group_regnum = COMPILE_STACK_TOP.regnum; 2515 this_group_regnum = COMPILE_STACK_TOP.regnum;
2516 /* If we've reached MAX_REGNUM groups, then this open 2516 /* If we've reached MAX_REGNUM groups, then this open
2517 won't actually generate any code, so we'll have to 2517 won't actually generate any code, so we'll have to
2518 clear pending_exact explicitly. */ 2518 clear pending_exact explicitly. */
2519 pending_exact = 0; 2519 pending_exact = 0;
2520 2520
2521 /* We're at the end of the group, so now we know how many 2521 /* We're at the end of the group, so now we know how many
2522 groups were inside this one. */ 2522 groups were inside this one. */
2523 if (this_group_regnum <= MAX_REGNUM) 2523 if (this_group_regnum <= MAX_REGNUM)
2524 { 2524 {
2525 unsigned char *inner_group_loc 2525 unsigned char *inner_group_loc
2526 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; 2526 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2527 2527
2528 *inner_group_loc = regnum - this_group_regnum; 2528 *inner_group_loc = regnum - this_group_regnum;
2529 BUF_PUSH_3 (stop_memory, this_group_regnum, 2529 BUF_PUSH_3 (stop_memory, this_group_regnum,
2530 regnum - this_group_regnum); 2530 regnum - this_group_regnum);
2531 } 2531 }
2532 } 2532 }
2533 break; 2533 break;
2534 2534
2535 2535
2536 case '|': /* `\|'. */ 2536 case '|': /* `\|'. */
2537 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) 2537 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2538 goto normal_backslash; 2538 goto normal_backslash;
2539 handle_alt: 2539 handle_alt:
2540 if (syntax & RE_LIMITED_OPS) 2540 if (syntax & RE_LIMITED_OPS)
2541 goto normal_char; 2541 goto normal_char;
2542 2542
2543 /* Insert before the previous alternative a jump which 2543 /* Insert before the previous alternative a jump which
2544 jumps to this alternative if the former fails. */ 2544 jumps to this alternative if the former fails. */
2545 GET_BUFFER_SPACE (3); 2545 GET_BUFFER_SPACE (3);
2546 INSERT_JUMP (on_failure_jump, begalt, b + 6); 2546 INSERT_JUMP (on_failure_jump, begalt, b + 6);
2547 pending_exact = 0; 2547 pending_exact = 0;
2548 b += 3; 2548 b += 3;
2549 2549
2550 /* The alternative before this one has a jump after it 2550 /* The alternative before this one has a jump after it
2551 which gets executed if it gets matched. Adjust that 2551 which gets executed if it gets matched. Adjust that
2552 jump so it will jump to this alternative's analogous 2552 jump so it will jump to this alternative's analogous
2553 jump (put in below, which in turn will jump to the next 2553 jump (put in below, which in turn will jump to the next
2554 (if any) alternative's such jump, etc.). The last such 2554 (if any) alternative's such jump, etc.). The last such
2555 jump jumps to the correct final destination. A picture: 2555 jump jumps to the correct final destination. A picture:
2556 _____ _____ 2556 _____ _____
2557 | | | | 2557 | | | |
2558 | v | v 2558 | v | v
2559 a | b | c 2559 a | b | c
2560 2560
2561 If we are at `b', then fixup_alt_jump right now points to a 2561 If we are at `b', then fixup_alt_jump right now points to a
2562 three-byte space after `a'. We'll put in the jump, set 2562 three-byte space after `a'. We'll put in the jump, set
2563 fixup_alt_jump to right after `b', and leave behind three 2563 fixup_alt_jump to right after `b', and leave behind three
2564 bytes which we'll fill in when we get to after `c'. */ 2564 bytes which we'll fill in when we get to after `c'. */
2565 2565
2566 if (fixup_alt_jump) 2566 if (fixup_alt_jump)
2567 STORE_JUMP (jump_past_alt, fixup_alt_jump, b); 2567 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2568 2568
2569 /* Mark and leave space for a jump after this alternative, 2569 /* Mark and leave space for a jump after this alternative,
2570 to be filled in later either by next alternative or 2570 to be filled in later either by next alternative or
2571 when know we're at the end of a series of alternatives. */ 2571 when know we're at the end of a series of alternatives. */
2572 fixup_alt_jump = b; 2572 fixup_alt_jump = b;
2573 GET_BUFFER_SPACE (3); 2573 GET_BUFFER_SPACE (3);
2574 b += 3; 2574 b += 3;
2575 2575
2576 laststart = 0; 2576 laststart = 0;
2577 begalt = b; 2577 begalt = b;
2578 break; 2578 break;
2579 2579
2580 2580
2581 case '{': 2581 case '{':
2582 /* If \{ is a literal. */ 2582 /* If \{ is a literal. */
2583 if (!(syntax & RE_INTERVALS) 2583 if (!(syntax & RE_INTERVALS)
2584 /* If we're at `\{' and it's not the open-interval 2584 /* If we're at `\{' and it's not the open-interval
2585 operator. */ 2585 operator. */
2586 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) 2586 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2587 || (p - 2 == pattern && p == pend)) 2587 || (p - 2 == pattern && p == pend))
2588 goto normal_backslash; 2588 goto normal_backslash;
2589 2589
2590 handle_interval: 2590 handle_interval:
2591 { 2591 {
2592 /* If got here, then the syntax allows intervals. */ 2592 /* If got here, then the syntax allows intervals. */
2593 2593
2594 /* At least (most) this many matches must be made. */ 2594 /* At least (most) this many matches must be made. */
2595 int lower_bound = -1, upper_bound = -1; 2595 int lower_bound = -1, upper_bound = -1;
2596 2596
2597 beg_interval = p - 1; 2597 beg_interval = p - 1;
2598 2598
2599 if (p == pend) 2599 if (p == pend)
2600 { 2600 {
2601 if (syntax & RE_NO_BK_BRACES) 2601 if (syntax & RE_NO_BK_BRACES)
2602 goto unfetch_interval; 2602 goto unfetch_interval;
2603 else 2603 else
2604 FREE_STACK_RETURN (REG_EBRACE); 2604 FREE_STACK_RETURN (REG_EBRACE);
2605 } 2605 }
2606 2606
2607 GET_UNSIGNED_NUMBER (lower_bound); 2607 GET_UNSIGNED_NUMBER (lower_bound);
2608 2608
2609 if (c == ',') 2609 if (c == ',')
2610 { 2610 {
2611 GET_UNSIGNED_NUMBER (upper_bound); 2611 GET_UNSIGNED_NUMBER (upper_bound);
2612 if (upper_bound < 0) upper_bound = RE_DUP_MAX; 2612 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2613 } 2613 }
2614 else 2614 else
2615 /* Interval such as `{1}' => match exactly once. */ 2615 /* Interval such as `{1}' => match exactly once. */
2616 upper_bound = lower_bound; 2616 upper_bound = lower_bound;
2617 2617
2618 if (lower_bound < 0 || upper_bound > RE_DUP_MAX 2618 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2619 || lower_bound > upper_bound) 2619 || lower_bound > upper_bound)
2620 { 2620 {
2621 if (syntax & RE_NO_BK_BRACES) 2621 if (syntax & RE_NO_BK_BRACES)
2622 goto unfetch_interval; 2622 goto unfetch_interval;
2623 else 2623 else
2624 FREE_STACK_RETURN (REG_BADBR); 2624 FREE_STACK_RETURN (REG_BADBR);
2625 } 2625 }
2626 2626
2627 if (!(syntax & RE_NO_BK_BRACES)) 2627 if (!(syntax & RE_NO_BK_BRACES))
2628 { 2628 {
2629 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); 2629 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2630 2630
2631 PATFETCH (c); 2631 PATFETCH (c);
2632 } 2632 }
2633 2633
2634 if (c != '}') 2634 if (c != '}')
2635 { 2635 {
2636 if (syntax & RE_NO_BK_BRACES) 2636 if (syntax & RE_NO_BK_BRACES)
2637 goto unfetch_interval; 2637 goto unfetch_interval;
2638 else 2638 else
2639 FREE_STACK_RETURN (REG_BADBR); 2639 FREE_STACK_RETURN (REG_BADBR);
2640 } 2640 }
2641 2641
2642 /* We just parsed a valid interval. */ 2642 /* We just parsed a valid interval. */
2643 2643
2644 /* If it's invalid to have no preceding re. */ 2644 /* If it's invalid to have no preceding re. */
2645 if (!laststart) 2645 if (!laststart)
2646 { 2646 {
2647 if (syntax & RE_CONTEXT_INVALID_OPS) 2647 if (syntax & RE_CONTEXT_INVALID_OPS)
2648 FREE_STACK_RETURN (REG_BADRPT); 2648 FREE_STACK_RETURN (REG_BADRPT);
2649 else if (syntax & RE_CONTEXT_INDEP_OPS) 2649 else if (syntax & RE_CONTEXT_INDEP_OPS)
2650 laststart = b; 2650 laststart = b;
2651 else 2651 else
2652 goto unfetch_interval; 2652 goto unfetch_interval;
2653 } 2653 }
2654 2654
2655 /* If the upper bound is zero, don't want to succeed at 2655 /* If the upper bound is zero, don't want to succeed at
2656 all; jump from `laststart' to `b + 3', which will be 2656 all; jump from `laststart' to `b + 3', which will be
2657 the end of the buffer after we insert the jump. */ 2657 the end of the buffer after we insert the jump. */
2658 if (upper_bound == 0) 2658 if (upper_bound == 0)
2659 { 2659 {
2660 GET_BUFFER_SPACE (3); 2660 GET_BUFFER_SPACE (3);
2661 INSERT_JUMP (jump, laststart, b + 3); 2661 INSERT_JUMP (jump, laststart, b + 3);
2662 b += 3; 2662 b += 3;
2663 } 2663 }
2664 2664
2665 /* Otherwise, we have a nontrivial interval. When 2665 /* Otherwise, we have a nontrivial interval. When
2666 we're all done, the pattern will look like: 2666 we're all done, the pattern will look like:
2667 set_number_at <jump count> <upper bound> 2667 set_number_at <jump count> <upper bound>
2668 set_number_at <succeed_n count> <lower bound> 2668 set_number_at <succeed_n count> <lower bound>
2669 succeed_n <after jump addr> <succeed_n count> 2669 succeed_n <after jump addr> <succeed_n count>
2670 <body of loop> 2670 <body of loop>
2671 jump_n <succeed_n addr> <jump count> 2671 jump_n <succeed_n addr> <jump count>
2672 (The upper bound and `jump_n' are omitted if 2672 (The upper bound and `jump_n' are omitted if
2673 `upper_bound' is 1, though.) */ 2673 `upper_bound' is 1, though.) */
2674 else 2674 else
2675 { /* If the upper bound is > 1, we need to insert 2675 { /* If the upper bound is > 1, we need to insert
2676 more at the end of the loop. */ 2676 more at the end of the loop. */
2677 unsigned nbytes = 10 + (upper_bound > 1) * 10; 2677 unsigned nbytes = 10 + (upper_bound > 1) * 10;
2678 2678
2679 GET_BUFFER_SPACE (nbytes); 2679 GET_BUFFER_SPACE (nbytes);
2680 2680
2681 /* Initialize lower bound of the `succeed_n', even 2681 /* Initialize lower bound of the `succeed_n', even
2682 though it will be set during matching by its 2682 though it will be set during matching by its
2683 attendant `set_number_at' (inserted next), 2683 attendant `set_number_at' (inserted next),
2684 because `re_compile_fastmap' needs to know. 2684 because `re_compile_fastmap' needs to know.
2685 Jump to the `jump_n' we might insert below. */ 2685 Jump to the `jump_n' we might insert below. */
2686 INSERT_JUMP2 (succeed_n, laststart, 2686 INSERT_JUMP2 (succeed_n, laststart,
2687 b + 5 + (upper_bound > 1) * 5, 2687 b + 5 + (upper_bound > 1) * 5,
2688 lower_bound); 2688 lower_bound);
2689 b += 5; 2689 b += 5;
2690 2690
2691 /* Code to initialize the lower bound. Insert 2691 /* Code to initialize the lower bound. Insert
2692 before the `succeed_n'. The `5' is the last two 2692 before the `succeed_n'. The `5' is the last two
2693 bytes of this `set_number_at', plus 3 bytes of 2693 bytes of this `set_number_at', plus 3 bytes of
2694 the following `succeed_n'. */ 2694 the following `succeed_n'. */
2695 insert_op2 (set_number_at, laststart, 5, lower_bound, b); 2695 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2696 b += 5; 2696 b += 5;
2697 2697
2698 if (upper_bound > 1) 2698 if (upper_bound > 1)
2699 { /* More than one repetition is allowed, so 2699 { /* More than one repetition is allowed, so
2700 append a backward jump to the `succeed_n' 2700 append a backward jump to the `succeed_n'
2701 that starts this interval. 2701 that starts this interval.
2702 2702
2703 When we've reached this during matching, 2703 When we've reached this during matching,
2704 we'll have matched the interval once, so 2704 we'll have matched the interval once, so
2705 jump back only `upper_bound - 1' times. */ 2705 jump back only `upper_bound - 1' times. */
2706 STORE_JUMP2 (jump_n, b, laststart + 5, 2706 STORE_JUMP2 (jump_n, b, laststart + 5,
2707 upper_bound - 1); 2707 upper_bound - 1);
2708 b += 5; 2708 b += 5;
2709 2709
2710 /* The location we want to set is the second 2710 /* The location we want to set is the second
2711 parameter of the `jump_n'; that is `b-2' as 2711 parameter of the `jump_n'; that is `b-2' as
2712 an absolute address. `laststart' will be 2712 an absolute address. `laststart' will be
2713 the `set_number_at' we're about to insert; 2713 the `set_number_at' we're about to insert;
2714 `laststart+3' the number to set, the source 2714 `laststart+3' the number to set, the source
2715 for the relative address. But we are 2715 for the relative address. But we are
2716 inserting into the middle of the pattern -- 2716 inserting into the middle of the pattern --
2717 so everything is getting moved up by 5. 2717 so everything is getting moved up by 5.
2718 Conclusion: (b - 2) - (laststart + 3) + 5, 2718 Conclusion: (b - 2) - (laststart + 3) + 5,
2719 i.e., b - laststart. 2719 i.e., b - laststart.
2720 2720
2721 We insert this at the beginning of the loop 2721 We insert this at the beginning of the loop
2722 so that if we fail during matching, we'll 2722 so that if we fail during matching, we'll
2723 reinitialize the bounds. */ 2723 reinitialize the bounds. */
2724 insert_op2 (set_number_at, laststart, b - laststart, 2724 insert_op2 (set_number_at, laststart, b - laststart,
2725 upper_bound - 1, b); 2725 upper_bound - 1, b);
2726 b += 5; 2726 b += 5;
2727 } 2727 }
2728 } 2728 }
2729 pending_exact = 0; 2729 pending_exact = 0;
2730 beg_interval = NULL; 2730 beg_interval = NULL;
2731 } 2731 }
2732 break; 2732 break;
2733 2733
2734 unfetch_interval: 2734 unfetch_interval:
2735 /* If an invalid interval, match the characters as literals. */ 2735 /* If an invalid interval, match the characters as literals. */
2736 assert (beg_interval); 2736 assert (beg_interval);
2737 p = beg_interval; 2737 p = beg_interval;
2738 beg_interval = NULL; 2738 beg_interval = NULL;
2739 2739
2740 /* normal_char and normal_backslash need `c'. */ 2740 /* normal_char and normal_backslash need `c'. */
2741 PATFETCH (c); 2741 PATFETCH (c);
2742 2742
2743 if (!(syntax & RE_NO_BK_BRACES)) 2743 if (!(syntax & RE_NO_BK_BRACES))
2744 { 2744 {
2745 if (p > pattern && p[-1] == '\\') 2745 if (p > pattern && p[-1] == '\\')
2746 goto normal_backslash; 2746 goto normal_backslash;
2747 } 2747 }
2748 goto normal_char; 2748 goto normal_char;
2749 2749
2750 #ifdef emacs 2750 #ifdef emacs
2751 /* There is no way to specify the before_dot and after_dot 2751 /* There is no way to specify the before_dot and after_dot
2752 operators. rms says this is ok. --karl */ 2752 operators. rms says this is ok. --karl */
2753 case '=': 2753 case '=':
2754 BUF_PUSH (at_dot); 2754 BUF_PUSH (at_dot);
2755 break; 2755 break;
2756 2756
2757 case 's': 2757 case 's':
2758 laststart = b; 2758 laststart = b;
2759 PATFETCH (c); 2759 PATFETCH (c);
2760 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); 2760 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2761 break; 2761 break;
2762 2762
2763 case 'S': 2763 case 'S':
2764 laststart = b; 2764 laststart = b;
2765 PATFETCH (c); 2765 PATFETCH (c);
2766 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]); 2766 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2767 break; 2767 break;
2768 2768
2769 case 'c': 2769 case 'c':
2770 laststart = b; 2770 laststart = b;
2771 PATFETCH_RAW (c); 2771 PATFETCH_RAW (c);
2772 BUF_PUSH_2 (categoryspec, c); 2772 BUF_PUSH_2 (categoryspec, c);
2778 BUF_PUSH_2 (notcategoryspec, c); 2778 BUF_PUSH_2 (notcategoryspec, c);
2779 break; 2779 break;
2780 #endif /* emacs */ 2780 #endif /* emacs */
2781 2781
2782 2782
2783 case 'w': 2783 case 'w':
2784 laststart = b; 2784 laststart = b;
2785 BUF_PUSH (wordchar); 2785 BUF_PUSH (wordchar);
2786 break; 2786 break;
2787 2787
2788 2788
2789 case 'W': 2789 case 'W':
2790 laststart = b; 2790 laststart = b;
2791 BUF_PUSH (notwordchar); 2791 BUF_PUSH (notwordchar);
2792 break; 2792 break;
2793 2793
2794 2794
2795 case '<': 2795 case '<':
2796 BUF_PUSH (wordbeg); 2796 BUF_PUSH (wordbeg);
2797 break; 2797 break;
2798 2798
2799 case '>': 2799 case '>':
2800 BUF_PUSH (wordend); 2800 BUF_PUSH (wordend);
2801 break; 2801 break;
2802 2802
2803 case 'b': 2803 case 'b':
2804 BUF_PUSH (wordbound); 2804 BUF_PUSH (wordbound);
2805 break; 2805 break;
2806 2806
2807 case 'B': 2807 case 'B':
2808 BUF_PUSH (notwordbound); 2808 BUF_PUSH (notwordbound);
2809 break; 2809 break;
2810 2810
2811 case '`': 2811 case '`':
2812 BUF_PUSH (begbuf); 2812 BUF_PUSH (begbuf);
2813 break; 2813 break;
2814 2814
2815 case '\'': 2815 case '\'':
2816 BUF_PUSH (endbuf); 2816 BUF_PUSH (endbuf);
2817 break; 2817 break;
2818 2818
2819 case '1': case '2': case '3': case '4': case '5': 2819 case '1': case '2': case '3': case '4': case '5':
2820 case '6': case '7': case '8': case '9': 2820 case '6': case '7': case '8': case '9':
2821 if (syntax & RE_NO_BK_REFS) 2821 if (syntax & RE_NO_BK_REFS)
2822 goto normal_char; 2822 goto normal_char;
2823 2823
2824 c1 = c - '0'; 2824 c1 = c - '0';
2825 2825
2826 if (c1 > regnum) 2826 if (c1 > regnum)
2827 FREE_STACK_RETURN (REG_ESUBREG); 2827 FREE_STACK_RETURN (REG_ESUBREG);
2828 2828
2829 /* Can't back reference to a subexpression if inside of it. */ 2829 /* Can't back reference to a subexpression if inside of it. */
2830 if (group_in_compile_stack (compile_stack, c1)) 2830 if (group_in_compile_stack (compile_stack, c1))
2831 goto normal_char; 2831 goto normal_char;
2832 2832
2833 laststart = b; 2833 laststart = b;
2834 BUF_PUSH_2 (duplicate, c1); 2834 BUF_PUSH_2 (duplicate, c1);
2835 break; 2835 break;
2836 2836
2837 2837
2838 case '+': 2838 case '+':
2839 case '?': 2839 case '?':
2840 if (syntax & RE_BK_PLUS_QM) 2840 if (syntax & RE_BK_PLUS_QM)
2841 goto handle_plus; 2841 goto handle_plus;
2842 else 2842 else
2843 goto normal_backslash; 2843 goto normal_backslash;
2844 2844
2845 default: 2845 default:
2846 normal_backslash: 2846 normal_backslash:
2847 /* You might think it would be useful for \ to mean 2847 /* You might think it would be useful for \ to mean
2848 not to translate; but if we don't translate it 2848 not to translate; but if we don't translate it
2849 it will never match anything. */ 2849 it will never match anything. */
2850 c = TRANSLATE (c); 2850 c = TRANSLATE (c);
2851 goto normal_char; 2851 goto normal_char;
2852 } 2852 }
2853 break; 2853 break;
2854 2854
2855 2855
2856 default: 2856 default:
2857 /* Expects the character in `c'. */ 2857 /* Expects the character in `c'. */
2858 normal_char: 2858 normal_char:
2859 p1 = p - 1; /* P1 points the head of C. */ 2859 p1 = p - 1; /* P1 points the head of C. */
2860 #ifdef emacs 2860 #ifdef emacs
2861 if (bufp->multibyte) 2861 if (bufp->multibyte)
2862 /* Set P to the next character boundary. */ 2862 /* Set P to the next character boundary. */
2863 p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1; 2863 p += MULTIBYTE_FORM_LENGTH (p1, pend - p1) - 1;
2864 #endif 2864 #endif
2865 /* If no exactn currently being built. */ 2865 /* If no exactn currently being built. */
2866 if (!pending_exact 2866 if (!pending_exact
2867 2867
2868 /* If last exactn not at current position. */ 2868 /* If last exactn not at current position. */
2869 || pending_exact + *pending_exact + 1 != b 2869 || pending_exact + *pending_exact + 1 != b
2870 2870
2871 /* We have only one byte following the exactn for the count. */ 2871 /* We have only one byte following the exactn for the count. */
2872 || *pending_exact >= (1 << BYTEWIDTH) - (p - p1) 2872 || *pending_exact >= (1 << BYTEWIDTH) - (p - p1)
2873 2873
2874 /* If followed by a repetition operator. */ 2874 /* If followed by a repetition operator. */
2875 || *p == '*' || *p == '^' 2875 || *p == '*' || *p == '^'
2876 || ((syntax & RE_BK_PLUS_QM) 2876 || ((syntax & RE_BK_PLUS_QM)
2877 ? *p == '\\' && (p[1] == '+' || p[1] == '?') 2877 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2878 : (*p == '+' || *p == '?')) 2878 : (*p == '+' || *p == '?'))
2879 || ((syntax & RE_INTERVALS) 2879 || ((syntax & RE_INTERVALS)
2880 && ((syntax & RE_NO_BK_BRACES) 2880 && ((syntax & RE_NO_BK_BRACES)
2881 ? *p == '{' 2881 ? *p == '{'
2882 : (p[0] == '\\' && p[1] == '{')))) 2882 : (p[0] == '\\' && p[1] == '{'))))
2883 { 2883 {
2884 /* Start building a new exactn. */ 2884 /* Start building a new exactn. */
2885 2885
2886 laststart = b; 2886 laststart = b;
2887 2887
2888 BUF_PUSH_2 (exactn, 0); 2888 BUF_PUSH_2 (exactn, 0);
2889 pending_exact = b - 1; 2889 pending_exact = b - 1;
2890 } 2890 }
2891 2891
2892 /* Here, C may translated, therefore C may not equal to *P1. */ 2892 /* Here, C may translated, therefore C may not equal to *P1. */
2893 while (1) 2893 while (1)
2894 { 2894 {
2895 BUF_PUSH (c); 2895 BUF_PUSH (c);
2896 (*pending_exact)++; 2896 (*pending_exact)++;
2897 if (++p1 == p) 2897 if (++p1 == p)
2898 break; 2898 break;
2899 2899
2900 /* Rest of multibyte form should be copied literally. */ 2900 /* Rest of multibyte form should be copied literally. */
2901 c = *(unsigned char *)p1; 2901 c = *(unsigned char *)p1;
2902 } 2902 }
2903 break; 2903 break;
2904 } /* switch (c) */ 2904 } /* switch (c) */
2905 } /* while p != pend */ 2905 } /* while p != pend */
2906 2906
2907 2907
2908 /* Through the pattern now. */ 2908 /* Through the pattern now. */
2909 2909
2975 return REG_NOERROR; 2975 return REG_NOERROR;
2976 } /* regex_compile */ 2976 } /* regex_compile */
2977 2977
2978 /* Subroutines for `regex_compile'. */ 2978 /* Subroutines for `regex_compile'. */
2979 2979
2980 /* Store OP at LOC followed by two-byte integer parameter ARG. */ 2980 /* Store OP at LOC followed by two-byte integer parameter ARG. */
2981 2981
2982 static void 2982 static void
2983 store_op1 (op, loc, arg) 2983 store_op1 (op, loc, arg)
2984 re_opcode_t op; 2984 re_opcode_t op;
2985 unsigned char *loc; 2985 unsigned char *loc;
3056 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; 3056 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3057 3057
3058 return 3058 return
3059 /* After a subexpression? */ 3059 /* After a subexpression? */
3060 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) 3060 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3061 /* After an alternative? */ 3061 /* After an alternative? */
3062 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash)); 3062 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3063 } 3063 }
3064 3064
3065 3065
3066 /* The dual of at_begline_loc_p. This one is for $. We assume there is 3066 /* The dual of at_begline_loc_p. This one is for $. We assume there is
3076 const char *next_next = p + 1 < pend ? p + 1 : 0; 3076 const char *next_next = p + 1 < pend ? p + 1 : 0;
3077 3077
3078 return 3078 return
3079 /* Before a subexpression? */ 3079 /* Before a subexpression? */
3080 (syntax & RE_NO_BK_PARENS ? *next == ')' 3080 (syntax & RE_NO_BK_PARENS ? *next == ')'
3081 : next_backslash && next_next && *next_next == ')') 3081 : next_backslash && next_next && *next_next == ')')
3082 /* Before an alternative? */ 3082 /* Before an alternative? */
3083 || (syntax & RE_NO_BK_VBAR ? *next == '|' 3083 || (syntax & RE_NO_BK_VBAR ? *next == '|'
3084 : next_backslash && next_next && *next_next == '|'); 3084 : next_backslash && next_next && *next_next == '|');
3085 } 3085 }
3086 3086
3087 3087
3088 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and 3088 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3089 false if it's not. */ 3089 false if it's not. */
3144 3144
3145 /* Have to increment the pointer into the pattern string, so the 3145 /* Have to increment the pointer into the pattern string, so the
3146 caller isn't still at the ending character. */ 3146 caller isn't still at the ending character. */
3147 (*p_ptr)++; 3147 (*p_ptr)++;
3148 3148
3149 /* If the start is after the end, the range is empty. */ 3149 /* If the start is after the end, the range is empty. */
3150 if (range_start > range_end) 3150 if (range_start > range_end)
3151 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; 3151 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3152 3152
3153 /* Here we see why `this_char' has to be larger than an `unsigned 3153 /* Here we see why `this_char' has to be larger than an `unsigned
3154 char' -- the range is inclusive, so if `range_end' == 0xff 3154 char' -- the range is inclusive, so if `range_end' == 0xff
3198 /* This holds the pointer to the failure stack, when 3198 /* This holds the pointer to the failure stack, when
3199 it is allocated relocatably. */ 3199 it is allocated relocatably. */
3200 fail_stack_elt_t *failure_stack_ptr; 3200 fail_stack_elt_t *failure_stack_ptr;
3201 3201
3202 /* Assume that each path through the pattern can be null until 3202 /* Assume that each path through the pattern can be null until
3203 proven otherwise. We set this false at the bottom of switch 3203 proven otherwise. We set this false at the bottom of switch
3204 statement, to which we get only if a particular path doesn't 3204 statement, to which we get only if a particular path doesn't
3205 match the empty string. */ 3205 match the empty string. */
3206 boolean path_can_be_null = true; 3206 boolean path_can_be_null = true;
3207 3207
3208 /* We aren't doing a `succeed_n' to begin with. */ 3208 /* We aren't doing a `succeed_n' to begin with. */
3209 boolean succeed_n_p = false; 3209 boolean succeed_n_p = false;
3210 3210
3211 /* If all elements for base leading-codes in fastmap is set, this 3211 /* If all elements for base leading-codes in fastmap is set, this
3212 flag is set true. */ 3212 flag is set true. */
3213 boolean match_any_multibyte_characters = false; 3213 boolean match_any_multibyte_characters = false;
3214 3214
3215 /* Maximum code of simple (single byte) character. */ 3215 /* Maximum code of simple (single byte) character. */
3216 int simple_char_max; 3216 int simple_char_max;
3217 3217
3218 assert (fastmap != NULL && p != NULL); 3218 assert (fastmap != NULL && p != NULL);
3219 3219
3220 INIT_FAIL_STACK (); 3220 INIT_FAIL_STACK ();
3221 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ 3221 bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */
3222 bufp->fastmap_accurate = 1; /* It will be when we're done. */ 3222 bufp->fastmap_accurate = 1; /* It will be when we're done. */
3223 bufp->can_be_null = 0; 3223 bufp->can_be_null = 0;
3224 3224
3225 while (1) 3225 while (1)
3226 { 3226 {
3240 } 3240 }
3241 else 3241 else
3242 break; 3242 break;
3243 } 3243 }
3244 3244
3245 /* We should never be about to go beyond the end of the pattern. */ 3245 /* We should never be about to go beyond the end of the pattern. */
3246 assert (p < pend); 3246 assert (p < pend);
3247 3247
3248 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 3248 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3249 { 3249 {
3250 3250
3251 /* I guess the idea here is to simply not bother with a fastmap 3251 /* I guess the idea here is to simply not bother with a fastmap
3252 if a backreference is used, since it's too hard to figure out 3252 if a backreference is used, since it's too hard to figure out
3253 the fastmap for the corresponding group. Setting 3253 the fastmap for the corresponding group. Setting
3254 `can_be_null' stops `re_search_2' from using the fastmap, so 3254 `can_be_null' stops `re_search_2' from using the fastmap, so
3255 that is all we do. */ 3255 that is all we do. */
3256 case duplicate: 3256 case duplicate:
3257 bufp->can_be_null = 1; 3257 bufp->can_be_null = 1;
3258 goto done; 3258 goto done;
3259 3259
3260 3260
3261 /* Following are the cases which match a character. These end 3261 /* Following are the cases which match a character. These end
3262 with `break'. */ 3262 with `break'. */
3263 3263
3264 case exactn: 3264 case exactn:
3265 fastmap[p[1]] = 1; 3265 fastmap[p[1]] = 1;
3266 break; 3266 break;
3267 3267
3268 3268
3269 #ifndef emacs 3269 #ifndef emacs
3270 case charset: 3270 case charset:
3271 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 3271 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3272 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) 3272 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3273 fastmap[j] = 1; 3273 fastmap[j] = 1;
3274 break; 3274 break;
3275 3275
3276 3276
3277 case charset_not: 3277 case charset_not:
3278 /* Chars beyond end of map must be allowed. */ 3278 /* Chars beyond end of map must be allowed. */
3279 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++) 3279 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3280 fastmap[j] = 1; 3280 fastmap[j] = 1;
3281 3281
3282 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--) 3282 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3283 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) 3283 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3284 fastmap[j] = 1; 3284 fastmap[j] = 1;
3285 break; 3285 break;
3286 3286
3287 3287
3288 case wordchar: 3288 case wordchar:
3289 for (j = 0; j < (1 << BYTEWIDTH); j++) 3289 for (j = 0; j < (1 << BYTEWIDTH); j++)
3290 if (SYNTAX (j) == Sword) 3290 if (SYNTAX (j) == Sword)
3313 3313
3314 /* Make P points the range table. */ 3314 /* Make P points the range table. */
3315 p += CHARSET_BITMAP_SIZE (&p[-2]); 3315 p += CHARSET_BITMAP_SIZE (&p[-2]);
3316 3316
3317 /* Extract the number of ranges in range table into 3317 /* Extract the number of ranges in range table into
3318 COUNT. */ 3318 COUNT. */
3319 EXTRACT_NUMBER_AND_INCR (count, p); 3319 EXTRACT_NUMBER_AND_INCR (count, p);
3320 for (; count > 0; count--, p += 2 * 3) /* XXX */ 3320 for (; count > 0; count--, p += 2 * 3) /* XXX */
3321 { 3321 {
3322 /* Extract the start of each range. */ 3322 /* Extract the start of each range. */
3323 EXTRACT_CHARACTER (c, p); 3323 EXTRACT_CHARACTER (c, p);
3363 if (SYNTAX (j) == Sword) 3363 if (SYNTAX (j) == Sword)
3364 fastmap[j] = 1; 3364 fastmap[j] = 1;
3365 3365
3366 if (bufp->multibyte) 3366 if (bufp->multibyte)
3367 /* Any character set can possibly contain a character 3367 /* Any character set can possibly contain a character
3368 whose syntax is `Sword'. */ 3368 whose syntax is `Sword'. */
3369 goto set_fastmap_for_multibyte_characters; 3369 goto set_fastmap_for_multibyte_characters;
3370 break; 3370 break;
3371 3371
3372 3372
3373 case notwordchar: 3373 case notwordchar:
3381 whose syntax is not `Sword'. */ 3381 whose syntax is not `Sword'. */
3382 goto set_fastmap_for_multibyte_characters; 3382 goto set_fastmap_for_multibyte_characters;
3383 break; 3383 break;
3384 #endif 3384 #endif
3385 3385
3386 case anychar: 3386 case anychar:
3387 { 3387 {
3388 int fastmap_newline = fastmap['\n']; 3388 int fastmap_newline = fastmap['\n'];
3389 3389
3390 /* `.' matches anything (but if bufp->multibyte is 3390 /* `.' matches anything (but if bufp->multibyte is
3391 nonzero, matches `\000' .. `\127' and possible multibyte 3391 nonzero, matches `\000' .. `\127' and possible multibyte
3408 /* ... except perhaps newline. */ 3408 /* ... except perhaps newline. */
3409 if (!(bufp->syntax & RE_DOT_NEWLINE)) 3409 if (!(bufp->syntax & RE_DOT_NEWLINE))
3410 fastmap['\n'] = fastmap_newline; 3410 fastmap['\n'] = fastmap_newline;
3411 3411
3412 /* Return if we have already set `can_be_null'; if we have, 3412 /* Return if we have already set `can_be_null'; if we have,
3413 then the fastmap is irrelevant. Something's wrong here. */ 3413 then the fastmap is irrelevant. Something's wrong here. */
3414 else if (bufp->can_be_null) 3414 else if (bufp->can_be_null)
3415 goto done; 3415 goto done;
3416 3416
3417 /* Otherwise, have to check alternative paths. */ 3417 /* Otherwise, have to check alternative paths. */
3418 break; 3418 break;
3422 case wordbound: 3422 case wordbound:
3423 case notwordbound: 3423 case notwordbound:
3424 case wordbeg: 3424 case wordbeg:
3425 case wordend: 3425 case wordend:
3426 case notsyntaxspec: 3426 case notsyntaxspec:
3427 case syntaxspec: 3427 case syntaxspec:
3428 /* This match depends on text properties. These end with 3428 /* This match depends on text properties. These end with
3429 aborting optimizations. */ 3429 aborting optimizations. */
3430 bufp->can_be_null = 1; 3430 bufp->can_be_null = 1;
3431 goto done; 3431 goto done;
3432 #if 0 3432 #if 0
3433 k = *p++; 3433 k = *p++;
3434 simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH); 3434 simple_char_max = bufp->multibyte ? 0x80 : (1 << BYTEWIDTH);
3435 for (j = 0; j < simple_char_max; j++) 3435 for (j = 0; j < simple_char_max; j++)
3436 if (SYNTAX (j) == (enum syntaxcode) k) 3436 if (SYNTAX (j) == (enum syntaxcode) k)
3478 if (!CHAR_HAS_CATEGORY (j, k)) 3478 if (!CHAR_HAS_CATEGORY (j, k))
3479 fastmap[j] = 1; 3479 fastmap[j] = 1;
3480 3480
3481 if (bufp->multibyte) 3481 if (bufp->multibyte)
3482 /* Any character set can possibly contain a character 3482 /* Any character set can possibly contain a character
3483 whose category is not K. */ 3483 whose category is not K. */
3484 goto set_fastmap_for_multibyte_characters; 3484 goto set_fastmap_for_multibyte_characters;
3485 break; 3485 break;
3486 3486
3487 /* All cases after this match the empty string. These end with 3487 /* All cases after this match the empty string. These end with
3488 `continue'. */ 3488 `continue'. */
3489 3489
3490 3490
3491 case before_dot: 3491 case before_dot:
3492 case at_dot: 3492 case at_dot:
3493 case after_dot: 3493 case after_dot:
3494 continue; 3494 continue;
3495 #endif /* emacs */ 3495 #endif /* emacs */
3496 3496
3497 3497
3498 case no_op: 3498 case no_op:
3499 case begline: 3499 case begline:
3500 case endline: 3500 case endline:
3501 case begbuf: 3501 case begbuf:
3502 case endbuf: 3502 case endbuf:
3503 #ifndef emacs 3503 #ifndef emacs
3504 case wordbound: 3504 case wordbound:
3505 case notwordbound: 3505 case notwordbound:
3506 case wordbeg: 3506 case wordbeg:
3507 case wordend: 3507 case wordend:
3508 #endif 3508 #endif
3509 case push_dummy_failure: 3509 case push_dummy_failure:
3510 continue; 3510 continue;
3511 3511
3512 3512
3513 case jump_n: 3513 case jump_n:
3514 case pop_failure_jump: 3514 case pop_failure_jump:
3515 case maybe_pop_jump: 3515 case maybe_pop_jump:
3516 case jump: 3516 case jump:
3517 case jump_past_alt: 3517 case jump_past_alt:
3518 case dummy_failure_jump: 3518 case dummy_failure_jump:
3519 EXTRACT_NUMBER_AND_INCR (j, p); 3519 EXTRACT_NUMBER_AND_INCR (j, p);
3520 p += j; 3520 p += j;
3521 if (j > 0) 3521 if (j > 0)
3522 continue; 3522 continue;
3523 3523
3524 /* Jump backward implies we just went through the body of a 3524 /* Jump backward implies we just went through the body of a
3525 loop and matched nothing. Opcode jumped to should be 3525 loop and matched nothing. Opcode jumped to should be
3526 `on_failure_jump' or `succeed_n'. Just treat it like an 3526 `on_failure_jump' or `succeed_n'. Just treat it like an
3527 ordinary jump. For a * loop, it has pushed its failure 3527 ordinary jump. For a * loop, it has pushed its failure
3528 point already; if so, discard that as redundant. */ 3528 point already; if so, discard that as redundant. */
3529 if ((re_opcode_t) *p != on_failure_jump 3529 if ((re_opcode_t) *p != on_failure_jump
3530 && (re_opcode_t) *p != succeed_n) 3530 && (re_opcode_t) *p != succeed_n)
3531 continue; 3531 continue;
3532 3532
3533 p++; 3533 p++;
3534 EXTRACT_NUMBER_AND_INCR (j, p); 3534 EXTRACT_NUMBER_AND_INCR (j, p);
3535 p += j; 3535 p += j;
3536 3536
3537 /* If what's on the stack is where we are now, pop it. */ 3537 /* If what's on the stack is where we are now, pop it. */
3538 if (!FAIL_STACK_EMPTY () 3538 if (!FAIL_STACK_EMPTY ()
3539 && fail_stack.stack[fail_stack.avail - 1].pointer == p) 3539 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3540 fail_stack.avail--; 3540 fail_stack.avail--;
3541 3541
3542 continue; 3542 continue;
3543 3543
3544 3544
3545 case on_failure_jump: 3545 case on_failure_jump:
3546 case on_failure_keep_string_jump: 3546 case on_failure_keep_string_jump:
3547 handle_on_failure_jump: 3547 handle_on_failure_jump:
3548 EXTRACT_NUMBER_AND_INCR (j, p); 3548 EXTRACT_NUMBER_AND_INCR (j, p);
3549 3549
3550 /* For some patterns, e.g., `(a?)?', `p+j' here points to the 3550 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3551 end of the pattern. We don't want to push such a point, 3551 end of the pattern. We don't want to push such a point,
3552 since when we restore it above, entering the switch will 3552 since when we restore it above, entering the switch will
3553 increment `p' past the end of the pattern. We don't need 3553 increment `p' past the end of the pattern. We don't need
3554 to push such a point since we obviously won't find any more 3554 to push such a point since we obviously won't find any more
3555 fastmap entries beyond `pend'. Such a pattern can match 3555 fastmap entries beyond `pend'. Such a pattern can match
3556 the null string, though. */ 3556 the null string, though. */
3557 if (p + j < pend) 3557 if (p + j < pend)
3558 { 3558 {
3559 if (!PUSH_PATTERN_OP (p + j, fail_stack)) 3559 if (!PUSH_PATTERN_OP (p + j, fail_stack))
3560 { 3560 {
3561 RESET_FAIL_STACK (); 3561 RESET_FAIL_STACK ();
3562 return -2; 3562 return -2;
3563 } 3563 }
3564 }
3565 else
3566 bufp->can_be_null = 1;
3567
3568 if (succeed_n_p)
3569 {
3570 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3571 succeed_n_p = false;
3572 } 3564 }
3573 3565 else
3574 continue; 3566 bufp->can_be_null = 1;
3567
3568 if (succeed_n_p)
3569 {
3570 EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n. */
3571 succeed_n_p = false;
3572 }
3573
3574 continue;
3575 3575
3576 3576
3577 case succeed_n: 3577 case succeed_n:
3578 /* Get to the number of times to succeed. */ 3578 /* Get to the number of times to succeed. */
3579 p += 2; 3579 p += 2;
3580 3580
3581 /* Increment p past the n for when k != 0. */ 3581 /* Increment p past the n for when k != 0. */
3582 EXTRACT_NUMBER_AND_INCR (k, p); 3582 EXTRACT_NUMBER_AND_INCR (k, p);
3583 if (k == 0) 3583 if (k == 0)
3584 { 3584 {
3585 p -= 4; 3585 p -= 4;
3586 succeed_n_p = true; /* Spaghetti code alert. */ 3586 succeed_n_p = true; /* Spaghetti code alert. */
3587 goto handle_on_failure_jump; 3587 goto handle_on_failure_jump;
3588 } 3588 }
3589 continue; 3589 continue;
3590 3590
3591 3591
3592 case set_number_at: 3592 case set_number_at:
3593 p += 4; 3593 p += 4;
3594 continue; 3594 continue;
3595 3595
3596 3596
3597 case start_memory: 3597 case start_memory:
3598 case stop_memory: 3598 case stop_memory:
3599 p += 2; 3599 p += 2;
3600 continue; 3600 continue;
3601 3601
3602 3602
3603 default: 3603 default:
3604 abort (); /* We have listed all the cases. */ 3604 abort (); /* We have listed all the cases. */
3605 } /* switch *p++ */ 3605 } /* switch *p++ */
3606 3606
3607 /* Getting here means we have found the possible starting 3607 /* Getting here means we have found the possible starting
3608 characters for one path of the pattern -- and that the empty 3608 characters for one path of the pattern -- and that the empty
3609 string does not match. We need not follow this path further. 3609 string does not match. We need not follow this path further.
3610 Instead, look at the next alternative (remembered on the 3610 Instead, look at the next alternative (remembered on the
3611 stack), or quit if no more. The test at the top of the loop 3611 stack), or quit if no more. The test at the top of the loop
3612 does these things. */ 3612 does these things. */
3613 path_can_be_null = false; 3613 path_can_be_null = false;
3614 p = pend; 3614 p = pend;
3615 } /* while p */ 3615 } /* while p */
3616 3616
3617 /* Set `can_be_null' for the last path (also the first path, if the 3617 /* Set `can_be_null' for the last path (also the first path, if the
3618 pattern is empty). */ 3618 pattern is empty). */
3619 bufp->can_be_null |= path_can_be_null; 3619 bufp->can_be_null |= path_can_be_null;
3620 3620
3621 done: 3621 done:
3622 RESET_FAIL_STACK (); 3622 RESET_FAIL_STACK ();
3623 return 0; 3623 return 0;
3656 regs->num_regs = 0; 3656 regs->num_regs = 0;
3657 regs->start = regs->end = (regoff_t *) 0; 3657 regs->start = regs->end = (regoff_t *) 0;
3658 } 3658 }
3659 } 3659 }
3660 3660
3661 /* Searching routines. */ 3661 /* Searching routines. */
3662 3662
3663 /* Like re_search_2, below, but only one string is specified, and 3663 /* Like re_search_2, below, but only one string is specified, and
3664 doesn't let you say where to stop matching. */ 3664 doesn't let you say where to stop matching. */
3665 3665
3666 int 3666 int
3718 register RE_TRANSLATE_TYPE translate = bufp->translate; 3718 register RE_TRANSLATE_TYPE translate = bufp->translate;
3719 int total_size = size1 + size2; 3719 int total_size = size1 + size2;
3720 int endpos = startpos + range; 3720 int endpos = startpos + range;
3721 int anchored_start = 0; 3721 int anchored_start = 0;
3722 3722
3723 /* Nonzero if we have to concern multibyte character. */ 3723 /* Nonzero if we have to concern multibyte character. */
3724 int multibyte = bufp->multibyte; 3724 int multibyte = bufp->multibyte;
3725 3725
3726 /* Check for out-of-range STARTPOS. */ 3726 /* Check for out-of-range STARTPOS. */
3727 if (startpos < 0 || startpos > total_size) 3727 if (startpos < 0 || startpos > total_size)
3728 return -1; 3728 return -1;
3765 if (bufp->buffer[0] == begline) 3765 if (bufp->buffer[0] == begline)
3766 anchored_start = 1; 3766 anchored_start = 1;
3767 3767
3768 #ifdef emacs 3768 #ifdef emacs
3769 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, 3769 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
3770 POS_AS_IN_BUFFER (startpos > 0 3770 POS_AS_IN_BUFFER (startpos > 0
3771 ? startpos - 1 : startpos), 3771 ? startpos - 1 : startpos),
3772 1); 3772 1);
3773 #endif 3773 #endif
3774 3774
3775 /* Loop through the string, looking for a place to start matching. */ 3775 /* Loop through the string, looking for a place to start matching. */
3787 == '\n'))) 3787 == '\n')))
3788 goto advance; 3788 goto advance;
3789 } 3789 }
3790 3790
3791 /* If a fastmap is supplied, skip quickly over characters that 3791 /* If a fastmap is supplied, skip quickly over characters that
3792 cannot be the start of a match. If the pattern can match the 3792 cannot be the start of a match. If the pattern can match the
3793 null string, however, we don't need to skip characters; we want 3793 null string, however, we don't need to skip characters; we want
3794 the first null string. */ 3794 the first null string. */
3795 if (fastmap && startpos < total_size && !bufp->can_be_null) 3795 if (fastmap && startpos < total_size && !bufp->can_be_null)
3796 { 3796 {
3797 if (range > 0) /* Searching forwards. */ 3797 if (range > 0) /* Searching forwards. */
3798 { 3798 {
3799 register const char *d; 3799 register const char *d;
3800 register int lim = 0; 3800 register int lim = 0;
3801 int irange = range; 3801 int irange = range;
3802 3802
3803 if (startpos < size1 && startpos + range >= size1) 3803 if (startpos < size1 && startpos + range >= size1)
3804 lim = range - (size1 - startpos); 3804 lim = range - (size1 - startpos);
3805 3805
3806 d = POS_ADDR_VSTRING (startpos); 3806 d = POS_ADDR_VSTRING (startpos);
3807 3807
3808 /* Written out as an if-else to avoid testing `translate' 3808 /* Written out as an if-else to avoid testing `translate'
3809 inside the loop. */ 3809 inside the loop. */
3810 if (translate) 3810 if (translate)
3811 while (range > lim 3811 while (range > lim
3812 && !fastmap[(unsigned char) 3812 && !fastmap[(unsigned char)
3813 translate[(unsigned char) *d++]]) 3813 translate[(unsigned char) *d++]])
3814 range--; 3814 range--;
3815 else 3815 else
3816 while (range > lim && !fastmap[(unsigned char) *d++]) 3816 while (range > lim && !fastmap[(unsigned char) *d++])
3817 range--; 3817 range--;
3818 3818
3819 startpos += irange - range; 3819 startpos += irange - range;
3820 } 3820 }
3821 else /* Searching backwards. */ 3821 else /* Searching backwards. */
3822 { 3822 {
3823 register char c = (size1 == 0 || startpos >= size1 3823 register char c = (size1 == 0 || startpos >= size1
3824 ? string2[startpos - size1] 3824 ? string2[startpos - size1]
3825 : string1[startpos]); 3825 : string1[startpos]);
3826 3826
3827 if (!fastmap[(unsigned char) TRANSLATE (c)]) 3827 if (!fastmap[(unsigned char) TRANSLATE (c)])
3828 goto advance; 3828 goto advance;
3829 } 3829 }
3830 } 3830 }
3831 3831
3832 /* If can't match the null string, and that's all we have left, fail. */ 3832 /* If can't match the null string, and that's all we have left, fail. */
3833 if (range >= 0 && startpos == total_size && fastmap 3833 if (range >= 0 && startpos == total_size && fastmap
3834 && !bufp->can_be_null) 3834 && !bufp->can_be_null)
3835 return -1; 3835 return -1;
3836 3836
3837 val = re_match_2_internal (bufp, string1, size1, string2, size2, 3837 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3838 startpos, regs, stop); 3838 startpos, regs, stop);
3839 #ifndef REGEX_MALLOC 3839 #ifndef REGEX_MALLOC
3848 if (val == -2) 3848 if (val == -2)
3849 return -2; 3849 return -2;
3850 3850
3851 advance: 3851 advance:
3852 if (!range) 3852 if (!range)
3853 break; 3853 break;
3854 else if (range > 0) 3854 else if (range > 0)
3855 { 3855 {
3856 /* Update STARTPOS to the next character boundary. */ 3856 /* Update STARTPOS to the next character boundary. */
3857 if (multibyte) 3857 if (multibyte)
3858 { 3858 {
3859 const unsigned char *p = POS_ADDR_VSTRING (startpos); 3859 const unsigned char *p = POS_ADDR_VSTRING (startpos);
3860 const unsigned char *pend = STOP_ADDR_VSTRING (startpos); 3860 const unsigned char *pend = STOP_ADDR_VSTRING (startpos);
3865 break; 3865 break;
3866 startpos += len; 3866 startpos += len;
3867 } 3867 }
3868 else 3868 else
3869 { 3869 {
3870 range--; 3870 range--;
3871 startpos++; 3871 startpos++;
3872 } 3872 }
3873 } 3873 }
3874 else 3874 else
3875 { 3875 {
3876 range++; 3876 range++;
3877 startpos--; 3877 startpos--;
3878 3878
3879 /* Update STARTPOS to the previous character boundary. */ 3879 /* Update STARTPOS to the previous character boundary. */
3880 if (multibyte) 3880 if (multibyte)
3881 { 3881 {
3882 const unsigned char *p = POS_ADDR_VSTRING (startpos); 3882 const unsigned char *p = POS_ADDR_VSTRING (startpos);
3898 break; 3898 break;
3899 3899
3900 startpos -= len; 3900 startpos -= len;
3901 } 3901 }
3902 } 3902 }
3903 } 3903 }
3904 } 3904 }
3905 return -1; 3905 return -1;
3906 } /* re_search_2 */ 3906 } /* re_search_2 */
3907 3907
3908 /* Declarations and macros for re_match_2. */ 3908 /* Declarations and macros for re_match_2. */
3909 3909
3910 static int bcmp_translate (); 3910 static int bcmp_translate ();
3911 static boolean alt_match_null_string_p (), 3911 static boolean alt_match_null_string_p (),
3912 common_op_match_null_string_p (), 3912 common_op_match_null_string_p (),
3913 group_match_null_string_p (); 3913 group_match_null_string_p ();
3914 3914
3915 /* This converts PTR, a pointer into one of the search strings `string1' 3915 /* This converts PTR, a pointer into one of the search strings `string1'
3916 and `string2' into an offset from the beginning of that string. */ 3916 and `string2' into an offset from the beginning of that string. */
3917 #define POINTER_TO_OFFSET(ptr) \ 3917 #define POINTER_TO_OFFSET(ptr) \
3918 (FIRST_STRING_P (ptr) \ 3918 (FIRST_STRING_P (ptr) \
3924 #define MATCHING_IN_FIRST_STRING (dend == end_match_1) 3924 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
3925 3925
3926 /* Call before fetching a character with *d. This switches over to 3926 /* Call before fetching a character with *d. This switches over to
3927 string2 if necessary. */ 3927 string2 if necessary. */
3928 #define PREFETCH() \ 3928 #define PREFETCH() \
3929 while (d == dend) \ 3929 while (d == dend) \
3930 { \ 3930 { \
3931 /* End of string2 => fail. */ \ 3931 /* End of string2 => fail. */ \
3932 if (dend == end_match_2) \ 3932 if (dend == end_match_2) \
3933 goto fail; \ 3933 goto fail; \
3934 /* End of string1 => advance to string2. */ \ 3934 /* End of string1 => advance to string2. */ \
3935 d = string2; \ 3935 d = string2; \
3936 dend = end_match_2; \ 3936 dend = end_match_2; \
3937 } 3937 }
3938 3938
3939 3939
3940 /* Test if at very beginning or at very end of the virtual concatenation 3940 /* Test if at very beginning or at very end of the virtual concatenation
3941 of `string1' and `string2'. If only one string, it's `string2'. */ 3941 of `string1' and `string2'. If only one string, it's `string2'. */
3942 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) 3942 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3943 #define AT_STRINGS_END(d) ((d) == end2) 3943 #define AT_STRINGS_END(d) ((d) == end2)
3944 3944
3945 3945
3946 /* Test if D points to a character which is word-constituent. We have 3946 /* Test if D points to a character which is word-constituent. We have
3947 two special cases to check for: if past the end of string1, look at 3947 two special cases to check for: if past the end of string1, look at
3948 the first character in string2; and if before the beginning of 3948 the first character in string2; and if before the beginning of
3949 string2, look at the last character in string1. */ 3949 string2, look at the last character in string1. */
3950 #define WORDCHAR_P(d) \ 3950 #define WORDCHAR_P(d) \
3951 (SYNTAX ((d) == end1 ? *string2 \ 3951 (SYNTAX ((d) == end1 ? *string2 \
3952 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ 3952 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3953 == Sword) 3953 == Sword)
3954 3954
3955 /* Disabled due to a compiler bug -- see comment at case wordbound */ 3955 /* Disabled due to a compiler bug -- see comment at case wordbound */
3956 3956
3957 /* The comment at case wordbound is following one, but we don't use 3957 /* The comment at case wordbound is following one, but we don't use
3958 AT_WORD_BOUNDARY anymore to support multibyte form. 3958 AT_WORD_BOUNDARY anymore to support multibyte form.
3959 3959
3960 The DEC Alpha C compiler 3.x generates incorrect code for the 3960 The DEC Alpha C compiler 3.x generates incorrect code for the
3961 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of 3961 test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of
3962 AT_WORD_BOUNDARY, so this code is disabled. Expanding the 3962 AT_WORD_BOUNDARY, so this code is disabled. Expanding the
3963 macro and introducing temporary variables works around the bug. */ 3963 macro and introducing temporary variables works around the bug. */
3964 3964
3965 #if 0 3965 #if 0
3966 /* Test if the character before D and the one at D differ with respect 3966 /* Test if the character before D and the one at D differ with respect
3967 to being word-constituent. */ 3967 to being word-constituent. */
3988 } while (0) 3988 } while (0)
3989 #else 3989 #else
3990 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ 3990 #define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */
3991 #endif /* not MATCH_MAY_ALLOCATE */ 3991 #endif /* not MATCH_MAY_ALLOCATE */
3992 3992
3993 /* These values must meet several constraints. They must not be valid 3993 /* These values must meet several constraints. They must not be valid
3994 register values; since we have a limit of 255 registers (because 3994 register values; since we have a limit of 255 registers (because
3995 we use only one byte in the pattern for the register number), we can 3995 we use only one byte in the pattern for the register number), we can
3996 use numbers larger than 255. They must differ by 1, because of 3996 use numbers larger than 255. They must differ by 1, because of
3997 NUM_FAILURE_ITEMS above. And the value for the lowest register must 3997 NUM_FAILURE_ITEMS above. And the value for the lowest register must
3998 be larger than the value for the highest register, so we do not try 3998 be larger than the value for the highest register, so we do not try
3999 to actually save any registers when none are active. */ 3999 to actually save any registers when none are active. */
4000 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH) 4000 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4001 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1) 4001 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4002 4002
4003 /* Matching routines. */ 4003 /* Matching routines. */
4004 4004
4005 #ifndef emacs /* Emacs never uses this. */ 4005 #ifndef emacs /* Emacs never uses this. */
4006 /* re_match is like re_match_2 except it takes only a single string. */ 4006 /* re_match is like re_match_2 except it takes only a single string. */
4007 4007
4008 int 4008 int
4009 re_match (bufp, string, size, pos, regs) 4009 re_match (bufp, string, size, pos, regs)
4010 struct re_pattern_buffer *bufp; 4010 struct re_pattern_buffer *bufp;
4019 } 4019 }
4020 #endif /* not emacs */ 4020 #endif /* not emacs */
4021 4021
4022 #ifdef emacs 4022 #ifdef emacs
4023 /* In Emacs, this is the string or buffer in which we 4023 /* In Emacs, this is the string or buffer in which we
4024 are matching. It is used for looking up syntax properties. */ 4024 are matching. It is used for looking up syntax properties. */
4025 Lisp_Object re_match_object; 4025 Lisp_Object re_match_object;
4026 #endif 4026 #endif
4027 4027
4028 /* re_match_2 matches the compiled pattern in BUFP against the 4028 /* re_match_2 matches the compiled pattern in BUFP against the
4029 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 4029 the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4030 and SIZE2, respectively). We start matching at POS, and stop 4030 and SIZE2, respectively). We start matching at POS, and stop
4031 matching at STOP. 4031 matching at STOP.
4032 4032
4033 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we 4033 If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4034 store offsets for the substring each group matched in REGS. See the 4034 store offsets for the substring each group matched in REGS. See the
4035 documentation for exactly how many groups we fill. 4035 documentation for exactly how many groups we fill.
4036 4036
4037 We return -1 if no match, -2 if an internal error (such as the 4037 We return -1 if no match, -2 if an internal error (such as the
4038 failure stack overflowing). Otherwise, we return the length of the 4038 failure stack overflowing). Otherwise, we return the length of the
4039 matched substring. */ 4039 matched substring. */
4040 4040
4041 int 4041 int
4042 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) 4042 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4043 struct re_pattern_buffer *bufp; 4043 struct re_pattern_buffer *bufp;
4046 int pos; 4046 int pos;
4047 struct re_registers *regs; 4047 struct re_registers *regs;
4048 int stop; 4048 int stop;
4049 { 4049 {
4050 int result; 4050 int result;
4051 4051
4052 #ifdef emacs 4052 #ifdef emacs
4053 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, 4053 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object,
4054 POS_AS_IN_BUFFER (pos > 0 ? pos - 1 : pos), 4054 POS_AS_IN_BUFFER (pos > 0 ? pos - 1 : pos),
4055 1); 4055 1);
4056 #endif 4056 #endif
4057 4057
4058 result = re_match_2_internal (bufp, string1, size1, string2, size2, 4058 result = re_match_2_internal (bufp, string1, size1, string2, size2,
4059 pos, regs, stop); 4059 pos, regs, stop);
4060 alloca (0); 4060 alloca (0);
4061 return result; 4061 return result;
4062 } 4062 }
4063 4063
4064 /* This is a separate function so that we can force an alloca cleanup 4064 /* This is a separate function so that we can force an alloca cleanup
4065 afterwards. */ 4065 afterwards. */
4066 static int 4066 static int
4067 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) 4067 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4068 struct re_pattern_buffer *bufp; 4068 struct re_pattern_buffer *bufp;
4069 const char *string1, *string2; 4069 const char *string1, *string2;
4070 int size1, size2; 4070 int size1, size2;
4078 4078
4079 /* Just past the end of the corresponding string. */ 4079 /* Just past the end of the corresponding string. */
4080 const char *end1, *end2; 4080 const char *end1, *end2;
4081 4081
4082 /* Pointers into string1 and string2, just past the last characters in 4082 /* Pointers into string1 and string2, just past the last characters in
4083 each to consider matching. */ 4083 each to consider matching. */
4084 const char *end_match_1, *end_match_2; 4084 const char *end_match_1, *end_match_2;
4085 4085
4086 /* Where we are in the data, and the end of the current string. */ 4086 /* Where we are in the data, and the end of the current string. */
4087 const char *d, *dend; 4087 const char *d, *dend;
4088 4088
4092 4092
4093 /* Mark the opcode just after a start_memory, so we can test for an 4093 /* Mark the opcode just after a start_memory, so we can test for an
4094 empty subpattern when we get to the stop_memory. */ 4094 empty subpattern when we get to the stop_memory. */
4095 unsigned char *just_past_start_mem = 0; 4095 unsigned char *just_past_start_mem = 0;
4096 4096
4097 /* We use this to map every character in the string. */ 4097 /* We use this to map every character in the string. */
4098 RE_TRANSLATE_TYPE translate = bufp->translate; 4098 RE_TRANSLATE_TYPE translate = bufp->translate;
4099 4099
4100 /* Nonzero if we have to concern multibyte character. */ 4100 /* Nonzero if we have to concern multibyte character. */
4101 int multibyte = bufp->multibyte; 4101 int multibyte = bufp->multibyte;
4102 4102
4103 /* Failure point stack. Each place that can handle a failure further 4103 /* Failure point stack. Each place that can handle a failure further
4104 down the line pushes a failure point on this stack. It consists of 4104 down the line pushes a failure point on this stack. It consists of
4105 restart, regend, and reg_info for all registers corresponding to 4105 restart, regend, and reg_info for all registers corresponding to
4106 the subexpressions we're currently inside, plus the number of such 4106 the subexpressions we're currently inside, plus the number of such
4107 registers, and, finally, two char *'s. The first char * is where 4107 registers, and, finally, two char *'s. The first char * is where
4108 to resume scanning the pattern; the second one is where to resume 4108 to resume scanning the pattern; the second one is where to resume
4109 scanning the strings. If the latter is zero, the failure point is 4109 scanning the strings. If the latter is zero, the failure point is
4110 a ``dummy''; if a failure happens and the failure point is a dummy, 4110 a ``dummy''; if a failure happens and the failure point is a dummy,
4111 it gets discarded and the next next one is tried. */ 4111 it gets discarded and the next next one is tried. */
4112 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 4112 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4113 fail_stack_type fail_stack; 4113 fail_stack_type fail_stack;
4114 #endif 4114 #endif
4115 #ifdef DEBUG 4115 #ifdef DEBUG
4116 static unsigned failure_id = 0; 4116 static unsigned failure_id = 0;
4117 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; 4117 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4120 /* This holds the pointer to the failure stack, when 4120 /* This holds the pointer to the failure stack, when
4121 it is allocated relocatably. */ 4121 it is allocated relocatably. */
4122 fail_stack_elt_t *failure_stack_ptr; 4122 fail_stack_elt_t *failure_stack_ptr;
4123 4123
4124 /* We fill all the registers internally, independent of what we 4124 /* We fill all the registers internally, independent of what we
4125 return, for use in backreferences. The number here includes 4125 return, for use in backreferences. The number here includes
4126 an element for register zero. */ 4126 an element for register zero. */
4127 unsigned num_regs = bufp->re_nsub + 1; 4127 unsigned num_regs = bufp->re_nsub + 1;
4128 4128
4129 /* The currently active registers. */ 4129 /* The currently active registers. */
4130 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; 4130 unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4153 /* The is_active field of reg_info helps us keep track of which (possibly 4153 /* The is_active field of reg_info helps us keep track of which (possibly
4154 nested) subexpressions we are currently in. The matched_something 4154 nested) subexpressions we are currently in. The matched_something
4155 field of reg_info[reg_num] helps us tell whether or not we have 4155 field of reg_info[reg_num] helps us tell whether or not we have
4156 matched any of the pattern so far this time through the reg_num-th 4156 matched any of the pattern so far this time through the reg_num-th
4157 subexpression. These two fields get reset each time through any 4157 subexpression. These two fields get reset each time through any
4158 loop their register is in. */ 4158 loop their register is in. */
4159 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ 4159 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */
4160 register_info_type *reg_info; 4160 register_info_type *reg_info;
4161 #endif 4161 #endif
4162 4162
4163 /* The following record the register info as found in the above 4163 /* The following record the register info as found in the above
4164 variables when we find a match better than any we've seen before. 4164 variables when we find a match better than any we've seen before.
4169 const char **best_regstart, **best_regend; 4169 const char **best_regstart, **best_regend;
4170 #endif 4170 #endif
4171 4171
4172 /* Logically, this is `best_regend[0]'. But we don't want to have to 4172 /* Logically, this is `best_regend[0]'. But we don't want to have to
4173 allocate space for that if we're not allocating space for anything 4173 allocate space for that if we're not allocating space for anything
4174 else (see below). Also, we never need info about register 0 for 4174 else (see below). Also, we never need info about register 0 for
4175 any of the other register vectors, and it seems rather a kludge to 4175 any of the other register vectors, and it seems rather a kludge to
4176 treat `best_regend' differently than the rest. So we keep track of 4176 treat `best_regend' differently than the rest. So we keep track of
4177 the end of the best match so far in a separate variable. We 4177 the end of the best match so far in a separate variable. We
4178 initialize this to NULL so that when we backtrack the first time 4178 initialize this to NULL so that when we backtrack the first time
4179 and need to test it, it's not garbage. */ 4179 and need to test it, it's not garbage. */
4214 reg_info = REGEX_TALLOC (num_regs, register_info_type); 4214 reg_info = REGEX_TALLOC (num_regs, register_info_type);
4215 reg_dummy = REGEX_TALLOC (num_regs, const char *); 4215 reg_dummy = REGEX_TALLOC (num_regs, const char *);
4216 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); 4216 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4217 4217
4218 if (!(regstart && regend && old_regstart && old_regend && reg_info 4218 if (!(regstart && regend && old_regstart && old_regend && reg_info
4219 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) 4219 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4220 { 4220 {
4221 FREE_VARIABLES (); 4221 FREE_VARIABLES ();
4222 return -2; 4222 return -2;
4223 } 4223 }
4224 } 4224 }
4225 else 4225 else
4226 { 4226 {
4227 /* We must initialize all our variables to NULL, so that 4227 /* We must initialize all our variables to NULL, so that
4228 `FREE_VARIABLES' doesn't try to free them. */ 4228 `FREE_VARIABLES' doesn't try to free them. */
4229 regstart = regend = old_regstart = old_regend = best_regstart 4229 regstart = regend = old_regstart = old_regend = best_regstart
4230 = best_regend = reg_dummy = NULL; 4230 = best_regend = reg_dummy = NULL;
4231 reg_info = reg_info_dummy = (register_info_type *) NULL; 4231 reg_info = reg_info_dummy = (register_info_type *) NULL;
4232 } 4232 }
4233 #endif /* MATCH_MAY_ALLOCATE */ 4233 #endif /* MATCH_MAY_ALLOCATE */
4234 4234
4235 /* The starting position is bogus. */ 4235 /* The starting position is bogus. */
4243 start_memory/stop_memory has been seen for. Also initialize the 4243 start_memory/stop_memory has been seen for. Also initialize the
4244 register information struct. */ 4244 register information struct. */
4245 for (mcnt = 1; mcnt < num_regs; mcnt++) 4245 for (mcnt = 1; mcnt < num_regs; mcnt++)
4246 { 4246 {
4247 regstart[mcnt] = regend[mcnt] 4247 regstart[mcnt] = regend[mcnt]
4248 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; 4248 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4249 4249
4250 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; 4250 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4251 IS_ACTIVE (reg_info[mcnt]) = 0; 4251 IS_ACTIVE (reg_info[mcnt]) = 0;
4252 MATCHED_SOMETHING (reg_info[mcnt]) = 0; 4252 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4253 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; 4253 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4254 } 4254 }
4255 4255
4256 /* We move `string1' into `string2' if the latter's empty -- but not if 4256 /* We move `string1' into `string2' if the latter's empty -- but not if
4257 `string1' is null. */ 4257 `string1' is null. */
4258 if (size2 == 0 && string1 != NULL) 4258 if (size2 == 0 && string1 != NULL)
4259 { 4259 {
4260 string2 = string1; 4260 string2 = string1;
4261 size2 = size1; 4261 size2 = size1;
4262 string1 = 0; 4262 string1 = 0;
4298 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); 4298 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4299 DEBUG_PRINT1 ("The string to match is: `"); 4299 DEBUG_PRINT1 ("The string to match is: `");
4300 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); 4300 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4301 DEBUG_PRINT1 ("'\n"); 4301 DEBUG_PRINT1 ("'\n");
4302 4302
4303 /* This loops over pattern commands. It exits by returning from the 4303 /* This loops over pattern commands. It exits by returning from the
4304 function if the match is complete, or it drops through if the match 4304 function if the match is complete, or it drops through if the match
4305 fails at this starting point in the input data. */ 4305 fails at this starting point in the input data. */
4306 for (;;) 4306 for (;;)
4307 { 4307 {
4308 DEBUG_PRINT2 ("\n0x%x: ", p); 4308 DEBUG_PRINT2 ("\n0x%x: ", p);
4309 4309
4310 if (p == pend) 4310 if (p == pend)
4311 { /* End of pattern means we might have succeeded. */ 4311 { /* End of pattern means we might have succeeded. */
4312 DEBUG_PRINT1 ("end of pattern ... "); 4312 DEBUG_PRINT1 ("end of pattern ... ");
4313 4313
4314 /* If we haven't matched the entire string, and we want the 4314 /* If we haven't matched the entire string, and we want the
4315 longest match, try backtracking. */ 4315 longest match, try backtracking. */
4316 if (d != end_match_2) 4316 if (d != end_match_2)
4317 { 4317 {
4318 /* 1 if this match ends in the same string (string1 or string2) 4318 /* 1 if this match ends in the same string (string1 or string2)
4319 as the best previous match. */ 4319 as the best previous match. */
4320 boolean same_str_p = (FIRST_STRING_P (match_end) 4320 boolean same_str_p = (FIRST_STRING_P (match_end)
4321 == MATCHING_IN_FIRST_STRING); 4321 == MATCHING_IN_FIRST_STRING);
4322 /* 1 if this match is the best seen so far. */ 4322 /* 1 if this match is the best seen so far. */
4323 boolean best_match_p; 4323 boolean best_match_p;
4324 4324
4325 /* AIX compiler got confused when this was combined 4325 /* AIX compiler got confused when this was combined
4326 with the previous declaration. */ 4326 with the previous declaration. */
4327 if (same_str_p) 4327 if (same_str_p)
4328 best_match_p = d > match_end; 4328 best_match_p = d > match_end;
4329 else 4329 else
4330 best_match_p = !MATCHING_IN_FIRST_STRING; 4330 best_match_p = !MATCHING_IN_FIRST_STRING;
4331 4331
4332 DEBUG_PRINT1 ("backtracking.\n"); 4332 DEBUG_PRINT1 ("backtracking.\n");
4333 4333
4334 if (!FAIL_STACK_EMPTY ()) 4334 if (!FAIL_STACK_EMPTY ())
4335 { /* More failure points to try. */ 4335 { /* More failure points to try. */
4336 4336
4337 /* If exceeds best match so far, save it. */ 4337 /* If exceeds best match so far, save it. */
4338 if (!best_regs_set || best_match_p) 4338 if (!best_regs_set || best_match_p)
4339 { 4339 {
4340 best_regs_set = true; 4340 best_regs_set = true;
4341 match_end = d; 4341 match_end = d;
4342 4342
4343 DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); 4343 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4344 4344
4345 for (mcnt = 1; mcnt < num_regs; mcnt++) 4345 for (mcnt = 1; mcnt < num_regs; mcnt++)
4346 { 4346 {
4347 best_regstart[mcnt] = regstart[mcnt]; 4347 best_regstart[mcnt] = regstart[mcnt];
4348 best_regend[mcnt] = regend[mcnt]; 4348 best_regend[mcnt] = regend[mcnt];
4349 } 4349 }
4350 } 4350 }
4351 goto fail; 4351 goto fail;
4352 } 4352 }
4353 4353
4354 /* If no failure points, don't restore garbage. And if 4354 /* If no failure points, don't restore garbage. And if
4355 last match is real best match, don't restore second 4355 last match is real best match, don't restore second
4356 best one. */ 4356 best one. */
4357 else if (best_regs_set && !best_match_p) 4357 else if (best_regs_set && !best_match_p)
4358 { 4358 {
4359 restore_best_regs: 4359 restore_best_regs:
4360 /* Restore best match. It may happen that `dend == 4360 /* Restore best match. It may happen that `dend ==
4361 end_match_1' while the restored d is in string2. 4361 end_match_1' while the restored d is in string2.
4362 For example, the pattern `x.*y.*z' against the 4362 For example, the pattern `x.*y.*z' against the
4363 strings `x-' and `y-z-', if the two strings are 4363 strings `x-' and `y-z-', if the two strings are
4364 not consecutive in memory. */ 4364 not consecutive in memory. */
4365 DEBUG_PRINT1 ("Restoring best registers.\n"); 4365 DEBUG_PRINT1 ("Restoring best registers.\n");
4366 4366
4367 d = match_end; 4367 d = match_end;
4368 dend = ((d >= string1 && d <= end1) 4368 dend = ((d >= string1 && d <= end1)
4369 ? end_match_1 : end_match_2); 4369 ? end_match_1 : end_match_2);
4370 4370
4371 for (mcnt = 1; mcnt < num_regs; mcnt++) 4371 for (mcnt = 1; mcnt < num_regs; mcnt++)
4372 { 4372 {
4373 regstart[mcnt] = best_regstart[mcnt]; 4373 regstart[mcnt] = best_regstart[mcnt];
4374 regend[mcnt] = best_regend[mcnt]; 4374 regend[mcnt] = best_regend[mcnt];
4375 } 4375 }
4376 } 4376 }
4377 } /* d != end_match_2 */ 4377 } /* d != end_match_2 */
4378 4378
4379 succeed_label: 4379 succeed_label:
4380 DEBUG_PRINT1 ("Accepting match.\n"); 4380 DEBUG_PRINT1 ("Accepting match.\n");
4381 4381
4382 /* If caller wants register contents data back, do it. */ 4382 /* If caller wants register contents data back, do it. */
4383 if (regs && !bufp->no_sub) 4383 if (regs && !bufp->no_sub)
4384 { 4384 {
4385 /* Have the register data arrays been allocated? */ 4385 /* Have the register data arrays been allocated? */
4386 if (bufp->regs_allocated == REGS_UNALLOCATED) 4386 if (bufp->regs_allocated == REGS_UNALLOCATED)
4387 { /* No. So allocate them with malloc. We need one 4387 { /* No. So allocate them with malloc. We need one
4388 extra element beyond `num_regs' for the `-1' marker 4388 extra element beyond `num_regs' for the `-1' marker
4389 GNU code uses. */ 4389 GNU code uses. */
4390 regs->num_regs = MAX (RE_NREGS, num_regs + 1); 4390 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4391 regs->start = TALLOC (regs->num_regs, regoff_t); 4391 regs->start = TALLOC (regs->num_regs, regoff_t);
4392 regs->end = TALLOC (regs->num_regs, regoff_t); 4392 regs->end = TALLOC (regs->num_regs, regoff_t);
4393 if (regs->start == NULL || regs->end == NULL) 4393 if (regs->start == NULL || regs->end == NULL)
4394 { 4394 {
4395 FREE_VARIABLES (); 4395 FREE_VARIABLES ();
4396 return -2; 4396 return -2;
4397 } 4397 }
4398 bufp->regs_allocated = REGS_REALLOCATE; 4398 bufp->regs_allocated = REGS_REALLOCATE;
4399 } 4399 }
4400 else if (bufp->regs_allocated == REGS_REALLOCATE) 4400 else if (bufp->regs_allocated == REGS_REALLOCATE)
4401 { /* Yes. If we need more elements than were already 4401 { /* Yes. If we need more elements than were already
4402 allocated, reallocate them. If we need fewer, just 4402 allocated, reallocate them. If we need fewer, just
4403 leave it alone. */ 4403 leave it alone. */
4404 if (regs->num_regs < num_regs + 1) 4404 if (regs->num_regs < num_regs + 1)
4405 { 4405 {
4406 regs->num_regs = num_regs + 1; 4406 regs->num_regs = num_regs + 1;
4407 RETALLOC (regs->start, regs->num_regs, regoff_t); 4407 RETALLOC (regs->start, regs->num_regs, regoff_t);
4408 RETALLOC (regs->end, regs->num_regs, regoff_t); 4408 RETALLOC (regs->end, regs->num_regs, regoff_t);
4409 if (regs->start == NULL || regs->end == NULL) 4409 if (regs->start == NULL || regs->end == NULL)
4410 { 4410 {
4411 FREE_VARIABLES (); 4411 FREE_VARIABLES ();
4412 return -2; 4412 return -2;
4413 } 4413 }
4414 } 4414 }
4415 } 4415 }
4416 else 4416 else
4417 { 4417 {
4418 /* These braces fend off a "empty body in an else-statement" 4418 /* These braces fend off a "empty body in an else-statement"
4419 warning under GCC when assert expands to nothing. */ 4419 warning under GCC when assert expands to nothing. */
4420 assert (bufp->regs_allocated == REGS_FIXED); 4420 assert (bufp->regs_allocated == REGS_FIXED);
4421 } 4421 }
4422 4422
4423 /* Convert the pointer data in `regstart' and `regend' to 4423 /* Convert the pointer data in `regstart' and `regend' to
4424 indices. Register zero has to be set differently, 4424 indices. Register zero has to be set differently,
4425 since we haven't kept track of any info for it. */ 4425 since we haven't kept track of any info for it. */
4426 if (regs->num_regs > 0) 4426 if (regs->num_regs > 0)
4427 { 4427 {
4428 regs->start[0] = pos; 4428 regs->start[0] = pos;
4429 regs->end[0] = (MATCHING_IN_FIRST_STRING 4429 regs->end[0] = (MATCHING_IN_FIRST_STRING
4430 ? ((regoff_t) (d - string1)) 4430 ? ((regoff_t) (d - string1))
4431 : ((regoff_t) (d - string2 + size1))); 4431 : ((regoff_t) (d - string2 + size1)));
4432 } 4432 }
4433 4433
4434 /* Go through the first `min (num_regs, regs->num_regs)' 4434 /* Go through the first `min (num_regs, regs->num_regs)'
4435 registers, since that is all we initialized. */ 4435 registers, since that is all we initialized. */
4436 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) 4436 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
4437 { 4437 {
4438 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) 4438 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4439 regs->start[mcnt] = regs->end[mcnt] = -1; 4439 regs->start[mcnt] = regs->end[mcnt] = -1;
4440 else 4440 else
4441 { 4441 {
4442 regs->start[mcnt] 4442 regs->start[mcnt]
4443 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); 4443 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4444 regs->end[mcnt] 4444 regs->end[mcnt]
4445 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); 4445 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4446 } 4446 }
4447 } 4447 }
4448 4448
4449 /* If the regs structure we return has more elements than 4449 /* If the regs structure we return has more elements than
4450 were in the pattern, set the extra elements to -1. If 4450 were in the pattern, set the extra elements to -1. If
4451 we (re)allocated the registers, this is the case, 4451 we (re)allocated the registers, this is the case,
4452 because we always allocate enough to have at least one 4452 because we always allocate enough to have at least one
4453 -1 at the end. */ 4453 -1 at the end. */
4454 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) 4454 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
4455 regs->start[mcnt] = regs->end[mcnt] = -1; 4455 regs->start[mcnt] = regs->end[mcnt] = -1;
4456 } /* regs && !bufp->no_sub */ 4456 } /* regs && !bufp->no_sub */
4457 4457
4458 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", 4458 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4459 nfailure_points_pushed, nfailure_points_popped, 4459 nfailure_points_pushed, nfailure_points_popped,
4460 nfailure_points_pushed - nfailure_points_popped); 4460 nfailure_points_pushed - nfailure_points_popped);
4461 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); 4461 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4462 4462
4463 mcnt = d - pos - (MATCHING_IN_FIRST_STRING 4463 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4464 ? string1 4464 ? string1
4465 : string2 - size1); 4465 : string2 - size1);
4466 4466
4467 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); 4467 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4468 4468
4469 FREE_VARIABLES (); 4469 FREE_VARIABLES ();
4470 return mcnt; 4470 return mcnt;
4471 } 4471 }
4472 4472
4473 /* Otherwise match next pattern command. */ 4473 /* Otherwise match next pattern command. */
4474 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) 4474 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4475 { 4475 {
4476 /* Ignore these. Used to ignore the n of succeed_n's which 4476 /* Ignore these. Used to ignore the n of succeed_n's which
4477 currently have n == 0. */ 4477 currently have n == 0. */
4478 case no_op: 4478 case no_op:
4479 DEBUG_PRINT1 ("EXECUTING no_op.\n"); 4479 DEBUG_PRINT1 ("EXECUTING no_op.\n");
4480 break; 4480 break;
4481 4481
4482 case succeed: 4482 case succeed:
4483 DEBUG_PRINT1 ("EXECUTING succeed.\n"); 4483 DEBUG_PRINT1 ("EXECUTING succeed.\n");
4484 goto succeed_label; 4484 goto succeed_label;
4485 4485
4486 /* Match the next n pattern characters exactly. The following 4486 /* Match the next n pattern characters exactly. The following
4487 byte in the pattern defines n, and the n bytes after that 4487 byte in the pattern defines n, and the n bytes after that
4488 are the characters to match. */ 4488 are the characters to match. */
4489 case exactn: 4489 case exactn:
4490 mcnt = *p++; 4490 mcnt = *p++;
4491 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt); 4491 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4492 4492
4493 /* This is written out as an if-else so we don't waste time 4493 /* This is written out as an if-else so we don't waste time
4494 testing `translate' inside the loop. */ 4494 testing `translate' inside the loop. */
4495 if (translate) 4495 if (translate)
4496 { 4496 {
4497 do 4497 do
4498 { 4498 {
4499 PREFETCH (); 4499 PREFETCH ();
4500 if ((unsigned char) translate[(unsigned char) *d++] 4500 if ((unsigned char) translate[(unsigned char) *d++]
4501 != (unsigned char) *p++) 4501 != (unsigned char) *p++)
4502 goto fail; 4502 goto fail;
4503 } 4503 }
4504 while (--mcnt); 4504 while (--mcnt);
4505 } 4505 }
4506 else 4506 else
4507 { 4507 {
4511 if (*d++ != (char) *p++) goto fail; 4511 if (*d++ != (char) *p++) goto fail;
4512 } 4512 }
4513 while (--mcnt); 4513 while (--mcnt);
4514 } 4514 }
4515 SET_REGS_MATCHED (); 4515 SET_REGS_MATCHED ();
4516 break; 4516 break;
4517 4517
4518 4518
4519 /* Match any character except possibly a newline or a null. */ 4519 /* Match any character except possibly a newline or a null. */
4520 case anychar: 4520 case anychar:
4521 DEBUG_PRINT1 ("EXECUTING anychar.\n"); 4521 DEBUG_PRINT1 ("EXECUTING anychar.\n");
4522 4522
4523 PREFETCH (); 4523 PREFETCH ();
4524 4524
4525 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n') 4525 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4526 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000')) 4526 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4527 goto fail; 4527 goto fail;
4528 4528
4529 SET_REGS_MATCHED (); 4529 SET_REGS_MATCHED ();
4530 DEBUG_PRINT2 (" Matched `%d'.\n", *d); 4530 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
4531 d += multibyte ? MULTIBYTE_FORM_LENGTH (d, dend - d) : 1; 4531 d += multibyte ? MULTIBYTE_FORM_LENGTH (d, dend - d) : 1;
4532 break; 4532 break;
4533 4533
4534 4534
4535 case charset: 4535 case charset:
4541 4541
4542 /* Start of actual range_table, or end of bitmap if there is no 4542 /* Start of actual range_table, or end of bitmap if there is no
4543 range table. */ 4543 range table. */
4544 unsigned char *range_table; 4544 unsigned char *range_table;
4545 4545
4546 /* Nonzero if there is range table. */ 4546 /* Nonzero if there is range table. */
4547 int range_table_exists; 4547 int range_table_exists;
4548 4548
4549 /* Number of ranges of range table. Not in bytes. */ 4549 /* Number of ranges of range table. Not in bytes. */
4550 int count; 4550 int count;
4551 4551
4552 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : ""); 4552 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4553 4553
4554 PREFETCH (); 4554 PREFETCH ();
4555 c = (unsigned char) *d; 4555 c = (unsigned char) *d;
4556 4556
4557 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */ 4557 range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap. */
4586 d += len; 4586 d += len;
4587 break; 4587 break;
4588 } 4588 }
4589 4589
4590 4590
4591 /* The beginning of a group is represented by start_memory. 4591 /* The beginning of a group is represented by start_memory.
4592 The arguments are the register number in the next byte, and the 4592 The arguments are the register number in the next byte, and the
4593 number of groups inner to this one in the next. The text 4593 number of groups inner to this one in the next. The text
4594 matched within the group is recorded (in the internal 4594 matched within the group is recorded (in the internal
4595 registers data structure) under the register number. */ 4595 registers data structure) under the register number. */
4596 case start_memory: 4596 case start_memory:
4597 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]); 4597 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4598 4598
4599 /* Find out if this group can match the empty string. */ 4599 /* Find out if this group can match the empty string. */
4600 p1 = p; /* To send to group_match_null_string_p. */ 4600 p1 = p; /* To send to group_match_null_string_p. */
4601 4601
4602 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) 4602 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4603 REG_MATCH_NULL_STRING_P (reg_info[*p]) 4603 REG_MATCH_NULL_STRING_P (reg_info[*p])
4604 = group_match_null_string_p (&p1, pend, reg_info); 4604 = group_match_null_string_p (&p1, pend, reg_info);
4605 4605
4606 /* Save the position in the string where we were the last time 4606 /* Save the position in the string where we were the last time
4607 we were at this open-group operator in case the group is 4607 we were at this open-group operator in case the group is
4608 operated upon by a repetition operator, e.g., with `(a*)*b' 4608 operated upon by a repetition operator, e.g., with `(a*)*b'
4609 against `ab'; then we want to ignore where we are now in 4609 against `ab'; then we want to ignore where we are now in
4610 the string in case this attempt to match fails. */ 4610 the string in case this attempt to match fails. */
4611 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 4611 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4612 ? REG_UNSET (regstart[*p]) ? d : regstart[*p] 4612 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4613 : regstart[*p]; 4613 : regstart[*p];
4614 DEBUG_PRINT2 (" old_regstart: %d\n", 4614 DEBUG_PRINT2 (" old_regstart: %d\n",
4615 POINTER_TO_OFFSET (old_regstart[*p])); 4615 POINTER_TO_OFFSET (old_regstart[*p]));
4616 4616
4617 regstart[*p] = d; 4617 regstart[*p] = d;
4618 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p])); 4618 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4619 4619
4620 IS_ACTIVE (reg_info[*p]) = 1; 4620 IS_ACTIVE (reg_info[*p]) = 1;
4621 MATCHED_SOMETHING (reg_info[*p]) = 0; 4621 MATCHED_SOMETHING (reg_info[*p]) = 0;
4622 4622
4623 /* Clear this whenever we change the register activity status. */ 4623 /* Clear this whenever we change the register activity status. */
4624 set_regs_matched_done = 0; 4624 set_regs_matched_done = 0;
4625 4625
4626 /* This is the new highest active register. */ 4626 /* This is the new highest active register. */
4627 highest_active_reg = *p; 4627 highest_active_reg = *p;
4628 4628
4629 /* If nothing was active before, this is the new lowest active 4629 /* If nothing was active before, this is the new lowest active
4630 register. */ 4630 register. */
4631 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 4631 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4632 lowest_active_reg = *p; 4632 lowest_active_reg = *p;
4633 4633
4634 /* Move past the register number and inner group count. */ 4634 /* Move past the register number and inner group count. */
4635 p += 2; 4635 p += 2;
4636 just_past_start_mem = p; 4636 just_past_start_mem = p;
4637 4637
4638 break; 4638 break;
4639 4639
4640 4640
4641 /* The stop_memory opcode represents the end of a group. Its 4641 /* The stop_memory opcode represents the end of a group. Its
4642 arguments are the same as start_memory's: the register 4642 arguments are the same as start_memory's: the register
4643 number, and the number of inner groups. */ 4643 number, and the number of inner groups. */
4644 case stop_memory: 4644 case stop_memory:
4645 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); 4645 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4646 4646
4647 /* We need to save the string position the last time we were at 4647 /* We need to save the string position the last time we were at
4648 this close-group operator in case the group is operated 4648 this close-group operator in case the group is operated
4649 upon by a repetition operator, e.g., with `((a*)*(b*)*)*' 4649 upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4650 against `aba'; then we want to ignore where we are now in 4650 against `aba'; then we want to ignore where we are now in
4651 the string in case this attempt to match fails. */ 4651 the string in case this attempt to match fails. */
4652 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) 4652 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4653 ? REG_UNSET (regend[*p]) ? d : regend[*p] 4653 ? REG_UNSET (regend[*p]) ? d : regend[*p]
4654 : regend[*p]; 4654 : regend[*p];
4655 DEBUG_PRINT2 (" old_regend: %d\n", 4655 DEBUG_PRINT2 (" old_regend: %d\n",
4656 POINTER_TO_OFFSET (old_regend[*p])); 4656 POINTER_TO_OFFSET (old_regend[*p]));
4657 4657
4658 regend[*p] = d; 4658 regend[*p] = d;
4659 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p])); 4659 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4660 4660
4661 /* This register isn't active anymore. */ 4661 /* This register isn't active anymore. */
4662 IS_ACTIVE (reg_info[*p]) = 0; 4662 IS_ACTIVE (reg_info[*p]) = 0;
4663 4663
4664 /* Clear this whenever we change the register activity status. */ 4664 /* Clear this whenever we change the register activity status. */
4665 set_regs_matched_done = 0; 4665 set_regs_matched_done = 0;
4666 4666
4667 /* If this was the only register active, nothing is active 4667 /* If this was the only register active, nothing is active
4668 anymore. */ 4668 anymore. */
4669 if (lowest_active_reg == highest_active_reg) 4669 if (lowest_active_reg == highest_active_reg)
4670 { 4670 {
4671 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 4671 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4672 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 4672 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4673 } 4673 }
4674 else 4674 else
4675 { /* We must scan for the new highest active register, since 4675 { /* We must scan for the new highest active register, since
4676 it isn't necessarily one less than now: consider 4676 it isn't necessarily one less than now: consider
4677 (a(b)c(d(e)f)g). When group 3 ends, after the f), the 4677 (a(b)c(d(e)f)g). When group 3 ends, after the f), the
4678 new highest active register is 1. */ 4678 new highest active register is 1. */
4679 unsigned char r = *p - 1; 4679 unsigned char r = *p - 1;
4680 while (r > 0 && !IS_ACTIVE (reg_info[r])) 4680 while (r > 0 && !IS_ACTIVE (reg_info[r]))
4681 r--; 4681 r--;
4682 4682
4683 /* If we end up at register zero, that means that we saved 4683 /* If we end up at register zero, that means that we saved
4684 the registers as the result of an `on_failure_jump', not 4684 the registers as the result of an `on_failure_jump', not
4685 a `start_memory', and we jumped to past the innermost 4685 a `start_memory', and we jumped to past the innermost
4686 `stop_memory'. For example, in ((.)*) we save 4686 `stop_memory'. For example, in ((.)*) we save
4687 registers 1 and 2 as a result of the *, but when we pop 4687 registers 1 and 2 as a result of the *, but when we pop
4688 back to the second ), we are at the stop_memory 1. 4688 back to the second ), we are at the stop_memory 1.
4689 Thus, nothing is active. */ 4689 Thus, nothing is active. */
4690 if (r == 0) 4690 if (r == 0)
4691 { 4691 {
4692 lowest_active_reg = NO_LOWEST_ACTIVE_REG; 4692 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4693 highest_active_reg = NO_HIGHEST_ACTIVE_REG; 4693 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4694 } 4694 }
4695 else 4695 else
4696 highest_active_reg = r; 4696 highest_active_reg = r;
4697 } 4697 }
4698 4698
4699 /* If just failed to match something this time around with a 4699 /* If just failed to match something this time around with a
4700 group that's operated on by a repetition operator, try to 4700 group that's operated on by a repetition operator, try to
4701 force exit from the ``loop'', and restore the register 4701 force exit from the ``loop'', and restore the register
4702 information for this group that we had before trying this 4702 information for this group that we had before trying this
4703 last match. */ 4703 last match. */
4704 if ((!MATCHED_SOMETHING (reg_info[*p]) 4704 if ((!MATCHED_SOMETHING (reg_info[*p])
4705 || just_past_start_mem == p - 1) 4705 || just_past_start_mem == p - 1)
4706 && (p + 2) < pend) 4706 && (p + 2) < pend)
4707 { 4707 {
4708 boolean is_a_jump_n = false; 4708 boolean is_a_jump_n = false;
4709 4709
4710 p1 = p + 2; 4710 p1 = p + 2;
4711 mcnt = 0; 4711 mcnt = 0;
4712 switch ((re_opcode_t) *p1++) 4712 switch ((re_opcode_t) *p1++)
4713 { 4713 {
4714 case jump_n: 4714 case jump_n:
4715 is_a_jump_n = true; 4715 is_a_jump_n = true;
4716 case pop_failure_jump: 4716 case pop_failure_jump:
4717 case maybe_pop_jump: 4717 case maybe_pop_jump:
4718 case jump: 4718 case jump:
4719 case dummy_failure_jump: 4719 case dummy_failure_jump:
4720 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4720 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4721 if (is_a_jump_n) 4721 if (is_a_jump_n)
4722 p1 += 2; 4722 p1 += 2;
4723 break; 4723 break;
4724 4724
4725 default: 4725 default:
4726 /* do nothing */ ; 4726 /* do nothing */ ;
4727 } 4727 }
4728 p1 += mcnt; 4728 p1 += mcnt;
4729 4729
4730 /* If the next operation is a jump backwards in the pattern 4730 /* If the next operation is a jump backwards in the pattern
4731 to an on_failure_jump right before the start_memory 4731 to an on_failure_jump right before the start_memory
4732 corresponding to this stop_memory, exit from the loop 4732 corresponding to this stop_memory, exit from the loop
4733 by forcing a failure after pushing on the stack the 4733 by forcing a failure after pushing on the stack the
4734 on_failure_jump's jump in the pattern, and d. */ 4734 on_failure_jump's jump in the pattern, and d. */
4735 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump 4735 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4736 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) 4736 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4737 { 4737 {
4738 /* If this group ever matched anything, then restore 4738 /* If this group ever matched anything, then restore
4739 what its registers were before trying this last 4739 what its registers were before trying this last
4740 failed match, e.g., with `(a*)*b' against `ab' for 4740 failed match, e.g., with `(a*)*b' against `ab' for
4741 regstart[1], and, e.g., with `((a*)*(b*)*)*' 4741 regstart[1], and, e.g., with `((a*)*(b*)*)*'
4742 against `aba' for regend[3]. 4742 against `aba' for regend[3].
4743 4743
4744 Also restore the registers for inner groups for, 4744 Also restore the registers for inner groups for,
4745 e.g., `((a*)(b*))*' against `aba' (register 3 would 4745 e.g., `((a*)(b*))*' against `aba' (register 3 would
4746 otherwise get trashed). */ 4746 otherwise get trashed). */
4747 4747
4748 if (EVER_MATCHED_SOMETHING (reg_info[*p])) 4748 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4749 { 4749 {
4750 unsigned r; 4750 unsigned r;
4751 4751
4752 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; 4752 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4753 4753
4754 /* Restore this and inner groups' (if any) registers. */ 4754 /* Restore this and inner groups' (if any) registers. */
4755 for (r = *p; r < *p + *(p + 1); r++) 4755 for (r = *p; r < *p + *(p + 1); r++)
4756 { 4756 {
4757 regstart[r] = old_regstart[r]; 4757 regstart[r] = old_regstart[r];
4758 4758
4759 /* xx why this test? */ 4759 /* xx why this test? */
4760 if (old_regend[r] >= regstart[r]) 4760 if (old_regend[r] >= regstart[r])
4761 regend[r] = old_regend[r]; 4761 regend[r] = old_regend[r];
4762 } 4762 }
4763 } 4763 }
4764 p1++; 4764 p1++;
4765 EXTRACT_NUMBER_AND_INCR (mcnt, p1); 4765 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4766 PUSH_FAILURE_POINT (p1 + mcnt, d, -2); 4766 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4767 4767
4768 goto fail; 4768 goto fail;
4769 } 4769 }
4770 } 4770 }
4771 4771
4772 /* Move past the register number and the inner group count. */ 4772 /* Move past the register number and the inner group count. */
4773 p += 2; 4773 p += 2;
4774 break; 4774 break;
4775 4775
4776 4776
4777 /* \<digit> has been turned into a `duplicate' command which is 4777 /* \<digit> has been turned into a `duplicate' command which is
4778 followed by the numeric value of <digit> as the register number. */ 4778 followed by the numeric value of <digit> as the register number. */
4779 case duplicate: 4779 case duplicate:
4780 { 4780 {
4781 register const char *d2, *dend2; 4781 register const char *d2, *dend2;
4782 int regno = *p++; /* Get which register to match against. */ 4782 int regno = *p++; /* Get which register to match against. */
4783 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno); 4783 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4784 4784
4785 /* Can't back reference a group which we've never matched. */ 4785 /* Can't back reference a group which we've never matched. */
4786 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) 4786 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4787 goto fail; 4787 goto fail;
4788 4788
4789 /* Where in input to try to start matching. */ 4789 /* Where in input to try to start matching. */
4790 d2 = regstart[regno]; 4790 d2 = regstart[regno];
4791 4791
4792 /* Where to stop matching; if both the place to start and 4792 /* Where to stop matching; if both the place to start and
4793 the place to stop matching are in the same string, then 4793 the place to stop matching are in the same string, then
4794 set to the place to stop, otherwise, for now have to use 4794 set to the place to stop, otherwise, for now have to use
4795 the end of the first string. */ 4795 the end of the first string. */
4796 4796
4797 dend2 = ((FIRST_STRING_P (regstart[regno]) 4797 dend2 = ((FIRST_STRING_P (regstart[regno])
4798 == FIRST_STRING_P (regend[regno])) 4798 == FIRST_STRING_P (regend[regno]))
4799 ? regend[regno] : end_match_1); 4799 ? regend[regno] : end_match_1);
4800 for (;;) 4800 for (;;)
4801 { 4801 {
4802 /* If necessary, advance to next segment in register 4802 /* If necessary, advance to next segment in register
4803 contents. */ 4803 contents. */
4804 while (d2 == dend2) 4804 while (d2 == dend2)
4805 { 4805 {
4806 if (dend2 == end_match_2) break; 4806 if (dend2 == end_match_2) break;
4807 if (dend2 == regend[regno]) break; 4807 if (dend2 == regend[regno]) break;
4808 4808
4809 /* End of string1 => advance to string2. */ 4809 /* End of string1 => advance to string2. */
4810 d2 = string2; 4810 d2 = string2;
4811 dend2 = regend[regno]; 4811 dend2 = regend[regno];
4812 } 4812 }
4813 /* At end of register contents => success */ 4813 /* At end of register contents => success */
4814 if (d2 == dend2) break; 4814 if (d2 == dend2) break;
4815 4815
4816 /* If necessary, advance to next segment in data. */ 4816 /* If necessary, advance to next segment in data. */
4818 4818
4819 /* How many characters left in this segment to match. */ 4819 /* How many characters left in this segment to match. */
4820 mcnt = dend - d; 4820 mcnt = dend - d;
4821 4821
4822 /* Want how many consecutive characters we can match in 4822 /* Want how many consecutive characters we can match in
4823 one shot, so, if necessary, adjust the count. */ 4823 one shot, so, if necessary, adjust the count. */
4824 if (mcnt > dend2 - d2) 4824 if (mcnt > dend2 - d2)
4825 mcnt = dend2 - d2; 4825 mcnt = dend2 - d2;
4826 4826
4827 /* Compare that many; failure if mismatch, else move 4827 /* Compare that many; failure if mismatch, else move
4828 past them. */ 4828 past them. */
4829 if (translate 4829 if (translate
4830 ? bcmp_translate (d, d2, mcnt, translate) 4830 ? bcmp_translate (d, d2, mcnt, translate)
4831 : bcmp (d, d2, mcnt)) 4831 : bcmp (d, d2, mcnt))
4832 goto fail; 4832 goto fail;
4833 d += mcnt, d2 += mcnt; 4833 d += mcnt, d2 += mcnt;
4834 4834
4835 /* Do this because we've match some characters. */ 4835 /* Do this because we've match some characters. */
4836 SET_REGS_MATCHED (); 4836 SET_REGS_MATCHED ();
4837 } 4837 }
4838 } 4838 }
4839 break; 4839 break;
4840 4840
4841 4841
4842 /* begline matches the empty string at the beginning of the string 4842 /* begline matches the empty string at the beginning of the string
4843 (unless `not_bol' is set in `bufp'), and, if 4843 (unless `not_bol' is set in `bufp'), and, if
4844 `newline_anchor' is set, after newlines. */ 4844 `newline_anchor' is set, after newlines. */
4845 case begline: 4845 case begline:
4846 DEBUG_PRINT1 ("EXECUTING begline.\n"); 4846 DEBUG_PRINT1 ("EXECUTING begline.\n");
4847 4847
4848 if (AT_STRINGS_BEG (d)) 4848 if (AT_STRINGS_BEG (d))
4849 { 4849 {
4850 if (!bufp->not_bol) break; 4850 if (!bufp->not_bol) break;
4851 } 4851 }
4852 else if (d[-1] == '\n' && bufp->newline_anchor) 4852 else if (d[-1] == '\n' && bufp->newline_anchor)
4853 { 4853 {
4854 break; 4854 break;
4855 } 4855 }
4856 /* In all other cases, we fail. */ 4856 /* In all other cases, we fail. */
4857 goto fail; 4857 goto fail;
4858 4858
4859 4859
4860 /* endline is the dual of begline. */ 4860 /* endline is the dual of begline. */
4861 case endline: 4861 case endline:
4862 DEBUG_PRINT1 ("EXECUTING endline.\n"); 4862 DEBUG_PRINT1 ("EXECUTING endline.\n");
4863 4863
4864 if (AT_STRINGS_END (d)) 4864 if (AT_STRINGS_END (d))
4865 { 4865 {
4866 if (!bufp->not_eol) break; 4866 if (!bufp->not_eol) break;
4867 } 4867 }
4868 4868
4869 /* We have to ``prefetch'' the next character. */ 4869 /* We have to ``prefetch'' the next character. */
4870 else if ((d == end1 ? *string2 : *d) == '\n' 4870 else if ((d == end1 ? *string2 : *d) == '\n'
4871 && bufp->newline_anchor) 4871 && bufp->newline_anchor)
4872 { 4872 {
4873 break; 4873 break;
4874 } 4874 }
4875 goto fail; 4875 goto fail;
4876 4876
4877 4877
4878 /* Match at the very beginning of the data. */ 4878 /* Match at the very beginning of the data. */
4879 case begbuf: 4879 case begbuf:
4880 DEBUG_PRINT1 ("EXECUTING begbuf.\n"); 4880 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4881 if (AT_STRINGS_BEG (d)) 4881 if (AT_STRINGS_BEG (d))
4882 break; 4882 break;
4883 goto fail; 4883 goto fail;
4884 4884
4885 4885
4886 /* Match at the very end of the data. */ 4886 /* Match at the very end of the data. */
4887 case endbuf: 4887 case endbuf:
4888 DEBUG_PRINT1 ("EXECUTING endbuf.\n"); 4888 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4889 if (AT_STRINGS_END (d)) 4889 if (AT_STRINGS_END (d))
4890 break; 4890 break;
4891 goto fail; 4891 goto fail;
4892 4892
4893 4893
4894 /* on_failure_keep_string_jump is used to optimize `.*\n'. It 4894 /* on_failure_keep_string_jump is used to optimize `.*\n'. It
4895 pushes NULL as the value for the string on the stack. Then 4895 pushes NULL as the value for the string on the stack. Then
4896 `pop_failure_point' will keep the current value for the 4896 `pop_failure_point' will keep the current value for the
4897 string, instead of restoring it. To see why, consider 4897 string, instead of restoring it. To see why, consider
4898 matching `foo\nbar' against `.*\n'. The .* matches the foo; 4898 matching `foo\nbar' against `.*\n'. The .* matches the foo;
4899 then the . fails against the \n. But the next thing we want 4899 then the . fails against the \n. But the next thing we want
4900 to do is match the \n against the \n; if we restored the 4900 to do is match the \n against the \n; if we restored the
4901 string value, we would be back at the foo. 4901 string value, we would be back at the foo.
4902 4902
4903 Because this is used only in specific cases, we don't need to 4903 Because this is used only in specific cases, we don't need to
4904 check all the things that `on_failure_jump' does, to make 4904 check all the things that `on_failure_jump' does, to make
4905 sure the right things get saved on the stack. Hence we don't 4905 sure the right things get saved on the stack. Hence we don't
4906 share its code. The only reason to push anything on the 4906 share its code. The only reason to push anything on the
4907 stack at all is that otherwise we would have to change 4907 stack at all is that otherwise we would have to change
4908 `anychar's code to do something besides goto fail in this 4908 `anychar's code to do something besides goto fail in this
4909 case; that seems worse than this. */ 4909 case; that seems worse than this. */
4910 case on_failure_keep_string_jump: 4910 case on_failure_keep_string_jump:
4911 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); 4911 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4912 4912
4913 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4913 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4914 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); 4914 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4915 4915
4916 PUSH_FAILURE_POINT (p + mcnt, NULL, -2); 4916 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4917 break; 4917 break;
4918 4918
4919 4919
4920 /* Uses of on_failure_jump: 4920 /* Uses of on_failure_jump:
4921 4921
4922 Each alternative starts with an on_failure_jump that points 4922 Each alternative starts with an on_failure_jump that points
4923 to the beginning of the next alternative. Each alternative 4923 to the beginning of the next alternative. Each alternative
4924 except the last ends with a jump that in effect jumps past 4924 except the last ends with a jump that in effect jumps past
4925 the rest of the alternatives. (They really jump to the 4925 the rest of the alternatives. (They really jump to the
4926 ending jump of the following alternative, because tensioning 4926 ending jump of the following alternative, because tensioning
4927 these jumps is a hassle.) 4927 these jumps is a hassle.)
4928 4928
4929 Repeats start with an on_failure_jump that points past both 4929 Repeats start with an on_failure_jump that points past both
4930 the repetition text and either the following jump or 4930 the repetition text and either the following jump or
4931 pop_failure_jump back to this on_failure_jump. */ 4931 pop_failure_jump back to this on_failure_jump. */
4932 case on_failure_jump: 4932 case on_failure_jump:
4933 on_failure: 4933 on_failure:
4934 DEBUG_PRINT1 ("EXECUTING on_failure_jump"); 4934 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4935 4935
4936 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4936 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4937 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); 4937 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4938 4938
4939 /* If this on_failure_jump comes right before a group (i.e., 4939 /* If this on_failure_jump comes right before a group (i.e.,
4940 the original * applied to a group), save the information 4940 the original * applied to a group), save the information
4941 for that group and all inner ones, so that if we fail back 4941 for that group and all inner ones, so that if we fail back
4942 to this point, the group's information will be correct. 4942 to this point, the group's information will be correct.
4943 For example, in \(a*\)*\1, we need the preceding group, 4943 For example, in \(a*\)*\1, we need the preceding group,
4944 and in \(zz\(a*\)b*\)\2, we need the inner group. */ 4944 and in \(zz\(a*\)b*\)\2, we need the inner group. */
4945 4945
4946 /* We can't use `p' to check ahead because we push 4946 /* We can't use `p' to check ahead because we push
4947 a failure point to `p + mcnt' after we do this. */ 4947 a failure point to `p + mcnt' after we do this. */
4948 p1 = p; 4948 p1 = p;
4949 4949
4950 /* We need to skip no_op's before we look for the 4950 /* We need to skip no_op's before we look for the
4951 start_memory in case this on_failure_jump is happening as 4951 start_memory in case this on_failure_jump is happening as
4952 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1 4952 the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4953 against aba. */ 4953 against aba. */
4954 while (p1 < pend && (re_opcode_t) *p1 == no_op) 4954 while (p1 < pend && (re_opcode_t) *p1 == no_op)
4955 p1++; 4955 p1++;
4956 4956
4957 if (p1 < pend && (re_opcode_t) *p1 == start_memory) 4957 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4958 { 4958 {
4959 /* We have a new highest active register now. This will 4959 /* We have a new highest active register now. This will
4960 get reset at the start_memory we are about to get to, 4960 get reset at the start_memory we are about to get to,
4961 but we will have saved all the registers relevant to 4961 but we will have saved all the registers relevant to
4962 this repetition op, as described above. */ 4962 this repetition op, as described above. */
4963 highest_active_reg = *(p1 + 1) + *(p1 + 2); 4963 highest_active_reg = *(p1 + 1) + *(p1 + 2);
4964 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) 4964 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4965 lowest_active_reg = *(p1 + 1); 4965 lowest_active_reg = *(p1 + 1);
4966 } 4966 }
4967 4967
4968 DEBUG_PRINT1 (":\n"); 4968 DEBUG_PRINT1 (":\n");
4969 PUSH_FAILURE_POINT (p + mcnt, d, -2); 4969 PUSH_FAILURE_POINT (p + mcnt, d, -2);
4970 break; 4970 break;
4971 4971
4972 4972
4973 /* A smart repeat ends with `maybe_pop_jump'. 4973 /* A smart repeat ends with `maybe_pop_jump'.
4974 We change it to either `pop_failure_jump' or `jump'. */ 4974 We change it to either `pop_failure_jump' or `jump'. */
4975 case maybe_pop_jump: 4975 case maybe_pop_jump:
4976 EXTRACT_NUMBER_AND_INCR (mcnt, p); 4976 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4977 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt); 4977 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4978 { 4978 {
4979 register unsigned char *p2 = p; 4979 register unsigned char *p2 = p;
4980 4980
4981 /* Compare the beginning of the repeat with what in the 4981 /* Compare the beginning of the repeat with what in the
4982 pattern follows its end. If we can establish that there 4982 pattern follows its end. If we can establish that there
4983 is nothing that they would both match, i.e., that we 4983 is nothing that they would both match, i.e., that we
4984 would have to backtrack because of (as in, e.g., `a*a') 4984 would have to backtrack because of (as in, e.g., `a*a')
4985 then we can change to pop_failure_jump, because we'll 4985 then we can change to pop_failure_jump, because we'll
4986 never have to backtrack. 4986 never have to backtrack.
4987 4987
4988 This is not true in the case of alternatives: in 4988 This is not true in the case of alternatives: in
4989 `(a|ab)*' we do need to backtrack to the `ab' alternative 4989 `(a|ab)*' we do need to backtrack to the `ab' alternative
4990 (e.g., if the string was `ab'). But instead of trying to 4990 (e.g., if the string was `ab'). But instead of trying to
4991 detect that here, the alternative has put on a dummy 4991 detect that here, the alternative has put on a dummy
4992 failure point which is what we will end up popping. */ 4992 failure point which is what we will end up popping. */
4993 4993
4994 /* Skip over open/close-group commands. 4994 /* Skip over open/close-group commands.
4995 If what follows this loop is a ...+ construct, 4995 If what follows this loop is a ...+ construct,
4996 look at what begins its body, since we will have to 4996 look at what begins its body, since we will have to
4997 match at least one of that. */ 4997 match at least one of that. */
5009 } 5009 }
5010 5010
5011 p1 = p + mcnt; 5011 p1 = p + mcnt;
5012 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding 5012 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5013 to the `maybe_finalize_jump' of this case. Examine what 5013 to the `maybe_finalize_jump' of this case. Examine what
5014 follows. */ 5014 follows. */
5015 5015
5016 /* If we're at the end of the pattern, we can change. */ 5016 /* If we're at the end of the pattern, we can change. */
5017 if (p2 == pend) 5017 if (p2 == pend)
5018 { 5018 {
5019 /* Consider what happens when matching ":\(.*\)" 5019 /* Consider what happens when matching ":\(.*\)"
5020 against ":/". I don't really understand this code 5020 against ":/". I don't really understand this code
5021 yet. */ 5021 yet. */
5022 p[-3] = (unsigned char) pop_failure_jump; 5022 p[-3] = (unsigned char) pop_failure_jump;
5023 DEBUG_PRINT1 5023 DEBUG_PRINT1
5024 (" End of pattern: change to `pop_failure_jump'.\n"); 5024 (" End of pattern: change to `pop_failure_jump'.\n");
5025 } 5025 }
5026 5026
5027 else if ((re_opcode_t) *p2 == exactn 5027 else if ((re_opcode_t) *p2 == exactn
5028 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline)) 5028 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5029 { 5029 {
5030 register unsigned int c 5030 register unsigned int c
5031 = *p2 == (unsigned char) endline ? '\n' : p2[2]; 5031 = *p2 == (unsigned char) endline ? '\n' : p2[2];
5032 5032
5033 if ((re_opcode_t) p1[3] == exactn) 5033 if ((re_opcode_t) p1[3] == exactn)
5034 { 5034 {
5035 if (!(multibyte /* && (c != '\n') */ 5035 if (!(multibyte /* && (c != '\n') */
5036 && BASE_LEADING_CODE_P (c)) 5036 && BASE_LEADING_CODE_P (c))
5037 ? c != p1[5] 5037 ? c != p1[5]
5038 : (STRING_CHAR (&p2[2], pend - &p2[2]) 5038 : (STRING_CHAR (&p2[2], pend - &p2[2])
5039 != STRING_CHAR (&p1[5], pend - &p1[5]))) 5039 != STRING_CHAR (&p1[5], pend - &p1[5])))
5040 { 5040 {
5041 p[-3] = (unsigned char) pop_failure_jump; 5041 p[-3] = (unsigned char) pop_failure_jump;
5042 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", 5042 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
5043 c, p1[5]); 5043 c, p1[5]);
5044 } 5044 }
5045 } 5045 }
5046 5046
5047 else if ((re_opcode_t) p1[3] == charset 5047 else if ((re_opcode_t) p1[3] == charset
5048 || (re_opcode_t) p1[3] == charset_not) 5048 || (re_opcode_t) p1[3] == charset_not)
5049 { 5049 {
5062 not = !not; 5062 not = !not;
5063 } 5063 }
5064 else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3])) 5064 else if (CHARSET_RANGE_TABLE_EXISTS_P (&p1[3]))
5065 CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]); 5065 CHARSET_LOOKUP_RANGE_TABLE (not, c, &p1[3]);
5066 5066
5067 /* `not' is equal to 1 if c would match, which means 5067 /* `not' is equal to 1 if c would match, which means
5068 that we can't change to pop_failure_jump. */ 5068 that we can't change to pop_failure_jump. */
5069 if (!not) 5069 if (!not)
5070 { 5070 {
5071 p[-3] = (unsigned char) pop_failure_jump; 5071 p[-3] = (unsigned char) pop_failure_jump;
5072 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 5072 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5073 } 5073 }
5074 } 5074 }
5075 } 5075 }
5076 else if ((re_opcode_t) *p2 == charset) 5076 else if ((re_opcode_t) *p2 == charset)
5077 { 5077 {
5078 if ((re_opcode_t) p1[3] == exactn) 5078 if ((re_opcode_t) p1[3] == exactn)
5079 { 5079 {
5080 register unsigned int c = p1[5]; 5080 register unsigned int c = p1[5];
5081 int not = 0; 5081 int not = 0;
5082 5082
5083 if (multibyte && BASE_LEADING_CODE_P (c)) 5083 if (multibyte && BASE_LEADING_CODE_P (c))
5084 c = STRING_CHAR (&p1[5], pend - &p1[5]); 5084 c = STRING_CHAR (&p1[5], pend - &p1[5]);
5085 5085
5086 /* Test if C is listed in charset at `p2'. */ 5086 /* Test if C is listed in charset at `p2'. */
5087 if (SINGLE_BYTE_CHAR_P (c)) 5087 if (SINGLE_BYTE_CHAR_P (c))
5088 { 5088 {
5089 if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH 5089 if (c < CHARSET_BITMAP_SIZE (p2) * BYTEWIDTH
5090 && (p2[2 + c / BYTEWIDTH] 5090 && (p2[2 + c / BYTEWIDTH]
5091 & (1 << (c % BYTEWIDTH)))) 5091 & (1 << (c % BYTEWIDTH))))
5093 } 5093 }
5094 else if (CHARSET_RANGE_TABLE_EXISTS_P (p2)) 5094 else if (CHARSET_RANGE_TABLE_EXISTS_P (p2))
5095 CHARSET_LOOKUP_RANGE_TABLE (not, c, p2); 5095 CHARSET_LOOKUP_RANGE_TABLE (not, c, p2);
5096 5096
5097 if (!not) 5097 if (!not)
5098 { 5098 {
5099 p[-3] = (unsigned char) pop_failure_jump; 5099 p[-3] = (unsigned char) pop_failure_jump;
5100 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 5100 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5101 } 5101 }
5102 } 5102 }
5103 5103
5104 /* It is hard to list up all the character in charset 5104 /* It is hard to list up all the character in charset
5105 P2 if it includes multibyte character. Give up in 5105 P2 if it includes multibyte character. Give up in
5106 such case. */ 5106 such case. */
5107 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) 5107 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
5108 { 5108 {
5109 /* Now, we are sure that P2 has no range table. 5109 /* Now, we are sure that P2 has no range table.
5110 So, for the size of bitmap in P2, `p2[1]' is 5110 So, for the size of bitmap in P2, `p2[1]' is
5111 enough. But P1 may have range table, so the 5111 enough. But P1 may have range table, so the
5112 size of bitmap table of P1 is extracted by 5112 size of bitmap table of P1 is extracted by
5113 using macro `CHARSET_BITMAP_SIZE'. 5113 using macro `CHARSET_BITMAP_SIZE'.
5114 5114
5115 Since we know that all the character listed in 5115 Since we know that all the character listed in
5116 P2 is ASCII, it is enough to test only bitmap 5116 P2 is ASCII, it is enough to test only bitmap
5117 table of P1. */ 5117 table of P1. */
5118 5118
5119 if ((re_opcode_t) p1[3] == charset_not) 5119 if ((re_opcode_t) p1[3] == charset_not)
5120 { 5120 {
5121 int idx; 5121 int idx;
5122 /* We win if the charset_not inside the loop lists 5122 /* We win if the charset_not inside the loop lists
5123 every character listed in the charset after. */ 5123 every character listed in the charset after. */
5124 for (idx = 0; idx < (int) p2[1]; idx++) 5124 for (idx = 0; idx < (int) p2[1]; idx++)
5125 if (! (p2[2 + idx] == 0 5125 if (! (p2[2 + idx] == 0
5126 || (idx < CHARSET_BITMAP_SIZE (&p1[3]) 5126 || (idx < CHARSET_BITMAP_SIZE (&p1[3])
5127 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) 5127 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5128 break; 5128 break;
5129 5129
5130 if (idx == p2[1]) 5130 if (idx == p2[1])
5131 { 5131 {
5132 p[-3] = (unsigned char) pop_failure_jump; 5132 p[-3] = (unsigned char) pop_failure_jump;
5133 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 5133 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5134 } 5134 }
5135 } 5135 }
5136 else if ((re_opcode_t) p1[3] == charset) 5136 else if ((re_opcode_t) p1[3] == charset)
5137 { 5137 {
5138 int idx; 5138 int idx;
5139 /* We win if the charset inside the loop 5139 /* We win if the charset inside the loop
5145 if ((p2[2 + idx] & p1[5 + idx]) != 0) 5145 if ((p2[2 + idx] & p1[5 + idx]) != 0)
5146 break; 5146 break;
5147 5147
5148 if (idx == p2[1] 5148 if (idx == p2[1]
5149 || idx == CHARSET_BITMAP_SIZE (&p1[3])) 5149 || idx == CHARSET_BITMAP_SIZE (&p1[3]))
5150 { 5150 {
5151 p[-3] = (unsigned char) pop_failure_jump; 5151 p[-3] = (unsigned char) pop_failure_jump;
5152 DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); 5152 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
5153 } 5153 }
5154 } 5154 }
5155 } 5155 }
5156 } 5156 }
5157 } 5157 }
5158 p -= 2; /* Point at relative address again. */ 5158 p -= 2; /* Point at relative address again. */
5159 if ((re_opcode_t) p[-1] != pop_failure_jump) 5159 if ((re_opcode_t) p[-1] != pop_failure_jump)
5160 { 5160 {
5161 p[-1] = (unsigned char) jump; 5161 p[-1] = (unsigned char) jump;
5162 DEBUG_PRINT1 (" Match => jump.\n"); 5162 DEBUG_PRINT1 (" Match => jump.\n");
5163 goto unconditional_jump; 5163 goto unconditional_jump;
5164 } 5164 }
5165 /* Note fall through. */ 5165 /* Note fall through. */
5166 5166
5167 5167
5168 /* The end of a simple repeat has a pop_failure_jump back to 5168 /* The end of a simple repeat has a pop_failure_jump back to
5169 its matching on_failure_jump, where the latter will push a 5169 its matching on_failure_jump, where the latter will push a
5170 failure point. The pop_failure_jump takes off failure 5170 failure point. The pop_failure_jump takes off failure
5171 points put on by this pop_failure_jump's matching 5171 points put on by this pop_failure_jump's matching
5172 on_failure_jump; we got through the pattern to here from the 5172 on_failure_jump; we got through the pattern to here from the
5173 matching on_failure_jump, so didn't fail. */ 5173 matching on_failure_jump, so didn't fail. */
5174 case pop_failure_jump: 5174 case pop_failure_jump:
5175 { 5175 {
5176 /* We need to pass separate storage for the lowest and 5176 /* We need to pass separate storage for the lowest and
5177 highest registers, even though we don't care about the 5177 highest registers, even though we don't care about the
5178 actual values. Otherwise, we will restore only one 5178 actual values. Otherwise, we will restore only one
5179 register from the stack, since lowest will == highest in 5179 register from the stack, since lowest will == highest in
5180 `pop_failure_point'. */ 5180 `pop_failure_point'. */
5181 unsigned dummy_low_reg, dummy_high_reg; 5181 unsigned dummy_low_reg, dummy_high_reg;
5182 unsigned char *pdummy; 5182 unsigned char *pdummy;
5183 const char *sdummy; 5183 const char *sdummy;
5184 5184
5185 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n"); 5185 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5186 POP_FAILURE_POINT (sdummy, pdummy, 5186 POP_FAILURE_POINT (sdummy, pdummy,
5187 dummy_low_reg, dummy_high_reg, 5187 dummy_low_reg, dummy_high_reg,
5188 reg_dummy, reg_dummy, reg_info_dummy); 5188 reg_dummy, reg_dummy, reg_info_dummy);
5189 } 5189 }
5190 /* Note fall through. */ 5190 /* Note fall through. */
5191 5191
5192 5192
5193 /* Unconditionally jump (without popping any failure points). */ 5193 /* Unconditionally jump (without popping any failure points). */
5194 case jump: 5194 case jump:
5195 unconditional_jump: 5195 unconditional_jump:
5196 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ 5196 EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */
5197 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); 5197 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5198 p += mcnt; /* Do the jump. */ 5198 p += mcnt; /* Do the jump. */
5199 DEBUG_PRINT2 ("(to 0x%x).\n", p); 5199 DEBUG_PRINT2 ("(to 0x%x).\n", p);
5200 break; 5200 break;
5201 5201
5202 5202
5203 /* We need this opcode so we can detect where alternatives end 5203 /* We need this opcode so we can detect where alternatives end
5204 in `group_match_null_string_p' et al. */ 5204 in `group_match_null_string_p' et al. */
5205 case jump_past_alt: 5205 case jump_past_alt:
5206 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n"); 5206 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5207 goto unconditional_jump; 5207 goto unconditional_jump;
5208 5208
5209 5209
5210 /* Normally, the on_failure_jump pushes a failure point, which 5210 /* Normally, the on_failure_jump pushes a failure point, which
5211 then gets popped at pop_failure_jump. We will end up at 5211 then gets popped at pop_failure_jump. We will end up at
5212 pop_failure_jump, also, and with a pattern of, say, `a+', we 5212 pop_failure_jump, also, and with a pattern of, say, `a+', we
5213 are skipping over the on_failure_jump, so we have to push 5213 are skipping over the on_failure_jump, so we have to push
5214 something meaningless for pop_failure_jump to pop. */ 5214 something meaningless for pop_failure_jump to pop. */
5215 case dummy_failure_jump: 5215 case dummy_failure_jump:
5216 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); 5216 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5217 /* It doesn't matter what we push for the string here. What 5217 /* It doesn't matter what we push for the string here. What
5218 the code at `fail' tests is the value for the pattern. */ 5218 the code at `fail' tests is the value for the pattern. */
5219 PUSH_FAILURE_POINT (0, 0, -2); 5219 PUSH_FAILURE_POINT (0, 0, -2);
5220 goto unconditional_jump; 5220 goto unconditional_jump;
5221 5221
5222 5222
5223 /* At the end of an alternative, we need to push a dummy failure 5223 /* At the end of an alternative, we need to push a dummy failure
5224 point in case we are followed by a `pop_failure_jump', because 5224 point in case we are followed by a `pop_failure_jump', because
5225 we don't want the failure point for the alternative to be 5225 we don't want the failure point for the alternative to be
5226 popped. For example, matching `(a|ab)*' against `aab' 5226 popped. For example, matching `(a|ab)*' against `aab'
5227 requires that we match the `ab' alternative. */ 5227 requires that we match the `ab' alternative. */
5228 case push_dummy_failure: 5228 case push_dummy_failure:
5229 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); 5229 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5230 /* See comments just above at `dummy_failure_jump' about the 5230 /* See comments just above at `dummy_failure_jump' about the
5231 two zeroes. */ 5231 two zeroes. */
5232 PUSH_FAILURE_POINT (0, 0, -2); 5232 PUSH_FAILURE_POINT (0, 0, -2);
5233 break; 5233 break;
5234 5234
5235 /* Have to succeed matching what follows at least n times. 5235 /* Have to succeed matching what follows at least n times.
5236 After that, handle like `on_failure_jump'. */ 5236 After that, handle like `on_failure_jump'. */
5237 case succeed_n: 5237 case succeed_n:
5238 EXTRACT_NUMBER (mcnt, p + 2); 5238 EXTRACT_NUMBER (mcnt, p + 2);
5239 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); 5239 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5240 5240
5241 assert (mcnt >= 0); 5241 assert (mcnt >= 0);
5242 /* Originally, this is how many times we HAVE to succeed. */ 5242 /* Originally, this is how many times we HAVE to succeed. */
5243 if (mcnt > 0) 5243 if (mcnt > 0)
5244 { 5244 {
5245 mcnt--; 5245 mcnt--;
5246 p += 2; 5246 p += 2;
5247 STORE_NUMBER_AND_INCR (p, mcnt); 5247 STORE_NUMBER_AND_INCR (p, mcnt);
5248 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); 5248 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt);
5249 } 5249 }
5250 else if (mcnt == 0) 5250 else if (mcnt == 0)
5251 { 5251 {
5252 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); 5252 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
5253 p[2] = (unsigned char) no_op; 5253 p[2] = (unsigned char) no_op;
5254 p[3] = (unsigned char) no_op; 5254 p[3] = (unsigned char) no_op;
5255 goto on_failure; 5255 goto on_failure;
5256 } 5256 }
5257 break; 5257 break;
5258 5258
5259 case jump_n: 5259 case jump_n:
5260 EXTRACT_NUMBER (mcnt, p + 2); 5260 EXTRACT_NUMBER (mcnt, p + 2);
5261 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); 5261 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5262 5262
5263 /* Originally, this is how many times we CAN jump. */ 5263 /* Originally, this is how many times we CAN jump. */
5264 if (mcnt) 5264 if (mcnt)
5265 { 5265 {
5266 mcnt--; 5266 mcnt--;
5267 STORE_NUMBER (p + 2, mcnt); 5267 STORE_NUMBER (p + 2, mcnt);
5268 goto unconditional_jump; 5268 goto unconditional_jump;
5269 } 5269 }
5270 /* If don't have to jump any more, skip over the rest of command. */ 5270 /* If don't have to jump any more, skip over the rest of command. */
5271 else 5271 else
5272 p += 4; 5272 p += 4;
5273 break; 5273 break;
5274 5274
5275 case set_number_at: 5275 case set_number_at:
5276 { 5276 {
5277 DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); 5277 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5278 5278
5279 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5279 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5280 p1 = p + mcnt; 5280 p1 = p + mcnt;
5281 EXTRACT_NUMBER_AND_INCR (mcnt, p); 5281 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5282 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); 5282 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
5283 STORE_NUMBER (p1, mcnt); 5283 STORE_NUMBER (p1, mcnt);
5284 break; 5284 break;
5285 } 5285 }
5286 5286
5287 case wordbound: 5287 case wordbound:
5288 DEBUG_PRINT1 ("EXECUTING wordbound.\n"); 5288 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5289 5289
5290 /* We SUCCEED in one of the following cases: */ 5290 /* We SUCCEED in one of the following cases: */
5301 5301
5302 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5302 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5303 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 5303 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5304 #ifdef emacs 5304 #ifdef emacs
5305 UPDATE_SYNTAX_TABLE (pos1 ? pos1 : 1); 5305 UPDATE_SYNTAX_TABLE (pos1 ? pos1 : 1);
5306 #endif 5306 #endif
5307 s1 = SYNTAX (c1); 5307 s1 = SYNTAX (c1);
5308 #ifdef emacs 5308 #ifdef emacs
5309 UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1); 5309 UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
5310 #endif 5310 #endif
5311 s2 = SYNTAX (c2); 5311 s2 = SYNTAX (c2);
5312 5312
5313 if (/* Case 2: Only one of S1 and S2 is Sword. */ 5313 if (/* Case 2: Only one of S1 and S2 is Sword. */
5314 ((s1 == Sword) != (s2 == Sword)) 5314 ((s1 == Sword) != (s2 == Sword))
5315 /* Case 3: Both of S1 and S2 are Sword, and macro 5315 /* Case 3: Both of S1 and S2 are Sword, and macro
5316 WORD_BOUNDARY_P (C1, C2) returns nonzero. */ 5316 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5317 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) 5317 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5318 break; 5318 break;
5319 } 5319 }
5320 goto fail; 5320 goto fail;
5321 5321
5336 5336
5337 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5337 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5338 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 5338 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5339 #ifdef emacs 5339 #ifdef emacs
5340 UPDATE_SYNTAX_TABLE (pos1); 5340 UPDATE_SYNTAX_TABLE (pos1);
5341 #endif 5341 #endif
5342 s1 = SYNTAX (c1); 5342 s1 = SYNTAX (c1);
5343 #ifdef emacs 5343 #ifdef emacs
5344 UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1); 5344 UPDATE_SYNTAX_TABLE_FORWARD (pos1 + 1);
5345 #endif 5345 #endif
5346 s2 = SYNTAX (c2); 5346 s2 = SYNTAX (c2);
5347 5347
5348 if (/* Case 2: Only one of S1 and S2 is Sword. */ 5348 if (/* Case 2: Only one of S1 and S2 is Sword. */
5349 ((s1 == Sword) != (s2 == Sword)) 5349 ((s1 == Sword) != (s2 == Sword))
5350 /* Case 3: Both of S1 and S2 are Sword, and macro 5350 /* Case 3: Both of S1 and S2 are Sword, and macro
5351 WORD_BOUNDARY_P (C1, C2) returns nonzero. */ 5351 WORD_BOUNDARY_P (C1, C2) returns nonzero. */
5352 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2))) 5352 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5353 goto fail; 5353 goto fail;
5354 } 5354 }
5355 break; 5355 break;
5356 5356
5357 case wordbeg: 5357 case wordbeg:
5358 DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); 5358 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5359 5359
5360 /* We FAIL in one of the following cases: */ 5360 /* We FAIL in one of the following cases: */
5361 5361
5362 /* Case 1: D is at the end of string. */ 5362 /* Case 1: D is at the end of string. */
5363 if (AT_STRINGS_END (d)) 5363 if (AT_STRINGS_END (d))
5364 goto fail; 5364 goto fail;
5365 else 5365 else
5366 { 5366 {
5367 /* C1 is the character before D, S1 is the syntax of C1, C2 5367 /* C1 is the character before D, S1 is the syntax of C1, C2
5368 is the character at D, and S2 is the syntax of C2. */ 5368 is the character at D, and S2 is the syntax of C2. */
5369 int c1, c2, s1, s2; 5369 int c1, c2, s1, s2;
5370 int pos1 = PTR_TO_OFFSET (d); 5370 int pos1 = PTR_TO_OFFSET (d);
5371 5371
5372 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 5372 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5373 #ifdef emacs 5373 #ifdef emacs
5374 UPDATE_SYNTAX_TABLE (pos1); 5374 UPDATE_SYNTAX_TABLE (pos1);
5375 #endif 5375 #endif
5376 s2 = SYNTAX (c2); 5376 s2 = SYNTAX (c2);
5377 5377
5378 /* Case 2: S2 is not Sword. */ 5378 /* Case 2: S2 is not Sword. */
5379 if (s2 != Sword) 5379 if (s2 != Sword)
5380 goto fail; 5380 goto fail;
5381 5381
5382 /* Case 3: D is not at the beginning of string ... */ 5382 /* Case 3: D is not at the beginning of string ... */
5383 if (!AT_STRINGS_BEG (d)) 5383 if (!AT_STRINGS_BEG (d))
5384 { 5384 {
5385 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2); 5385 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5386 #ifdef emacs 5386 #ifdef emacs
5387 UPDATE_SYNTAX_TABLE_BACKWARD (pos1 - 1); 5387 UPDATE_SYNTAX_TABLE_BACKWARD (pos1 - 1);
5388 #endif 5388 #endif
5389 s1 = SYNTAX (c1); 5389 s1 = SYNTAX (c1);
5390 5390
5391 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2) 5391 /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5392 returns 0. */ 5392 returns 0. */
5393 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2)) 5393 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5394 goto fail; 5394 goto fail;
5395 } 5395 }
5396 } 5396 }
5397 break; 5397 break;
5398 5398
5399 case wordend: 5399 case wordend:
5400 DEBUG_PRINT1 ("EXECUTING wordend.\n"); 5400 DEBUG_PRINT1 ("EXECUTING wordend.\n");
5401 5401
5402 /* We FAIL in one of the following cases: */ 5402 /* We FAIL in one of the following cases: */
5403 5403
5404 /* Case 1: D is at the beginning of string. */ 5404 /* Case 1: D is at the beginning of string. */
5405 if (AT_STRINGS_BEG (d)) 5405 if (AT_STRINGS_BEG (d))
5422 { 5422 {
5423 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2); 5423 GET_CHAR_AFTER_2 (c2, d, string1, end1, string2, end2);
5424 s2 = SYNTAX (c2); 5424 s2 = SYNTAX (c2);
5425 5425
5426 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2) 5426 /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5427 returns 0. */ 5427 returns 0. */
5428 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2)) 5428 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5429 goto fail; 5429 goto fail;
5430 } 5430 }
5431 } 5431 }
5432 break; 5432 break;
5433 5433
5434 #ifdef emacs 5434 #ifdef emacs
5435 case before_dot: 5435 case before_dot:
5436 DEBUG_PRINT1 ("EXECUTING before_dot.\n"); 5436 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5437 if (PTR_CHAR_POS ((unsigned char *) d) >= PT) 5437 if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
5438 goto fail; 5438 goto fail;
5439 break; 5439 break;
5440 5440
5441 case at_dot: 5441 case at_dot:
5442 DEBUG_PRINT1 ("EXECUTING at_dot.\n"); 5442 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5443 if (PTR_CHAR_POS ((unsigned char *) d) != PT) 5443 if (PTR_CHAR_POS ((unsigned char *) d) != PT)
5444 goto fail; 5444 goto fail;
5445 break; 5445 break;
5446 5446
5447 case after_dot: 5447 case after_dot:
5448 DEBUG_PRINT1 ("EXECUTING after_dot.\n"); 5448 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5449 if (PTR_CHAR_POS ((unsigned char *) d) <= PT) 5449 if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
5450 goto fail; 5450 goto fail;
5451 break; 5451 break;
5452 5452
5453 case syntaxspec: 5453 case syntaxspec:
5454 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); 5454 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5455 mcnt = *p++; 5455 mcnt = *p++;
5456 goto matchsyntax; 5456 goto matchsyntax;
5457 5457
5458 case wordchar: 5458 case wordchar:
5459 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n"); 5459 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5460 mcnt = (int) Sword; 5460 mcnt = (int) Sword;
5461 matchsyntax: 5461 matchsyntax:
5462 PREFETCH (); 5462 PREFETCH ();
5463 #ifdef emacs 5463 #ifdef emacs
5464 { 5464 {
5465 int pos1 = PTR_TO_OFFSET (d); 5465 int pos1 = PTR_TO_OFFSET (d);
5466 UPDATE_SYNTAX_TABLE (pos1); 5466 UPDATE_SYNTAX_TABLE (pos1);
5467 } 5467 }
5468 #endif 5468 #endif
5469 { 5469 {
5470 int c, len; 5470 int c, len;
5471 5471
5472 if (multibyte) 5472 if (multibyte)
5473 /* we must concern about multibyte form, ... */ 5473 /* we must concern about multibyte form, ... */
5479 5479
5480 if (SYNTAX (c) != (enum syntaxcode) mcnt) 5480 if (SYNTAX (c) != (enum syntaxcode) mcnt)
5481 goto fail; 5481 goto fail;
5482 d += len; 5482 d += len;
5483 } 5483 }
5484 SET_REGS_MATCHED (); 5484 SET_REGS_MATCHED ();
5485 break; 5485 break;
5486 5486
5487 case notsyntaxspec: 5487 case notsyntaxspec:
5488 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt); 5488 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5489 mcnt = *p++; 5489 mcnt = *p++;
5490 goto matchnotsyntax; 5490 goto matchnotsyntax;
5491 5491
5492 case notwordchar: 5492 case notwordchar:
5493 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n"); 5493 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5494 mcnt = (int) Sword; 5494 mcnt = (int) Sword;
5495 matchnotsyntax: 5495 matchnotsyntax:
5496 PREFETCH (); 5496 PREFETCH ();
5497 #ifdef emacs 5497 #ifdef emacs
5498 { 5498 {
5499 int pos1 = PTR_TO_OFFSET (d); 5499 int pos1 = PTR_TO_OFFSET (d);
5500 UPDATE_SYNTAX_TABLE (pos1); 5500 UPDATE_SYNTAX_TABLE (pos1);
5501 } 5501 }
5502 #endif 5502 #endif
5503 { 5503 {
5504 int c, len; 5504 int c, len;
5505 5505
5506 if (multibyte) 5506 if (multibyte)
5507 c = STRING_CHAR_AND_LENGTH (d, dend - d, len); 5507 c = STRING_CHAR_AND_LENGTH (d, dend - d, len);