comparison src/regex.c @ 90054:f2ebccfa87d4

Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-74 Merge from emacs--cvs-trunk--0 Patches applied: * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-709 Update from CVS: src/indent.c (Fvertical_motion): Fix last change. * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-710 - miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-715 Update from CVS * miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-716 Merge from gnus--rel--5.10 * miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-74 Update from CVS
author Miles Bader <miles@gnu.org>
date Wed, 08 Dec 2004 05:02:30 +0000
parents 0fe073a08cef 5f1c0193e984
children eac554634bfa
comparison
equal deleted inserted replaced
90053:fff5f1a61d92 90054:f2ebccfa87d4
1273 The argument SYNTAX is a bit mask comprised of the various bits 1273 The argument SYNTAX is a bit mask comprised of the various bits
1274 defined in regex.h. We return the old syntax. */ 1274 defined in regex.h. We return the old syntax. */
1275 1275
1276 reg_syntax_t 1276 reg_syntax_t
1277 re_set_syntax (syntax) 1277 re_set_syntax (syntax)
1278 reg_syntax_t syntax; 1278 reg_syntax_t syntax;
1279 { 1279 {
1280 reg_syntax_t ret = re_syntax_options; 1280 reg_syntax_t ret = re_syntax_options;
1281 1281
1282 re_syntax_options = syntax; 1282 re_syntax_options = syntax;
1283 return ret; 1283 return ret;
1284 }
1285 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1286
1287 /* Regexp to use to replace spaces, or NULL meaning don't. */
1288 static re_char *whitespace_regexp;
1289
1290 void
1291 re_set_whitespace_regexp (regexp)
1292 re_char *regexp;
1293 {
1294 whitespace_regexp = regexp;
1284 } 1295 }
1285 WEAK_ALIAS (__re_set_syntax, re_set_syntax) 1296 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1286 1297
1287 /* This table gives an error message for each of the error codes listed 1298 /* This table gives an error message for each of the error codes listed
1288 in regex.h. Obviously the order here has to be same as there. 1299 in regex.h. Obviously the order here has to be same as there.
1991 2002
1992 #endif /* emacs */ 2003 #endif /* emacs */
1993 2004
1994 /* Get the next unsigned number in the uncompiled pattern. */ 2005 /* Get the next unsigned number in the uncompiled pattern. */
1995 #define GET_UNSIGNED_NUMBER(num) \ 2006 #define GET_UNSIGNED_NUMBER(num) \
1996 do { if (p != pend) \ 2007 do { \
1997 { \ 2008 if (p == pend) \
1998 PATFETCH (c); \ 2009 FREE_STACK_RETURN (REG_EBRACE); \
1999 if (c == ' ') \ 2010 else \
2000 FREE_STACK_RETURN (REG_BADBR); \ 2011 { \
2001 while ('0' <= c && c <= '9') \ 2012 PATFETCH (c); \
2002 { \ 2013 while ('0' <= c && c <= '9') \
2003 int prev; \ 2014 { \
2004 if (num < 0) \ 2015 int prev; \
2005 num = 0; \ 2016 if (num < 0) \
2006 prev = num; \ 2017 num = 0; \
2007 num = num * 10 + c - '0'; \ 2018 prev = num; \
2008 if (num / 10 != prev) \ 2019 num = num * 10 + c - '0'; \
2009 FREE_STACK_RETURN (REG_BADBR); \ 2020 if (num / 10 != prev) \
2010 if (p == pend) \ 2021 FREE_STACK_RETURN (REG_BADBR); \
2011 break; \ 2022 if (p == pend) \
2012 PATFETCH (c); \ 2023 FREE_STACK_RETURN (REG_EBRACE); \
2013 } \ 2024 PATFETCH (c); \
2014 if (c == ' ') \ 2025 } \
2015 FREE_STACK_RETURN (REG_BADBR); \ 2026 } \
2016 } \ 2027 } while (0)
2017 } while (0)
2018 2028
2019 #if ! WIDE_CHAR_SUPPORT 2029 #if ! WIDE_CHAR_SUPPORT
2020 2030
2021 /* Map a string to the char class it names (if any). */ 2031 /* Map a string to the char class it names (if any). */
2022 re_wctype_t 2032 re_wctype_t
2493 const boolean multibyte = RE_MULTIBYTE_P (bufp); 2503 const boolean multibyte = RE_MULTIBYTE_P (bufp);
2494 2504
2495 /* If a target of matching can contain multibyte characters. */ 2505 /* If a target of matching can contain multibyte characters. */
2496 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); 2506 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
2497 2507
2508 /* Nonzero if we have pushed down into a subpattern. */
2509 int in_subpattern = 0;
2510
2511 /* These hold the values of p, pattern, and pend from the main
2512 pattern when we have pushed into a subpattern. */
2513 re_char *main_p;
2514 re_char *main_pattern;
2515 re_char *main_pend;
2516
2498 #ifdef DEBUG 2517 #ifdef DEBUG
2499 debug++; 2518 debug++;
2500 DEBUG_PRINT1 ("\nCompiling pattern: "); 2519 DEBUG_PRINT1 ("\nCompiling pattern: ");
2501 if (debug > 0) 2520 if (debug > 0)
2502 { 2521 {
2555 } 2574 }
2556 2575
2557 begalt = b = bufp->buffer; 2576 begalt = b = bufp->buffer;
2558 2577
2559 /* Loop through the uncompiled pattern until we're at the end. */ 2578 /* Loop through the uncompiled pattern until we're at the end. */
2560 while (p != pend) 2579 while (1)
2561 { 2580 {
2581 if (p == pend)
2582 {
2583 /* If this is the end of an included regexp,
2584 pop back to the main regexp and try again. */
2585 if (in_subpattern)
2586 {
2587 in_subpattern = 0;
2588 pattern = main_pattern;
2589 p = main_p;
2590 pend = main_pend;
2591 continue;
2592 }
2593 /* If this is the end of the main regexp, we are done. */
2594 break;
2595 }
2596
2562 PATFETCH (c); 2597 PATFETCH (c);
2563 2598
2564 switch (c) 2599 switch (c)
2565 { 2600 {
2601 case ' ':
2602 {
2603 re_char *p1 = p;
2604
2605 /* If there's no special whitespace regexp, treat
2606 spaces normally. And don't try to do this recursively. */
2607 if (!whitespace_regexp || in_subpattern)
2608 goto normal_char;
2609
2610 /* Peek past following spaces. */
2611 while (p1 != pend)
2612 {
2613 if (*p1 != ' ')
2614 break;
2615 p1++;
2616 }
2617 /* If the spaces are followed by a repetition op,
2618 treat them normally. */
2619 if (p1 != pend
2620 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
2621 || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
2622 goto normal_char;
2623
2624 /* Replace the spaces with the whitespace regexp. */
2625 in_subpattern = 1;
2626 main_p = p1;
2627 main_pend = pend;
2628 main_pattern = pattern;
2629 p = pattern = whitespace_regexp;
2630 pend = p + strlen (p);
2631 break;
2632 }
2633
2566 case '^': 2634 case '^':
2567 { 2635 {
2568 if ( /* If at start of pattern, it's an operator. */ 2636 if ( /* If at start of pattern, it's an operator. */
2569 p == pattern + 1 2637 p == pattern + 1
2570 /* If context independent, it's an operator. */ 2638 /* If context independent, it's an operator. */
3229 /* At least (most) this many matches must be made. */ 3297 /* At least (most) this many matches must be made. */
3230 int lower_bound = 0, upper_bound = -1; 3298 int lower_bound = 0, upper_bound = -1;
3231 3299
3232 beg_interval = p; 3300 beg_interval = p;
3233 3301
3234 if (p == pend)
3235 FREE_STACK_RETURN (REG_EBRACE);
3236
3237 GET_UNSIGNED_NUMBER (lower_bound); 3302 GET_UNSIGNED_NUMBER (lower_bound);
3238 3303
3239 if (c == ',') 3304 if (c == ',')
3240 GET_UNSIGNED_NUMBER (upper_bound); 3305 GET_UNSIGNED_NUMBER (upper_bound);
3241 else 3306 else
3248 3313
3249 if (!(syntax & RE_NO_BK_BRACES)) 3314 if (!(syntax & RE_NO_BK_BRACES))
3250 { 3315 {
3251 if (c != '\\') 3316 if (c != '\\')
3252 FREE_STACK_RETURN (REG_BADBR); 3317 FREE_STACK_RETURN (REG_BADBR);
3253 3318 if (p == pend)
3319 FREE_STACK_RETURN (REG_EESCAPE);
3254 PATFETCH (c); 3320 PATFETCH (c);
3255 } 3321 }
3256 3322
3257 if (c != '}') 3323 if (c != '}')
3258 FREE_STACK_RETURN (REG_BADBR); 3324 FREE_STACK_RETURN (REG_BADBR);