Mercurial > emacs
comparison src/regex.c @ 90054:f2ebccfa87d4
Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-74
Merge from emacs--cvs-trunk--0
Patches applied:
* miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-709
Update from CVS: src/indent.c (Fvertical_motion): Fix last change.
* miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-710
- miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-715
Update from CVS
* miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-716
Merge from gnus--rel--5.10
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-74
Update from CVS
author | Miles Bader <miles@gnu.org> |
---|---|
date | Wed, 08 Dec 2004 05:02:30 +0000 |
parents | 0fe073a08cef 5f1c0193e984 |
children | eac554634bfa |
comparison
equal
deleted
inserted
replaced
90053:fff5f1a61d92 | 90054:f2ebccfa87d4 |
---|---|
1273 The argument SYNTAX is a bit mask comprised of the various bits | 1273 The argument SYNTAX is a bit mask comprised of the various bits |
1274 defined in regex.h. We return the old syntax. */ | 1274 defined in regex.h. We return the old syntax. */ |
1275 | 1275 |
1276 reg_syntax_t | 1276 reg_syntax_t |
1277 re_set_syntax (syntax) | 1277 re_set_syntax (syntax) |
1278 reg_syntax_t syntax; | 1278 reg_syntax_t syntax; |
1279 { | 1279 { |
1280 reg_syntax_t ret = re_syntax_options; | 1280 reg_syntax_t ret = re_syntax_options; |
1281 | 1281 |
1282 re_syntax_options = syntax; | 1282 re_syntax_options = syntax; |
1283 return ret; | 1283 return ret; |
1284 } | |
1285 WEAK_ALIAS (__re_set_syntax, re_set_syntax) | |
1286 | |
1287 /* Regexp to use to replace spaces, or NULL meaning don't. */ | |
1288 static re_char *whitespace_regexp; | |
1289 | |
1290 void | |
1291 re_set_whitespace_regexp (regexp) | |
1292 re_char *regexp; | |
1293 { | |
1294 whitespace_regexp = regexp; | |
1284 } | 1295 } |
1285 WEAK_ALIAS (__re_set_syntax, re_set_syntax) | 1296 WEAK_ALIAS (__re_set_syntax, re_set_syntax) |
1286 | 1297 |
1287 /* This table gives an error message for each of the error codes listed | 1298 /* This table gives an error message for each of the error codes listed |
1288 in regex.h. Obviously the order here has to be same as there. | 1299 in regex.h. Obviously the order here has to be same as there. |
1991 | 2002 |
1992 #endif /* emacs */ | 2003 #endif /* emacs */ |
1993 | 2004 |
1994 /* Get the next unsigned number in the uncompiled pattern. */ | 2005 /* Get the next unsigned number in the uncompiled pattern. */ |
1995 #define GET_UNSIGNED_NUMBER(num) \ | 2006 #define GET_UNSIGNED_NUMBER(num) \ |
1996 do { if (p != pend) \ | 2007 do { \ |
1997 { \ | 2008 if (p == pend) \ |
1998 PATFETCH (c); \ | 2009 FREE_STACK_RETURN (REG_EBRACE); \ |
1999 if (c == ' ') \ | 2010 else \ |
2000 FREE_STACK_RETURN (REG_BADBR); \ | 2011 { \ |
2001 while ('0' <= c && c <= '9') \ | 2012 PATFETCH (c); \ |
2002 { \ | 2013 while ('0' <= c && c <= '9') \ |
2003 int prev; \ | 2014 { \ |
2004 if (num < 0) \ | 2015 int prev; \ |
2005 num = 0; \ | 2016 if (num < 0) \ |
2006 prev = num; \ | 2017 num = 0; \ |
2007 num = num * 10 + c - '0'; \ | 2018 prev = num; \ |
2008 if (num / 10 != prev) \ | 2019 num = num * 10 + c - '0'; \ |
2009 FREE_STACK_RETURN (REG_BADBR); \ | 2020 if (num / 10 != prev) \ |
2010 if (p == pend) \ | 2021 FREE_STACK_RETURN (REG_BADBR); \ |
2011 break; \ | 2022 if (p == pend) \ |
2012 PATFETCH (c); \ | 2023 FREE_STACK_RETURN (REG_EBRACE); \ |
2013 } \ | 2024 PATFETCH (c); \ |
2014 if (c == ' ') \ | 2025 } \ |
2015 FREE_STACK_RETURN (REG_BADBR); \ | 2026 } \ |
2016 } \ | 2027 } while (0) |
2017 } while (0) | |
2018 | 2028 |
2019 #if ! WIDE_CHAR_SUPPORT | 2029 #if ! WIDE_CHAR_SUPPORT |
2020 | 2030 |
2021 /* Map a string to the char class it names (if any). */ | 2031 /* Map a string to the char class it names (if any). */ |
2022 re_wctype_t | 2032 re_wctype_t |
2493 const boolean multibyte = RE_MULTIBYTE_P (bufp); | 2503 const boolean multibyte = RE_MULTIBYTE_P (bufp); |
2494 | 2504 |
2495 /* If a target of matching can contain multibyte characters. */ | 2505 /* If a target of matching can contain multibyte characters. */ |
2496 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); | 2506 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp); |
2497 | 2507 |
2508 /* Nonzero if we have pushed down into a subpattern. */ | |
2509 int in_subpattern = 0; | |
2510 | |
2511 /* These hold the values of p, pattern, and pend from the main | |
2512 pattern when we have pushed into a subpattern. */ | |
2513 re_char *main_p; | |
2514 re_char *main_pattern; | |
2515 re_char *main_pend; | |
2516 | |
2498 #ifdef DEBUG | 2517 #ifdef DEBUG |
2499 debug++; | 2518 debug++; |
2500 DEBUG_PRINT1 ("\nCompiling pattern: "); | 2519 DEBUG_PRINT1 ("\nCompiling pattern: "); |
2501 if (debug > 0) | 2520 if (debug > 0) |
2502 { | 2521 { |
2555 } | 2574 } |
2556 | 2575 |
2557 begalt = b = bufp->buffer; | 2576 begalt = b = bufp->buffer; |
2558 | 2577 |
2559 /* Loop through the uncompiled pattern until we're at the end. */ | 2578 /* Loop through the uncompiled pattern until we're at the end. */ |
2560 while (p != pend) | 2579 while (1) |
2561 { | 2580 { |
2581 if (p == pend) | |
2582 { | |
2583 /* If this is the end of an included regexp, | |
2584 pop back to the main regexp and try again. */ | |
2585 if (in_subpattern) | |
2586 { | |
2587 in_subpattern = 0; | |
2588 pattern = main_pattern; | |
2589 p = main_p; | |
2590 pend = main_pend; | |
2591 continue; | |
2592 } | |
2593 /* If this is the end of the main regexp, we are done. */ | |
2594 break; | |
2595 } | |
2596 | |
2562 PATFETCH (c); | 2597 PATFETCH (c); |
2563 | 2598 |
2564 switch (c) | 2599 switch (c) |
2565 { | 2600 { |
2601 case ' ': | |
2602 { | |
2603 re_char *p1 = p; | |
2604 | |
2605 /* If there's no special whitespace regexp, treat | |
2606 spaces normally. And don't try to do this recursively. */ | |
2607 if (!whitespace_regexp || in_subpattern) | |
2608 goto normal_char; | |
2609 | |
2610 /* Peek past following spaces. */ | |
2611 while (p1 != pend) | |
2612 { | |
2613 if (*p1 != ' ') | |
2614 break; | |
2615 p1++; | |
2616 } | |
2617 /* If the spaces are followed by a repetition op, | |
2618 treat them normally. */ | |
2619 if (p1 != pend | |
2620 && (*p1 == '*' || *p1 == '+' || *p1 == '?' | |
2621 || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{'))) | |
2622 goto normal_char; | |
2623 | |
2624 /* Replace the spaces with the whitespace regexp. */ | |
2625 in_subpattern = 1; | |
2626 main_p = p1; | |
2627 main_pend = pend; | |
2628 main_pattern = pattern; | |
2629 p = pattern = whitespace_regexp; | |
2630 pend = p + strlen (p); | |
2631 break; | |
2632 } | |
2633 | |
2566 case '^': | 2634 case '^': |
2567 { | 2635 { |
2568 if ( /* If at start of pattern, it's an operator. */ | 2636 if ( /* If at start of pattern, it's an operator. */ |
2569 p == pattern + 1 | 2637 p == pattern + 1 |
2570 /* If context independent, it's an operator. */ | 2638 /* If context independent, it's an operator. */ |
3229 /* At least (most) this many matches must be made. */ | 3297 /* At least (most) this many matches must be made. */ |
3230 int lower_bound = 0, upper_bound = -1; | 3298 int lower_bound = 0, upper_bound = -1; |
3231 | 3299 |
3232 beg_interval = p; | 3300 beg_interval = p; |
3233 | 3301 |
3234 if (p == pend) | |
3235 FREE_STACK_RETURN (REG_EBRACE); | |
3236 | |
3237 GET_UNSIGNED_NUMBER (lower_bound); | 3302 GET_UNSIGNED_NUMBER (lower_bound); |
3238 | 3303 |
3239 if (c == ',') | 3304 if (c == ',') |
3240 GET_UNSIGNED_NUMBER (upper_bound); | 3305 GET_UNSIGNED_NUMBER (upper_bound); |
3241 else | 3306 else |
3248 | 3313 |
3249 if (!(syntax & RE_NO_BK_BRACES)) | 3314 if (!(syntax & RE_NO_BK_BRACES)) |
3250 { | 3315 { |
3251 if (c != '\\') | 3316 if (c != '\\') |
3252 FREE_STACK_RETURN (REG_BADBR); | 3317 FREE_STACK_RETURN (REG_BADBR); |
3253 | 3318 if (p == pend) |
3319 FREE_STACK_RETURN (REG_EESCAPE); | |
3254 PATFETCH (c); | 3320 PATFETCH (c); |
3255 } | 3321 } |
3256 | 3322 |
3257 if (c != '}') | 3323 if (c != '}') |
3258 FREE_STACK_RETURN (REG_BADBR); | 3324 FREE_STACK_RETURN (REG_BADBR); |