Mercurial > emacs
comparison src/regex.c @ 81329:465e2d55267d
(regex_compile): Remove the `regnum' counter.
Use bufp->re_nsub instead. Add support for \(?N:RE\).
author | Stefan Monnier <monnier@iro.umontreal.ca> |
---|---|
date | Tue, 12 Jun 2007 18:40:18 +0000 |
parents | 3d45362f1d38 |
children | ce1b74b859a9 3619e7770f2e |
comparison
equal
deleted
inserted
replaced
81328:99942c448a6f | 81329:465e2d55267d |
---|---|
2480 /* Address of the place where a forward jump should go to the end of | 2480 /* Address of the place where a forward jump should go to the end of |
2481 the containing expression. Each alternative of an `or' -- except the | 2481 the containing expression. Each alternative of an `or' -- except the |
2482 last -- ends with a forward jump of this sort. */ | 2482 last -- ends with a forward jump of this sort. */ |
2483 unsigned char *fixup_alt_jump = 0; | 2483 unsigned char *fixup_alt_jump = 0; |
2484 | 2484 |
2485 /* Counts open-groups as they are encountered. Remembered for the | |
2486 matching close-group on the compile stack, so the same register | |
2487 number is put in the stop_memory as the start_memory. */ | |
2488 regnum_t regnum = 0; | |
2489 | |
2490 /* Work area for range table of charset. */ | 2485 /* Work area for range table of charset. */ |
2491 struct range_table_work_area range_table_work; | 2486 struct range_table_work_area range_table_work; |
2492 | 2487 |
2493 /* If the object matched can contain multibyte characters. */ | 2488 /* If the object matched can contain multibyte characters. */ |
2494 const boolean multibyte = RE_MULTIBYTE_P (bufp); | 2489 const boolean multibyte = RE_MULTIBYTE_P (bufp); |
3121 goto normal_backslash; | 3116 goto normal_backslash; |
3122 | 3117 |
3123 handle_open: | 3118 handle_open: |
3124 { | 3119 { |
3125 int shy = 0; | 3120 int shy = 0; |
3121 regnum_t regnum = 0; | |
3126 if (p+1 < pend) | 3122 if (p+1 < pend) |
3127 { | 3123 { |
3128 /* Look for a special (?...) construct */ | 3124 /* Look for a special (?...) construct */ |
3129 if ((syntax & RE_SHY_GROUPS) && *p == '?') | 3125 if ((syntax & RE_SHY_GROUPS) && *p == '?') |
3130 { | 3126 { |
3131 PATFETCH (c); /* Gobble up the '?'. */ | 3127 PATFETCH (c); /* Gobble up the '?'. */ |
3132 PATFETCH (c); | 3128 while (!shy) |
3133 switch (c) | |
3134 { | 3129 { |
3135 case ':': shy = 1; break; | 3130 PATFETCH (c); |
3136 default: | 3131 switch (c) |
3137 /* Only (?:...) is supported right now. */ | 3132 { |
3138 FREE_STACK_RETURN (REG_BADPAT); | 3133 case ':': shy = 1; break; |
3134 case '0': | |
3135 /* An explicitly specified regnum must start | |
3136 with non-0. */ | |
3137 if (regnum == 0) | |
3138 FREE_STACK_RETURN (REG_BADPAT); | |
3139 case '1': case '2': case '3': case '4': | |
3140 case '5': case '6': case '7': case '8': case '9': | |
3141 regnum = 10*regnum + (c - '0'); break; | |
3142 default: | |
3143 /* Only (?:...) is supported right now. */ | |
3144 FREE_STACK_RETURN (REG_BADPAT); | |
3145 } | |
3139 } | 3146 } |
3140 } | 3147 } |
3141 } | 3148 } |
3142 | 3149 |
3143 if (!shy) | 3150 if (!shy) |
3144 { | 3151 regnum = ++bufp->re_nsub; |
3145 bufp->re_nsub++; | 3152 else if (regnum) |
3146 regnum++; | 3153 { /* It's actually not shy, but explicitly numbered. */ |
3154 shy = 0; | |
3155 if (regnum > bufp->re_nsub) | |
3156 bufp->re_nsub = regnum; | |
3157 else if (regnum > bufp->re_nsub | |
3158 /* Ideally, we'd want to check that the specified | |
3159 group can't have matched (i.e. all subgroups | |
3160 using the same regnum are in other branches of | |
3161 OR patterns), but we don't currently keep track | |
3162 of enough info to do that easily. */ | |
3163 || group_in_compile_stack (compile_stack, regnum)) | |
3164 FREE_STACK_RETURN (REG_BADPAT); | |
3147 } | 3165 } |
3166 else | |
3167 /* It's really shy. */ | |
3168 regnum = - bufp->re_nsub; | |
3148 | 3169 |
3149 if (COMPILE_STACK_FULL) | 3170 if (COMPILE_STACK_FULL) |
3150 { | 3171 { |
3151 RETALLOC (compile_stack.stack, compile_stack.size << 1, | 3172 RETALLOC (compile_stack.stack, compile_stack.size << 1, |
3152 compile_stack_elt_t); | 3173 compile_stack_elt_t); |
3161 be valid. */ | 3182 be valid. */ |
3162 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; | 3183 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; |
3163 COMPILE_STACK_TOP.fixup_alt_jump | 3184 COMPILE_STACK_TOP.fixup_alt_jump |
3164 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; | 3185 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; |
3165 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; | 3186 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; |
3166 COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum; | 3187 COMPILE_STACK_TOP.regnum = regnum; |
3167 | 3188 |
3168 /* Do not push a | 3189 /* Do not push a start_memory for groups beyond the last one |
3169 start_memory for groups beyond the last one we can | 3190 we can represent in the compiled pattern. */ |
3170 represent in the compiled pattern. */ | 3191 if (regnum <= MAX_REGNUM && regnum > 0) |
3171 if (regnum <= MAX_REGNUM && !shy) | |
3172 BUF_PUSH_2 (start_memory, regnum); | 3192 BUF_PUSH_2 (start_memory, regnum); |
3173 | 3193 |
3174 compile_stack.avail++; | 3194 compile_stack.avail++; |
3175 | 3195 |
3176 fixup_alt_jump = 0; | 3196 fixup_alt_jump = 0; |
3211 assert (compile_stack.avail != 0); | 3231 assert (compile_stack.avail != 0); |
3212 { | 3232 { |
3213 /* We don't just want to restore into `regnum', because | 3233 /* We don't just want to restore into `regnum', because |
3214 later groups should continue to be numbered higher, | 3234 later groups should continue to be numbered higher, |
3215 as in `(ab)c(de)' -- the second group is #2. */ | 3235 as in `(ab)c(de)' -- the second group is #2. */ |
3216 regnum_t this_group_regnum; | 3236 regnum_t regnum; |
3217 | 3237 |
3218 compile_stack.avail--; | 3238 compile_stack.avail--; |
3219 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; | 3239 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; |
3220 fixup_alt_jump | 3240 fixup_alt_jump |
3221 = COMPILE_STACK_TOP.fixup_alt_jump | 3241 = COMPILE_STACK_TOP.fixup_alt_jump |
3222 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 | 3242 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 |
3223 : 0; | 3243 : 0; |
3224 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; | 3244 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; |
3225 this_group_regnum = COMPILE_STACK_TOP.regnum; | 3245 regnum = COMPILE_STACK_TOP.regnum; |
3226 /* If we've reached MAX_REGNUM groups, then this open | 3246 /* If we've reached MAX_REGNUM groups, then this open |
3227 won't actually generate any code, so we'll have to | 3247 won't actually generate any code, so we'll have to |
3228 clear pending_exact explicitly. */ | 3248 clear pending_exact explicitly. */ |
3229 pending_exact = 0; | 3249 pending_exact = 0; |
3230 | 3250 |
3231 /* We're at the end of the group, so now we know how many | 3251 /* We're at the end of the group, so now we know how many |
3232 groups were inside this one. */ | 3252 groups were inside this one. */ |
3233 if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0) | 3253 if (regnum <= MAX_REGNUM && regnum > 0) |
3234 BUF_PUSH_2 (stop_memory, this_group_regnum); | 3254 BUF_PUSH_2 (stop_memory, regnum); |
3235 } | 3255 } |
3236 break; | 3256 break; |
3237 | 3257 |
3238 | 3258 |
3239 case '|': /* `\|'. */ | 3259 case '|': /* `\|'. */ |
3555 if (syntax & RE_NO_BK_REFS) | 3575 if (syntax & RE_NO_BK_REFS) |
3556 goto normal_backslash; | 3576 goto normal_backslash; |
3557 | 3577 |
3558 reg = c - '0'; | 3578 reg = c - '0'; |
3559 | 3579 |
3560 /* Can't back reference to a subexpression before its end. */ | 3580 if (reg > bufp->re_nsub || reg < 1 |
3561 if (reg > regnum || group_in_compile_stack (compile_stack, reg)) | 3581 /* Can't back reference to a subexp before its end. */ |
3582 || group_in_compile_stack (compile_stack, reg)) | |
3562 FREE_STACK_RETURN (REG_ESUBREG); | 3583 FREE_STACK_RETURN (REG_ESUBREG); |
3563 | 3584 |
3564 laststart = b; | 3585 laststart = b; |
3565 BUF_PUSH_2 (duplicate, reg); | 3586 BUF_PUSH_2 (duplicate, reg); |
3566 } | 3587 } |