comparison src/regex.c @ 81329:465e2d55267d

(regex_compile): Remove the `regnum' counter. Use bufp->re_nsub instead. Add support for \(?N:RE\).
author Stefan Monnier <monnier@iro.umontreal.ca>
date Tue, 12 Jun 2007 18:40:18 +0000
parents 3d45362f1d38
children ce1b74b859a9 3619e7770f2e
comparison
equal deleted inserted replaced
81328:99942c448a6f 81329:465e2d55267d
2480 /* Address of the place where a forward jump should go to the end of 2480 /* Address of the place where a forward jump should go to the end of
2481 the containing expression. Each alternative of an `or' -- except the 2481 the containing expression. Each alternative of an `or' -- except the
2482 last -- ends with a forward jump of this sort. */ 2482 last -- ends with a forward jump of this sort. */
2483 unsigned char *fixup_alt_jump = 0; 2483 unsigned char *fixup_alt_jump = 0;
2484 2484
2485 /* Counts open-groups as they are encountered. Remembered for the
2486 matching close-group on the compile stack, so the same register
2487 number is put in the stop_memory as the start_memory. */
2488 regnum_t regnum = 0;
2489
2490 /* Work area for range table of charset. */ 2485 /* Work area for range table of charset. */
2491 struct range_table_work_area range_table_work; 2486 struct range_table_work_area range_table_work;
2492 2487
2493 /* If the object matched can contain multibyte characters. */ 2488 /* If the object matched can contain multibyte characters. */
2494 const boolean multibyte = RE_MULTIBYTE_P (bufp); 2489 const boolean multibyte = RE_MULTIBYTE_P (bufp);
3121 goto normal_backslash; 3116 goto normal_backslash;
3122 3117
3123 handle_open: 3118 handle_open:
3124 { 3119 {
3125 int shy = 0; 3120 int shy = 0;
3121 regnum_t regnum = 0;
3126 if (p+1 < pend) 3122 if (p+1 < pend)
3127 { 3123 {
3128 /* Look for a special (?...) construct */ 3124 /* Look for a special (?...) construct */
3129 if ((syntax & RE_SHY_GROUPS) && *p == '?') 3125 if ((syntax & RE_SHY_GROUPS) && *p == '?')
3130 { 3126 {
3131 PATFETCH (c); /* Gobble up the '?'. */ 3127 PATFETCH (c); /* Gobble up the '?'. */
3132 PATFETCH (c); 3128 while (!shy)
3133 switch (c)
3134 { 3129 {
3135 case ':': shy = 1; break; 3130 PATFETCH (c);
3136 default: 3131 switch (c)
3137 /* Only (?:...) is supported right now. */ 3132 {
3138 FREE_STACK_RETURN (REG_BADPAT); 3133 case ':': shy = 1; break;
3134 case '0':
3135 /* An explicitly specified regnum must start
3136 with non-0. */
3137 if (regnum == 0)
3138 FREE_STACK_RETURN (REG_BADPAT);
3139 case '1': case '2': case '3': case '4':
3140 case '5': case '6': case '7': case '8': case '9':
3141 regnum = 10*regnum + (c - '0'); break;
3142 default:
3143 /* Only (?:...) is supported right now. */
3144 FREE_STACK_RETURN (REG_BADPAT);
3145 }
3139 } 3146 }
3140 } 3147 }
3141 } 3148 }
3142 3149
3143 if (!shy) 3150 if (!shy)
3144 { 3151 regnum = ++bufp->re_nsub;
3145 bufp->re_nsub++; 3152 else if (regnum)
3146 regnum++; 3153 { /* It's actually not shy, but explicitly numbered. */
3154 shy = 0;
3155 if (regnum > bufp->re_nsub)
3156 bufp->re_nsub = regnum;
3157 else if (regnum > bufp->re_nsub
3158 /* Ideally, we'd want to check that the specified
3159 group can't have matched (i.e. all subgroups
3160 using the same regnum are in other branches of
3161 OR patterns), but we don't currently keep track
3162 of enough info to do that easily. */
3163 || group_in_compile_stack (compile_stack, regnum))
3164 FREE_STACK_RETURN (REG_BADPAT);
3147 } 3165 }
3166 else
3167 /* It's really shy. */
3168 regnum = - bufp->re_nsub;
3148 3169
3149 if (COMPILE_STACK_FULL) 3170 if (COMPILE_STACK_FULL)
3150 { 3171 {
3151 RETALLOC (compile_stack.stack, compile_stack.size << 1, 3172 RETALLOC (compile_stack.stack, compile_stack.size << 1,
3152 compile_stack_elt_t); 3173 compile_stack_elt_t);
3161 be valid. */ 3182 be valid. */
3162 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; 3183 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
3163 COMPILE_STACK_TOP.fixup_alt_jump 3184 COMPILE_STACK_TOP.fixup_alt_jump
3164 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; 3185 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
3165 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; 3186 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
3166 COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum; 3187 COMPILE_STACK_TOP.regnum = regnum;
3167 3188
3168 /* Do not push a 3189 /* Do not push a start_memory for groups beyond the last one
3169 start_memory for groups beyond the last one we can 3190 we can represent in the compiled pattern. */
3170 represent in the compiled pattern. */ 3191 if (regnum <= MAX_REGNUM && regnum > 0)
3171 if (regnum <= MAX_REGNUM && !shy)
3172 BUF_PUSH_2 (start_memory, regnum); 3192 BUF_PUSH_2 (start_memory, regnum);
3173 3193
3174 compile_stack.avail++; 3194 compile_stack.avail++;
3175 3195
3176 fixup_alt_jump = 0; 3196 fixup_alt_jump = 0;
3211 assert (compile_stack.avail != 0); 3231 assert (compile_stack.avail != 0);
3212 { 3232 {
3213 /* We don't just want to restore into `regnum', because 3233 /* We don't just want to restore into `regnum', because
3214 later groups should continue to be numbered higher, 3234 later groups should continue to be numbered higher,
3215 as in `(ab)c(de)' -- the second group is #2. */ 3235 as in `(ab)c(de)' -- the second group is #2. */
3216 regnum_t this_group_regnum; 3236 regnum_t regnum;
3217 3237
3218 compile_stack.avail--; 3238 compile_stack.avail--;
3219 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; 3239 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
3220 fixup_alt_jump 3240 fixup_alt_jump
3221 = COMPILE_STACK_TOP.fixup_alt_jump 3241 = COMPILE_STACK_TOP.fixup_alt_jump
3222 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 3242 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
3223 : 0; 3243 : 0;
3224 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; 3244 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
3225 this_group_regnum = COMPILE_STACK_TOP.regnum; 3245 regnum = COMPILE_STACK_TOP.regnum;
3226 /* If we've reached MAX_REGNUM groups, then this open 3246 /* If we've reached MAX_REGNUM groups, then this open
3227 won't actually generate any code, so we'll have to 3247 won't actually generate any code, so we'll have to
3228 clear pending_exact explicitly. */ 3248 clear pending_exact explicitly. */
3229 pending_exact = 0; 3249 pending_exact = 0;
3230 3250
3231 /* We're at the end of the group, so now we know how many 3251 /* We're at the end of the group, so now we know how many
3232 groups were inside this one. */ 3252 groups were inside this one. */
3233 if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0) 3253 if (regnum <= MAX_REGNUM && regnum > 0)
3234 BUF_PUSH_2 (stop_memory, this_group_regnum); 3254 BUF_PUSH_2 (stop_memory, regnum);
3235 } 3255 }
3236 break; 3256 break;
3237 3257
3238 3258
3239 case '|': /* `\|'. */ 3259 case '|': /* `\|'. */
3555 if (syntax & RE_NO_BK_REFS) 3575 if (syntax & RE_NO_BK_REFS)
3556 goto normal_backslash; 3576 goto normal_backslash;
3557 3577
3558 reg = c - '0'; 3578 reg = c - '0';
3559 3579
3560 /* Can't back reference to a subexpression before its end. */ 3580 if (reg > bufp->re_nsub || reg < 1
3561 if (reg > regnum || group_in_compile_stack (compile_stack, reg)) 3581 /* Can't back reference to a subexp before its end. */
3582 || group_in_compile_stack (compile_stack, reg))
3562 FREE_STACK_RETURN (REG_ESUBREG); 3583 FREE_STACK_RETURN (REG_ESUBREG);
3563 3584
3564 laststart = b; 3585 laststart = b;
3565 BUF_PUSH_2 (duplicate, reg); 3586 BUF_PUSH_2 (duplicate, reg);
3566 } 3587 }