comparison src/regex.c @ 35525:95c2eedea354

(mutually_exclusive_p): Don't blindly handle `charset_not' as if it was a `charset'.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Wed, 24 Jan 2001 23:11:40 +0000
parents c782e22760e0
children 6bd789b0ccc3
comparison
equal deleted inserted replaced
35524:91c357a883aa 35525:95c2eedea354
4261 case endline: 4261 case endline:
4262 case exactn: 4262 case exactn:
4263 { 4263 {
4264 register re_wchar_t c 4264 register re_wchar_t c
4265 = (re_opcode_t) *p2 == endline ? '\n' 4265 = (re_opcode_t) *p2 == endline ? '\n'
4266 : RE_STRING_CHAR(p2 + 2, pend - p2 - 2); 4266 : RE_STRING_CHAR (p2 + 2, pend - p2 - 2);
4267 4267
4268 if ((re_opcode_t) *p1 == exactn) 4268 if ((re_opcode_t) *p1 == exactn)
4269 { 4269 {
4270 if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2)) 4270 if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2))
4271 { 4271 {
4306 } 4306 }
4307 } 4307 }
4308 break; 4308 break;
4309 4309
4310 case charset: 4310 case charset:
4311 case charset_not:
4312 { 4311 {
4313 if ((re_opcode_t) *p1 == exactn) 4312 if ((re_opcode_t) *p1 == exactn)
4314 /* Reuse the code above. */ 4313 /* Reuse the code above. */
4315 return mutually_exclusive_p (bufp, p2, p1); 4314 return mutually_exclusive_p (bufp, p2, p1);
4316
4317 4315
4318 /* It is hard to list up all the character in charset 4316 /* It is hard to list up all the character in charset
4319 P2 if it includes multibyte character. Give up in 4317 P2 if it includes multibyte character. Give up in
4320 such case. */ 4318 such case. */
4321 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2)) 4319 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4328 4326
4329 Since we know that all the character listed in 4327 Since we know that all the character listed in
4330 P2 is ASCII, it is enough to test only bitmap 4328 P2 is ASCII, it is enough to test only bitmap
4331 table of P1. */ 4329 table of P1. */
4332 4330
4333 if (*p1 == *p2) 4331 if ((re_opcode_t) *p1 == charset)
4334 { 4332 {
4335 int idx; 4333 int idx;
4336 /* We win if the charset inside the loop 4334 /* We win if the charset inside the loop
4337 has no overlap with the one after the loop. */ 4335 has no overlap with the one after the loop. */
4338 for (idx = 0; 4336 for (idx = 0;
4347 { 4345 {
4348 DEBUG_PRINT1 (" No match => fast loop.\n"); 4346 DEBUG_PRINT1 (" No match => fast loop.\n");
4349 return 1; 4347 return 1;
4350 } 4348 }
4351 } 4349 }
4352 else if ((re_opcode_t) *p1 == charset 4350 else if ((re_opcode_t) *p1 == charset_not)
4353 || (re_opcode_t) *p1 == charset_not)
4354 { 4351 {
4355 int idx; 4352 int idx;
4356 /* We win if the charset_not inside the loop lists 4353 /* We win if the charset_not inside the loop lists
4357 every character listed in the charset after. */ 4354 every character listed in the charset after. */
4358 for (idx = 0; idx < (int) p2[1]; idx++) 4355 for (idx = 0; idx < (int) p2[1]; idx++)
4368 } 4365 }
4369 } 4366 }
4370 } 4367 }
4371 } 4368 }
4372 4369
4370 case charset_not:
4371 switch (SWITCH_ENUM_CAST (*p1))
4372 {
4373 case exactn:
4374 case charset:
4375 /* Reuse the code above. */
4376 return mutually_exclusive_p (bufp, p2, p1);
4377 case charset_not:
4378 /* When we have two charset_not, it's very unlikely that
4379 they don't overlap. The union of the two sets of excluded
4380 chars should cover all possible chars, which, as a matter of
4381 fact, is virtually impossible in multibyte buffers. */
4382 ;
4383 }
4384 break;
4385
4373 case wordend: 4386 case wordend:
4374 case notsyntaxspec: 4387 case notsyntaxspec:
4375 return ((re_opcode_t) *p1 == syntaxspec 4388 return ((re_opcode_t) *p1 == syntaxspec
4376 && p1[1] == (op2 == wordend ? Sword : p2[1])); 4389 && p1[1] == (op2 == wordend ? Sword : p2[1]));
4377 4390