changeset 95856:f13a77e0e34f

* character.h (CHAR_TO_BYTE_SAFE): New macro. * character.c (Fmultibyte_char_to_unibyte): Obey the docstring. * regex.c (RE_CHAR_TO_UNIBYTE): Use the new macro. (WEAK_ALIAS): Simplify. * syntax.c (skip_chars): Don't mark non-byte chars in the fastmap when searching a unibyte buffer.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Thu, 12 Jun 2008 20:25:47 +0000
parents 5bef95d2d506
children 5d1e0dab59fb
files src/ChangeLog src/character.c src/character.h src/regex.c src/syntax.c
diffstat 5 files changed, 28 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Thu Jun 12 17:28:00 2008 +0000
+++ b/src/ChangeLog	Thu Jun 12 20:25:47 2008 +0000
@@ -1,3 +1,12 @@
+2008-06-12  Stefan Monnier  <monnier@iro.umontreal.ca>
+
+	* character.h (CHAR_TO_BYTE_SAFE): New macro.
+	* character.c (Fmultibyte_char_to_unibyte): Obey the docstring.
+	* regex.c (RE_CHAR_TO_UNIBYTE): Use the new macro.
+	(WEAK_ALIAS): Simplify.
+	* syntax.c (skip_chars): Don't mark non-byte chars in the fastmap
+	when searching a unibyte buffer.
+
 2008-06-12  Chong Yidong  <cyd@stupidchicken.com>
 
 	* xfns.c (Fx_select_font): Rename from x-font-dialog.
@@ -8,10 +17,10 @@
 
 2008-06-11  Jason Rumney  <jasonr@gnu.org>
 
-        * w32font.c (w32font_encode_char): Detect missing glyphs that are
-        misreported as space.
-        (add_font_entity_to_list): Support unicode-bmp and unicode-sip
-        as aliases for registry iso10646-1.
+	* w32font.c (w32font_encode_char): Detect missing glyphs that are
+	misreported as space.
+	(add_font_entity_to_list): Support unicode-bmp and unicode-sip
+	as aliases for registry iso10646-1.
 
 2008-06-11  Stefan Monnier  <monnier@iro.umontreal.ca>
 
--- a/src/character.c	Thu Jun 12 17:28:00 2008 +0000
+++ b/src/character.c	Thu Jun 12 20:25:47 2008 +0000
@@ -359,7 +359,7 @@
     return ch;
   else
     {
-      int cu = CHAR_TO_BYTE8 (cm);
+      int cu = CHAR_TO_BYTE_SAFE (cm);
       return make_number (cu);
     }
 }
--- a/src/character.h	Thu Jun 12 17:28:00 2008 +0000
+++ b/src/character.h	Thu Jun 12 20:25:47 2008 +0000
@@ -68,6 +68,13 @@
    ? (c) - 0x3FFF00		\
    : multibyte_char_to_unibyte (c, Qnil))
 
+/* Return the raw 8-bit byte for character C,
+   or -1 if C doesn't correspond to a byte.  */
+#define CHAR_TO_BYTE_SAFE(c)	\
+  (CHAR_BYTE8_P (c)		\
+   ? (c) - 0x3FFF00		\
+   : multibyte_char_to_unibyte_safe (c, Qnil))
+
 /* Nonzero iff BYTE is the 1st byte of a multibyte form of a character
    that corresponds to a raw 8-bit byte.  */
 #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
--- a/src/regex.c	Thu Jun 12 17:28:00 2008 +0000
+++ b/src/regex.c	Thu Jun 12 20:25:47 2008 +0000
@@ -153,10 +153,7 @@
 
 # define RE_CHAR_TO_MULTIBYTE(c) unibyte_to_multibyte_table[(c)]
 
-# define RE_CHAR_TO_UNIBYTE(c)			\
-  (ASCII_CHAR_P (c) ? (c)			\
-   : CHAR_BYTE8_P (c) ? CHAR_TO_BYTE8 (c)	\
-   : multibyte_char_to_unibyte_safe (c))
+# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
 
 /* Set C a (possibly converted to multibyte) character before P.  P
    points into a string which is the virtual concatenation of STR1
@@ -5574,10 +5571,7 @@
 		if (multibyte)
 		  {
 		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
-		    if (CHAR_BYTE8_P (pat_ch))
-		      pat_ch = CHAR_TO_BYTE8 (pat_ch);
-		    else
-		      pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
+		    pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
 		  }
 		else
 		  {
--- a/src/syntax.c	Thu Jun 12 17:28:00 2008 +0000
+++ b/src/syntax.c	Thu Jun 12 20:25:47 2008 +0000
@@ -1711,7 +1711,11 @@
 	      int c2 = char_ranges[i + 1];
 
 	      for (; c1 <= c2; c1++)
-		fastmap[CHAR_TO_BYTE8 (c1)] = 1;
+		{
+		  int b = CHAR_TO_BYTE_SAFE (c1);
+		  if (b >= 0)
+		    fastmap[b] = 1;
+		}
 	    }
 	}
     }