changeset 93179:139d0b227fdc

(casify_object): Avoid pathological N^2 worst case if every char is changed and has a different byte-length. (Fupcase_word, Fdowncase_word, Fcapitalize_word, operate_on_word): Fix int -> EMACS_INT.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Mon, 24 Mar 2008 19:42:12 +0000
parents 6c44d22f4d83
children 9add13a0e597
files src/ChangeLog src/casefiddle.c
diffstat 2 files changed, 61 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog	Mon Mar 24 18:43:04 2008 +0000
+++ b/src/ChangeLog	Mon Mar 24 19:42:12 2008 +0000
@@ -1,6 +1,13 @@
+2008-03-24  Stefan Monnier  <monnier@iro.umontreal.ca>
+
+	* casefiddle.c (casify_object): Avoid pathological N^2 worst case if
+	every char is changed and has a different byte-length.
+	(Fupcase_word, Fdowncase_word, Fcapitalize_word, operate_on_word):
+	Fix int -> EMACS_INT.
+
 2008-03-23  David Hansen  <david.hansen@gmx.net>  (tiny change)
 
-	* dbusbind.c (xd_read_message): Removed extra copying of message
+	* dbusbind.c (xd_read_message): Remove extra copying of message
 	strings.  Check for NULL `interface' or `member'.
 
 2008-03-22  Eli Zaretskii  <eliz@gnu.org>
--- a/src/casefiddle.c	Mon Mar 24 18:43:04 2008 +0000
+++ b/src/casefiddle.c	Mon Mar 24 19:42:12 2008 +0000
@@ -75,23 +75,18 @@
       return obj;
     }
 
-  if (STRINGP (obj))
+  if (!STRINGP (obj))
+    wrong_type_argument (Qchar_or_string_p, obj);
+  else if (!STRING_MULTIBYTE (obj))
     {
-      int multibyte = STRING_MULTIBYTE (obj);
-      int i, i_byte, len;
-      int size = SCHARS (obj);
+      EMACS_INT i;
+      EMACS_INT size = SCHARS (obj);
 
       obj = Fcopy_sequence (obj);
-      for (i = i_byte = 0; i < size; i++, i_byte += len)
+      for (i = 0; i < size; i++)
 	{
-	  if (multibyte)
-	    c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
-	  else
-	    {
-	      c = SREF (obj, i_byte);
-	      len = 1;
+	  c = SREF (obj, i);
 	      MAKE_CHAR_MULTIBYTE (c);
-	    }
 	  c1 = c;
 	  if (inword && flag != CASE_CAPITALIZE_UP)
 	    c = DOWNCASE (c);
@@ -102,24 +97,52 @@
 	    inword = (SYNTAX (c) == Sword);
 	  if (c != c1)
 	    {
-	      if (! multibyte)
-		{
 		  MAKE_CHAR_UNIBYTE (c);
-		  SSET (obj, i_byte, c);
-		}
-	      else if (ASCII_CHAR_P (c1) && ASCII_CHAR_P (c))
-		SSET (obj, i_byte,  c);
-	      else
-		{
-		  Faset (obj, make_number (i), make_number (c));
-		  i_byte += CHAR_BYTES (c) - len;
-		}
+	      /* If the char can't be converted to a valid byte, just don't
+		 change it.  */
+	      if (c >= 0 && c < 256)
+		SSET (obj, i, c);
 	    }
 	}
       return obj;
-    }
+		}
+	      else
+		{
+      EMACS_INT i, i_byte, len;
+      EMACS_INT size = SCHARS (obj);
+      USE_SAFE_ALLOCA;
+      unsigned char *dst, *o;
+      /* Over-allocate by 12%: this is a minor overhead, but should be
+	 sufficient in 99.999% of the cases to avoid a reallocation.  */
+      EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
+      SAFE_ALLOCA (dst, void *, o_size);
+      o = dst;
 
-  wrong_type_argument (Qchar_or_string_p, obj);
+      for (i = i_byte = 0; i < size; i++, i_byte += len)
+	{
+	  if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
+	    { /* Not enough space for the next char: grow the destination.  */
+	      unsigned char *old_dst = dst;
+	      o_size += o_size;	/* Probably overkill, but extremely rare.  */
+	      SAFE_ALLOCA (dst, void *, o_size);
+	      bcopy (old_dst, dst, o - old_dst);
+	      o = dst + (o - old_dst);
+	    }
+	  c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, 0, len);
+	  if (inword && flag != CASE_CAPITALIZE_UP)
+	    c = DOWNCASE (c);
+	  else if (!UPPERCASEP (c)
+		   && (!inword || flag != CASE_CAPITALIZE_UP))
+	    c = UPCASE1 (c);
+	  if ((int) flag >= (int) CASE_CAPITALIZE)
+	    inword = (SYNTAX (c) == Sword);
+	  o += CHAR_STRING (c, o);
+	}
+      eassert (o - dst <= o_size);
+      obj = make_multibyte_string (dst, size, o - dst);
+      SAFE_FREE ();
+      return obj;
+    }
 }
 
 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
@@ -329,10 +352,10 @@
   return Qnil;
 }
 
-Lisp_Object
+static Lisp_Object
 operate_on_word (arg, newpoint)
      Lisp_Object arg;
-     int *newpoint;
+     EMACS_INT *newpoint;
 {
   Lisp_Object val;
   int farend;
@@ -358,7 +381,7 @@
      Lisp_Object arg;
 {
   Lisp_Object beg, end;
-  int newpoint;
+  EMACS_INT newpoint;
   XSETFASTINT (beg, PT);
   end = operate_on_word (arg, &newpoint);
   casify_region (CASE_UP, beg, end);
@@ -373,7 +396,7 @@
      Lisp_Object arg;
 {
   Lisp_Object beg, end;
-  int newpoint;
+  EMACS_INT newpoint;
   XSETFASTINT (beg, PT);
   end = operate_on_word (arg, &newpoint);
   casify_region (CASE_DOWN, beg, end);
@@ -390,7 +413,7 @@
      Lisp_Object arg;
 {
   Lisp_Object beg, end;
-  int newpoint;
+  EMACS_INT newpoint;
   XSETFASTINT (beg, PT);
   end = operate_on_word (arg, &newpoint);
   casify_region (CASE_CAPITALIZE, beg, end);