changeset 29010:f62cfa81b0c4

(concat): Handle 8-bit characters correctly. (Fstring_as_unibyte): Be sure to make all 8-bit characters in unibyte in the result. (Fstring_as_multibyte): Be sure to make all 8-bit characters in valid multibyte form in the result. (map_char_table): Use MAKE_CHAR instead of MAKE_NON_ASCII_CHAR. (Fbase64_encode_region, Fbase64_encode_string): If base64_encode_1 return -1, signal an error. (base64_encode_1): New arg MULTIBYTE. Get each character by CHAR_STRING_AND_LENGTH if MULTIBYTE is nonzero. If a multibyte character is found, return -1. (Fbase64_decode_region): Delete codes for handling byte-combining. Treat each decoded byte as a unibyte character. (Fbase64_decode_string): Return unibyte string. (Fcompare_strings, concat, string_byte_to_char): Use FETCH_STRING_CHAR_ADVANCE_NO_CHECK instead off FETCH_STRING_CHAR_ADVANCE. (Fstring_lessp): Use FETCH_STRING_CHAR_ADVANCE unconditionally. (mapcar1): If SEQ is string, always use FETCH_STRING_CHAR_ADVANCE.
author Kenichi Handa <handa@m17n.org>
date Fri, 19 May 2000 23:59:50 +0000
parents 85822da9ece9
children b60861d6c1e0
files src/fns.c
diffstat 1 files changed, 104 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/src/fns.c	Fri May 19 23:59:27 2000 +0000
+++ b/src/fns.c	Fri May 19 23:59:50 2000 +0000
@@ -290,7 +290,7 @@
       int c1, c2;
 
       if (STRING_MULTIBYTE (str1))
-	FETCH_STRING_CHAR_ADVANCE (c1, str1, i1, i1_byte);
+	FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c1, str1, i1, i1_byte);
       else
 	{
 	  c1 = XSTRING (str1)->data[i1++];
@@ -298,7 +298,7 @@
 	}
 
       if (STRING_MULTIBYTE (str2))
-	FETCH_STRING_CHAR_ADVANCE (c2, str2, i2, i2_byte);
+	FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c2, str2, i2, i2_byte);
       else
 	{
 	  c2 = XSTRING (str2)->data[i2++];
@@ -367,15 +367,8 @@
 	 characters, not just the bytes.  */
       int c1, c2;
 
-      if (STRING_MULTIBYTE (s1))
-	FETCH_STRING_CHAR_ADVANCE (c1, s1, i1, i1_byte);
-      else
-	c1 = XSTRING (s1)->data[i1++];
-
-      if (STRING_MULTIBYTE (s2))
-	FETCH_STRING_CHAR_ADVANCE (c2, s2, i2, i2_byte);
-      else
-	c2 = XSTRING (s2)->data[i2++];
+      FETCH_STRING_CHAR_ADVANCE (c1, s1, i1, i1_byte);
+      FETCH_STRING_CHAR_ADVANCE (c2, s2, i2, i2_byte);
 
       if (c1 != c2)
 	return c1 < c2 ? Qt : Qnil;
@@ -625,7 +618,7 @@
 		  wrong_type_argument (Qintegerp, ch);
 		this_len_byte = CHAR_BYTES (XINT (ch));
 		result_len_byte += this_len_byte;
-		if (this_len_byte > 1)
+		if (!SINGLE_BYTE_CHAR_P (XINT (ch)))
 		  some_multibyte = 1;
 	      }
 	  else if (BOOL_VECTOR_P (this) && XBOOL_VECTOR (this)->size > 0)
@@ -638,7 +631,7 @@
 		  wrong_type_argument (Qintegerp, ch);
 		this_len_byte = CHAR_BYTES (XINT (ch));
 		result_len_byte += this_len_byte;
-		if (this_len_byte > 1)
+		if (!SINGLE_BYTE_CHAR_P (XINT (ch)))
 		  some_multibyte = 1;
 	      }
 	  else if (STRINGP (this))
@@ -753,9 +746,9 @@
 		int c;
 		if (STRING_MULTIBYTE (this))
 		  {
-		    FETCH_STRING_CHAR_ADVANCE (c, this,
-					       thisindex,
-					       thisindex_byte);
+		    FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, this,
+							thisindex,
+							thisindex_byte);
 		    XSETFASTINT (elt, c);
 		  }
 		else
@@ -799,7 +792,12 @@
 		CHECK_NUMBER (elt, 0);
 		if (SINGLE_BYTE_CHAR_P (XINT (elt)))
 		  {
-		    XSTRING (val)->data[toindex_byte++] = XINT (elt);
+		    if (some_multibyte)
+		      toindex_byte
+			+= CHAR_STRING (XINT (elt),
+					XSTRING (val)->data + toindex_byte);
+		    else
+		      XSTRING (val)->data[toindex_byte++] = XINT (elt);
 		    if (some_multibyte
 			&& toindex_byte > 0
 			&& count_combining (XSTRING (val)->data,
@@ -886,7 +884,8 @@
       while (best_below < char_index)
 	{
 	  int c;
-	  FETCH_STRING_CHAR_ADVANCE (c, string, best_below, best_below_byte);
+	  FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string,
+					      best_below, best_below_byte);
 	}
       i = best_below;
       i_byte = best_below_byte;
@@ -958,7 +957,8 @@
       while (best_below_byte < byte_index)
 	{
 	  int c;
-	  FETCH_STRING_CHAR_ADVANCE (c, string, best_below, best_below_byte);
+	  FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, string,
+					      best_below, best_below_byte);
 	}
       i = best_below;
       i_byte = best_below_byte;
@@ -1070,7 +1070,9 @@
        1, 1, 0,
   "Return a unibyte string with the same individual bytes as STRING.\n\
 If STRING is unibyte, the result is STRING itself.\n\
-Otherwise it is a newly created string, with no text properties.")
+Otherwise it is a newly created string, with no text properties.\n\
+If STRING is multibyte and contains a character of charset `binary',\n\
+it is converted to the corresponding single byte.")
   (string)
      Lisp_Object string;
 {
@@ -1078,10 +1080,13 @@
 
   if (STRING_MULTIBYTE (string))
     {
-      string = Fcopy_sequence (string);
-      XSTRING (string)->size = STRING_BYTES (XSTRING (string));
-      XSTRING (string)->intervals = NULL_INTERVAL;
-      SET_STRING_BYTES (XSTRING (string), -1);
+      int bytes = STRING_BYTES (XSTRING (string));
+      unsigned char *str = (unsigned char *) xmalloc (bytes);
+
+      bcopy (XSTRING (string)->data, str, bytes);
+      bytes = str_as_unibyte (str, bytes);
+      string = make_unibyte_string (str, bytes);
+      xfree (str);
     }
   return string;
 }
@@ -1090,7 +1095,10 @@
        1, 1, 0,
   "Return a multibyte string with the same individual bytes as STRING.\n\
 If STRING is multibyte, the result is STRING itself.\n\
-Otherwise it is a newly created string, with no text properties.")
+Otherwise it is a newly created string, with no text properties.\n\
+If STRING is unibyte and contains an individual 8-bit byte (i.e. not\n\
+part of multibyte form), it is converted to the corresponding\n\
+multibyte character of charset `binary'.")
   (string)
      Lisp_Object string;
 {
@@ -1098,12 +1106,19 @@
 
   if (! STRING_MULTIBYTE (string))
     {
-      int nbytes = STRING_BYTES (XSTRING (string));
-      int newlen = multibyte_chars_in_text (XSTRING (string)->data, nbytes);
-
-      string = Fcopy_sequence (string);
-      XSTRING (string)->size = newlen;
-      XSTRING (string)->size_byte = nbytes;
+      Lisp_Object new_string;
+      int nchars, nbytes;
+
+      parse_str_as_multibyte (XSTRING (string)->data,
+			      STRING_BYTES (XSTRING (string)),
+			      &nchars, &nbytes);
+      new_string = make_uninit_multibyte_string (nchars, nbytes);
+      bcopy (XSTRING (string)->data, XSTRING (new_string)->data,
+	     STRING_BYTES (XSTRING (string)));
+      if (nbytes != STRING_BYTES (XSTRING (string)))
+	str_as_multibyte (XSTRING (new_string)->data, nbytes,
+			  STRING_BYTES (XSTRING (string)), NULL);
+      string = new_string;
       XSTRING (string)->intervals = NULL_INTERVAL;
     }
   return string;
@@ -2374,7 +2389,7 @@
 	    elt = XCHAR_TABLE (subtable)->defalt;
 	  c1 = depth >= 1 ? XFASTINT (indices[1]) : 0;
 	  c2 = depth >= 2 ? XFASTINT (indices[2]) : 0;
-	  c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
+	  c = MAKE_CHAR (charset, c1, c2);
 	  if (c_function)
 	    (*c_function) (arg, make_number (c), elt);
 	  else
@@ -2513,20 +2528,8 @@
 	    vals[i] = dummy;
 	}
     }
-  else if (STRINGP (seq) && ! STRING_MULTIBYTE (seq))
-    {
-      /* Single-byte string.  */
-      for (i = 0; i < leni; i++)
-	{
-	  XSETFASTINT (dummy, XSTRING (seq)->data[i]);
-	  dummy = call1 (fn, dummy);
-	  if (vals)
-	    vals[i] = dummy;
-	}
-    }
   else if (STRINGP (seq))
     {
-      /* Multi-byte string.  */
       int i_byte;
 
       for (i = 0, i_byte = 0; i < leni;)
@@ -3100,7 +3103,7 @@
    base64 characters.  */
 
 
-static int base64_encode_1 P_ ((const char *, char *, int, int));
+static int base64_encode_1 P_ ((const char *, char *, int, int, int));
 static int base64_decode_1 P_ ((const char *, char *, int));
 
 DEFUN ("base64-encode-region", Fbase64_encode_region, Sbase64_encode_region,
@@ -3135,10 +3138,19 @@
   else
     encoded = (char *) xmalloc (allength);
   encoded_length = base64_encode_1 (BYTE_POS_ADDR (ibeg), encoded, length,
-				    NILP (no_line_break));
+				    NILP (no_line_break),
+				    !NILP (current_buffer->enable_multibyte_characters));
   if (encoded_length > allength)
     abort ();
 
+  if (encoded_length < 0)
+    {
+      /* The encoding wasn't possible. */
+      if (length > MAX_ALLOCA)
+	xfree (encoded);
+      error ("Base64 encoding failed");
+    }
+
   /* Now we have encoded the region, so we insert the new contents
      and delete the old.  (Insert first in order to preserve markers.)  */
   SET_PT_BOTH (XFASTINT (beg), ibeg);
@@ -3187,10 +3199,19 @@
     encoded = (char *) xmalloc (allength);
 
   encoded_length = base64_encode_1 (XSTRING (string)->data,
-				    encoded, length, NILP (no_line_break));
+				    encoded, length, NILP (no_line_break),
+				    STRING_MULTIBYTE (string));
   if (encoded_length > allength)
     abort ();
 
+  if (encoded_length < 0)
+    {
+      /* The encoding wasn't possible. */
+      if (length > MAX_ALLOCA)
+	xfree (encoded);
+      error ("Base64 encoding failed");
+    }
+
   encoded_string = make_unibyte_string (encoded, encoded_length);
   if (allength > MAX_ALLOCA)
     xfree (encoded);
@@ -3199,20 +3220,30 @@
 }
 
 static int
-base64_encode_1 (from, to, length, line_break)
+base64_encode_1 (from, to, length, line_break, multibyte)
      const char *from;
      char *to;
      int length;
      int line_break;
+     int multibyte;
 {
   int counter = 0, i = 0;
   char *e = to;
   unsigned char c;
   unsigned int value;
+  int bytes;
 
   while (i < length)
     {
-      c = from[i++];
+      if (multibyte)
+	{
+	  c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
+	  if (!SINGLE_BYTE_CHAR_P (c))
+	    return -1;
+	  i += bytes;
+	}
+      else
+	c = from[i++];
 
       /* Wrap line every 76 characters.  */
 
@@ -3242,7 +3273,13 @@
 	  break;
 	}
 
-      c = from[i++];
+      if (multibyte)
+	{
+	  c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
+	  i += bytes;
+	}
+      else
+	c = from[i++];
 
       *e++ = base64_value_to_char[value | (0x0f & c >> 4)];
       value = (0x0f & c) << 2;
@@ -3256,7 +3293,13 @@
 	  break;
 	}
 
-      c = from[i++];
+      if (multibyte)
+	{
+	  c = STRING_CHAR_AND_LENGTH (from + i, length - i, bytes);
+	  i += bytes;
+	}
+      else
+	c = from[i++];
 
       *e++ = base64_value_to_char[value | (0x03 & c >> 6)];
       *e++ = base64_value_to_char[0x3f & c];
@@ -3305,27 +3348,19 @@
       error ("Base64 decoding failed");
     }
 
+  inserted_chars = decoded_length;
+  if (!NILP (current_buffer->enable_multibyte_characters))
+    decoded_length = str_to_multibyte (decoded, length, decoded_length);
+
   /* Now we have decoded the region, so we insert the new contents
      and delete the old.  (Insert first in order to preserve markers.)  */
-  /* We insert two spaces, then insert the decoded text in between
-     them, at last, delete those extra two spaces.  This is to avoid
-     byte combining while inserting.  */
-  TEMP_SET_PT_BOTH (XFASTINT (beg), ibeg);
-  insert_1_both ("  ", 2, 2, 0, 1, 0);
-  TEMP_SET_PT_BOTH (XFASTINT (beg) + 1, ibeg + 1);  
-  insert (decoded, decoded_length);
-  inserted_chars = PT - (XFASTINT (beg) + 1);
+  TEMP_SET_PT_BOTH (XFASTINT (beg), ibeg);  
+  insert_1_both (decoded, inserted_chars, decoded_length, 0, 1, 0);
   if (length > MAX_ALLOCA)
     xfree (decoded);
-  /* At first delete the original text.  This never causes byte
-     combining.  */
-  del_range_both (PT + 1, PT_BYTE + 1, XFASTINT (end) + inserted_chars + 2,
-		  iend + decoded_length + 2, 1);
-  /* Next delete the extra spaces.  This will cause byte combining
-     error.  */
-  del_range_both (PT, PT_BYTE, PT + 1, PT_BYTE + 1, 0);
-  del_range_both (XFASTINT (beg), ibeg, XFASTINT (beg) + 1, ibeg + 1, 0);
-  inserted_chars = PT - XFASTINT (beg);
+  /* Delete the original text.  */
+  del_range_both (PT, PT_BYTE, XFASTINT (end) + inserted_chars,
+		  iend + decoded_length, 1);
 
   /* If point was outside of the region, restore it exactly; else just
      move to the beginning of the region.  */
@@ -3361,7 +3396,7 @@
   if (decoded_length > length)
     abort ();
   else if (decoded_length >= 0)
-    decoded_string = make_string (decoded, decoded_length);
+    decoded_string = make_unibyte_string (decoded, decoded_length);
   else
     decoded_string = Qnil;