changeset 23325:bbd06336cd0c

(check_composing_code): If the current composing sequence doesn't end properly, return -1. (DECODE_CHARACTER_ASCII): Update coding->composed_chars. (DECODE_CHARACTER_DIMENSION1): Likewise. (decode_coding_iso2022): Check validity of a composing sequence. (code_convert_string): If the length of text to be converted is shrunk to zero, don't perform code conversion. (shrink_decoding_region): Fix previous change.
author Kenichi Handa <handa@m17n.org>
date Sat, 26 Sep 1998 04:20:48 +0000
parents 4c5f12c6041c
children df3f641c9ca1
files src/coding.c
diffstat 1 files changed, 82 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/src/coding.c	Sat Sep 26 04:20:48 1998 +0000
+++ b/src/coding.c	Sat Sep 26 04:20:48 1998 +0000
@@ -213,15 +213,18 @@
 
 /* Decode one ASCII character C.  */
 
-#define DECODE_CHARACTER_ASCII(c)				\
-  do {								\
-    if (COMPOSING_P (coding->composing))			\
-      *dst++ = 0xA0, *dst++ = (c) | 0x80;			\
-    else							\
-      {								\
-	*dst++ = (c);						\
-	coding->produced_char++;				\
-      }								\
+#define DECODE_CHARACTER_ASCII(c)		\
+  do {						\
+    if (COMPOSING_P (coding->composing))	\
+      {						\
+	*dst++ = 0xA0, *dst++ = (c) | 0x80;	\
+	coding->composed_chars++;		\
+      }						\
+    else					\
+      {						\
+	*dst++ = (c);				\
+	coding->produced_char++;		\
+      }						\
   } while (0)
 
 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
@@ -231,7 +234,10 @@
   do {									\
     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);	\
     if (COMPOSING_P (coding->composing))				\
-      *dst++ = leading_code + 0x20;					\
+      {									\
+	*dst++ = leading_code + 0x20;					\
+	coding->composed_chars++;					\
+      }									\
     else								\
       {									\
 	*dst++ = leading_code;						\
@@ -997,9 +1003,7 @@
 	    invalid_code_found = 1;
 	}
     }
-  return (invalid_code_found
-	  ? src - src_start
-	  : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
+  return (invalid_code_found ? src - src_start : -1);
 }
 
 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
@@ -1030,6 +1034,7 @@
     translation_table = Vstandard_translation_table_for_decode;
 
   coding->produced_char = 0;
+  coding->composed_chars = 0;
   coding->fake_multibyte = 0;
   while (src < src_end && (dst_bytes
 			   ? (dst < adjusted_dst_end)
@@ -1243,7 +1248,7 @@
 		    coding->composing = (c1 == '0'
 					 ? COMPOSING_NO_RULE_HEAD
 					 : COMPOSING_WITH_RULE_HEAD);
-		    coding->produced_char++;
+		    coding->composed_chars = 0;
 		  }
 		else if (result1 > 0)
 		  {
@@ -1253,6 +1258,7 @@
 			src += result1;
 			dst += result1 + 2;
 			coding->produced_char += result1 + 2;
+			coding->fake_multibyte = 1;
 		      }
 		    else
 		      {
@@ -1266,6 +1272,28 @@
 	      break;
 
 	    case '1':		/* end composing */
+	      if (coding->composed_chars > 0)
+		{
+		  if (coding->composed_chars == 1)
+		    {
+		      unsigned char *this_char_start = dst;
+		      int this_bytes;
+
+		      /* Only one character is in the composing
+			 sequence.  Make it a normal character.  */
+		      while (*--this_char_start != LEADING_CODE_COMPOSITION);
+		      dst = (this_char_start
+			     + (coding->composing == COMPOSING_NO_RULE_TAIL
+				? 1 : 2));
+		      *dst -= 0x20;
+		      if (*dst == 0x80)
+			*++dst &= 0x7F;
+		      this_bytes = BYTES_BY_CHAR_HEAD (*dst);
+		      while (this_bytes--) *this_char_start++ = *dst++;
+		      dst = this_char_start;
+		    }
+		  coding->produced_char++;
+		}
 	      coding->composing = COMPOSING_NO;
 	      break;
 
@@ -3938,30 +3966,45 @@
 
 	case CODING_CATEGORY_IDX_ISO_7:
 	case CODING_CATEGORY_IDX_ISO_7_TIGHT:
-	  /* We can skip all charactes at the tail except for ESC and
-             the following 2-byte at the tail.  */
-	  if (eol_conversion)
-	    while (begp < endp
-		   && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
-	      endp--;
-	  else
-	    while (begp < endp
-		   && (c = endp[-1]) != ISO_CODE_ESC)
-	      endp--;
-	  /* Do not consider LF as ascii if preceded by CR, since that
-             confuses eol decoding. */
-	  if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
-	    endp++;
-	  if (begp < endp && endp[-1] == ISO_CODE_ESC)
-	    {
-	      if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
-		/* This is an ASCII designation sequence.  We can
-                    surely skip the tail.  */
-		endp += 2;
-	      else
-		/* Hmmm, we can't skip the tail.  */
-		endp = endp_orig;
-	    }
+	  {
+	    /* We can skip all charactes at the tail except for 8-bit
+	       codes and ESC and the following 2-byte at the tail.  */
+	    unsigned char *eight_bit = NULL;
+
+	    if (eol_conversion)
+	      while (begp < endp
+		     && (c = endp[-1]) != ISO_CODE_ESC && c != '\r')
+		{
+		  if (!eight_bit && c & 0x80) eight_bit = endp;
+		  endp--;
+		}
+	    else
+	      while (begp < endp
+		     && (c = endp[-1]) != ISO_CODE_ESC)
+		{
+		  if (!eight_bit && c & 0x80) eight_bit = endp;
+		  endp--;
+		}
+	    /* Do not consider LF as ascii if preceded by CR, since that
+	       confuses eol decoding. */
+	    if (begp < endp && endp < endp_orig
+		&& endp[-1] == '\r' && endp[0] == '\n')
+	      endp++;
+	    if (begp < endp && endp[-1] == ISO_CODE_ESC)
+	      {
+		if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
+		  /* This is an ASCII designation sequence.  We can
+		     surely skip the tail.  But, if we have
+		     encountered an 8-bit code, skip only the codes
+		     after that.  */
+		  endp = eight_bit ? eight_bit : endp + 2;
+		else
+		  /* Hmmm, we can't skip the tail.  */
+		  endp = endp_orig;
+	      }
+	    else if (eight_bit)
+	      endp = eight_bit;
+	  }
 	}
     }
   *beg += begp - begp_orig;
@@ -4524,9 +4567,7 @@
       else
 	shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
     }
-  if (from == to_byte
-      && ! (coding->mode & CODING_MODE_LAST_BLOCK
-	    && CODING_REQUIRE_FLUSHING (coding)))
+  if (from == to_byte)
     return (nocopy ? str : Fcopy_sequence (str));
 
   if (encodep)