changeset 89852:b636ae1109c6

(MAX_ANNOTATION_LENGTH): Adjusted for the change of annotation data format. (ADD_ANNOTATION_DATA, ADD_COMPOSITION_DATA, ADD_CHARSET_DATA): Change arguments FROM and TO to single argument NCHARS. Caller changed. (decode_coding_utf_8): Pay attention to coding->charbuf_used. (decode_coding_utf_16, decode_coding_emacs_mule) (decode_coding_iso_2022, decode_coding_sjis, decode_coding_big5) (decode_coding_ccl, decode_coding_charset): Likewise. (get_translation): New function. (produce_chars): New arguments translation_table and last_block. Translate characters here. Return number of carryover chars. Caller changed. (produce_composition): New argument pos. Caller changed. Adjusted for the change of annotation data format. (produce_charset, produce_annotation): Likewise. (decode_coding): Don't call translate_chars.
author Kenichi Handa <handa@m17n.org>
date Wed, 10 Mar 2004 23:11:18 +0000
parents b2d1259417e3
children e13ef0e2d2b0
files src/coding.c
diffstat 1 files changed, 169 insertions(+), 108 deletions(-) [+]
line wrap: on
line diff
--- a/src/coding.c	Tue Mar 09 02:30:13 2004 +0000
+++ b/src/coding.c	Wed Mar 10 23:11:18 2004 +0000
@@ -212,8 +212,8 @@
      when there's no room in CHARBUF for a decoded character.  */
   unsigned char *src_base;
   /* A buffer to produce decoded characters.  */
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end = coding->charbuf + coding->charbuf_size;
   int multibytep = coding->src_multibyte;
 
   while (1)
@@ -1025,15 +1025,14 @@
 
 /* Maximum length of annotation data (sum of annotations for
    composition and charset).  */
-#define MAX_ANNOTATION_LENGTH (5 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 5)
+#define MAX_ANNOTATION_LENGTH (4 + (MAX_COMPOSITION_COMPONENTS * 2) - 1 + 4)
 
 /* An annotation data is stored in the array coding->charbuf in this
    format:
-     [ -LENGTH ANNOTATION_MASK FROM TO ... ]
+     [ -LENGTH ANNOTATION_MASK NCHARS ... ]
    LENGTH is the number of elements in the annotation.
    ANNOTATION_MASK is one of CODING_ANNOTATE_XXX_MASK.
-   FROM and TO specify the range of text annotated.  They are relative
-   to coding->src_pos (on encoding) or coding->dst_pos (on decoding).
+   NCHARS is the number of characters in the text annotated.
 
    The format of the following elements depend on ANNOTATION_MASK.
 
@@ -1047,26 +1046,25 @@
    In the case of CODING_ANNOTATE_CHARSET_MASK, one element CHARSET-ID
    follows.  */
 
-#define ADD_ANNOTATION_DATA(buf, len, mask, from, to)	\
+#define ADD_ANNOTATION_DATA(buf, len, mask, nchars)	\
   do {							\
     *(buf)++ = -(len);					\
     *(buf)++ = (mask);					\
-    *(buf)++ = (from);					\
-    *(buf)++ = (to);					\
+    *(buf)++ = (nchars);				\
     coding->annotated = 1;				\
   } while (0);
 
-#define ADD_COMPOSITION_DATA(buf, from, to, method)			      \
-  do {									      \
-    ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_COMPOSITION_MASK, from, to); \
-    *buf++ = method;							      \
+#define ADD_COMPOSITION_DATA(buf, nchars, method)			    \
+  do {									    \
+    ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_COMPOSITION_MASK, nchars); \
+    *buf++ = method;							    \
   } while (0)
 
 
-#define ADD_CHARSET_DATA(buf, from, to, id)				  \
-  do {									  \
-    ADD_ANNOTATION_DATA (buf, 5, CODING_ANNOTATE_CHARSET_MASK, from, to); \
-    *buf++ = id;							  \
+#define ADD_CHARSET_DATA(buf, nchars, id)				\
+  do {									\
+    ADD_ANNOTATION_DATA (buf, 4, CODING_ANNOTATE_CHARSET_MASK, nchars);	\
+    *buf++ = id;							\
   } while (0)
 
 
@@ -1166,8 +1164,8 @@
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
   const unsigned char *src_base;
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end = coding->charbuf + coding->charbuf_size;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   Lisp_Object attr, charset_list;
@@ -1413,8 +1411,8 @@
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
   const unsigned char *src_base;
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end = coding->charbuf + coding->charbuf_size;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
@@ -1921,7 +1919,6 @@
        number of characters composed by this composition.  */		\
     enum composition_method method = c - 0xF2;				\
     int *charbuf_base = charbuf;					\
-    int from, to;							\
     int consumed_chars_limit;						\
     int nbytes, nchars;							\
 									\
@@ -1935,9 +1932,7 @@
     if (c < 0)								\
       goto invalid_code;						\
     nchars = c - 0xA0;							\
-    from = coding->produced + char_offset;				\
-    to = from + nchars;							\
-    ADD_COMPOSITION_DATA (charbuf, from, to, method);			\
+    ADD_COMPOSITION_DATA (charbuf, nchars, method);			\
     consumed_chars_limit = consumed_chars_base + nbytes;		\
     if (method != COMPOSITION_RELATIVE)					\
       {									\
@@ -1965,7 +1960,6 @@
     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];		\
     int *buf = components;					\
     int i, j;							\
-    int from, to;						\
 								\
     src = src_base;						\
     ONE_MORE_BYTE (c);		/* skip 0x80 */			\
@@ -1973,9 +1967,7 @@
       DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);			\
     if (i < 2)							\
       goto invalid_code;					\
-    from = coding->produced_char + char_offset;			\
-    to = from + i;						\
-    ADD_COMPOSITION_DATA (charbuf, from, to, method);		\
+    ADD_COMPOSITION_DATA (charbuf, i, method);			\
     for (j = 0; j < i; j++)					\
       *charbuf++ = components[j];				\
   } while (0)
@@ -1989,7 +1981,6 @@
     int components[MAX_COMPOSITION_COMPONENTS * 2 - 1];		\
     int *buf = components;					\
     int i, j;							\
-    int from, to;						\
 								\
     DECODE_EMACS_MULE_COMPOSITION_CHAR (buf);			\
     for (i = 0; i < MAX_COMPOSITION_COMPONENTS; i++)		\
@@ -2001,9 +1992,7 @@
       goto invalid_code;					\
     if (charbuf + i + (i / 2) + 1 < charbuf_end)		\
       goto no_more_source;					\
-    from = coding->produced_char + char_offset;			\
-    to = from + i;						\
-    ADD_COMPOSITION_DATA (buf, from, to, method);		\
+    ADD_COMPOSITION_DATA (buf, i, method);			\
     for (j = 0; j < i; j++)					\
       *charbuf++ = components[j];				\
     for (j = 0; j < i; j += 2)					\
@@ -2018,8 +2007,9 @@
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
   const unsigned char *src_base;
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end
+    = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   Lisp_Object attrs, charset_list;
@@ -2082,7 +2072,7 @@
 	  if (last_id != id)
 	    {
 	      if (last_id != charset_ascii)
-		ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+		ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
 	      last_id = id;
 	      last_offset = char_offset;
 	    }
@@ -2104,7 +2094,7 @@
 
  no_more_source:
   if (last_id != charset_ascii)
-    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
   coding->consumed_char += consumed_chars_base;
   coding->consumed = src_base - coding->source;
   coding->charbuf_used = charbuf - coding->charbuf;
@@ -2810,10 +2800,8 @@
 		  : (component_idx + 1) / 2);				\
     int i;								\
     int *saved_charbuf = charbuf;					\
-    int from = char_offset;						\
-    int to = from + nchars;						\
 									\
-    ADD_COMPOSITION_DATA (charbuf, from, to, method);			\
+    ADD_COMPOSITION_DATA (charbuf, nchars, method);			\
     if (method != COMPOSITION_RELATIVE)					\
       {									\
 	if (component_len == 0)						\
@@ -2869,9 +2857,9 @@
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
   const unsigned char *src_base;
-  int *charbuf = coding->charbuf;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
   int *charbuf_end
-    = charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
+    = coding->charbuf + coding->charbuf_size - 4 - MAX_ANNOTATION_LENGTH;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
@@ -3224,7 +3212,7 @@
 	  && last_id != charset->id)
 	{
 	  if (last_id != charset_ascii)
-	    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+	    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
 	  last_id = charset->id;
 	  last_offset = char_offset;
 	}
@@ -3293,7 +3281,7 @@
 
  no_more_source:
   if (last_id != charset_ascii)
-    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
   coding->consumed_char += consumed_chars_base;
   coding->consumed = src_base - coding->source;
   coding->charbuf_used = charbuf - coding->charbuf;
@@ -3995,8 +3983,9 @@
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
   const unsigned char *src_base;
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end
+    = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   struct charset *charset_roman, *charset_kanji, *charset_kana;
@@ -4064,7 +4053,7 @@
 	  && last_id != charset->id)
 	{
 	  if (last_id != charset_ascii)
-	    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+	    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
 	  last_id = charset->id;
 	  last_offset = char_offset;
 	}
@@ -4084,7 +4073,7 @@
 
  no_more_source:
   if (last_id != charset_ascii)
-    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
   coding->consumed_char += consumed_chars_base;
   coding->consumed = src_base - coding->source;
   coding->charbuf_used = charbuf - coding->charbuf;
@@ -4097,8 +4086,9 @@
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
   const unsigned char *src_base;
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end
+    = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   struct charset *charset_roman, *charset_big5;
@@ -4144,7 +4134,7 @@
 	  && last_id != charset->id)
 	{
 	  if (last_id != charset_ascii)
-	    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+	    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
 	  last_id = charset->id;
 	  last_offset = char_offset;
 	}
@@ -4164,7 +4154,7 @@
 
  no_more_source:
   if (last_id != charset_ascii)
-    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
   coding->consumed_char += consumed_chars_base;
   coding->consumed = src_base - coding->source;
   coding->charbuf_used = charbuf - coding->charbuf;
@@ -4396,8 +4386,8 @@
 {
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end = coding->charbuf + coding->charbuf_size;
   int consumed_chars = 0;
   int multibytep = coding->src_multibyte;
   struct ccl_program ccl;
@@ -4683,8 +4673,9 @@
   const unsigned char *src = coding->source + coding->consumed;
   const unsigned char *src_end = coding->source + coding->src_bytes;
   const unsigned char *src_base;
-  int *charbuf = coding->charbuf;
-  int *charbuf_end = charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
+  int *charbuf = coding->charbuf + coding->charbuf_used;
+  int *charbuf_end
+    = coding->charbuf + coding->charbuf_size - MAX_ANNOTATION_LENGTH;
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   Lisp_Object attrs, charset_list, valids;
@@ -4759,7 +4750,7 @@
 	  && last_id != charset->id)
 	{
 	  if (last_id != charset_ascii)
-	    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+	    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
 	  last_id = charset->id;
 	  last_offset = char_offset;
 	}
@@ -4779,7 +4770,7 @@
 
  no_more_source:
   if (last_id != charset_ascii)
-    ADD_CHARSET_DATA (charbuf, last_offset, char_offset, last_id);
+    ADD_CHARSET_DATA (charbuf, char_offset - last_offset, last_id);
   coding->consumed_char += consumed_chars_base;
   coding->consumed = src_base - coding->source;
   coding->charbuf_used = charbuf - coding->charbuf;
@@ -5573,53 +5564,108 @@
     }
 }
 
+static Lisp_Object
+get_translation (val, buf, buf_end, last_block, from_nchars, to_nchars)
+     Lisp_Object val;
+     int *buf, *buf_end;
+     int last_block;
+     int *from_nchars, *to_nchars;
+{
+  /* VAL is TO-CHAR, [TO-CHAR ...], ([FROM-CHAR ...] .  TO-CHAR), or
+     ([FROM-CHAR ...] . [TO-CHAR ...]).  */
+  if (CONSP (val))
+    {
+      Lisp_Object from;
+      int i, len;
+
+      from = XCAR (val);
+      val = XCDR (val);
+      len = ASIZE (from);
+      for (i = 0; i < len; i++)
+	{
+	  if (buf + i == buf_end)
+	    return (last_block ? Qnil : Qt);
+	  if (XINT (AREF (from, i)) != buf[i])
+	    return Qnil;
+	}
+      *from_nchars = len;
+    }
+  if (VECTORP (val))
+    *buf = XINT (AREF (val, 0)), *to_nchars = ASIZE (val);
+  else
+    *buf = XINT (val);
+  return val;
+}
+
+
 static int
-produce_chars (coding)
+produce_chars (coding, translation_table, last_block)
      struct coding_system *coding;
+     Lisp_Object translation_table;
+     int last_block;
 {
   unsigned char *dst = coding->destination + coding->produced;
   unsigned char *dst_end = coding->destination + coding->dst_bytes;
   int produced;
   int produced_chars = 0;
+  int carryover = 0;
 
   if (! coding->chars_at_source)
     {
       /* Characters are in coding->charbuf.  */
       int *buf = coding->charbuf;
       int *buf_end = buf + coding->charbuf_used;
-      unsigned char *adjusted_dst_end;
 
       if (BUFFERP (coding->src_object)
 	  && EQ (coding->src_object, coding->dst_object))
 	dst_end = ((unsigned char *) coding->source) + coding->consumed;
-      adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
 
       while (buf < buf_end)
 	{
-	  int c = *buf++;
-
-	  if (dst >= adjusted_dst_end)
-	    {
-	      dst = alloc_destination (coding,
-				       buf_end - buf + MAX_MULTIBYTE_LENGTH,
-				       dst);
-	      dst_end = coding->destination + coding->dst_bytes;
-	      adjusted_dst_end = dst_end - MAX_MULTIBYTE_LENGTH;
-	    }
+	  int c = *buf, i;
+
 	  if (c >= 0)
 	    {
-	      if (coding->dst_multibyte
-		  || ! CHAR_BYTE8_P (c))
-		CHAR_STRING_ADVANCE (c, dst);
-	      else
-		*dst++ = CHAR_TO_BYTE8 (c);
-	      produced_chars++;
+	      int from_nchars = 1, to_nchars = 1;
+	      Lisp_Object trans = Qnil;
+
+	      if (! NILP (translation_table)
+		  && ! NILP (trans = CHAR_TABLE_REF (translation_table, c)))
+		{
+		  trans = get_translation (trans, buf, buf_end, last_block,
+					   &from_nchars, &to_nchars);
+		  if (EQ (trans, Qt))
+		    break;
+		  c = *buf;
+		}
+
+	      if (dst + MAX_MULTIBYTE_LENGTH * to_nchars > dst_end)
+		{
+		  dst = alloc_destination (coding,
+					   buf_end - buf
+					   + MAX_MULTIBYTE_LENGTH * to_nchars,
+					   dst);
+		  dst_end = coding->destination + coding->dst_bytes;
+		}
+
+	      for (i = 0; i < to_nchars; i++, c = XINT (AREF (trans, i)))
+		{
+		  if (coding->dst_multibyte
+		      || ! CHAR_BYTE8_P (c))
+		    CHAR_STRING_ADVANCE (c, dst);
+		  else
+		    *dst++ = CHAR_TO_BYTE8 (c);
+		}
+	      produced_chars += to_nchars;
+	      *buf++ = to_nchars;
+	      while (--from_nchars > 0)
+		*buf++ = 0;
 	    }
 	  else
-	    /* This is an annotation datum.  (-C) is the length of
-	       it.  */
-	    buf += -c - 1;
-	}
+	    /* This is an annotation datum.  (-C) is the length.  */
+	    buf += -c;
+	}
+      carryover = buf_end - buf;
     }
   else
     {
@@ -5761,7 +5807,7 @@
     insert_from_gap (produced_chars, produced);
   coding->produced += produced;
   coding->produced_char += produced_chars;
-  return produced_chars;
+  return carryover;
 }
 
 /* Compose text in CODING->object according to the annotation data at
@@ -5770,19 +5816,19 @@
  */
 
 static INLINE void
-produce_composition (coding, charbuf)
+produce_composition (coding, charbuf, pos)
      struct coding_system *coding;
      int *charbuf;
+     EMACS_INT pos;
 {
   int len;
-  EMACS_INT from, to;
+  EMACS_INT to;
   enum composition_method method;
   Lisp_Object components;
 
   len = -charbuf[0];
-  from = coding->dst_pos + charbuf[2];
-  to = coding->dst_pos + charbuf[3];
-  method = (enum composition_method) (charbuf[4]);
+  to = pos + charbuf[2];
+  method = (enum composition_method) (charbuf[3]);
 
   if (method == COMPOSITION_RELATIVE)
     components = Qnil;
@@ -5791,32 +5837,32 @@
       Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1];
       int i;
 
-      len -= 5;
-      charbuf += 5;
+      len -= 4;
+      charbuf += 4;
       for (i = 0; i < len; i++)
 	args[i] = make_number (charbuf[i]);
       components = (method == COMPOSITION_WITH_ALTCHARS
 		    ? Fstring (len, args) : Fvector (len, args));
     }
-  compose_text (from, to, components, Qnil, coding->dst_object);
+  compose_text (pos, to, components, Qnil, coding->dst_object);
 }
 
 
 /* Put `charset' property on text in CODING->object according to
    the annotation data at CHARBUF.  CHARBUF is an array:
-     [ -LENGTH ANNOTATION_MASK FROM TO CHARSET-ID ]
+     [ -LENGTH ANNOTATION_MASK NCHARS CHARSET-ID ]
  */
 
 static INLINE void
-produce_charset (coding, charbuf)
+produce_charset (coding, charbuf, pos)
      struct coding_system *coding;
      int *charbuf;
-{
-  EMACS_INT from = coding->dst_pos + charbuf[2];
-  EMACS_INT to = coding->dst_pos + charbuf[3];
-  struct charset *charset = CHARSET_FROM_ID (charbuf[4]);
-
-  Fput_text_property (make_number (from), make_number (to),
+     EMACS_INT pos;
+{
+  EMACS_INT from = pos - charbuf[2];
+  struct charset *charset = CHARSET_FROM_ID (charbuf[3]);
+
+  Fput_text_property (make_number (from), make_number (pos),
 		      Qcharset, CHARSET_NAME (charset),
 		      coding->dst_object);
 }
@@ -5846,8 +5892,9 @@
 
 
 static void
-produce_annotation (coding)
+produce_annotation (coding, pos)
      struct coding_system *coding;
+     EMACS_INT pos;
 {
   int *charbuf = coding->charbuf;
   int *charbuf_end = charbuf + coding->charbuf_used;
@@ -5858,17 +5905,17 @@
   while (charbuf < charbuf_end)
     {
       if (*charbuf >= 0)
-	charbuf++;
+	pos += *charbuf++;
       else
 	{
 	  int len = -*charbuf;
 	  switch (charbuf[1])
 	    {
 	    case CODING_ANNOTATE_COMPOSITION_MASK:
-	      produce_composition (coding, charbuf);
+	      produce_composition (coding, charbuf, pos);
 	      break;
 	    case CODING_ANNOTATE_CHARSET_MASK:
-	      produce_charset (coding, charbuf);
+	      produce_charset (coding, charbuf, pos);
 	      break;
 	    default:
 	      abort ();
@@ -5908,6 +5955,8 @@
   Lisp_Object attrs;
   Lisp_Object undo_list;
   Lisp_Object translation_table;
+  int carryover;
+  int i;
 
   if (BUFFERP (coding->src_object)
       && coding->src_pos > 0
@@ -5937,21 +5986,33 @@
   attrs = CODING_ID_ATTRS (coding->id);
   translation_table = get_translation_table (attrs, 0);
 
+  carryover = 0;
   do
     {
+      EMACS_INT pos = coding->dst_pos + coding->produced_char;
+
       coding_set_source (coding);
       coding->annotated = 0;
+      coding->charbuf_used = carryover;
       (*(coding->decoder)) (coding);
-      if (!NILP (translation_table))
-	translate_chars (coding, translation_table);
       coding_set_destination (coding);
-      produce_chars (coding);
+      carryover = produce_chars (coding, translation_table, 0);
       if (coding->annotated)
-	produce_annotation (coding);
+	produce_annotation (coding, pos);
+      for (i = 0; i < carryover; i++)
+	coding->charbuf[i]
+	  = coding->charbuf[coding->charbuf_used - carryover + i];
     }
   while (coding->consumed < coding->src_bytes
 	 && ! coding->result);
 
+  if (carryover > 0)
+    {
+      coding_set_destination (coding);
+      coding->charbuf_used = carryover;
+      produce_chars (coding, translation_table, 1);
+    }
+
   coding->carryover_bytes = 0;
   if (coding->consumed < coding->src_bytes)
     {
@@ -6036,7 +6097,7 @@
 	  enum composition_method method = COMPOSITION_METHOD (prop);
 	  int nchars = COMPOSITION_LENGTH (prop);
 
-	  ADD_COMPOSITION_DATA (buf, 0, nchars, method);
+	  ADD_COMPOSITION_DATA (buf, nchars, method);
 	  if (method != COMPOSITION_RELATIVE)
 	    {
 	      Lisp_Object components;
@@ -6111,7 +6172,7 @@
     id = XINT (CHARSET_SYMBOL_ID (val));
   else
     id = -1;
-  ADD_CHARSET_DATA (buf, 0, 0, id);
+  ADD_CHARSET_DATA (buf, 0, id);
   next = Fnext_single_property_change (make_number (pos), Qcharset,
 				       coding->src_object,
 				       make_number (limit));