emacs: src/coding.c comparison

comparison src/coding.c @ 23339:2da87b489590

(check_composing_code): Fix previous change. Now it alwasy returns 0 or -1. (decode_coding_iso2022): Adjusted for the above change. (encode_coding_iso2022): When encoding the last block, flush out tailing garbage bytes. (setup_coding_system): Delete unnecessary code. (shrink_decoding_region): Check translation table. If ASCII should be translated, give up shrinking. (shrink_encoding_region): Likewise. (SHRINK_CONVERSION_REGION_THRESHHOLD): New macro. (SHRINK_CONVERSION_REGION): New macro. (code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text properties here. (code_convert_region): In the case of encoding, always calulate correct character number. (code_convert_string): Call SHRINK_CONVERSION_REGION. (code_convert_region1): Don't delete text properties here. (check_composing_code): Fix previous change. Now it alwasy returns 0 or -1. (decode_coding_iso2022): Adjusted for the above change. (encode_coding_iso2022): When encoding the last block, flush out tailing garbage bytes. (setup_coding_system): Delete unnecessary code. (shrink_decoding_region): Check translation table. If ASCII should be translated, give up shrinking. (shrink_encoding_region): Likewise. (SHRINK_CONVERSION_REGION_THRESHHOLD): New macro. (SHRINK_CONVERSION_REGION): New macro. (code_convert_region): Call SHRINK_CONVERSION_REGION. Delete text properties here. (code_convert_region): In the case of encoding, always calulate correct character number. (code_convert_string): Call SHRINK_CONVERSION_REGION. (code_convert_region1): Don't delete text properties here.

author	Kenichi Handa <handa@m17n.org>
date	Mon, 28 Sep 1998 11:52:53 +0000
parents	bbd06336cd0c
children	6905813a49c6

comparison

equal deleted inserted replaced

-:0a2b76b09162
+:2da87b489590
 	coding->spec.iso2022.last_invalid_designation_register = reg;	   \
 	goto label_invalid_code;					   \
 }									   \
 } while (0)
-/* Check if the current composing sequence contains only valid codes.
+/* Return 0 if there's a valid composing sequence starting at SRC and
-If the composing sequence doesn't end before SRC_END, return -1.
+ending before SRC_END, else return -1.  */
-Else, if it contains only valid codes, return 0.
-Else return the length of the composing sequence.  */
 int
 check_composing_code (coding, src, src_end)
 struct coding_system *coding;
 unsigned char *src, *src_end;
 {
-unsigned char *src_start = src;
-int invalid_code_found = 0;
 int charset, c, c1, dim;
 while (src < src_end)
 {
-if (*src++ != ISO_CODE_ESC) continue;
+c = *src++;
-if (src >= src_end) break;
+if (c >= 0x20)
-if ((c = *src++) == '1') /* end of compsition */
+	continue;
-	return (invalid_code_found ? src - src_start : 0);
+if (c != ISO_CODE_ESC || src >= src_end)
-if (src + 2 >= src_end) break;
+	return -1;
-if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
+c = *src++;
-	invalid_code_found = 1;
+if (c == '1') /* end of compsition */
+	return 0;
+if (src + 2 >= src_end
+	  || !coding->flags & CODING_FLAG_ISO_DESIGNATION)
+	return -1;
+dim = (c == '$');
+if (dim == 1)
+	c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
+if (c >= '(' && c <= '/')
+	{
+	  c1 = *src++;
+	  if ((c1 < ' ' || c1 >= 0x80)
+	      || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
+	      || ! coding->safe_charsets[charset]
+	      || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+		  == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
+	    return -1;
+	}
 else
-	{
+	return -1;
-	  dim = 0;
+}
-	  if (c == '$')
-	    {
+/* We have not found the sequence "ESC 1".  */
-	      dim = 1;
+return -1;
-	      c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
-	    }
-	  if (c >= '(' && c <= '/')
-	    {
-	      c1 = *src++;
-	      if ((c1 < ' ' || c1 >= 0x80)
-		  || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
-		  || ! coding->safe_charsets[charset]
-		  || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
-		      == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
-		invalid_code_found = 1;
-	    }
-	  else
-	    invalid_code_found = 1;
-	}
-}
-return (invalid_code_found ? src - src_start : -1);
 }
 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 int
 	      if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
 		goto label_invalid_code;
 	      ONE_MORE_BYTE (c1);
 	      if (c1 >= '@' && c1 <= 'B')
 		{	/* designation of JISX0208.1978, GB2312.1980,
-				   or JISX0208.1980 */
+			   or JISX0208.1980 */
 		  DECODE_DESIGNATION (0, 2, 94, c1);
 		}
 	      else if (c1 >= 0x28 && c1 <= 0x2B)
 		{	/* designation of DIMENSION2_CHARS94 character set */
 		  ONE_MORE_BYTE (c2);
 	      break;
 	    case '0': case '2':	/* start composing */
 	      /* Before processing composing, we must be sure that all
 		 characters being composed are supported by CODING.
-		 If not, we must give up composing and insert the
+		 If not, we must give up composing.  */
-		 bunch of codes for composing as is without decoding.  */
+	      if (check_composing_code (coding, src, src_end) == 0)
-	      {
+		{
-		int result1;
+		  /* We are looking at a valid composition sequence.  */
+		  coding->composing = (c1 == '0'
-		result1 = check_composing_code (coding, src, src_end);
+				       ? COMPOSING_NO_RULE_HEAD
-		if (result1 == 0)
+				       : COMPOSING_WITH_RULE_HEAD);
-		  {
+		  coding->composed_chars = 0;
-		    coding->composing = (c1 == '0'
+		}
-					 ? COMPOSING_NO_RULE_HEAD
+	      else
-					 : COMPOSING_WITH_RULE_HEAD);
+		{
-		    coding->composed_chars = 0;
+		  *dst++ = ISO_CODE_ESC;
-		  }
+		  *dst++ = c1;
-		else if (result1 > 0)
+		  coding->produced_char += 2;
-		  {
+		}
-		    if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
-		      {
-			bcopy (src_base, dst, result1 + 2);
-			src += result1;
-			dst += result1 + 2;
-			coding->produced_char += result1 + 2;
-			coding->fake_multibyte = 1;
-		      }
-		    else
-		      {
-			result = CODING_FINISH_INSUFFICIENT_DST;
-			goto label_end_of_loop_2;
-		      }
-		  }
-		else
-		  goto label_end_of_loop;
-	      }
 	      break;
 	    case '1':		/* end composing */
+	      if (!coding->composing)
+		{
+		  *dst++ = ISO_CODE_ESC;
+		  *dst++ = c1;
+		  coding->produced_char += 2;
+		  break;
+		}
 	      if (coding->composed_chars > 0)
 		{
 		  if (coding->composed_chars == 1)
 		    {
 		      unsigned char *this_char_start = dst;
 if (coding->mode & CODING_MODE_LAST_BLOCK)
 {
 ENCODE_RESET_PLANE_AND_REGISTER;
 if (COMPOSING_P (coding->composing))
 	ENCODE_COMPOSITION_END;
+if (result == CODING_FINISH_INSUFFICIENT_SRC)
+	{
+	  while (src < src_end && dst < dst_end)
+	    *dst++ = *src++;
+	}
 }
 coding->consumed = src - source;
 coding->produced = coding->produced_char = dst - destination;
 return result;
 }
 return 0;
 }
 /* Initialize remaining fields.  */
 coding->composing = 0;
-coding->translation_table_for_decode = Qnil;
-coding->translation_table_for_encode = Qnil;
 /* Get values of coding system properties:
 `post-read-conversion', `pre-write-conversion',
 `translation-table-for-decode', `translation-table-for-encode'.  */
 plist = XVECTOR (coding_spec)->contents[3];
 struct coding_system *coding;
 unsigned char *str;
 {
 unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
 int eol_conversion;
+Lisp_Object translation_table;
 if (coding->type == coding_type_ccl
 || coding->type == coding_type_undecided
 || !NILP (coding->post_read_conversion))
 {
 else if (coding->type == coding_type_no_conversion)
 {
 /* We need no conversion, but don't have to skip any data here.
 Decoding routine handles them effectively anyway.  */
 return;
+}
+translation_table = coding->translation_table_for_decode;
+if (NILP (translation_table) && !NILP (Venable_character_translation))
+translation_table = Vstandard_translation_table_for_decode;
+if (CHAR_TABLE_P (translation_table))
+{
+int i;
+for (i = 0; i < 128; i++)
+	if (!NILP (CHAR_TABLE_REF (translation_table, i)))
+	  break;
+if (i < 128)
+	/* Some ASCII character should be tranlsated.  We give up
+	   shrinking.  */
+	return;
 }
 eol_conversion = (coding->eol_type != CODING_EOL_LF);
 if ((! eol_conversion) && (coding->heading_ascii >= 0))
 struct coding_system *coding;
 unsigned char *str;
 {
 unsigned char *begp_orig, *begp, *endp_orig, *endp;
 int eol_conversion;
+Lisp_Object translation_table;
 if (coding->type == coding_type_ccl)
 /* We can't skip any data.  */
 return;
 else if (coding->type == coding_type_no_conversion)
 {
 /* We need no conversion.  */
 *beg = *end;
 return;
+}
+translation_table = coding->translation_table_for_encode;
+if (NILP (translation_table) && !NILP (Venable_character_translation))
+translation_table = Vstandard_translation_table_for_encode;
+if (CHAR_TABLE_P (translation_table))
+{
+int i;
+for (i = 0; i < 128; i++)
+	if (!NILP (CHAR_TABLE_REF (translation_table, i)))
+	  break;
+if (i < 128)
+	/* Some ASCII character should be tranlsated.  We give up
+	   shrinking.  */
+	return;
 }
 if (str)
 {
 begp_orig = begp = str + *beg;
 *beg += begp - begp_orig;
 *end += endp - endp_orig;
 return;
 }
+/* As shrinking conversion region requires some overhead, we don't try
+shrinking if the length of conversion region is less than this
+value.  */
+static int shrink_conversion_region_threshhold = 1024;
+#define SHRINK_CONVERSION_REGION(beg, end, coding, str, encodep)	\
+do {									\
+if (*(end) - *(beg) > shrink_conversion_region_threshhold)		\
+{									\
+if (encodep) shrink_encoding_region (beg, end, coding, str);	\
+else shrink_decoding_region (beg, end, coding, str);		\
+}									\
+} while (0)
 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
 text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
 coding system CODING, and return the status code of code conversion
 (currently, this value has no meaning).
 {
 int from_byte_orig = from_byte, to_byte_orig = to_byte;
 if (from < GPT && GPT < to)
 move_gap_both (from, from_byte);
-if (encodep)
+SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
-shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
-else
-shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
 if (from_byte == to_byte
 	&& ! (coding->mode & CODING_MODE_LAST_BLOCK
 	      && CODING_REQUIRE_FLUSHING (coding)))
 {
 	coding->produced = len_byte;
 total_skip = head_skip + tail_skip;
 from += head_skip;
 to -= tail_skip;
 len -= total_skip; len_byte -= total_skip;
 }
+/* The code conversion routine can not preserve text properties for
+now.  So, we must remove all text properties in the region.  */
+if (replace)
+Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
 /* For converion, we must put the gap before the text in addition to
 making the gap larger for efficient decoding.  The required gap
 size starts from 2000 which is the magic number used in make_gap.
 But, after one batch of conversion, it will be incremented if we
 	}
 }
 if (src - dst > 0) *dst = 0; /* Put an anchor.  */
 if (multibyte
-&& (fake_multibyte
+&& (encodep
-	  || !encodep && (to - from) != (to_byte - from_byte)))
+	  || fake_multibyte
+	  || (to - from) != (to_byte - from_byte)))
 inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
 /* If we have shrinked the conversion area, adjust it now.  */
 if (total_skip > 0)
 {
 : ! CODING_REQUIRE_DECODING (coding))
 from = to_byte;
 else
 {
 /* Try to skip the heading and tailing ASCIIs.  */
-if (encodep)
+SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
-	shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
+				encodep);
-else
-	shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
 }
 if (from == to_byte)
 return (nocopy ? str : Fcopy_sequence (str));
 if (encodep)
 if (NILP (coding_system))
 return make_number (to - from);
 if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
 error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
-/* The code conversion routine can not preserve text properties for
-now.  So, we must remove all text properties in the region.  */
-Fset_text_properties (start, end, Qnil, Qnil);
 coding.mode |= CODING_MODE_LAST_BLOCK;
 code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
 		       &coding, encodep, 1);
 Vlast_coding_system_used = coding.symbol;

Mercurial > emacs

comparison src/coding.c @ 23339:2da87b489590