Mercurial > emacs
changeset 23325:bbd06336cd0c
(check_composing_code): If the current composing
sequence doesn't end properly, return -1.
(DECODE_CHARACTER_ASCII): Update coding->composed_chars.
(DECODE_CHARACTER_DIMENSION1): Likewise.
(decode_coding_iso2022): Check validity of a composing sequence.
(code_convert_string): If the length of text to be converted is
shrunk to zero, don't perform code conversion.
(shrink_decoding_region): Fix previous change.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Sat, 26 Sep 1998 04:20:48 +0000 |
parents | 4c5f12c6041c |
children | df3f641c9ca1 |
files | src/coding.c |
diffstat | 1 files changed, 82 insertions(+), 41 deletions(-) [+] |
line wrap: on
line diff
--- a/src/coding.c Sat Sep 26 04:20:48 1998 +0000 +++ b/src/coding.c Sat Sep 26 04:20:48 1998 +0000 @@ -213,15 +213,18 @@ /* Decode one ASCII character C. */ -#define DECODE_CHARACTER_ASCII(c) \ - do { \ - if (COMPOSING_P (coding->composing)) \ - *dst++ = 0xA0, *dst++ = (c) | 0x80; \ - else \ - { \ - *dst++ = (c); \ - coding->produced_char++; \ - } \ +#define DECODE_CHARACTER_ASCII(c) \ + do { \ + if (COMPOSING_P (coding->composing)) \ + { \ + *dst++ = 0xA0, *dst++ = (c) | 0x80; \ + coding->composed_chars++; \ + } \ + else \ + { \ + *dst++ = (c); \ + coding->produced_char++; \ + } \ } while (0) /* Decode one DIMENSION1 character whose charset is CHARSET and whose @@ -231,7 +234,10 @@ do { \ unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset); \ if (COMPOSING_P (coding->composing)) \ - *dst++ = leading_code + 0x20; \ + { \ + *dst++ = leading_code + 0x20; \ + coding->composed_chars++; \ + } \ else \ { \ *dst++ = leading_code; \ @@ -997,9 +1003,7 @@ invalid_code_found = 1; } } - return (invalid_code_found - ? src - src_start - : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1)); + return (invalid_code_found ? src - src_start : -1); } /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions". */ @@ -1030,6 +1034,7 @@ translation_table = Vstandard_translation_table_for_decode; coding->produced_char = 0; + coding->composed_chars = 0; coding->fake_multibyte = 0; while (src < src_end && (dst_bytes ? (dst < adjusted_dst_end) @@ -1243,7 +1248,7 @@ coding->composing = (c1 == '0' ? COMPOSING_NO_RULE_HEAD : COMPOSING_WITH_RULE_HEAD); - coding->produced_char++; + coding->composed_chars = 0; } else if (result1 > 0) { @@ -1253,6 +1258,7 @@ src += result1; dst += result1 + 2; coding->produced_char += result1 + 2; + coding->fake_multibyte = 1; } else { @@ -1266,6 +1272,28 @@ break; case '1': /* end composing */ + if (coding->composed_chars > 0) + { + if (coding->composed_chars == 1) + { + unsigned char *this_char_start = dst; + int this_bytes; + + /* Only one character is in the composing + sequence. Make it a normal character. */ + while (*--this_char_start != LEADING_CODE_COMPOSITION); + dst = (this_char_start + + (coding->composing == COMPOSING_NO_RULE_TAIL + ? 1 : 2)); + *dst -= 0x20; + if (*dst == 0x80) + *++dst &= 0x7F; + this_bytes = BYTES_BY_CHAR_HEAD (*dst); + while (this_bytes--) *this_char_start++ = *dst++; + dst = this_char_start; + } + coding->produced_char++; + } coding->composing = COMPOSING_NO; break; @@ -3938,30 +3966,45 @@ case CODING_CATEGORY_IDX_ISO_7: case CODING_CATEGORY_IDX_ISO_7_TIGHT: - /* We can skip all charactes at the tail except for ESC and - the following 2-byte at the tail. */ - if (eol_conversion) - while (begp < endp - && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') - endp--; - else - while (begp < endp - && (c = endp[-1]) != ISO_CODE_ESC) - endp--; - /* Do not consider LF as ascii if preceded by CR, since that - confuses eol decoding. */ - if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n') - endp++; - if (begp < endp && endp[-1] == ISO_CODE_ESC) - { - if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') - /* This is an ASCII designation sequence. We can - surely skip the tail. */ - endp += 2; - else - /* Hmmm, we can't skip the tail. */ - endp = endp_orig; - } + { + /* We can skip all charactes at the tail except for 8-bit + codes and ESC and the following 2-byte at the tail. */ + unsigned char *eight_bit = NULL; + + if (eol_conversion) + while (begp < endp + && (c = endp[-1]) != ISO_CODE_ESC && c != '\r') + { + if (!eight_bit && c & 0x80) eight_bit = endp; + endp--; + } + else + while (begp < endp + && (c = endp[-1]) != ISO_CODE_ESC) + { + if (!eight_bit && c & 0x80) eight_bit = endp; + endp--; + } + /* Do not consider LF as ascii if preceded by CR, since that + confuses eol decoding. */ + if (begp < endp && endp < endp_orig + && endp[-1] == '\r' && endp[0] == '\n') + endp++; + if (begp < endp && endp[-1] == ISO_CODE_ESC) + { + if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B') + /* This is an ASCII designation sequence. We can + surely skip the tail. But, if we have + encountered an 8-bit code, skip only the codes + after that. */ + endp = eight_bit ? eight_bit : endp + 2; + else + /* Hmmm, we can't skip the tail. */ + endp = endp_orig; + } + else if (eight_bit) + endp = eight_bit; + } } } *beg += begp - begp_orig; @@ -4524,9 +4567,7 @@ else shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data); } - if (from == to_byte - && ! (coding->mode & CODING_MODE_LAST_BLOCK - && CODING_REQUIRE_FLUSHING (coding))) + if (from == to_byte) return (nocopy ? str : Fcopy_sequence (str)); if (encodep)