Mercurial > emacs
changeset 29725:2bc397e9b09a
(setup_coding_system) <4>: Reset member `cr_carryover'.
(ccl_coding_driver): On encoding, initialize ccl->eol_type.
(decode_eol_post_ccl): New function.
(decode_coding): Don't detect EOL format here for CCL based coding
systems.
(decode_coding) <coding_type_ccl>: Handle carryovered CR. Call
decode_eol_post_ccl after running the CCL program.
(code_convert_region): Don't detect EOL format here for CCL based
coding systems.
(decode_coding_string): Likewise.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 19 Jun 2000 05:18:09 +0000 |
parents | caf7f927357c |
children | c9430bc1c824 |
files | src/coding.c |
diffstat | 1 files changed, 152 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/src/coding.c Mon Jun 19 05:16:57 2000 +0000 +++ b/src/coding.c Mon Jun 19 05:18:09 2000 +0000 @@ -3202,6 +3202,7 @@ } } coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK; + coding->spec.ccl.cr_carryover = 0; break; case 5: @@ -3883,7 +3884,8 @@ int result; ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK; - + if (encodep) + ccl->eol_type = coding->eol_type; coding->produced = ccl_driver (ccl, source, destination, src_bytes, dst_bytes, &(coding->consumed)); if (encodep) @@ -3916,6 +3918,136 @@ return result; } +/* Decode EOL format of the text at PTR of BYTES length destructively + according to CODING->eol_type. This is called after the CCL + program produced a decoded text at PTR. If we do CRLF->LF + conversion, update CODING->produced and CODING->produced_char. */ + +static void +decode_eol_post_ccl (coding, ptr, bytes) + struct coding_system *coding; + unsigned char *ptr; + int bytes; +{ + Lisp_Object val, saved_coding_symbol; + unsigned char *pend = ptr + bytes; + int dummy; + + /* Remember the current coding system symbol. We set it back when + an inconsistent EOL is found so that `last-coding-system-used' is + set to the coding system that doesn't specify EOL conversion. */ + saved_coding_symbol = coding->symbol; + + coding->spec.ccl.cr_carryover = 0; + if (coding->eol_type == CODING_EOL_UNDECIDED) + { + /* Here, to avoid the call of setup_coding_system, we directly + call detect_eol_type. */ + coding->eol_type = detect_eol_type (ptr, bytes, &dummy); + val = Fget (coding->symbol, Qeol_type); + if (VECTORP (val) && XVECTOR (val)->size == 3) + coding->symbol = XVECTOR (val)->contents[coding->eol_type]; + coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL; + } + + if (coding->eol_type == CODING_EOL_LF) + { + /* We have nothing to do. */ + ptr = pend; + } + else if (coding->eol_type == CODING_EOL_CRLF) + { + unsigned char *pstart = ptr, *p = ptr; + + if (! (coding->mode & CODING_MODE_LAST_BLOCK) + && *(pend - 1) == '\r') + { + /* If the last character is CR, we can't handle it here + because LF will be in the not-yet-decoded source text. + Recorded that the CR is not yet processed. */ + coding->spec.ccl.cr_carryover = 1; + coding->produced--; + coding->produced_char--; + pend--; + } + while (ptr < pend) + { + if (*ptr == '\r') + { + if (ptr + 1 < pend && *(ptr + 1) == '\n') + { + *p++ = '\n'; + ptr += 2; + } + else + { + if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) + goto undo_eol_conversion; + *p++ = *ptr++; + } + } + else if (*ptr == '\n' + && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) + goto undo_eol_conversion; + else + *p++ = *ptr++; + continue; + + undo_eol_conversion: + /* We have faced with inconsistent EOL format at PTR. + Convert all LFs before PTR back to CRLFs. */ + for (p--, ptr--; p >= pstart; p--) + { + if (*p == '\n') + *ptr-- = '\n', *ptr-- = '\r'; + else + *ptr-- = *p; + } + /* If carryover is recorded, cancel it because we don't + convert CRLF anymore. */ + if (coding->spec.ccl.cr_carryover) + { + coding->spec.ccl.cr_carryover = 0; + coding->produced++; + coding->produced_char++; + pend++; + } + p = ptr = pend; + coding->eol_type = CODING_EOL_LF; + coding->symbol = saved_coding_symbol; + } + if (p < pend) + { + /* As each two-byte sequence CRLF was converted to LF, (PEND + - P) is the number of deleted characters. */ + coding->produced -= pend - p; + coding->produced_char -= pend - p; + } + } + else /* i.e. coding->eol_type == CODING_EOL_CR */ + { + unsigned char *p = ptr; + + for (; ptr < pend; ptr++) + { + if (*ptr == '\r') + *ptr = '\n'; + else if (*ptr == '\n' + && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL) + { + for (; p < ptr; p++) + { + if (*p == '\n') + *p = '\r'; + } + ptr = pend; + coding->eol_type = CODING_EOL_LF; + coding->symbol = saved_coding_symbol; + } + } + } +} + /* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before decoding, it may detect coding system and format of end-of-line if those are not yet decided. The source should be unibyte, the @@ -3931,7 +4063,8 @@ if (coding->type == coding_type_undecided) detect_coding (coding, source, src_bytes); - if (coding->eol_type == CODING_EOL_UNDECIDED) + if (coding->eol_type == CODING_EOL_UNDECIDED + && coding->type != coding_type_ccl) detect_eol (coding, source, src_bytes); coding->produced = coding->produced_char = 0; @@ -3962,8 +4095,20 @@ break; case coding_type_ccl: - ccl_coding_driver (coding, source, destination, + if (coding->spec.ccl.cr_carryover) + { + /* Set the CR which is not processed by the previous call of + decode_eol_post_ccl in DESTINATION. */ + *destination = '\r'; + coding->produced++; + coding->produced_char++; + dst_bytes--; + } + ccl_coding_driver (coding, source, + destination + coding->spec.ccl.cr_carryover, src_bytes, dst_bytes, 0); + if (coding->eol_type != CODING_EOL_LF) + decode_eol_post_ccl (coding, destination, coding->produced); break; default: @@ -4580,7 +4725,8 @@ encodings again in vain. */ coding->type = coding_type_emacs_mule; } - if (coding->eol_type == CODING_EOL_UNDECIDED) + if (coding->eol_type == CODING_EOL_UNDECIDED + && coding->type != coding_type_ccl) { saved_coding_symbol = coding->symbol; detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte); @@ -5038,7 +5184,8 @@ if (coding->type == coding_type_undecided) coding->type = coding_type_emacs_mule; } - if (coding->eol_type == CODING_EOL_UNDECIDED) + if (coding->eol_type == CODING_EOL_UNDECIDED + && coding->type != coding_type_ccl) { saved_coding_symbol = coding->symbol; detect_eol (coding, XSTRING (str)->data, to_byte);