Mercurial > emacs
changeset 17062:a9f1f08212ec
Include charset.h and coding.h.
(READ_BUF_SIZE): New macro.
(Finsert_file_contents): Refer to a coding system in the docstring.
Perform character code conversion of a text read in.
(Fwrite_region): Refer to a coding system in the docstring.
Setup a coding system for character code conversion.
Pass a new arg `pre_write_conversion' (Lisp function) to
build_annotations.
Pass a new arg `coding' to a_write.
(build_annotations): Handle the new arg.
(a_write): Handle the new arg `coding' by passing it to e_write.
(WRITE_BUF_SIZE): New macro.
(e_write): Perform character code conversion of a text to write
out according to the new arg `coding'.
author | Karl Heuer <kwzh@gnu.org> |
---|---|
date | Sat, 22 Feb 1997 19:31:32 +0000 |
parents | bc9a4db95edb |
children | 647b28ba4d1b |
files | src/fileio.c |
diffstat | 1 files changed, 235 insertions(+), 55 deletions(-) [+] |
line wrap: on
line diff
--- a/src/fileio.c Sat Feb 22 19:31:13 1997 +0000 +++ b/src/fileio.c Sat Feb 22 19:31:32 1997 +0000 @@ -92,6 +92,8 @@ #include "lisp.h" #include "intervals.h" #include "buffer.h" +#include "charset.h" +#include "coding.h" #include "window.h" #ifdef WINDOWSNT @@ -2987,6 +2989,10 @@ Lisp_Object Qfind_buffer_file_type; #endif /* DOS_NT */ +#ifndef READ_BUF_SIZE +#define READ_BUF_SIZE (64 << 10) +#endif + DEFUN ("insert-file-contents", Finsert_file_contents, Sinsert_file_contents, 1, 5, 0, "Insert contents of file FILENAME after point.\n\ @@ -2994,7 +3000,7 @@ If second argument VISIT is non-nil, the buffer's visited filename\n\ and last save file modtime are set, and it is marked unmodified.\n\ If visiting and the file does not exist, visiting is completed\n\ -before the error is signaled.\n\n\ +before the error is signaled.\n\ The optional third and fourth arguments BEG and END\n\ specify what portion of the file to insert.\n\ If VISIT is non-nil, BEG and END must be nil.\n\ @@ -3005,7 +3011,10 @@ the whole thing because (1) it preserves some marker positions\n\ and (2) it puts less data in the undo list.\n\ When REPLACE is non-nil, the value is the number of characters actually read,\n\ -which is often less than the number of characters to be read.") +which is often less than the number of characters to be read.\n\ +This does code conversion according to the value of\n\ + `coding-system-for-read' or `coding-system-alist', and sets the variable\n\ + `last-coding-system-used' to the coding system actually used.") (filename, visit, beg, end, replace) Lisp_Object filename, visit, beg, end, replace; { @@ -3013,12 +3022,15 @@ register int fd; register int inserted = 0; register int how_much; + register int unprocessed; int count = specpdl_ptr - specpdl; struct gcpro gcpro1, gcpro2, gcpro3; Lisp_Object handler, val, insval; Lisp_Object p; int total; int not_regular = 0; + char read_buf[READ_BUF_SIZE]; + struct coding_system coding; if (current_buffer->base_buffer && ! NILP (visit)) error ("Cannot do file visiting in an indirect buffer"); @@ -3044,6 +3056,21 @@ goto handled; } + /* Decide the coding-system of the file. */ + { + Lisp_Object val = Vcoding_system_for_read; + if (NILP (val)) + { + Lisp_Object args[6], coding_systems; + + args[0] = Qinsert_file_contents, args[1] = filename, args[2] = visit, + args[3] = beg, args[4] = end, args[5] = replace; + coding_systems = Ffind_coding_system (6, args); + val = CONSP (coding_systems) ? XCONS (coding_systems)->car : Qnil; + } + setup_coding_system (Fcheck_coding_system (val), &coding); + } + fd = -1; #ifndef APOLLO @@ -3114,21 +3141,23 @@ with the file contents. Avoid replacing text at the beginning or end of the buffer that matches the file contents; that preserves markers pointing to the unchanged parts. */ -#ifdef DOS_NT - /* On MSDOS, replace mode doesn't really work, except for binary files, - and it's not worth supporting just for them. */ - if (!NILP (replace)) + if (!NILP (replace) && CODING_REQUIRE_CONVERSION (&coding)) { + /* We have to decode the input, which means replace mode is + quite difficult. We give it up for the moment. */ replace = Qnil; del_range_1 (BEGV, ZV, 0); } -#else /* not DOS_NT */ if (!NILP (replace)) { unsigned char buffer[1 << 14]; int same_at_start = BEGV; int same_at_end = ZV; int overlap; + /* There is still a possibility we will find the need to do code + conversion. If that happens, we set this variable to 1 to + give up on the REPLACE feature. */ + int giveup_match_end = 0; if (XINT (beg) != 0) { @@ -3151,9 +3180,30 @@ XSTRING (filename)->data, strerror (errno)); else if (nread == 0) break; + + if (coding.type == coding_type_automatic) + detect_coding (&coding, buffer, nread); + if (CODING_REQUIRE_TEXT_CONVERSION (&coding)) + /* We found that the file should be decoded somehow. + Let's give up here. */ + { + giveup_match_end = 1; + break; + } + + if (coding.eol_type == CODING_EOL_AUTOMATIC) + detect_eol (&coding, buffer, nread); + if (CODING_REQUIRE_EOL_CONVERSION (&coding)) + /* We found that the format of eol should be decoded. + Let's give up here. */ + { + giveup_match_end = 1; + break; + } + bufpos = 0; while (bufpos < nread && same_at_start < ZV - && FETCH_CHAR (same_at_start) == buffer[bufpos]) + && FETCH_BYTE (same_at_start) == buffer[bufpos]) same_at_start++, bufpos++; /* If we found a discrepancy, stop the scan. Otherwise loop around and scan the next bufferful. */ @@ -3174,8 +3224,9 @@ immediate_quit = 1; QUIT; /* Count how many chars at the end of the file - match the text at the end of the buffer. */ - while (1) + match the text at the end of the buffer. But, if we have + already found that decoding is necessary, don't waste time. */ + while (!giveup_match_end) { int total_read, nread, bufpos, curpos, trial; @@ -3205,7 +3256,7 @@ /* Compare with same_at_start to avoid counting some buffer text as matching both at the file's beginning and at the end. */ while (bufpos > 0 && same_at_end > same_at_start - && FETCH_CHAR (same_at_end - 1) == buffer[bufpos - 1]) + && FETCH_BYTE (same_at_end - 1) == buffer[bufpos - 1]) same_at_end--, bufpos--; /* If we found a discrepancy, stop the scan. Otherwise loop around and scan the preceding bufferful. */ @@ -3231,7 +3282,6 @@ /* Insert from the file at the proper position. */ SET_PT (same_at_start); } -#endif /* not DOS_NT */ total = XINT (end) - XINT (beg); @@ -3257,32 +3307,81 @@ report_file_error ("Setting file position", Fcons (filename, Qnil)); } + /* In the following loop, HOW_MUCH contains the total bytes read so + far. Before exiting the loop, it is set to -1 if I/O error + occurs, set to -2 if the maximum buffer size is exceeded. */ how_much = 0; - while (inserted < total) + /* Total bytes inserted. */ + inserted = 0; + /* Bytes not processed in the previous loop because short gap size. */ + unprocessed = 0; + while (how_much < total) { /* try is reserved in some compilers (Microsoft C) */ - int trytry = min (total - inserted, 64 << 10); + int trytry = min (total - how_much, READ_BUF_SIZE - unprocessed); + char *destination = (CODING_REQUIRE_CONVERSION (&coding) + ? read_buf + unprocessed + : (char *) (POS_ADDR (PT + inserted - 1) + 1)); int this; /* Allow quitting out of the actual I/O. */ immediate_quit = 1; QUIT; - this = read (fd, &FETCH_CHAR (PT + inserted - 1) + 1, trytry); + this = read (fd, destination, trytry); immediate_quit = 0; - if (this <= 0) + if (this < 0 || this + unprocessed == 0) { how_much = this; break; } + how_much += this; + + if (CODING_REQUIRE_CONVERSION (&coding)) + { + int require, produced, consumed; + + this += unprocessed; + /* Make sure that the gap is large enough. */ + require = decoding_buffer_size (&coding, this); + if (GAP_SIZE < require) + make_gap (require - GAP_SIZE); + if (how_much >= total) /* This is the last block. */ + coding.last_block = 1; + produced = decode_coding (&coding, read_buf, + POS_ADDR (PT + inserted - 1) + 1, + this, GAP_SIZE, &consumed); + if (produced > 0) + { + Lisp_Object temp; + + XSET (temp, Lisp_Int, Z + produced); + if (Z + produced != XINT (temp)) + { + how_much = -2; + break; + } + } + unprocessed = this - consumed; + bcopy (read_buf + consumed, read_buf, unprocessed); + this = produced; + } + GPT += this; GAP_SIZE -= this; ZV += this; Z += this; + if (GAP_SIZE > 0) + /* Put an anchor to ensure multi-byte form ends at gap. */ + *GPT_ADDR = 0; inserted += this; } + /* We don't have to consider file type of MSDOS because all files + are read as binary and end-of-line format has already been + decoded appropriately. */ +#if 0 #ifdef DOS_NT /* Demacs 1.1.1 91/10/16 HIRANO Satoshi, MW July 1993 */ /* Determine file type from name and remove LFs from CR-LFs if the file @@ -3293,7 +3392,7 @@ if (NILP (current_buffer->buffer_file_type)) { int reduced_size - = inserted - crlf_to_lf (inserted, &FETCH_CHAR (PT - 1) + 1); + = inserted - crlf_to_lf (inserted, POS_ADDR (PT - 1) + 1); ZV -= reduced_size; Z -= reduced_size; GPT -= reduced_size; @@ -3302,6 +3401,7 @@ } } #endif /* DOS_NT */ +#endif /* 0 */ if (inserted > 0) { @@ -3317,9 +3417,11 @@ /* Discard the unwind protect for closing the file. */ specpdl_ptr--; - if (how_much < 0) + if (how_much == -1) error ("IO error reading %s: %s", XSTRING (filename)->data, strerror (errno)); + else if (how_much == -2) + error ("maximum buffer size exceeded"); notfound: handled: @@ -3374,6 +3476,9 @@ if (inserted > 0) { p = Vafter_insert_file_functions; + if (!NILP (coding.post_read_conversion)) + p = Fcons (coding.post_read_conversion, p); + while (!NILP (p)) { insval = call1 (Fcar (p), make_number (inserted)); @@ -3398,7 +3503,11 @@ static Lisp_Object build_annotations (); /* If build_annotations switched buffers, switch back to BUF. - Kill the temporary buffer that was selected in the meantime. */ + Kill the temporary buffer that was selected in the meantime. + + Since this kill only the last temporary buffer, some buffers remain + not killed if build_annotations switched buffers more than once. + -- K.Handa */ static Lisp_Object build_annotations_unwind (buf) @@ -3432,7 +3541,10 @@ The optional sixth arg LOCKNAME, if non-nil, specifies the name to\n\ use for locking and unlocking, overriding FILENAME and VISIT.\n\ Kludgy feature: if START is a string, then that string is written\n\ -to the file, instead of any buffer contents, and END is ignored.") +to the file, instead of any buffer contents, and END is ignored.\n\ +This does code conversion according to the value of\n\ + `coding-system-for-write' or `coding-system-alist', and sets the variable\n\ + `last-coding-system-used' to the coding system actually used.") (start, end, filename, append, visit, lockname) Lisp_Object start, end, filename, append, visit, lockname; { @@ -3457,6 +3569,7 @@ int buffer_file_type = NILP (current_buffer->buffer_file_type) ? O_TEXT : O_BINARY; #endif /* DOS_NT */ + struct coding_system coding; if (current_buffer->base_buffer && ! NILP (visit)) error ("Cannot do file visiting in an indirect buffer"); @@ -3505,6 +3618,38 @@ return val; } + /* Decide the coding-system to be encoded to. */ + { + Lisp_Object val; + + if (auto_saving) + val = Qnil; + else if (!NILP (Vcoding_system_for_write)) + val = Vcoding_system_for_write; + else if (!NILP (Flocal_variable_if_set_p (Qbuffer_file_coding_system, + Qnil))) + val = Fsymbol_value (Qbuffer_file_coding_system); + else + { + Lisp_Object args[7], coding_systems; + + args[0] = Qwrite_region, args[1] = start, args[2] = end, + args[3] = filename, args[4] = append, args[5] = visit, + args[6] = lockname; + coding_systems = Ffind_coding_system (7, args); + val = (CONSP (coding_systems) + ? XCONS (coding_systems)->cdr + : Fsymbol_value (Qbuffer_file_coding_system)); + } + setup_coding_system (Fcheck_coding_system (val), &coding); + if (!STRINGP (start) && !NILP (current_buffer->selective_display)) + coding.selective = 1; +#ifdef DOS_NT + if (!NILP (current_buffer->buffer_file_type)) + coding.eol_type = CODING_EOL_LF; +#endif /* DOS_NT */ + } + /* Special kludge to simplify auto-saving. */ if (NILP (start)) { @@ -3516,7 +3661,7 @@ count1 = specpdl_ptr - specpdl; given_buffer = current_buffer; - annotations = build_annotations (start, end); + annotations = build_annotations (start, end, coding.pre_write_conversion); if (current_buffer != given_buffer) { start = BEGV; @@ -3649,7 +3794,7 @@ if (STRINGP (start)) { failure = 0 > a_write (desc, XSTRING (start)->data, - XSTRING (start)->size, 0, &annotations); + XSTRING (start)->size, 0, &annotations, &coding); save_errno = errno; } else if (XINT (start) != XINT (end)) @@ -3659,8 +3804,9 @@ { register int end1 = XINT (end); tem = XINT (start); - failure = 0 > a_write (desc, &FETCH_CHAR (tem), - min (GPT, end1) - tem, tem, &annotations); + failure = 0 > a_write (desc, POS_ADDR (tem), + min (GPT, end1) - tem, tem, &annotations, + &coding); nwritten += min (GPT, end1) - tem; save_errno = errno; } @@ -3669,8 +3815,8 @@ { tem = XINT (start); tem = max (tem, GPT); - failure = 0 > a_write (desc, &FETCH_CHAR (tem), XINT (end) - tem, - tem, &annotations); + failure = 0 > a_write (desc, POS_ADDR (tem), XINT (end) - tem, + tem, &annotations, &coding); nwritten += XINT (end) - tem; save_errno = errno; } @@ -3678,7 +3824,15 @@ else { /* If file was empty, still need to write the annotations */ - failure = 0 > a_write (desc, "", 0, XINT (start), &annotations); + failure = 0 > a_write (desc, "", 0, XINT (start), &annotations, &coding); + save_errno = errno; + } + + if (coding.require_flushing) + { + /* We have to flush out a data. */ + coding.last_block = 1; + failure = 0 > e_write (desc, "", 0, &coding); save_errno = errno; } @@ -3787,8 +3941,8 @@ as save-excursion would do. */ static Lisp_Object -build_annotations (start, end) - Lisp_Object start, end; +build_annotations (start, end, pre_write_conversion) + Lisp_Object start, end, pre_write_conversion; { Lisp_Object annotations; Lisp_Object p, res; @@ -3842,6 +3996,24 @@ annotations = merge (annotations, res, Qcar_less_than_car); p = Fcdr (p); } + + /* At last, do the same for the function PRE_WRITE_CONVERSION + implied by the current coding-system. */ + if (!NILP (pre_write_conversion)) + { + struct buffer *given_buffer = current_buffer; + Vwrite_region_annotations_so_far = annotations; + res = call2 (pre_write_conversion, start, end); + if (current_buffer != given_buffer) + { + start = BEGV; + end = ZV; + annotations = Qnil; + } + Flength (res); + annotations = merge (annotations, res, Qcar_less_than_car); + } + UNGCPRO; return annotations; } @@ -3856,12 +4028,13 @@ The return value is negative in case of system call failure. */ int -a_write (desc, addr, len, pos, annot) +a_write (desc, addr, len, pos, annot, coding) int desc; register char *addr; register int len; int pos; Lisp_Object *annot; + struct coding_system *coding; { Lisp_Object tem; int nextpos; @@ -3873,10 +4046,10 @@ if (INTEGERP (tem) && XINT (tem) >= pos && XFASTINT (tem) <= lastpos) nextpos = XFASTINT (tem); else - return e_write (desc, addr, lastpos - pos); + return e_write (desc, addr, lastpos - pos, coding); if (nextpos > pos) { - if (0 > e_write (desc, addr, nextpos - pos)) + if (0 > e_write (desc, addr, nextpos - pos, coding)) return -1; addr += nextpos - pos; pos = nextpos; @@ -3884,43 +4057,50 @@ tem = Fcdr (Fcar (*annot)); if (STRINGP (tem)) { - if (0 > e_write (desc, XSTRING (tem)->data, XSTRING (tem)->size)) + if (0 > e_write (desc, XSTRING (tem)->data, XSTRING (tem)->size, + coding)) return -1; } *annot = Fcdr (*annot); } } +#ifndef WRITE_BUF_SIZE +#define WRITE_BUF_SIZE (16 * 1024) +#endif + int -e_write (desc, addr, len) +e_write (desc, addr, len, coding) int desc; register char *addr; register int len; + struct coding_system *coding; { - char buf[16 * 1024]; - register char *p, *end; - - if (!EQ (current_buffer->selective_display, Qt)) - return write (desc, addr, len) - len; - else + char buf[WRITE_BUF_SIZE]; + int produced, consumed; + + /* We used to have a code for handling selective display here. But, + now it is handled within encode_coding. */ + while (1) { - p = buf; - end = p + sizeof buf; - while (len--) + produced = encode_coding (coding, addr, buf, len, WRITE_BUF_SIZE, + &consumed); + len -= consumed, addr += consumed; + if (produced == 0 && len > 0) { - if (p == end) - { - if (write (desc, buf, sizeof buf) != sizeof buf) - return -1; - p = buf; - } - *p = *addr++; - if (*p++ == '\015') - p[-1] = '\n'; + /* There was a carry over because of invalid codes in the source. + We just write out them as is. */ + bcopy (addr, buf, len); + produced = len; + len = 0; } - if (p != buf) - if (write (desc, buf, p - buf) != p - buf) - return -1; + if (produced > 0) + { + produced -= write (desc, buf, produced); + if (produced) return -1; + } + if (len <= 0) + break; } return 0; }