Mercurial > emacs
changeset 17016:ded89d7e1575
(current_column_bol_cache): New variable. This makes
move_to_column faster.
(current_column): Set current_column_bol_cache.
(current_column_1): Likewise.
(Fmove_to_column): Use current_column_bol_cache.
(current_column): When the buffer may have multi-byte
characters, call current_column_1.
(compute_motion): Handle multi-byte characters.
(current_column_1): Likewise.
(Fmove_to_column): Likewise.
Include "charset.h".
(compute_motion): Remove the variable PREV_VPOS.
Introduce new local variables (multibyte, wide_column, prev_pos,
contin_hpos, prev_tab_offset) to handle multi-byte characters and
wide-column characters.
(pos_tab_offset): Specify "- (1 << (BITS_PER_SHORT - 1))"
to express big negative number (for consistency with xdisp.c).
(vmotion): Specify "1 << (BITS_PER_SHORT - 1)" to
express "Don't care".
author | Karl Heuer <kwzh@gnu.org> |
---|---|
date | Thu, 20 Feb 1997 06:39:27 +0000 |
parents | fe0b3661b584 |
children | 667a3686a447 |
files | src/indent.c |
diffstat | 1 files changed, 310 insertions(+), 40 deletions(-) [+] |
line wrap: on
line diff
--- a/src/indent.c Thu Feb 20 06:35:00 1997 +0000 +++ b/src/indent.c Thu Feb 20 06:39:27 1997 +0000 @@ -22,6 +22,7 @@ #include <config.h> #include "lisp.h" #include "buffer.h" +#include "charset.h" #include "indent.h" #include "frame.h" #include "window.h" @@ -52,6 +53,10 @@ static int current_column_1 (); +/* Cache of beginning of line found by the last call of + current_column. */ +int current_column_bol_cache; + /* Get the display table to use for the current buffer. */ struct Lisp_Char_Table * @@ -148,7 +153,10 @@ static void width_run_cache_on_off () { - if (NILP (current_buffer->cache_long_line_scans)) + if (NILP (current_buffer->cache_long_line_scans) + /* And, for the moment, this feature doesn't work on multibyte + characters. */ + || !NILP (current_buffer->enable_multibyte_characters)) { /* It should be off. */ if (current_buffer->width_run_cache) @@ -233,6 +241,13 @@ proplimit = overlay_limit; end = Fnext_single_property_change (position, Qinvisible, buffer, proplimit); + /* Don't put the boundary in the middle of multibyte form if + there is no actual property change. */ + if (end == pos + 100 + && !NILP (current_buffer->enable_multibyte_characters) + && end < ZV) + while (pos < end && !CHAR_HEAD_P (POS_ADDR (end))) + end--; *next_boundary_p = XFASTINT (end); } /* if the `invisible' property is set, we can skip to @@ -287,18 +302,19 @@ && MODIFF == last_known_column_modified) return last_known_column; - /* If the buffer has overlays or text properties, + /* If the buffer has overlays, text properties, or multibyte, use a more general algorithm. */ if (BUF_INTERVALS (current_buffer) || !NILP (current_buffer->overlays_before) - || !NILP (current_buffer->overlays_after)) + || !NILP (current_buffer->overlays_after) + || !NILP (current_buffer->enable_multibyte_characters)) return current_column_1 (PT); /* Scan backwards from point to the previous newline, counting width. Tab characters are the only complicated case. */ /* Make a pointer for decrementing through the chars before point. */ - ptr = &FETCH_CHAR (PT - 1) + 1; + ptr = POS_ADDR (PT - 1) + 1; /* Make a pointer to where consecutive chars leave off, going backwards from point. */ if (PT == BEGV) @@ -355,6 +371,10 @@ col += post_tab; } + if (ptr == BEGV_ADDR) + current_column_bol_cache = BEGV; + else + current_column_bol_cache = PTR_CHAR_POS ((ptr+1)); last_known_column = col; last_known_column_point = PT; last_known_column_modified = MODIFF; @@ -377,8 +397,9 @@ /* Start the scan at the beginning of this line with column number 0. */ register int col = 0; - int scan = find_next_newline (pos, -1); + int scan = current_column_bol_cache = find_next_newline (pos, -1); int next_boundary = scan; + int multibyte = !NILP (current_buffer->enable_multibyte_characters); if (tab_width <= 0 || tab_width > 1000) tab_width = 8; @@ -397,7 +418,7 @@ goto endloop; } - c = FETCH_CHAR (scan); + c = FETCH_BYTE (scan); if (dp != 0 && VECTORP (DISP_CHAR_VECTOR (dp, c))) { col += XVECTOR (DISP_CHAR_VECTOR (dp, c))->size; @@ -415,6 +436,42 @@ col += tab_width; col = col / tab_width * tab_width; } + else if (multibyte && BASE_LEADING_CODE_P (c)) + { + scan--; + /* Start of multi-byte form. */ + if (c == LEADING_CODE_COMPOSITION) + { + unsigned char *ptr = POS_ADDR (scan); + + int cmpchar_id = str_cmpchar_id (ptr, next_boundary - scan); + if (cmpchar_id >= 0) + { + scan += cmpchar_table[cmpchar_id]->len, + col += cmpchar_table[cmpchar_id]->width; + } + else + { /* invalid composite character */ + scan++; + col += 4; + } + } + else + { + /* Here, we check that the following bytes are valid + constituents of multi-byte form. */ + int len = BYTES_BY_CHAR_HEAD (c), i; + + for (i = 1, scan++; i < len; i++, scan++) + /* We don't need range checking for PTR because there + are anchors (`\0') at GAP and Z. */ + if (CHAR_HEAD_P (POS_ADDR (scan))) break; + if (i < len) + col += 4, scan -= i - 1; + else + col += WIDTH_BY_CHAR_HEAD (c); + } + } else if (ctl_arrow && (c < 040 || c == 0177)) col += 2; else if (c < 040 || c >= 0177) @@ -584,7 +641,7 @@ if (tab_width <= 0 || tab_width > 1000) tab_width = 8; - p = &FETCH_CHAR (pos); + p = POS_ADDR (pos); /* STOP records the value of P at which we will need to think about the gap, or about invisible text, or about the end of the buffer. */ @@ -615,8 +672,8 @@ (if STOP_POS is the position of the gap) rather than at the data after the gap. */ - stop = &FETCH_CHAR (stop_pos - 1) + 1; - p = &FETCH_CHAR (pos); + stop = POS_ADDR (stop_pos - 1) + 1; + p = POS_ADDR (pos); } switch (*p++) { @@ -639,7 +696,7 @@ indented_beyond_p (pos, column) int pos, column; { - while (pos > BEGV && FETCH_CHAR (pos) == '\n') + while (pos > BEGV && FETCH_BYTE (pos) == '\n') pos = find_next_newline_no_quit (pos - 1, -1); return (position_indentation (pos) >= column); } @@ -669,6 +726,7 @@ register int tab_width = XINT (current_buffer->tab_width); register int ctl_arrow = !NILP (current_buffer->ctl_arrow); register struct Lisp_Char_Table *dp = buffer_display_table (); + register int multibyte = !NILP (current_buffer->enable_multibyte_characters); Lisp_Object val; int prev_col; @@ -689,7 +747,7 @@ if (col > goal) { end = pos; - pos = find_next_newline (pos, -1); + pos = current_column_bol_cache; col = 0; } @@ -708,7 +766,7 @@ if (col >= goal) break; - c = FETCH_CHAR (pos); + c = FETCH_BYTE (pos); if (dp != 0 && VECTORP (DISP_CHAR_VECTOR (dp, c))) { col += XVECTOR (DISP_CHAR_VECTOR (dp, c))->size; @@ -728,10 +786,50 @@ } else if (ctl_arrow && (c < 040 || c == 0177)) col += 2; - else if (c < 040 || c >= 0177) + else if (c < 040 || c == 0177) col += 4; + else if (c < 0177) + col++; + else if (multibyte && BASE_LEADING_CODE_P (c)) + { + /* Start of multi-byte form. */ + unsigned char *ptr; + + pos--; /* rewind to the character head */ + ptr = POS_ADDR (pos); + if (c == LEADING_CODE_COMPOSITION) + { + int cmpchar_id = str_cmpchar_id (ptr, end - pos); + + if (cmpchar_id >= 0) + { + col += cmpchar_table[cmpchar_id]->width; + pos += cmpchar_table[cmpchar_id]->len; + } + else + { /* invalid composite character */ + col += 4; + pos++; + } + } + else + { + /* Here, we check that the following bytes are valid + constituents of multi-byte form. */ + int len = BYTES_BY_CHAR_HEAD (c), i; + + for (i = 1, ptr++; i < len; i++, ptr++) + /* We don't need range checking for PTR because there + are anchors (`\0') both at GPT and Z. */ + if (CHAR_HEAD_P (ptr)) break; + if (i < len) + col += 4, pos++; + else + col += WIDTH_BY_CHAR_HEAD (c), pos += i; + } + } else - col++; + col += 4; } endloop: @@ -848,7 +946,7 @@ = (INTEGERP (current_buffer->selective_display) ? XINT (current_buffer->selective_display) : !NILP (current_buffer->selective_display) ? -1 : 0); - int prev_vpos = vpos, prev_hpos = 0; + int prev_hpos = 0; int selective_rlen = (selective && dp && VECTORP (DISP_INVIS_VECTOR (dp)) ? XVECTOR (DISP_INVIS_VECTOR (dp))->size : 0); @@ -870,6 +968,13 @@ int next_width_run = from; Lisp_Object window; + int multibyte = !NILP (current_buffer->enable_multibyte_characters); + int wide_column = 0; /* Set to 1 when a previous character + is wide-colomn. */ + int prev_pos; /* Previous buffer position. */ + int contin_hpos; /* HPOS of last column of continued line. */ + int prev_tab_offset; /* Previous tab offset. */ + XSETBUFFER (buffer, current_buffer); XSETWINDOW (window, win); @@ -885,7 +990,9 @@ if (tab_width <= 0 || tab_width > 1000) tab_width = 8; - pos = from; + pos = prev_pos = from; + contin_hpos = 0; + prev_tab_offset = tab_offset; while (1) { while (pos == next_boundary) @@ -896,10 +1003,14 @@ through, so clear the flag after testing it. */ if (!did_motion) /* We need to skip past the overlay strings. Currently those - strings must contain single-column printing characters; + strings must not contain TAB; if we want to relax that restriction, something will have to be changed here. */ - hpos += overlay_strings (pos, win, (char **)0); + { + unsigned char *ovstr; + int ovlen = overlay_strings (pos, win, &ovstr); + hpos += (multibyte ? strwidth (ovstr, ovlen) : ovlen); + } did_motion = 0; if (pos >= to) @@ -913,9 +1024,50 @@ } /* Handle right margin. */ - if (hpos >= width - && (hpos > width - || (pos < ZV && FETCH_CHAR (pos) != '\n'))) + /* Note on a wide-column character. + + Characters are classified into the following three categories + according to the width (columns occupied on screen). + + (1) single-column character: ex. `a' + (2) multi-column character: ex. `^A', TAB, `\033' + (3) wide-column character: ex. Japanese character, Chinese character + (In the following example, `W_' stands for them.) + + Multi-column characters can be divided around the right margin, + but wide-column characters cannot. + + NOTE: + + (*) The cursor is placed on the next character after the point. + + ---------- + abcdefghi\ + j ^---- next after the point + ^--- next char. after the point. + ---------- + In case of sigle-column character + + ---------- + abcdefgh\\ + 033 ^---- next after the point, next char. after the point. + ---------- + In case of multi-column character + + ---------- + abcdefgh\\ + W_ ^---- next after the point + ^---- next char. after the point. + ---------- + In case of wide-column character + + The problem here is continuation at a wide-column character. + In this case, the line may shorter less than WIDTH. + And we find the continuation AFTER it occurs. + + */ + + if (hpos > width) { if (hscroll || (truncate_partial_width_windows @@ -923,31 +1075,94 @@ || !NILP (current_buffer->truncate_lines)) { /* Truncating: skip to newline. */ - pos = find_before_next_newline (pos, to, 1); + if (pos <= to) /* This IF is needed because we may past TO */ + pos = find_before_next_newline (pos, to, 1); hpos = width; /* If we just skipped next_boundary, loop around in the main while and handle it. */ if (pos >= next_boundary) next_boundary = pos + 1; + prev_hpos = width; + prev_tab_offset = tab_offset; } else { /* Continuing. */ - vpos += hpos / width; - tab_offset += hpos - hpos % width; - hpos %= width; + /* Remember the previous value. */ + prev_tab_offset = tab_offset; + + if (wide_column) + { + hpos -= prev_hpos; + tab_offset += prev_hpos; + } + else + { + tab_offset += width; + hpos -= width; + } + vpos++; + contin_hpos = prev_hpos; + prev_hpos = 0; } } /* Stop if past the target buffer position or screen position. */ - if (pos >= to) - break; - if (vpos > tovpos || (vpos == tovpos && hpos >= tohpos)) + if (pos > to) + { + /* Go back to the previous position. */ + pos = prev_pos; + hpos = prev_hpos; + tab_offset = prev_tab_offset; + + /* NOTE on contin_hpos, hpos, and prev_hpos. + + ---------- + abcdefgh\\ + W_ ^---- contin_hpos + | ^----- hpos + \---- prev_hpos + ---------- + */ + + if (contin_hpos && prev_hpos == 0 + && contin_hpos < width && !wide_column) + { + /* Line breaking occurs in the middle of multi-column + character. Go back to previous line. */ + hpos = contin_hpos; + vpos = vpos - 1; + } + else if (c == '\n') + /* If previous character is NEWLINE, + set VPOS back to previous line */ + vpos = vpos - 1; + break; + } + + if (vpos > tovpos || vpos == tovpos && hpos >= tohpos) + { + if (contin_hpos && prev_hpos == 0 + && (contin_hpos == width || wide_column)) + { /* Line breaks because we can't put the character at the + previous line any more. It is not the multi-column + character continued in middle. Go back to previous + buffer position, screen position, and set tab offset + to previous value. It's the beginning of the + line. */ + pos = prev_pos; + hpos = prev_hpos; + tab_offset = prev_tab_offset; + } + break; + } + if (pos == ZV) /* We cannot go beyond ZV. Stop here. */ break; - prev_vpos = vpos; prev_hpos = hpos; + prev_pos = pos; + wide_column = 0; /* Consult the width run cache to see if we can avoid inspecting the text character-by-character. */ @@ -1000,7 +1215,7 @@ /* We have to scan the text character-by-character. */ else { - c = FETCH_CHAR (pos); + c = FETCH_BYTE (pos); pos++; /* Perhaps add some info to the width_run_cache. */ @@ -1073,6 +1288,7 @@ hpos++; tab_offset = 0; } + contin_hpos = 0; } else if (c == CR && selective < 0) { @@ -1093,6 +1309,47 @@ hpos = width; } } + else if (multibyte && BASE_LEADING_CODE_P (c)) + { + /* Start of multi-byte form. */ + unsigned char *ptr; + + pos--; /* rewind POS */ + ptr = POS_ADDR (pos); + + if (c == LEADING_CODE_COMPOSITION) + { + int cmpchar_id = str_cmpchar_id (ptr, next_boundary - pos); + + if (cmpchar_id >= 0) + { + if (cmpchar_table[cmpchar_id]->width >= 2) + wide_column = 1; + hpos += cmpchar_table[cmpchar_id]->width; + pos += cmpchar_table[cmpchar_id]->len; + } + else + { /* invalid composite character */ + hpos += 4; + pos ++; + } + } + else + { + /* Here, we check that the following bytes are valid + constituents of multi-byte form. */ + int len = BYTES_BY_CHAR_HEAD (c), i; + + for (i = 1, ptr++; i < len; i++, ptr++) + /* We don't need range checking for PTR because + there are anchors ('\0') both at GPT and Z. */ + if (CHAR_HEAD_P (ptr)) break; + if (i < len) + hpos += 4, pos++; + else + hpos += WIDTH_BY_CHAR_HEAD (c), pos += i, wide_column = 1; + } + } else hpos += (ctl_arrow && c < 0200) ? 2 : 4; } @@ -1113,10 +1370,7 @@ val_compute_motion.ovstring_chars_done = 0; /* Nonzero if have just continued a line */ - val_compute_motion.contin - = (pos != from - && (val_compute_motion.vpos != prev_vpos) - && c != '\n'); + val_compute_motion.contin = (contin_hpos && prev_hpos == 0); return &val_compute_motion; } @@ -1237,17 +1491,23 @@ if (pos == BEGV) return MINI_WINDOW_P (w) ? -minibuf_prompt_width : 0; - if (FETCH_CHAR (pos - 1) == '\n') + if (FETCH_BYTE (pos - 1) == '\n') return 0; TEMP_SET_PT (pos); col = current_column (); TEMP_SET_PT (opoint); + /* Modulo is no longer valid, as a line may get shorter than WIDTH + columns by continuation of a wide-column character. Just return + COL here. */ +#if 0 /* In the continuation of the first line in a minibuffer we must take the width of the prompt into account. */ if (MINI_WINDOW_P (w) && col >= width - minibuf_prompt_width && find_next_newline_no_quit (pos, -1) == BEGV) return col - (col + minibuf_prompt_width) % width; return col - (col % width); +#endif + return col; } @@ -1320,7 +1580,11 @@ lmargin + (XFASTINT (prevline) == BEG ? start_hpos : 0), 0, - from, 1 << (BITS_PER_INT - 2), 0, + from, + /* Don't care for VPOS... */ + 1 << (BITS_PER_SHORT - 1), + /* ... nor HPOS. */ + 1 << (BITS_PER_SHORT - 1), width, hscroll, /* This compensates for start_hpos so that a tab as first character @@ -1344,6 +1608,7 @@ val_vmotion.contin = 0; val_vmotion.prevhpos = 0; val_vmotion.ovstring_chars_done = 0; + val_vmotion.tab_offset = 0; /* For accumulating tab offset. */ return &val_vmotion; } @@ -1351,7 +1616,7 @@ } /* Moving downward is simple, but must calculate from beg of line to determine hpos of starting point */ - if (from > BEGV && FETCH_CHAR (from - 1) != '\n') + if (from > BEGV && FETCH_BYTE (from - 1) != '\n') { Lisp_Object propval; @@ -1373,7 +1638,11 @@ lmargin + (XFASTINT (prevline) == BEG ? start_hpos : 0), 0, - from, 1 << (BITS_PER_INT - 2), 0, + from, + /* Don't care for VPOS... */ + 1 << (BITS_PER_SHORT - 1), + /* ... nor HPOS. */ + 1 << (BITS_PER_SHORT - 1), width, hscroll, (XFASTINT (prevline) == BEG ? -start_hpos : 0), w); @@ -1383,12 +1652,13 @@ { pos.hpos = lmargin + (from == BEG ? start_hpos : 0); pos.vpos = 0; + pos.tab_offset = 0; did_motion = 0; } return compute_motion (from, vpos, pos.hpos, did_motion, - ZV, vtarget, - (1 << (BITS_PER_INT - 2)), + ZV, vtarget, - (1 << (BITS_PER_SHORT - 1)), width, hscroll, - pos.vpos * width - (from == BEG ? start_hpos : 0), + pos.tab_offset - (from == BEG ? start_hpos : 0), w); }