# HG changeset patch # User Kenichi Handa <handa@etlken> # Date 1274747544 -32400 # Node ID 0bb727f1d5479b0db60c7e9658ad70e09c6391f9 # Parent 452cd1d4f5ad551901ed7f675b58c2eb2f9199dc Fix bidi-composition interaction in backward scanning.. diff -r 452cd1d4f5ad -r 0bb727f1d547 src/ChangeLog --- a/src/ChangeLog Wed May 19 10:16:01 2010 +0900 +++ b/src/ChangeLog Tue May 25 09:32:24 2010 +0900 @@ -1,3 +1,26 @@ +2010-05-25 Kenichi Handa <handa@m17n.org> + + * dispextern.h (struct composition_it): New members rule_idx and + charpos. + + * xdisp.c (set_iterator_to_next): While scanning backward, assume + that the character positions of IT point the last character of the + current grapheme cluster. + (next_element_from_composition): Don't change character positions + of IT. + (append_composite_glyph): Set glyph->charpos to + it->cmp_it.charpos. + + * composite.c (autocmp_chars): Change the first argument to RULE, + and try composition with RULE only. + (composition_compute_stop_pos): Record the index number of the + composition rule in CMP_IT->rule_idx. + (composition_reseat_it): Call autocmp_chars repeatedly until the + correct rule of the composition is found. + (composition_update_it): Set CMP_IT->charpos. Assume the CHARPOS + is at the last character of the current grapheme cluster when + CMP_IT->reversed_p is nonzero. + 2010-05-18 Chong Yidong <cyd@stupidchicken.com> * character.c (Fstring, Funibyte_string): Use SAFE_ALLOCA to diff -r 452cd1d4f5ad -r 0bb727f1d547 src/composite.c --- a/src/composite.c Wed May 19 10:16:01 2010 +0900 +++ b/src/composite.c Tue May 25 09:32:24 2010 +0900 @@ -915,15 +915,16 @@ } -/* Try to compose the characters at CHARPOS according to CFT_ELEMENT - which is an element of composition-function-table (which see). - LIMIT limits the characters to compose. STRING, if not nil, is a - target string. WIN is a window where the characters are being - displayed. */ +/* Try to compose the characters at CHARPOS according to composition + rule RULE ([PATTERN PREV-CHARS FUNC]). LIMIT limits the characters + to compose. STRING, if not nil, is a target string. WIN is a + window where the characters are being displayed. If characters are + successfully composed, return the composition as a glyph-string + object. Otherwise return nil. */ static Lisp_Object -autocmp_chars (cft_element, charpos, bytepos, limit, win, face, string) - Lisp_Object cft_element; +autocmp_chars (rule, charpos, bytepos, limit, win, face, string) + Lisp_Object rule; EMACS_INT charpos, bytepos, limit; struct window *win; struct face *face; @@ -932,90 +933,66 @@ int count = SPECPDL_INDEX (); FRAME_PTR f = XFRAME (win->frame); Lisp_Object pos = make_number (charpos); + EMACS_INT to; EMACS_INT pt = PT, pt_byte = PT_BYTE; - int lookback; + Lisp_Object re, font_object, lgstring; + int len; record_unwind_save_match_data (); - for (lookback = -1; CONSP (cft_element); cft_element = XCDR (cft_element)) + re = AREF (rule, 0); + if (NILP (re)) + len = 1; + else if (! STRINGP (re)) + return unbind_to (count, Qnil); + else if ((len = fast_looking_at (re, charpos, bytepos, limit, -1, string)) + > 0) { - Lisp_Object elt = XCAR (cft_element); - Lisp_Object re; - Lisp_Object font_object = Qnil, gstring; - EMACS_INT len, to; - - if (! VECTORP (elt) || ASIZE (elt) != 3) - continue; - if (lookback < 0) - { - lookback = XFASTINT (AREF (elt, 1)); - if (limit > charpos + MAX_COMPOSITION_COMPONENTS) - limit = charpos + MAX_COMPOSITION_COMPONENTS; - } - else if (lookback != XFASTINT (AREF (elt, 1))) - break; - re = AREF (elt, 0); - if (NILP (re)) - len = 1; - else if ((len = fast_looking_at (re, charpos, bytepos, limit, -1, string)) - > 0) - { - if (NILP (string)) - len = BYTE_TO_CHAR (bytepos + len) - charpos; - else - len = string_byte_to_char (string, bytepos + len) - charpos; - } - if (len > 0) - { - limit = to = charpos + len; + if (NILP (string)) + len = BYTE_TO_CHAR (bytepos + len) - charpos; + else + len = string_byte_to_char (string, bytepos + len) - charpos; + } + if (len <= 0) + return unbind_to (count, Qnil); + to = limit = charpos + len; #ifdef HAVE_WINDOW_SYSTEM - if (FRAME_WINDOW_P (f)) - { - font_object = font_range (charpos, &to, win, face, string); - if (! FONT_OBJECT_P (font_object) - || (! NILP (re) - && to < limit - && (fast_looking_at (re, charpos, bytepos, to, -1, string) <= 0))) - { - if (NILP (string)) - TEMP_SET_PT_BOTH (pt, pt_byte); - return unbind_to (count, Qnil); - } - } - else + if (FRAME_WINDOW_P (f)) + { + font_object = font_range (charpos, &to, win, face, string); + if (! FONT_OBJECT_P (font_object) + || (! NILP (re) + && to < limit + && (fast_looking_at (re, charpos, bytepos, to, -1, string) <= 0))) + return unbind_to (count, Qnil); + } + else #endif /* not HAVE_WINDOW_SYSTEM */ - font_object = win->frame; - gstring = Fcomposition_get_gstring (pos, make_number (to), - font_object, string); - if (NILP (LGSTRING_ID (gstring))) - { - Lisp_Object args[6]; + font_object = win->frame; + lgstring = Fcomposition_get_gstring (pos, make_number (to), font_object, + string); + if (NILP (LGSTRING_ID (lgstring))) + { + Lisp_Object args[6]; - /* Save point as marker before calling out to lisp. */ - if (NILP (string)) - { - Lisp_Object m = Fmake_marker (); - set_marker_both (m, Qnil, pt, pt_byte); - record_unwind_protect (restore_point_unwind, m); - } + /* Save point as marker before calling out to lisp. */ + if (NILP (string)) + { + Lisp_Object m = Fmake_marker (); + set_marker_both (m, Qnil, pt, pt_byte); + record_unwind_protect (restore_point_unwind, m); + } - args[0] = Vauto_composition_function; - args[1] = AREF (elt, 2); - args[2] = pos; - args[3] = make_number (to); - args[4] = font_object; - args[5] = string; - gstring = safe_call (6, args); - } - else if (NILP (string)) - { - TEMP_SET_PT_BOTH (pt, pt_byte); - } - return unbind_to (count, gstring); - } + args[0] = Vauto_composition_function; + args[1] = AREF (rule, 2); + args[2] = pos; + args[3] = make_number (to); + args[4] = font_object; + args[5] = string; + lgstring = safe_call (6, args); + if (NILP (string)) + TEMP_SET_PT_BOTH (pt, pt_byte); } - if (NILP (string)) - TEMP_SET_PT_BOTH (pt, pt_byte); - return unbind_to (count, Qnil); + return unbind_to (count, lgstring); } static Lisp_Object _work_val; @@ -1126,8 +1103,9 @@ if (! NILP (val)) { Lisp_Object elt; + int ridx; - for (; CONSP (val); val = XCDR (val)) + for (ridx = 0; CONSP (val); val = XCDR (val), ridx++) { elt = XCAR (val); if (VECTORP (elt) && ASIZE (elt) == 3 @@ -1137,6 +1115,7 @@ } if (CONSP (val)) { + cmp_it->rule_idx = ridx; cmp_it->lookback = XFASTINT (AREF (elt, 1)); cmp_it->stop_pos = charpos - 1 - cmp_it->lookback; cmp_it->ch = c; @@ -1167,45 +1146,54 @@ limit = bytepos + len; while (CHAR_COMPOSABLE_P (c)) { - for (val = CHAR_TABLE_REF (Vcomposition_function_table, c); - CONSP (val); val = XCDR (val)) + val = CHAR_TABLE_REF (Vcomposition_function_table, c); + if (! NILP (val)) { - Lisp_Object elt = XCAR (val); - int back, len; + Lisp_Object elt; + int ridx, back, len; - if (VECTORP (elt) && ASIZE (elt) == 3 - && NATNUMP (AREF (elt, 1)) - && charpos - (back = XFASTINT (AREF (elt, 1))) > endpos) + for (ridx = 0; CONSP (val); val = XCDR (val), ridx++) { - EMACS_INT cpos = charpos - back, bpos; + elt = XCAR (val); + if (VECTORP (elt) && ASIZE (elt) == 3 + && NATNUMP (AREF (elt, 1)) + && charpos - (back = XFASTINT (AREF (elt, 1))) > endpos) + { + EMACS_INT cpos = charpos - back, bpos; - if (back == 0) - bpos = bytepos; - else - bpos = (NILP (string) ? CHAR_TO_BYTE (cpos) - : string_char_to_byte (string, cpos)); - if (STRINGP (AREF (elt, 0))) - len = fast_looking_at (AREF (elt, 0), cpos, bpos, - start + 1, limit, string); - else - len = 1; - if (len > 0) - { - /* Make CPOS point to the last character of match. - Note that LEN is byte-length. */ - bpos += len; - if (NILP (string)) - cpos = BYTE_TO_CHAR (bpos) - 1; + if (back == 0) + bpos = bytepos; + else + bpos = (NILP (string) ? CHAR_TO_BYTE (cpos) + : string_char_to_byte (string, cpos)); + if (STRINGP (AREF (elt, 0))) + len = fast_looking_at (AREF (elt, 0), cpos, bpos, + start + 1, limit, string); else - cpos = string_byte_to_char (string, bpos) - 1; - back = cpos - (charpos - back); - if (cmp_it->stop_pos < cpos - || (cmp_it->stop_pos == cpos - && cmp_it->lookback < back)) + len = 1; + if (len > 0) { - cmp_it->stop_pos = cpos; - cmp_it->ch = c; - cmp_it->lookback = back; + /* Make CPOS point to the last character of + match. Note that LEN is byte-length. */ + if (len > 1) + { + bpos += len; + if (NILP (string)) + cpos = BYTE_TO_CHAR (bpos) - 1; + else + cpos = string_byte_to_char (string, bpos) - 1; + } + back = cpos - (charpos - back); + if (cmp_it->stop_pos < cpos + || (cmp_it->stop_pos == cpos + && cmp_it->lookback < back)) + { + cmp_it->rule_idx = ridx; + cmp_it->stop_pos = cpos; + cmp_it->ch = c; + cmp_it->lookback = back; + cmp_it->nchars = back + 1; + } } } } @@ -1293,7 +1281,7 @@ if (cmp_it->ch == -2) { composition_compute_stop_pos (cmp_it, charpos, bytepos, endpos, string); - if (cmp_it->stop_pos != charpos) + if (cmp_it->ch == -2 || cmp_it->stop_pos != charpos) /* The current position is not composed. */ return 0; } @@ -1314,55 +1302,75 @@ } else if (w) { - Lisp_Object val, elt; - int i; + Lisp_Object lgstring = Qnil; + Lisp_Object val, elt, re; + int len, i; val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch); + for (i = 0; i < cmp_it->rule_idx; i++, val = XCDR (val)); if (charpos < endpos) { for (; CONSP (val); val = XCDR (val)) { elt = XCAR (val); - if (cmp_it->lookback == XFASTINT (AREF (elt, 1))) + if (! VECTORP (elt) || ASIZE (elt) != 3 + || ! INTEGERP (AREF (elt, 1))) + continue; + if (XFASTINT (AREF (elt, 1)) != cmp_it->lookback) + goto no_composition; + lgstring = autocmp_chars (elt, charpos, bytepos, endpos, + w, face, string); + if (composition_gstring_p (lgstring)) break; + lgstring = Qnil; + /* Composition failed perhaps because the font doesn't + support sufficient range of characters. Try the + other composition rules if any. */ } - if (NILP (val)) - goto no_composition; - - val = autocmp_chars (val, charpos, bytepos, endpos, w, face, string); - if (! composition_gstring_p (val)) - goto no_composition; cmp_it->reversed_p = 0; } else { - EMACS_INT saved_charpos = charpos, saved_bytepos = bytepos; + EMACS_INT cpos = charpos, bpos = bytepos; - if (cmp_it->lookback > 0) + while (1) { - charpos -= cmp_it->lookback; - if (charpos < endpos) + elt = XCAR (val); + if (cmp_it->lookback > 0) + { + cpos -= cmp_it->lookback; + if (STRINGP (string)) + bpos = string_char_to_byte (string, cpos); + else + bpos = CHAR_TO_BYTE (cpos); + } + lgstring = autocmp_chars (elt, cpos, bpos, charpos + 1, w, face, + string); + if (composition_gstring_p (lgstring) + && cpos + LGSTRING_CHAR_LEN (lgstring) - 1 == charpos) + break; + /* Composition failed or didn't cover the current + character. */ + if (cmp_it->lookback == 0) goto no_composition; - if (STRINGP (string)) - bytepos = string_char_to_byte (string, charpos); - else - bytepos = CHAR_TO_BYTE (charpos); - } - val = autocmp_chars (val, charpos, bytepos, saved_charpos + 1, - w, face, string); - if (! composition_gstring_p (val) - || charpos + LGSTRING_CHAR_LEN (val) <= saved_charpos) - { - charpos = saved_charpos, bytepos = saved_bytepos; - goto no_composition; + lgstring = Qnil; + /* Try to find a shorter compostion that starts after CPOS. */ + composition_compute_stop_pos (cmp_it, charpos, bytepos, cpos, + string); + if (cmp_it->ch == -2 || cmp_it->stop_pos < charpos) + goto no_composition; + val = CHAR_TABLE_REF (Vcomposition_function_table, cmp_it->ch); + for (i = 0; i < cmp_it->rule_idx; i++, val = XCDR (val)); } cmp_it->reversed_p = 1; } - if (NILP (LGSTRING_ID (val))) - val = composition_gstring_put_cache (val, -1); - cmp_it->id = XINT (LGSTRING_ID (val)); - for (i = 0; i < LGSTRING_GLYPH_LEN (val); i++) - if (NILP (LGSTRING_GLYPH (val, i))) + if (NILP (lgstring)) + goto no_composition; + if (NILP (LGSTRING_ID (lgstring))) + lgstring = composition_gstring_put_cache (lgstring, -1); + cmp_it->id = XINT (LGSTRING_ID (lgstring)); + for (i = 0; i < LGSTRING_GLYPH_LEN (lgstring); i++) + if (NILP (LGSTRING_GLYPH (lgstring, i))) break; cmp_it->nglyphs = i; cmp_it->from = 0; @@ -1378,10 +1386,10 @@ if (charpos < endpos) { charpos++; - if (STRINGP (string)) - bytepos += MULTIBYTE_LENGTH_NO_CHECK (SDATA (string) + bytepos); + if (NILP (string)) + INC_POS (bytepos); else - INC_POS (bytepos); + bytepos += MULTIBYTE_FORM_LENGTH (SDATA (string) + bytepos, 0); } else { @@ -1393,11 +1401,20 @@ return 0; } -/* Update nchars, nbytes, and width of the current grapheme cluster - which is identified by CMP_IT->from (if the composition is static - or automatic in l2r context) or CMPT_IT->to (if the composition is - automatic in r2l context). In addition, in the former case, update - CMP_IT->to, and in the latter case, update CMP_IT->from. */ +/* Update charpos, nchars, nbytes, and width of the current grapheme + cluster. + + If the composition is static or automatic in L2R context, the + cluster is identified by CMP_IT->from, and CHARPOS is the position + of the first character of the cluster. In this case, update + CMP_IT->to too. + + If the composition is automatic in R2L context, the cluster is + identified by CMP_IT->to, and CHARPOS is the position of the last + character of the cluster. In this case, update CMP_IT->from too. + + The return value is the character code of the first character of + the cluster, or -1 if the composition is somehow broken. */ int composition_update_it (cmp_it, charpos, bytepos, string) @@ -1409,8 +1426,10 @@ if (cmp_it->ch < 0) { + /* static composition */ struct composition *cmp = composition_table[cmp_it->id]; + cmp_it->charpos = charpos; cmp_it->to = cmp_it->nglyphs; if (cmp_it->nglyphs == 0) c = -1; @@ -1423,70 +1442,64 @@ c = ' '; } cmp_it->width = cmp->width; + charpos += cmp_it->nchars; + if (STRINGP (string)) + cmp_it->nbytes = string_char_to_byte (string, charpos) - bytepos; + else + cmp_it->nbytes = CHAR_TO_BYTE (charpos) - bytepos; } else { + /* automatic composition */ Lisp_Object gstring = composition_gstring_from_id (cmp_it->id); + Lisp_Object glyph; + int from, to; if (cmp_it->nglyphs == 0) { - c = -1; cmp_it->nchars = LGSTRING_CHAR_LEN (gstring); cmp_it->width = 0; + cmp_it->from = cmp_it->to = 0; + return -1; } - else if (! cmp_it->reversed_p) + if (! cmp_it->reversed_p) { - Lisp_Object glyph = LGSTRING_GLYPH (gstring, cmp_it->from); - int from = LGLYPH_FROM (glyph); - - c = XINT (LGSTRING_CHAR (gstring, from)); - cmp_it->nchars = LGLYPH_TO (glyph) - from + 1; - cmp_it->width = (LGLYPH_WIDTH (glyph) > 0 - ? CHAR_WIDTH (LGLYPH_CHAR (glyph)) : 0); + glyph = LGSTRING_GLYPH (gstring, cmp_it->from); + from = LGLYPH_FROM (glyph); for (cmp_it->to = cmp_it->from + 1; cmp_it->to < cmp_it->nglyphs; cmp_it->to++) { glyph = LGSTRING_GLYPH (gstring, cmp_it->to); if (LGLYPH_FROM (glyph) != from) break; - if (LGLYPH_WIDTH (glyph) > 0) - cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph)); } + cmp_it->charpos = charpos; } else { - int from_idx = cmp_it->to - 1; - Lisp_Object glyph = LGSTRING_GLYPH (gstring, from_idx); - int from = LGLYPH_FROM (glyph); - - c = XINT (LGSTRING_CHAR (gstring, from)); - cmp_it->nchars = LGLYPH_TO (glyph) - from + 1; - cmp_it->width = (LGLYPH_WIDTH (glyph) > 0 - ? CHAR_WIDTH (LGLYPH_CHAR (glyph)) : 0); - for (from_idx--; from_idx >= 0; from_idx--) + glyph = LGSTRING_GLYPH (gstring, cmp_it->to - 1); + from = LGLYPH_FROM (glyph); + cmp_it->charpos = charpos - (LGLYPH_TO (glyph) - from); + for (cmp_it->from = cmp_it->to - 1; cmp_it->from > 0; + cmp_it->from--) { - glyph = LGSTRING_GLYPH (gstring, from_idx); + glyph = LGSTRING_GLYPH (gstring, cmp_it->from - 1); if (LGLYPH_FROM (glyph) != from) break; - if (LGLYPH_WIDTH (glyph) > 0) - cmp_it->width += CHAR_WIDTH (LGLYPH_CHAR (glyph)); } - cmp_it->from = from_idx + 1; - charpos -= cmp_it->nchars - 1; - bytepos += CHAR_BYTES (c); - if (STRINGP (string)) - cmp_it->nbytes = bytepos - string_char_to_byte (string, charpos); - else - cmp_it->nbytes = bytepos - CHAR_TO_BYTE (charpos); - return c; + } + glyph = LGSTRING_GLYPH (gstring, cmp_it->from); + cmp_it->nchars = LGLYPH_TO (glyph) + 1 - from; + cmp_it->nbytes = 0; + cmp_it->width = 0; + for (i = cmp_it->nchars - 1; i >= 0; i--) + { + c = XINT (LGSTRING_CHAR (gstring, i)); + cmp_it->nbytes += CHAR_BYTES (c); + cmp_it->width = (LGLYPH_WIDTH (glyph) > 0 + ? CHAR_WIDTH (LGLYPH_CHAR (glyph)) : 0); } } - - charpos += cmp_it->nchars; - if (STRINGP (string)) - cmp_it->nbytes = string_char_to_byte (string, charpos) - bytepos; - else - cmp_it->nbytes = CHAR_TO_BYTE (charpos) - bytepos; return c; } @@ -1655,7 +1668,7 @@ check.pos_byte = cur.pos_byte; else check.pos_byte = CHAR_TO_BYTE (check.pos); - val = autocmp_chars (check_val, check.pos, check.pos_byte, + val = autocmp_chars (elt, check.pos, check.pos_byte, tail, w, NULL, string); need_adjustment = 1; if (! NILP (val)) @@ -2059,7 +2072,7 @@ a function to call to compose that character. The element at index C in the table, if non-nil, is a list of -this form: ([PATTERN PREV-CHARS FUNC] ...) +composition rules of this form: ([PATTERN PREV-CHARS FUNC] ...) PATTERN is a regular expression which C and the surrounding characters must match. diff -r 452cd1d4f5ad -r 0bb727f1d547 src/dispextern.h --- a/src/dispextern.h Wed May 19 10:16:01 2010 +0900 +++ b/src/dispextern.h Tue May 25 09:32:24 2010 +0900 @@ -1968,17 +1968,31 @@ are not iterating over a composition now. */ int id; /* If non-negative, character that triggers the automatic - composition at `stop_pos', and this is an automatic compositoin. + composition at `stop_pos', and this is an automatic composition. If negative, this is a static composition. This is set to -2 temporarily if searching of composition reach a limit or a newline. */ int ch; - /* If this an automatic composition, how many characters to look back - from the position where a character triggering the composition - exists. */ + /* If this is an automatic composition, index of a rule for making + the automatic composition. Provided that ELT is an element of + Vcomposition_function_table for CH, (nth ELT RULE_IDX) is the + rule for the composition. */ + int rule_idx; + /* If this is an automatic composition, how many characters to look + back from the position where a character triggering the + composition exists. */ int lookback; /* If non-negative, number of glyphs of the glyph-string. */ int nglyphs; + /* Nonzero iff the composition is created while buffer is scanned in + reverse order, and thus the grapheme clusters must be rendered + from the last to the first. */ + int reversed_p; + + /** The following members contain information about the current + grapheme cluster. */ + /* Position of the first character of the current grapheme cluster. */ + EMACS_INT charpos; /* Number of characters and bytes of the current grapheme cluster. */ int nchars, nbytes; /* Indices of the glyphs for the current grapheme cluster. */ @@ -1987,10 +2001,6 @@ graphic display and in units of canonical characters on a terminal display. */ int width; - /* Nonzero iff the composition is created while buffer is scanned in - reverse order, and thus the grapheme clusters must be rendered - from the last to the first. */ - int reversed_p; }; struct it diff -r 452cd1d4f5ad -r 0bb727f1d547 src/xdisp.c --- a/src/xdisp.c Wed May 19 10:16:01 2010 +0900 +++ b/src/xdisp.c Tue May 25 09:32:24 2010 +0900 @@ -6339,10 +6339,10 @@ /* Update IT's char/byte positions to point to the last character of the previous grapheme cluster, or the character visually after the current composition. */ - bidi_move_to_visually_next (&it->bidi_it); + for (i = 0; i < it->cmp_it.nchars; i++) + bidi_move_to_visually_next (&it->bidi_it); IT_BYTEPOS (*it) = it->bidi_it.bytepos; IT_CHARPOS (*it) = it->bidi_it.charpos; - if (it->cmp_it.from > 0) { /* Proceed to the previous grapheme cluster. */ @@ -7108,19 +7108,6 @@ it->object = it->w->buffer; it->c = composition_update_it (&it->cmp_it, IT_CHARPOS (*it), IT_BYTEPOS (*it), Qnil); - if (it->cmp_it.reversed_p) - { - /* Now it->position points the last character of the current - grapheme cluster. Adjust it to point the first one. We - have to do it here so that append_composite_glyph sets - correct (struct glyph)->charpos. */ - int i; - for (i = 0; i < it->cmp_it.nchars - 1; i++) - bidi_move_to_visually_next (&it->bidi_it); - IT_CHARPOS (*it) = it->bidi_it.charpos; - IT_BYTEPOS (*it) = it->bidi_it.bytepos; - it->position = it->current.pos; - } } return 1; } @@ -21929,7 +21916,7 @@ g[1] = *g; glyph = it->glyph_row->glyphs[it->area]; } - glyph->charpos = CHARPOS (it->position); + glyph->charpos = it->cmp_it.charpos; glyph->object = it->object; glyph->pixel_width = it->pixel_width; glyph->ascent = it->ascent;