# HG changeset patch # User Eli Zaretskii # Date 1262344633 18000 # Node ID 40b49fa464cfeb7a1d31ea57d52f69c8417741d6 # Parent a551e4109c043b02cfe66108c83c170dcec74b06 Retrospective commit from 2009-10-04. Continue working on determining paragraph's base direction. bidi.c (bidi_at_paragraph_end): Check for paragraph-start if paragraph-separate failed to match. Return the length of the matched separator. (bidi_line_init): New function. (bidi_paragraph_init): Use bidi_line_init. Do nothing if in the middle of a paragraph-separate sequence. Don't override existing paragraph direction if no strong characters found in this paragraph. Set separator_limit according to what bidi_at_paragraph_end returns. Reset new_paragraph flag when a new paragraph is found. (bidi_init_it): Reset separator_limit. dispextern.h (struct bidi_it): New member separator_limit. bidi.c (bidi_find_paragraph_start): Return the byte position of the paragraph beginning. xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the new_paragraph flag is set in the bidi iterator. bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use the buffer-local value of paragraph-start and paragraph-separate. diff -r a551e4109c04 -r 40b49fa464cf src/ChangeLog.bidi --- a/src/ChangeLog.bidi Fri Jan 01 06:06:48 2010 -0500 +++ b/src/ChangeLog.bidi Fri Jan 01 06:17:13 2010 -0500 @@ -1,3 +1,28 @@ +2009-10-04 Eli Zaretskii + + * bidi.c (bidi_at_paragraph_end): Check for paragraph-start if + paragraph-separate failed to match. Return the length of the + matched separator. + (bidi_line_init): New function. + (bidi_paragraph_init): Use bidi_line_init. Do nothing if in the + middle of a paragraph-separate sequence. Don't override existing + paragraph direction if no strong characters found in this + paragraph. Set separator_limit according to what + bidi_at_paragraph_end returns. Reset new_paragraph flag when a + new paragraph is found. + (bidi_init_it): Reset separator_limit. + + * dispextern.h (struct bidi_it): New member separator_limit. + + * bidi.c (bidi_find_paragraph_start): Return the byte position of + the paragraph beginning. + + * xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the + new_paragraph flag is set in the bidi iterator. + + * bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use + the buffer-local value of paragraph-start and paragraph-separate. + 2009-10-03 Eli Zaretskii * bidi.c (bidi_set_paragraph_end): Don't set the new_paragraph diff -r a551e4109c04 -r 40b49fa464cf src/bidi.c --- a/src/bidi.c Fri Jan 01 06:06:48 2010 -0500 +++ b/src/bidi.c Fri Jan 01 06:17:13 2010 -0500 @@ -733,17 +733,35 @@ return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; } -/* Return non-zero if buffer's byte position POS is the end of a - paragraph. */ -int +/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph. + Value is the non-negative length of the paragraph separator + following the buffer position, -1 if position is at the beginning + of a new paragraph, or -2 if position is neither at beginning nor + at end of a paragraph. */ +EMACS_INT bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) { - Lisp_Object re = XSYMBOL (Qparagraph_separate)->value; + Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate, + Fcurrent_buffer ()); + Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start, + Fcurrent_buffer ()); + EMACS_INT val; + + if (!STRINGP (sep_re)) + sep_re = fallback_paragraph_separate_re; + if (!STRINGP (start_re)) + start_re = fallback_paragraph_start_re; - if (!STRINGP (re)) - re = fallback_paragraph_separate_re; + val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); + if (val < 0) + { + if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0) + val = -1; + else + val = -2; + } - return fast_looking_at (re, charpos, bytepos, ZV, ZV_BYTE, Qnil) > 0; + return val; } /* Determine the start-of-run (sor) directional type given the two @@ -779,12 +797,28 @@ bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ } -/* Find the beginning of this paragraph by looking back in the - buffer. */ static void +bidi_line_init (struct bidi_it *bidi_it) +{ + bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */ + bidi_it->resolved_level = bidi_it->level_stack[0].level; + bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ + bidi_it->invalid_levels = 0; + bidi_it->invalid_rl_levels = -1; + bidi_it->next_en_pos = -1; + bidi_it->next_for_ws.type = UNKNOWN_BT; + bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir, + bidi_it->level_stack[0].level); /* X10 */ + + bidi_cache_reset (); +} + +/* Find the beginning of this paragraph by looking back in the buffer. + Value is the byte position of the paragraph's beginning. */ +static EMACS_INT bidi_find_paragraph_start (struct bidi_it *bidi_it) { - Lisp_Object re = XSYMBOL (Qparagraph_start)->value; + Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ()); EMACS_INT pos = bidi_it->charpos; EMACS_INT pos_byte = bidi_it->bytepos; EMACS_INT limit = ZV, limit_byte = ZV_BYTE; @@ -794,10 +828,14 @@ while (pos_byte > BEGV_BYTE && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) { - find_next_newline_no_quit (pos, -1); + pos = find_next_newline_no_quit (pos - 1, -1); + pos_byte = CHAR_TO_BYTE (pos); } + return pos_byte; } +/* Determine the direction, a.k.a. base embedding level, of the + paragraph we are about to iterate through. */ void bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) { @@ -807,18 +845,41 @@ if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) abort (); - bidi_it->level_stack[0].level = 0; /* default for L2R */ - bidi_it->paragraph_dir = L2R; - if (dir == R2L) - bidi_it->level_stack[0].level = 1; + if (dir == L2R) + { + bidi_it->paragraph_dir = L2R; + bidi_it->new_paragraph = 0; + } + else if (dir == R2L) + { + bidi_it->paragraph_dir = R2L; + bidi_it->new_paragraph = 0; + } else if (dir == NEUTRAL_DIR) /* P2 */ { int ch, ch_len; EMACS_INT pos; bidi_type_t type; + EMACS_INT sep_len; - /* Search back to where this paragraph starts. */ - bidi_find_paragraph_start (bidi_it); + /* If we are inside a paragraph separator, we are just waiting + for the separator to be exhausted; use the previous paragraph + direction. */ + if (bidi_it->charpos < bidi_it->separator_limit) + return; + + /* If we are before another paragraph separator, continue + through that with the previous paragraph direction. */ + sep_len = bidi_at_paragraph_end (bidi_it->charpos, bytepos); + if (sep_len >= 0) + { + bidi_it->separator_limit += sep_len + 1; + return; + } + else if (sep_len == -2) + /* We are in the middle of a paragraph. Search back to where + this paragraph starts. */ + bytepos = bidi_find_paragraph_start (bidi_it); /* We should always be at the beginning of a new line at this point. */ @@ -827,9 +888,11 @@ || FETCH_CHAR (bytepos - 1) == '\n')) abort (); + bidi_it->separator_limit = -1; + bidi_it->new_paragraph = 0; ch = FETCH_CHAR (bytepos); ch_len = CHAR_BYTES (ch); - pos = bidi_it->charpos; + pos = BYTE_TO_CHAR (bytepos); type = bidi_get_type (ch, NEUTRAL_DIR); for (pos++, bytepos += ch_len; @@ -843,27 +906,28 @@ || type == LRE || type == LRO)); type = bidi_get_type (ch, NEUTRAL_DIR)) { - if (type == NEUTRAL_B || bidi_at_paragraph_end (pos, bytepos)) + if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1) break; FETCH_CHAR_ADVANCE (ch, pos, bytepos); } if (type == STRONG_R || type == STRONG_AL) /* P3 */ - bidi_it->level_stack[0].level = 1; + bidi_it->paragraph_dir = R2L; + else if (type == STRONG_L) + bidi_it->paragraph_dir = L2R; } - if (bidi_it->level_stack[0].level == 1) - bidi_it->paragraph_dir = R2L; - bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */ - bidi_it->resolved_level = bidi_it->level_stack[0].level; - bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ - bidi_it->invalid_levels = 0; - bidi_it->invalid_rl_levels = -1; - bidi_it->new_paragraph = 0; - bidi_it->next_en_pos = -1; - bidi_it->next_for_ws.type = UNKNOWN_BT; - bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir, - bidi_it->level_stack[0].level); /* X10 */ + else + abort (); - bidi_cache_reset (); + /* Contrary to UAX#9 clause P3, we only default to L2R if we have no + previous usable paragraph direction. */ + if (bidi_it->paragraph_dir == NEUTRAL_DIR) + bidi_it->paragraph_dir = L2R; /* P3 */ + if (bidi_it->paragraph_dir == R2L) + bidi_it->level_stack[0].level == 1; + else + bidi_it->level_stack[0].level == 0; + + bidi_line_init (bidi_it); } /* Do whatever UAX#9 clause X8 says should be done at paragraph's @@ -888,6 +952,7 @@ bidi_it->first_elt = 1; bidi_set_paragraph_end (bidi_it); bidi_it->new_paragraph = 1; + bidi_it->separator_limit = -1; bidi_it->type = NEUTRAL_B; bidi_it->type_after_w1 = UNKNOWN_BT; bidi_it->orig_type = UNKNOWN_BT; @@ -1802,6 +1867,10 @@ bidi_it->scan_dir = 1; /* default to logical order */ } + /* If we just passed a newline, initialize for the next line. */ + if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B) + bidi_line_init (bidi_it); + /* Prepare the sentinel iterator state. */ if (bidi_cache_idx == 0) { @@ -1875,14 +1944,23 @@ } /* Take note when we are at the end of the paragraph. The next time - we are about to be called, next_element_from_buffer will + we are about to be called, set_iterator_to_next will automatically reinit the paragraph direction, if needed. */ if (bidi_it->scan_dir == 1 - && bidi_it->type == NEUTRAL_B - && bidi_it->bytepos < ZV_BYTE - && bidi_at_paragraph_end (bidi_it->charpos + 1, - bidi_it->bytepos + bidi_it->ch_len)) - bidi_it->new_paragraph = 1; + && bidi_it->orig_type == NEUTRAL_B + && bidi_it->bytepos < ZV_BYTE) + { + EMACS_INT sep_len = + bidi_at_paragraph_end (bidi_it->charpos + 1, + bidi_it->bytepos + bidi_it->ch_len); + if (sep_len >= 0) + { + bidi_it->new_paragraph = 1; + /* Record the buffer position of the first character after + the paragraph separator. */ + bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len + 1; + } + } if (bidi_it->scan_dir == 1 && bidi_cache_idx) { diff -r a551e4109c04 -r 40b49fa464cf src/dispextern.h --- a/src/dispextern.h Fri Jan 01 06:06:48 2010 -0500 +++ b/src/dispextern.h Fri Jan 01 06:17:13 2010 -0500 @@ -1772,7 +1772,8 @@ int resolved_level; /* final resolved level of this character */ int invalid_levels; /* how many PDFs to ignore */ int invalid_rl_levels; /* how many PDFs from RLE/RLO to ignore */ - int new_paragraph; /* if non-zero, a new paragraph begins here */ + int new_paragraph; /* if non-zero, we expect a new paragraph */ + EMACS_INT separator_limit; /* where paragraph separator should end */ bidi_dir_t paragraph_dir; /* current paragraph direction */ int prev_was_pdf; /* if non-zero, previous char was PDF */ struct bidi_saved_info prev; /* info about previous character */ diff -r a551e4109c04 -r 40b49fa464cf src/xdisp.c --- a/src/xdisp.c Fri Jan 01 06:06:48 2010 -0500 +++ b/src/xdisp.c Fri Jan 01 06:17:13 2010 -0500 @@ -6103,6 +6103,10 @@ } else { + /* If this is a new paragraph, determine its base + direction (a.k.a. its base embedding level). */ + if (it->bidi_it.new_paragraph) + bidi_paragraph_init (NEUTRAL_DIR, &it->bidi_it); bidi_get_next_char_visually (&it->bidi_it); IT_BYTEPOS (*it) = it->bidi_it.bytepos; IT_CHARPOS (*it) = it->bidi_it.charpos; @@ -6508,8 +6512,10 @@ xassert (IT_CHARPOS (*it) >= BEGV); - /* With bidi reordering, the character to display might not be - the character at IT_CHARPOS. */ + /* With bidi reordering, the character to display might not be the + character at IT_CHARPOS. BIDI_IT.FIRST_ELT non-zero means that + we were reseat()ed to a new buffer position, which is potentially + a different paragraph. */ if (it->bidi_p && it->bidi_it.first_elt) { it->bidi_it.charpos = IT_CHARPOS (*it); @@ -6521,13 +6527,9 @@ || FETCH_CHAR (it->bidi_it.bytepos - 1) == '\n' || FETCH_CHAR (it->bidi_it.bytepos) == '\n') { - /* FIXME: L2R below is just for easyness of testing, as we - currently support only left-to-right paragraphs. The - value should be user-definable and/or come from some - ``higher protocol''. In the absence of any other - guidance, the default for this initialization should be - NEUTRAL_DIR. */ - bidi_paragraph_init (L2R, &it->bidi_it); + /* FIXME: NEUTRAL_DIR below should be user-definable and/or + come from some ``higher protocol''. */ + bidi_paragraph_init (NEUTRAL_DIR, &it->bidi_it); bidi_get_next_char_visually (&it->bidi_it); } else @@ -6541,7 +6543,7 @@ IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it)); it->bidi_it.charpos = IT_CHARPOS (*it); it->bidi_it.bytepos = IT_BYTEPOS (*it); - bidi_paragraph_init (L2R, &it->bidi_it); + bidi_paragraph_init (NEUTRAL_DIR, &it->bidi_it); do { /* Now return to buffer position where we were asked to get the next display element, and produce that. */ @@ -16314,6 +16316,7 @@ Lisp_Object saved_object; enum display_element_type saved_what = it->what; int saved_face_id = it->face_id; + int text_len = it->glyph_row->used[TEXT_AREA]; saved_object = it->object; saved_pos = it->position; @@ -16330,6 +16333,23 @@ while (it->current_x <= it->last_visible_x) PRODUCE_GLYPHS (it); + /* If the paragraph base direction is right to left, reverse the + glyphs of non-empty line. */ + if (it->bidi_p && it->bidi_it.level_stack[0].level == 1 + && text_len > 0) + { + struct glyph *gleft = it->glyph_row->glyphs[TEXT_AREA]; + struct glyph *gright = gleft + it->glyph_row->used[TEXT_AREA] - 1; + struct glyph tem; + + for ( ; gleft < gright; gleft++, gright--) + { + tem = *gleft; + *gleft = *gright; + *gright = tem; + } + } + /* Don't count these blanks really. It would let us insert a left truncation glyph below and make us set the cursor on them, maybe. */ it->current_x = saved_x;