changeset 107594:40b49fa464cf

Retrospective commit from 2009-10-04. Continue working on determining paragraph's base direction. bidi.c (bidi_at_paragraph_end): Check for paragraph-start if paragraph-separate failed to match. Return the length of the matched separator. (bidi_line_init): New function. (bidi_paragraph_init): Use bidi_line_init. Do nothing if in the middle of a paragraph-separate sequence. Don't override existing paragraph direction if no strong characters found in this paragraph. Set separator_limit according to what bidi_at_paragraph_end returns. Reset new_paragraph flag when a new paragraph is found. (bidi_init_it): Reset separator_limit. dispextern.h (struct bidi_it): New member separator_limit. bidi.c (bidi_find_paragraph_start): Return the byte position of the paragraph beginning. xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the new_paragraph flag is set in the bidi iterator. bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use the buffer-local value of paragraph-start and paragraph-separate.
author Eli Zaretskii <eliz@gnu.org>
date Fri, 01 Jan 2010 06:17:13 -0500
parents a551e4109c04
children 69c12db7031d
files src/ChangeLog.bidi src/bidi.c src/dispextern.h src/xdisp.c
diffstat 4 files changed, 174 insertions(+), 50 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog.bidi	Fri Jan 01 06:06:48 2010 -0500
+++ b/src/ChangeLog.bidi	Fri Jan 01 06:17:13 2010 -0500
@@ -1,3 +1,28 @@
+2009-10-04  Eli Zaretskii  <eliz@gnu.org>
+
+	* bidi.c (bidi_at_paragraph_end): Check for paragraph-start if
+	paragraph-separate failed to match.  Return the length of the
+	matched separator.
+	(bidi_line_init): New function.
+	(bidi_paragraph_init): Use bidi_line_init.  Do nothing if in the
+	middle of a paragraph-separate sequence.  Don't override existing
+	paragraph direction if no strong characters found in this
+	paragraph.  Set separator_limit according to what
+	bidi_at_paragraph_end returns.  Reset new_paragraph flag when a
+	new paragraph is found.
+	(bidi_init_it): Reset separator_limit.
+
+	* dispextern.h (struct bidi_it): New member separator_limit.
+
+	* bidi.c (bidi_find_paragraph_start): Return the byte position of
+	the paragraph beginning.
+
+	* xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the
+	new_paragraph flag is set in the bidi iterator.
+
+	* bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use
+	the buffer-local value of paragraph-start and paragraph-separate.
+
 2009-10-03  Eli Zaretskii  <eliz@gnu.org>
 
 	* bidi.c (bidi_set_paragraph_end): Don't set the new_paragraph
--- a/src/bidi.c	Fri Jan 01 06:06:48 2010 -0500
+++ b/src/bidi.c	Fri Jan 01 06:17:13 2010 -0500
@@ -733,17 +733,35 @@
   return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
 }
 
-/* Return non-zero if buffer's byte position POS is the end of a
-   paragraph.  */
-int
+/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
+   Value is the non-negative length of the paragraph separator
+   following the buffer position, -1 if position is at the beginning
+   of a new paragraph, or -2 if position is neither at beginning nor
+   at end of a paragraph.  */
+EMACS_INT
 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
 {
-  Lisp_Object re = XSYMBOL (Qparagraph_separate)->value;
+  Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate,
+					    Fcurrent_buffer ());
+  Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start,
+					      Fcurrent_buffer ());
+  EMACS_INT val;
+
+  if (!STRINGP (sep_re))
+    sep_re = fallback_paragraph_separate_re;
+  if (!STRINGP (start_re))
+    start_re = fallback_paragraph_start_re;
 
-  if (!STRINGP (re))
-    re = fallback_paragraph_separate_re;
+  val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
+  if (val < 0)
+    {
+      if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
+	val = -1;
+      else
+	val = -2;
+    }
 
-  return fast_looking_at (re, charpos, bytepos, ZV, ZV_BYTE, Qnil) > 0;
+  return val;
 }
 
 /* Determine the start-of-run (sor) directional type given the two
@@ -779,12 +797,28 @@
   bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
 }
 
-/* Find the beginning of this paragraph by looking back in the
-   buffer.  */
 static void
+bidi_line_init (struct bidi_it *bidi_it)
+{
+  bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
+  bidi_it->resolved_level = bidi_it->level_stack[0].level;
+  bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
+  bidi_it->invalid_levels = 0;
+  bidi_it->invalid_rl_levels = -1;
+  bidi_it->next_en_pos = -1;
+  bidi_it->next_for_ws.type = UNKNOWN_BT;
+  bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir,
+		     bidi_it->level_stack[0].level); /* X10 */
+
+  bidi_cache_reset ();
+}
+
+/* Find the beginning of this paragraph by looking back in the buffer.
+   Value is the byte position of the paragraph's beginning.  */
+static EMACS_INT
 bidi_find_paragraph_start (struct bidi_it *bidi_it)
 {
-  Lisp_Object re = XSYMBOL (Qparagraph_start)->value;
+  Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ());
   EMACS_INT pos = bidi_it->charpos;
   EMACS_INT pos_byte = bidi_it->bytepos;
   EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
@@ -794,10 +828,14 @@
   while (pos_byte > BEGV_BYTE
 	 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
     {
-      find_next_newline_no_quit (pos, -1);
+      pos = find_next_newline_no_quit (pos - 1, -1);
+      pos_byte = CHAR_TO_BYTE (pos);
     }
+  return pos_byte;
 }
 
+/* Determine the direction, a.k.a. base embedding level, of the
+   paragraph we are about to iterate through.  */
 void
 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
 {
@@ -807,18 +845,41 @@
   if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
     abort ();
 
-  bidi_it->level_stack[0].level = 0; /* default for L2R */
-  bidi_it->paragraph_dir = L2R;
-  if (dir == R2L)
-    bidi_it->level_stack[0].level = 1;
+  if (dir == L2R)
+    {
+      bidi_it->paragraph_dir = L2R;
+      bidi_it->new_paragraph = 0;
+    }
+  else if (dir == R2L)
+    {
+      bidi_it->paragraph_dir = R2L;
+      bidi_it->new_paragraph = 0;
+    }
   else if (dir == NEUTRAL_DIR)	/* P2 */
     {
       int ch, ch_len;
       EMACS_INT pos;
       bidi_type_t type;
+      EMACS_INT sep_len;
 
-      /* Search back to where this paragraph starts.  */
-      bidi_find_paragraph_start (bidi_it);
+      /* If we are inside a paragraph separator, we are just waiting
+	 for the separator to be exhausted; use the previous paragraph
+	 direction.  */
+      if (bidi_it->charpos < bidi_it->separator_limit)
+	return;
+
+      /* If we are before another paragraph separator, continue
+	 through that with the previous paragraph direction.  */
+      sep_len = bidi_at_paragraph_end (bidi_it->charpos, bytepos);
+      if (sep_len >= 0)
+	{
+	  bidi_it->separator_limit += sep_len + 1;
+	  return;
+	}
+      else if (sep_len == -2)
+	/* We are in the middle of a paragraph.  Search back to where
+	   this paragraph starts.  */
+	bytepos = bidi_find_paragraph_start (bidi_it);
 
       /* We should always be at the beginning of a new line at this
 	 point.  */
@@ -827,9 +888,11 @@
 	    || FETCH_CHAR (bytepos - 1) == '\n'))
 	abort ();
 
+      bidi_it->separator_limit = -1;
+      bidi_it->new_paragraph = 0;
       ch = FETCH_CHAR (bytepos);
       ch_len = CHAR_BYTES (ch);
-      pos = bidi_it->charpos;
+      pos = BYTE_TO_CHAR (bytepos);
       type = bidi_get_type (ch, NEUTRAL_DIR);
 
       for (pos++, bytepos += ch_len;
@@ -843,27 +906,28 @@
 		     || type == LRE || type == LRO));
 	   type = bidi_get_type (ch, NEUTRAL_DIR))
 	{
-	  if (type == NEUTRAL_B || bidi_at_paragraph_end (pos, bytepos))
+	  if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
 	    break;
 	  FETCH_CHAR_ADVANCE (ch, pos, bytepos);
 	}
       if (type == STRONG_R || type == STRONG_AL) /* P3 */
-	bidi_it->level_stack[0].level = 1;
+	bidi_it->paragraph_dir = R2L;
+      else if (type == STRONG_L)
+	bidi_it->paragraph_dir = L2R;
     }
-  if (bidi_it->level_stack[0].level == 1)
-    bidi_it->paragraph_dir = R2L;
-  bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
-  bidi_it->resolved_level = bidi_it->level_stack[0].level;
-  bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
-  bidi_it->invalid_levels = 0;
-  bidi_it->invalid_rl_levels = -1;
-  bidi_it->new_paragraph = 0;
-  bidi_it->next_en_pos = -1;
-  bidi_it->next_for_ws.type = UNKNOWN_BT;
-  bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir,
-		     bidi_it->level_stack[0].level); /* X10 */
+  else
+    abort ();
 
-  bidi_cache_reset ();
+  /* Contrary to UAX#9 clause P3, we only default to L2R if we have no
+     previous usable paragraph direction.  */
+  if (bidi_it->paragraph_dir == NEUTRAL_DIR)
+    bidi_it->paragraph_dir = L2R; /* P3 */
+  if (bidi_it->paragraph_dir == R2L)
+    bidi_it->level_stack[0].level == 1;
+  else
+    bidi_it->level_stack[0].level == 0;
+
+  bidi_line_init (bidi_it);
 }
 
 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
@@ -888,6 +952,7 @@
   bidi_it->first_elt = 1;
   bidi_set_paragraph_end (bidi_it);
   bidi_it->new_paragraph = 1;
+  bidi_it->separator_limit = -1;
   bidi_it->type = NEUTRAL_B;
   bidi_it->type_after_w1 = UNKNOWN_BT;
   bidi_it->orig_type = UNKNOWN_BT;
@@ -1802,6 +1867,10 @@
       bidi_it->scan_dir = 1;	/* default to logical order */
     }
 
+  /* If we just passed a newline, initialize for the next line.  */
+  if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
+    bidi_line_init (bidi_it);
+
   /* Prepare the sentinel iterator state.  */
   if (bidi_cache_idx == 0)
     {
@@ -1875,14 +1944,23 @@
     }
 
   /* Take note when we are at the end of the paragraph.  The next time
-     we are about to be called, next_element_from_buffer will
+     we are about to be called, set_iterator_to_next will
      automatically reinit the paragraph direction, if needed.  */
   if (bidi_it->scan_dir == 1
-      && bidi_it->type == NEUTRAL_B
-      && bidi_it->bytepos < ZV_BYTE
-      && bidi_at_paragraph_end (bidi_it->charpos + 1,
-				bidi_it->bytepos + bidi_it->ch_len))
-    bidi_it->new_paragraph = 1;
+      && bidi_it->orig_type == NEUTRAL_B
+      && bidi_it->bytepos < ZV_BYTE)
+    {
+      EMACS_INT sep_len =
+	bidi_at_paragraph_end (bidi_it->charpos + 1,
+			       bidi_it->bytepos + bidi_it->ch_len);
+      if (sep_len >= 0)
+	{
+	  bidi_it->new_paragraph = 1;
+	  /* Record the buffer position of the first character after
+	     the paragraph separator.  */
+	  bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len + 1;
+	}
+    }
 
   if (bidi_it->scan_dir == 1 && bidi_cache_idx)
     {
--- a/src/dispextern.h	Fri Jan 01 06:06:48 2010 -0500
+++ b/src/dispextern.h	Fri Jan 01 06:17:13 2010 -0500
@@ -1772,7 +1772,8 @@
   int resolved_level;		/* final resolved level of this character */
   int invalid_levels;		/* how many PDFs to ignore */
   int invalid_rl_levels;	/* how many PDFs from RLE/RLO to ignore */
-  int new_paragraph;		/* if non-zero, a new paragraph begins here */
+  int new_paragraph;		/* if non-zero, we expect a new paragraph */
+  EMACS_INT separator_limit;	/* where paragraph separator should end */
   bidi_dir_t paragraph_dir;	/* current paragraph direction */
   int prev_was_pdf;		/* if non-zero, previous char was PDF */
   struct bidi_saved_info prev;	/* info about previous character */
--- a/src/xdisp.c	Fri Jan 01 06:06:48 2010 -0500
+++ b/src/xdisp.c	Fri Jan 01 06:17:13 2010 -0500
@@ -6103,6 +6103,10 @@
 	    }
 	  else
 	    {
+	      /* If this is a new paragraph, determine its base
+		 direction (a.k.a. its base embedding level).  */
+	      if (it->bidi_it.new_paragraph)
+		bidi_paragraph_init (NEUTRAL_DIR, &it->bidi_it);
 	      bidi_get_next_char_visually (&it->bidi_it);
 	      IT_BYTEPOS (*it) = it->bidi_it.bytepos;
 	      IT_CHARPOS (*it) = it->bidi_it.charpos;
@@ -6508,8 +6512,10 @@
 
   xassert (IT_CHARPOS (*it) >= BEGV);
 
-  /* With bidi reordering, the character to display might not be
-     the character at IT_CHARPOS.  */
+  /* With bidi reordering, the character to display might not be the
+     character at IT_CHARPOS.  BIDI_IT.FIRST_ELT non-zero means that
+     we were reseat()ed to a new buffer position, which is potentially
+     a different paragraph.  */
   if (it->bidi_p && it->bidi_it.first_elt)
     {
       it->bidi_it.charpos = IT_CHARPOS (*it);
@@ -6521,13 +6527,9 @@
 	  || FETCH_CHAR (it->bidi_it.bytepos - 1) == '\n'
 	  || FETCH_CHAR (it->bidi_it.bytepos) == '\n')
 	{
-	  /* FIXME: L2R below is just for easyness of testing, as we
-	     currently support only left-to-right paragraphs.  The
-	     value should be user-definable and/or come from some
-	     ``higher protocol''. In the absence of any other
-	     guidance, the default for this initialization should be
-	     NEUTRAL_DIR.  */
-	  bidi_paragraph_init (L2R, &it->bidi_it);
+	  /* FIXME: NEUTRAL_DIR below should be user-definable and/or
+	     come from some ``higher protocol''.  */
+	  bidi_paragraph_init (NEUTRAL_DIR, &it->bidi_it);
 	  bidi_get_next_char_visually (&it->bidi_it);
 	}
       else
@@ -6541,7 +6543,7 @@
 	  IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it));
 	  it->bidi_it.charpos = IT_CHARPOS (*it);
 	  it->bidi_it.bytepos = IT_BYTEPOS (*it);
-	  bidi_paragraph_init (L2R, &it->bidi_it);
+	  bidi_paragraph_init (NEUTRAL_DIR, &it->bidi_it);
 	  do {
 	    /* Now return to buffer position where we were asked to
 	       get the next display element, and produce that.  */
@@ -16314,6 +16316,7 @@
       Lisp_Object saved_object;
       enum display_element_type saved_what = it->what;
       int saved_face_id = it->face_id;
+      int text_len = it->glyph_row->used[TEXT_AREA];
 
       saved_object = it->object;
       saved_pos = it->position;
@@ -16330,6 +16333,23 @@
       while (it->current_x <= it->last_visible_x)
 	PRODUCE_GLYPHS (it);
 
+      /* If the paragraph base direction is right to left, reverse the
+	 glyphs of non-empty line.  */
+      if (it->bidi_p && it->bidi_it.level_stack[0].level == 1
+	  && text_len > 0)
+	{
+	  struct glyph *gleft = it->glyph_row->glyphs[TEXT_AREA];
+	  struct glyph *gright = gleft + it->glyph_row->used[TEXT_AREA] - 1;
+	  struct glyph tem;
+
+	  for ( ; gleft < gright; gleft++, gright--)
+	    {
+	      tem = *gleft;
+	      *gleft = *gright;
+	      *gright = tem;
+	    }
+	}
+
       /* Don't count these blanks really.  It would let us insert a left
 	 truncation glyph below and make us set the cursor on them, maybe.  */
       it->current_x = saved_x;