changeset 107590:ebdfe4d01cff

Retrospective commit from 2009-09-20. Handle positions at BEGV and ZV. Mor initialization cleanups. xdisp.c (reseat_1): Handle position < BEGV. bidi.c (bidi_paragraph_init): Set bidi_it->ch_len. Handle ZV. (bidi_init_it): Don't initialize bidi_it->ch_len. (bidi_resolve_explicit_1): Abort if bidi_it->ch_len was not initialized. (bidi_at_paragraph_end, bidi_resolve_explicit_1) (bidi_resolve_weak, bidi_level_of_next_char): Handle bytepos at ZV_BYTE. (bidi_resolve_explicit_1): Handle position < BEGV.
author Eli Zaretskii <eliz@gnu.org>
date Thu, 31 Dec 2009 16:31:54 -0500
parents 84267baa779b
children 86eec24bee2c
files src/ChangeLog.bidi src/bidi.c src/xdisp.c
diffstat 3 files changed, 71 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog.bidi	Thu Dec 31 16:20:15 2009 -0500
+++ b/src/ChangeLog.bidi	Thu Dec 31 16:31:54 2009 -0500
@@ -1,3 +1,16 @@
+2009-09-20  Eli Zaretskii  <eliz@gnu.org>
+
+	* xdisp.c (reseat_1): Handle position < BEGV.
+
+	* bidi.c (bidi_paragraph_init): Set bidi_it->ch_len.  Handle ZV.
+	(bidi_init_it): Don't initialize bidi_it->ch_len.
+	(bidi_resolve_explicit_1): Abort if bidi_it->ch_len was not
+	initialized.
+	(bidi_at_paragraph_end, bidi_resolve_explicit_1)
+	(bidi_resolve_weak, bidi_level_of_next_char): Handle bytepos at
+	ZV_BYTE.
+	(bidi_resolve_explicit_1): Handle position < BEGV.
+
 2009-09-19  Eli Zaretskii  <eliz@gnu.org>
 
 	* xdisp.c (init_iterator): Call bidi_init_it.  Set
--- a/src/bidi.c	Thu Dec 31 16:20:15 2009 -0500
+++ b/src/bidi.c	Thu Dec 31 16:31:54 2009 -0500
@@ -64,6 +64,7 @@
 
 static Lisp_Object bidi_type_table;
 
+/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table.  */
 #define LRM_CHAR   0x200E
 #define RLM_CHAR   0x200F
 #define LRE_CHAR   0x202A
@@ -72,15 +73,8 @@
 #define LRO_CHAR   0x202D
 #define RLO_CHAR   0x202E
 
-#define CHARSET_HEBREW   0x88
-#define CHARSET_ARABIC	 0x87
-#define CHARSET_SYRIAC	 -1	/* these are undefined yet, -1 is invalid */
-#define CHARSET_THAANA	 -1
-
-/* FIXME: need to define wrappers for FETCH_CHAR etc. that return
-   BIDI_EOB when they hit ZV.  */
 #define BIDI_EOB   -1
-#define BIDI_BOB   -2
+#define BIDI_BOB   -2		/* FIXME: Is this needed? */
 
 /* Local data structures.  (Look in dispextern.h for the rest.)  */
 
@@ -416,6 +410,8 @@
 bidi_type_t
 bidi_get_type (int ch)
 {
+  if (ch == BIDI_EOB)
+    return NEUTRAL_B;
   return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 }
 
@@ -695,11 +691,15 @@
 int
 bidi_at_paragraph_end (int this_ch, int pos)
 {
-  int next_ch = FETCH_CHAR (pos);
+  int next_ch;
 
+  if (pos >= ZV_BYTE)
+    return 1;
+
+  next_ch = FETCH_CHAR (pos);
   /* FIXME: This should support all Unicode characters that can end a
      paragraph.  */
-  return (this_ch == '\n' && next_ch == '\n') || this_ch == BIDI_EOB;
+  return (this_ch == '\n' && next_ch == '\n');
 }
 
 /* Determine the start-of-run (sor) directional type given the two
@@ -738,24 +738,27 @@
 void
 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
 {
-  bidi_it->level_stack[0].level = 0;
+  int pos = bidi_it->charpos, bytepos = bidi_it->bytepos;
+  int ch;
+
+  /* We should never be called at EOB.  */
+  if (bytepos >= ZV_BYTE)
+    abort ();
+
+  ch = FETCH_CHAR (bytepos);
+  bidi_it->ch_len = CHAR_BYTES (ch);
+  bidi_it->level_stack[0].level = 0; /* default for L2R */
   if (dir == R2L)
     bidi_it->level_stack[0].level = 1;
   else if (dir == NEUTRAL_DIR)	/* P2 */
     {
       bidi_type_t type;
-      int pos = bidi_it->charpos, bytepos = bidi_it->bytepos;
-      int ch;
-
-      ch = FETCH_CHAR (bytepos);
-      pos++;
-      bytepos += CHAR_BYTES (ch);
 
       /* FIXME: should actually go to where the paragraph begins and
 	 start the loop below from there, since UAX#9 says to find the
 	 first strong directional character in the paragraph.  */
 
-      for (type = bidi_get_type (ch);
+      for (type = bidi_get_type (ch), pos++, bytepos += bidi_it->ch_len;
 	   /* NOTE: UAX#9 says to search only for L, AL, or R types of
 	      characters, and ignore RLE, RLO, LRE, and LRO.  However,
 	      I'm not sure it makes sense to omit those 4; should try
@@ -807,7 +810,6 @@
   bidi_set_paragraph_end (bidi_it);
   bidi_it->charpos = charpos;
   bidi_it->bytepos = bytepos;
-  bidi_it->ch_len = 1;
   bidi_it->type = NEUTRAL_B;
   bidi_it->type_after_w1 = UNKNOWN_BT;
   bidi_it->orig_type = UNKNOWN_BT;
@@ -906,11 +908,16 @@
   int new_level;
   bidi_dir_t override;
 
-  if (bidi_it->charpos < 0)
-    bidi_it->charpos = bidi_it->bytepos = 0;
-  else
+  if (bidi_it->bytepos < BEGV_BYTE) /* after reseat to BEGV */
+    {
+      bidi_it->charpos = BEGV;
+      bidi_it->bytepos = BEGV_BYTE;
+    }
+  else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */
     {
       bidi_it->charpos++;
+      if (bidi_it->ch_len == 0)
+	abort ();
       bidi_it->bytepos += bidi_it->ch_len;
     }
 
@@ -920,9 +927,17 @@
 
   /* in case it is a unibyte character (not yet implemented) */
   /* _fetch_multibyte_char_len = 1; */
-  curchar = FETCH_CHAR (bidi_it->bytepos);
+  if (bidi_it->bytepos >= ZV_BYTE)
+    {
+      curchar = BIDI_EOB;
+      bidi_it->ch_len = 1;
+    }
+  else
+    {
+      curchar = FETCH_CHAR (bidi_it->bytepos);
+      bidi_it->ch_len = CHAR_BYTES (curchar);
+    }
   bidi_it->ch = curchar;
-  bidi_it->ch_len = CHAR_BYTES (curchar);
 
   type = bidi_get_type (curchar);
   bidi_it->orig_type = type;
@@ -1055,6 +1070,7 @@
   if (prev_level < new_level
       && bidi_it->type == WEAK_BN
       && bidi_it->ignore_bn_limit == 0 /* only if not already known */
+      && bidi_it->ch != BIDI_EOB       /* not already at EOB */
       && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos
 					     + bidi_it->ch_len)))
     {
@@ -1194,7 +1210,9 @@
 			|| bidi_it->prev.orig_type == WEAK_NSM)) /* a/W1 */
 		   || bidi_it->prev.type_after_w1 == WEAK_AN)))  /* W4 */
     {
-      next_char = FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+      next_char =
+	bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
+	? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
       type_of_next = bidi_get_type (next_char);
 
       if (type_of_next == WEAK_BN
@@ -1245,7 +1263,9 @@
 	{
 	  int en_pos = bidi_it->charpos + 1;
 
-	  next_char = FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
+	  next_char =
+	    bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE
+	    ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len);
 	  type_of_next = bidi_get_type (next_char);
 
 	  if (type_of_next == WEAK_ET
@@ -1568,11 +1588,11 @@
 
       do {
 	/*_fetch_multibyte_char_len = 1;*/
-	ch = FETCH_CHAR (bpos + clen);
+	ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen);
 	bpos += clen;
 	cpos++;
-	clen = CHAR_BYTES (ch);
-	if (ch == '\n' /* || ch == LINESEP_CHAR */)
+	clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch));
+	if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */)
 	  chtype = NEUTRAL_B;
 	else
 	  chtype = bidi_get_type (ch);
@@ -1615,13 +1635,14 @@
 	 ugly side effect does not happen.
 
 	 (This is, of course, only important if the formatting codes
-	 are actually displayed, but Emacs does display them if the
-	 user wants to.)     */
+	 are actually displayed, but Emacs does need to display them
+	 if the user wants to.)  */
       level = prev_level;
     }
   else if (bidi_it->orig_type == NEUTRAL_B /* L1 */
 	   || bidi_it->orig_type == NEUTRAL_S
-	   || bidi_it->ch == '\n' /* || bidi_it->ch == LINESEP_CHAR */
+	   || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB
+	   /* || bidi_it->ch == LINESEP_CHAR */
 	   || (bidi_it->orig_type == NEUTRAL_WS
 	       && (bidi_it->next_for_ws.type == NEUTRAL_B
 		   || bidi_it->next_for_ws.type == NEUTRAL_S)))
--- a/src/xdisp.c	Thu Dec 31 16:20:15 2009 -0500
+++ b/src/xdisp.c	Thu Dec 31 16:31:54 2009 -0500
@@ -5541,13 +5541,13 @@
 	 not be the character at POS.  We need to find the next
 	 character in visual order starting from the preceding
 	 character.  */
-      if ((it->bidi_it.charpos = CHARPOS (pos) - 1) > 1)
+      if ((it->bidi_it.charpos = CHARPOS (pos) - 1) >= BEGV)
 	{
 	  it->bidi_it.bytepos = CHAR_TO_BYTE (CHARPOS (pos) - 1);
-	  it->bidi_it.ch_len = CHAR_BYTES (CHARPOS (pos) - 1);
-	}
-      else
-	it->bidi_it.bytepos = 0;
+	  it->bidi_it.ch_len = CHAR_BYTES (FETCH_CHAR (it->bidi_it.bytepos));
+	}
+      else
+	it->bidi_it.bytepos = 0; /* signal bidi.c not to move */
       bidi_get_next_char_visually (&it->bidi_it);
       SET_TEXT_POS (pos, it->bidi_it.charpos, it->bidi_it.bytepos);
       it->current.pos = it->position = pos;