changeset 107592:e6df672626c1

Retrospective commit from 2009-09-27. Support character mirroring. Support iteration that starts in the middle of a line. Misc cleanups. xdisp.c (next_element_from_buffer): If called not at line beginning, start bidi iteration from line beginning. bidi.c (bidi_paragraph_init): Use bidi_overriding_paragraph_direction instead of a literal zero. (bidi_initialize): Fix some character types, per Unicode 5.x. (bidi_get_type): Abort if called with invalid character code. dispextern.h: Add prototype of bidi_mirror_char. xdisp.c (get_next_display_element): Mirror characters whose resolved type is STRONG_R.
author Eli Zaretskii <eliz@gnu.org>
date Fri, 01 Jan 2010 06:01:34 -0500
parents 86eec24bee2c
children a551e4109c04
files src/ChangeLog.bidi src/bidi.c src/dispextern.h src/xdisp.c
diffstat 4 files changed, 84 insertions(+), 41 deletions(-) [+]
line wrap: on
line diff
--- a/src/ChangeLog.bidi	Fri Jan 01 05:57:17 2010 -0500
+++ b/src/ChangeLog.bidi	Fri Jan 01 06:01:34 2010 -0500
@@ -1,3 +1,18 @@
+2009-09-27  Eli Zaretskii  <eliz@gnu.org>
+
+	* xdisp.c (next_element_from_buffer): If called not at line
+	beginning, start bidi iteration from line beginning.
+
+	* bidi.c (bidi_paragraph_init): Use
+	bidi_overriding_paragraph_direction instead of a literal zero.
+	(bidi_initialize): Fix some character types, per Unicode 5.x.
+	(bidi_get_type): Abort if called with invalid character code.
+
+	* dispextern.h: Add prototype of bidi_mirror_char.
+
+	* xdisp.c (get_next_display_element): Mirror characters whose
+	resolved type is STRONG_R.
+
 2009-09-26  Eli Zaretskii  <eliz@gnu.org>
 
 	* bidi.c (bidi_paragraph_init): Don't set bidi_it->ch_len.  Abort
--- a/src/bidi.c	Fri Jan 01 05:57:17 2010 -0500
+++ b/src/bidi.c	Fri Jan 01 06:01:34 2010 -0500
@@ -99,14 +99,6 @@
 /* FIXME: Should be user-definable.  */
 bidi_dir_t bidi_overriding_paragraph_direction = L2R;
 
-/* FIXME: Unused? */
-#define ASCII_BIDI_TYPE_SET(STR, TYPE)			\
-  do {							\
-    unsigned char *p;					\
-    for (p = (STR); *p; p++)				\
-      CHAR_TABLE_SET (bidi_type_table, *p, (TYPE));	\
-  } while (0)
-
 static void
 bidi_initialize ()
 {
@@ -128,11 +120,10 @@
 	{ 0x0021, 0x0022, NEUTRAL_ON },
 	{ 0x0023, 0x0025, WEAK_ET },
 	{ 0x0026, 0x002A, NEUTRAL_ON },
-	{ 0x002B, 0x0000, WEAK_ET },
+	{ 0x002B, 0x0000, WEAK_ES },
 	{ 0x002C, 0x0000, WEAK_CS },
-	{ 0x002D, 0x0000, WEAK_ET },
-	{ 0x002E, 0x0000, WEAK_CS },
-	{ 0x002F, 0x0000, WEAK_ES },
+	{ 0x002D, 0x0000, WEAK_ES },
+	{ 0x002E, 0x002F, WEAK_CS },
 	{ 0x0030, 0x0039, WEAK_EN },
 	{ 0x003A, 0x0000, WEAK_CS },
 	{ 0x003B, 0x0040, NEUTRAL_ON },
@@ -145,7 +136,9 @@
 	{ 0x00A1, 0x0000, NEUTRAL_ON },
 	{ 0x00A2, 0x00A5, WEAK_ET },
 	{ 0x00A6, 0x00A9, NEUTRAL_ON },
-	{ 0x00AB, 0x00AF, NEUTRAL_ON },
+	{ 0x00AB, 0x00AC, NEUTRAL_ON },
+	{ 0x00AD, 0x0000, WEAK_BN },
+	{ 0x00AE, 0x00Af, NEUTRAL_ON },
 	{ 0x00B0, 0x00B1, WEAK_ET },
 	{ 0x00B2, 0x00B3, WEAK_EN },
 	{ 0x00B4, 0x0000, NEUTRAL_ON },
@@ -171,7 +164,9 @@
 	{ 0x05C0, 0x0000, STRONG_R },
 	{ 0x05C1, 0x05C2, WEAK_NSM },
 	{ 0x05C3, 0x0000, STRONG_R },
-	{ 0x05C4, 0x0000, WEAK_NSM },
+	{ 0x05C4, 0x05C5, WEAK_NSM },
+	{ 0x05C6, 0x0000, STRONG_R },
+	{ 0x05C7, 0x0000, WEAK_NSM },
 	{ 0x05D0, 0x05F4, STRONG_R },
 	{ 0x060C, 0x0000, WEAK_CS },
 	{ 0x061B, 0x064A, STRONG_AL },
@@ -400,18 +395,14 @@
   bidi_initialized = 1;
 }
 
-static int
-bidi_is_arabic_number (int ch)
-{
-  return 0;	/* FIXME! */
-}
-
 /* Return the bidi type of a character CH.  */
 bidi_type_t
 bidi_get_type (int ch)
 {
   if (ch == BIDI_EOB)
     return NEUTRAL_B;
+  if (ch < 0 || ch > MAX_CHAR)
+    abort ();
   return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
 }
 
@@ -457,6 +448,10 @@
     }
 }
 
+/* Return the mirrored character of C, if any.
+
+   Note: The conditions in UAX#9 clause L4 must be tested by the
+   caller.  */
 /* FIXME: exceedingly temporary!  Should consult the Unicode database
    of character properties.  */
 int
@@ -722,7 +717,7 @@
      that we find on the two sides of the level boundary (see UAX#9,
      clause X10), and so we don't need to know the final embedding
      level to which we descend after processing all the PDFs.  */
-  if (level_before < level_after || !bidi_it->prev_was_pdf)
+  if (!bidi_it->prev_was_pdf || level_before < level_after)
     /* FIXME: should the default sor direction be user selectable?  */
     bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
   if (level_before > level_after)
@@ -742,8 +737,7 @@
 void
 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
 {
-  int pos = bidi_it->charpos, bytepos = bidi_it->bytepos;
-  int ch, ch_len;
+  int bytepos = bidi_it->bytepos;
 
   /* We should never be called at EOB or before BEGV.  */
   if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
@@ -756,20 +750,16 @@
 	|| FETCH_CHAR (bytepos - 1) == '\n'))
     abort ();
 
-  ch = FETCH_CHAR (bytepos);
-  ch_len = CHAR_BYTES (ch);
   bidi_it->level_stack[0].level = 0; /* default for L2R */
   if (dir == R2L)
     bidi_it->level_stack[0].level = 1;
   else if (dir == NEUTRAL_DIR)	/* P2 */
     {
-      bidi_type_t type;
+      int ch = FETCH_CHAR (bytepos), ch_len = CHAR_BYTES (ch);
+      int pos = bidi_it->charpos;
+      bidi_type_t type = bidi_get_type (ch);
 
-      /* FIXME: should actually go to where the paragraph begins and
-	 start the loop below from there, since UAX#9 says to find the
-	 first strong directional character in the paragraph.  */
-
-      for (type = bidi_get_type (ch), pos++, bytepos += ch_len;
+      for (pos++, bytepos += ch_len;
 	   /* NOTE: UAX#9 says to search only for L, AL, or R types of
 	      characters, and ignore RLE, RLO, LRE, and LRO.  However,
 	      I'm not sure it makes sense to omit those 4; should try
@@ -795,7 +785,8 @@
   bidi_it->new_paragraph = 0;
   bidi_it->next_en_pos = -1;
   bidi_it->next_for_ws.type = UNKNOWN_BT;
-  bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */
+  bidi_set_sor_type (bidi_it, bidi_overriding_paragraph_direction,
+		     bidi_it->level_stack[0].level); /* X10 */
 
   bidi_cache_reset ();
 }
--- a/src/dispextern.h	Fri Jan 01 05:57:17 2010 -0500
+++ b/src/dispextern.h	Fri Jan 01 06:01:34 2010 -0500
@@ -2801,6 +2801,7 @@
 extern void bidi_init_it P_ ((int, int, struct bidi_it *));
 extern void bidi_get_next_char_visually P_ ((struct bidi_it *));
 extern void bidi_paragraph_init P_ ((bidi_dir_t, struct bidi_it *));
+extern int  bidi_mirror_char P_ ((int));
 
 /* Defined in xdisp.c */
 
--- a/src/xdisp.c	Fri Jan 01 05:57:17 2010 -0500
+++ b/src/xdisp.c	Fri Jan 01 06:01:34 2010 -0500
@@ -5682,6 +5682,13 @@
 
   if (it->what == IT_CHARACTER)
     {
+      /* UAX#9, L4: "A character is depicted by a mirrored glyph if
+	 and only if (a) the resolved directionality of that character
+	 is R..."  */
+      /* FIXME: Do we need an exception for characters from display
+	 tables?  */
+      if (it->bidi_p && it->bidi_it.type == STRONG_R)
+	it->c = bidi_mirror_char (it->c);
       /* Map via display table or translate control characters.
 	 IT->c, IT->len etc. have been set to the next character by
 	 the function call above.  If we have a display table, and it
@@ -6505,17 +6512,46 @@
      the character at IT_CHARPOS.  */
   if (it->bidi_p && it->bidi_it.first_elt)
     {
-      /* FIXME: L2R below is just for easyness of testing, as we
-	 currently support only left-to-right paragraphs.  The value
-	 should be user-definable and/or come from some ``higher
-	 protocol''. In the absence of any other guidance, the default
-	 for this initialization should be NEUTRAL_DIR.  */
       it->bidi_it.charpos = IT_CHARPOS (*it);
       it->bidi_it.bytepos = IT_BYTEPOS (*it);
-      bidi_paragraph_init (L2R, &it->bidi_it);
-      bidi_get_next_char_visually (&it->bidi_it);
-      it->bidi_it.first_elt = 0;
-      /*  Adjust IT's position information to where we moved.  */
+      /* If we are at the beginning of a line, we can produce the next
+	 element right away.  */
+      if (it->bidi_it.bytepos == BEGV_BYTE
+	  /* FIXME: Should support all Unicode line separators.  */
+	  || FETCH_CHAR (it->bidi_it.bytepos - 1) == '\n'
+	  || FETCH_CHAR (it->bidi_it.bytepos) == '\n')
+	{
+	  /* FIXME: L2R below is just for easyness of testing, as we
+	     currently support only left-to-right paragraphs.  The
+	     value should be user-definable and/or come from some
+	     ``higher protocol''. In the absence of any other
+	     guidance, the default for this initialization should be
+	     NEUTRAL_DIR.  */
+	  bidi_paragraph_init (L2R, &it->bidi_it);
+	  bidi_get_next_char_visually (&it->bidi_it);
+	}
+      else
+	{
+	  int orig_bytepos = IT_BYTEPOS (*it);
+
+	  /* We need to prime the bidi iterator starting at the line's
+	     beginning, before we will be able to produce the next
+	     element.  */
+	  IT_CHARPOS (*it) = find_next_newline_no_quit (IT_CHARPOS (*it), -1);
+	  IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it));
+	  it->bidi_it.charpos = IT_CHARPOS (*it);
+	  it->bidi_it.bytepos = IT_BYTEPOS (*it);
+	  bidi_paragraph_init (L2R, &it->bidi_it);
+	  do {
+	    /* Now return to buffer position where we were asked to
+	       get the next display element, and produce that.  */
+	    bidi_get_next_char_visually (&it->bidi_it);
+	  } while (it->bidi_it.bytepos != orig_bytepos
+		   && it->bidi_it.bytepos < ZV_BYTE);
+	}
+
+      it->bidi_it.first_elt = 0; /* paranoia: bidi.c does this */
+      /*  Adjust IT's position information to where we ended up.  */
       IT_CHARPOS (*it) = it->bidi_it.charpos;
       IT_BYTEPOS (*it) = it->bidi_it.bytepos;
       SET_TEXT_POS (it->position, IT_CHARPOS (*it), IT_BYTEPOS (*it));