comparison src/bidi.c @ 107592:e6df672626c1

Retrospective commit from 2009-09-27. Support character mirroring. Support iteration that starts in the middle of a line. Misc cleanups. xdisp.c (next_element_from_buffer): If called not at line beginning, start bidi iteration from line beginning. bidi.c (bidi_paragraph_init): Use bidi_overriding_paragraph_direction instead of a literal zero. (bidi_initialize): Fix some character types, per Unicode 5.x. (bidi_get_type): Abort if called with invalid character code. dispextern.h: Add prototype of bidi_mirror_char. xdisp.c (get_next_display_element): Mirror characters whose resolved type is STRONG_R.
author Eli Zaretskii <eliz@gnu.org>
date Fri, 01 Jan 2010 06:01:34 -0500
parents 86eec24bee2c
children a551e4109c04
comparison
equal deleted inserted replaced
107591:86eec24bee2c 107592:e6df672626c1
96 96
97 int bidi_ignore_explicit_marks_for_paragraph_level = 1; 97 int bidi_ignore_explicit_marks_for_paragraph_level = 1;
98 98
99 /* FIXME: Should be user-definable. */ 99 /* FIXME: Should be user-definable. */
100 bidi_dir_t bidi_overriding_paragraph_direction = L2R; 100 bidi_dir_t bidi_overriding_paragraph_direction = L2R;
101
102 /* FIXME: Unused? */
103 #define ASCII_BIDI_TYPE_SET(STR, TYPE) \
104 do { \
105 unsigned char *p; \
106 for (p = (STR); *p; p++) \
107 CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \
108 } while (0)
109 101
110 static void 102 static void
111 bidi_initialize () 103 bidi_initialize ()
112 { 104 {
113 /* FIXME: This should come from the Unicode Database. */ 105 /* FIXME: This should come from the Unicode Database. */
126 { 0x001F, 0x0000, NEUTRAL_S }, 118 { 0x001F, 0x0000, NEUTRAL_S },
127 { 0x0020, 0x0000, NEUTRAL_WS }, 119 { 0x0020, 0x0000, NEUTRAL_WS },
128 { 0x0021, 0x0022, NEUTRAL_ON }, 120 { 0x0021, 0x0022, NEUTRAL_ON },
129 { 0x0023, 0x0025, WEAK_ET }, 121 { 0x0023, 0x0025, WEAK_ET },
130 { 0x0026, 0x002A, NEUTRAL_ON }, 122 { 0x0026, 0x002A, NEUTRAL_ON },
131 { 0x002B, 0x0000, WEAK_ET }, 123 { 0x002B, 0x0000, WEAK_ES },
132 { 0x002C, 0x0000, WEAK_CS }, 124 { 0x002C, 0x0000, WEAK_CS },
133 { 0x002D, 0x0000, WEAK_ET }, 125 { 0x002D, 0x0000, WEAK_ES },
134 { 0x002E, 0x0000, WEAK_CS }, 126 { 0x002E, 0x002F, WEAK_CS },
135 { 0x002F, 0x0000, WEAK_ES },
136 { 0x0030, 0x0039, WEAK_EN }, 127 { 0x0030, 0x0039, WEAK_EN },
137 { 0x003A, 0x0000, WEAK_CS }, 128 { 0x003A, 0x0000, WEAK_CS },
138 { 0x003B, 0x0040, NEUTRAL_ON }, 129 { 0x003B, 0x0040, NEUTRAL_ON },
139 { 0x005B, 0x0060, NEUTRAL_ON }, 130 { 0x005B, 0x0060, NEUTRAL_ON },
140 { 0x007B, 0x007E, NEUTRAL_ON }, 131 { 0x007B, 0x007E, NEUTRAL_ON },
143 { 0x0086, 0x009F, WEAK_BN }, 134 { 0x0086, 0x009F, WEAK_BN },
144 { 0x00A0, 0x0000, WEAK_CS }, 135 { 0x00A0, 0x0000, WEAK_CS },
145 { 0x00A1, 0x0000, NEUTRAL_ON }, 136 { 0x00A1, 0x0000, NEUTRAL_ON },
146 { 0x00A2, 0x00A5, WEAK_ET }, 137 { 0x00A2, 0x00A5, WEAK_ET },
147 { 0x00A6, 0x00A9, NEUTRAL_ON }, 138 { 0x00A6, 0x00A9, NEUTRAL_ON },
148 { 0x00AB, 0x00AF, NEUTRAL_ON }, 139 { 0x00AB, 0x00AC, NEUTRAL_ON },
140 { 0x00AD, 0x0000, WEAK_BN },
141 { 0x00AE, 0x00Af, NEUTRAL_ON },
149 { 0x00B0, 0x00B1, WEAK_ET }, 142 { 0x00B0, 0x00B1, WEAK_ET },
150 { 0x00B2, 0x00B3, WEAK_EN }, 143 { 0x00B2, 0x00B3, WEAK_EN },
151 { 0x00B4, 0x0000, NEUTRAL_ON }, 144 { 0x00B4, 0x0000, NEUTRAL_ON },
152 { 0x00B6, 0x00B8, NEUTRAL_ON }, 145 { 0x00B6, 0x00B8, NEUTRAL_ON },
153 { 0x00B9, 0x0000, WEAK_EN }, 146 { 0x00B9, 0x0000, WEAK_EN },
169 { 0x05BE, 0x0000, STRONG_R }, 162 { 0x05BE, 0x0000, STRONG_R },
170 { 0x05BF, 0x0000, WEAK_NSM }, 163 { 0x05BF, 0x0000, WEAK_NSM },
171 { 0x05C0, 0x0000, STRONG_R }, 164 { 0x05C0, 0x0000, STRONG_R },
172 { 0x05C1, 0x05C2, WEAK_NSM }, 165 { 0x05C1, 0x05C2, WEAK_NSM },
173 { 0x05C3, 0x0000, STRONG_R }, 166 { 0x05C3, 0x0000, STRONG_R },
174 { 0x05C4, 0x0000, WEAK_NSM }, 167 { 0x05C4, 0x05C5, WEAK_NSM },
168 { 0x05C6, 0x0000, STRONG_R },
169 { 0x05C7, 0x0000, WEAK_NSM },
175 { 0x05D0, 0x05F4, STRONG_R }, 170 { 0x05D0, 0x05F4, STRONG_R },
176 { 0x060C, 0x0000, WEAK_CS }, 171 { 0x060C, 0x0000, WEAK_CS },
177 { 0x061B, 0x064A, STRONG_AL }, 172 { 0x061B, 0x064A, STRONG_AL },
178 { 0x064B, 0x0655, WEAK_NSM }, 173 { 0x064B, 0x0655, WEAK_NSM },
179 { 0x0660, 0x0669, WEAK_AN }, 174 { 0x0660, 0x0669, WEAK_AN },
398 bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from, 393 bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from,
399 make_number (bidi_type[i].type)); 394 make_number (bidi_type[i].type));
400 bidi_initialized = 1; 395 bidi_initialized = 1;
401 } 396 }
402 397
403 static int
404 bidi_is_arabic_number (int ch)
405 {
406 return 0; /* FIXME! */
407 }
408
409 /* Return the bidi type of a character CH. */ 398 /* Return the bidi type of a character CH. */
410 bidi_type_t 399 bidi_type_t
411 bidi_get_type (int ch) 400 bidi_get_type (int ch)
412 { 401 {
413 if (ch == BIDI_EOB) 402 if (ch == BIDI_EOB)
414 return NEUTRAL_B; 403 return NEUTRAL_B;
404 if (ch < 0 || ch > MAX_CHAR)
405 abort ();
415 return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); 406 return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch));
416 } 407 }
417 408
418 void 409 void
419 bidi_check_type (bidi_type_t type) 410 bidi_check_type (bidi_type_t type)
455 default: 446 default:
456 abort (); 447 abort ();
457 } 448 }
458 } 449 }
459 450
451 /* Return the mirrored character of C, if any.
452
453 Note: The conditions in UAX#9 clause L4 must be tested by the
454 caller. */
460 /* FIXME: exceedingly temporary! Should consult the Unicode database 455 /* FIXME: exceedingly temporary! Should consult the Unicode database
461 of character properties. */ 456 of character properties. */
462 int 457 int
463 bidi_mirror_char (int c) 458 bidi_mirror_char (int c)
464 { 459 {
720 next level run only once: when we see the first PDF. That's 715 next level run only once: when we see the first PDF. That's
721 because the sor type depends only on the higher of the two levels 716 because the sor type depends only on the higher of the two levels
722 that we find on the two sides of the level boundary (see UAX#9, 717 that we find on the two sides of the level boundary (see UAX#9,
723 clause X10), and so we don't need to know the final embedding 718 clause X10), and so we don't need to know the final embedding
724 level to which we descend after processing all the PDFs. */ 719 level to which we descend after processing all the PDFs. */
725 if (level_before < level_after || !bidi_it->prev_was_pdf) 720 if (!bidi_it->prev_was_pdf || level_before < level_after)
726 /* FIXME: should the default sor direction be user selectable? */ 721 /* FIXME: should the default sor direction be user selectable? */
727 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; 722 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R;
728 if (level_before > level_after) 723 if (level_before > level_after)
729 bidi_it->prev_was_pdf = 1; 724 bidi_it->prev_was_pdf = 1;
730 725
740 } 735 }
741 736
742 void 737 void
743 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) 738 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
744 { 739 {
745 int pos = bidi_it->charpos, bytepos = bidi_it->bytepos; 740 int bytepos = bidi_it->bytepos;
746 int ch, ch_len;
747 741
748 /* We should never be called at EOB or before BEGV. */ 742 /* We should never be called at EOB or before BEGV. */
749 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) 743 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
750 abort (); 744 abort ();
751 745
754 if (!(bytepos == BEGV_BYTE 748 if (!(bytepos == BEGV_BYTE
755 || FETCH_CHAR (bytepos) == '\n' 749 || FETCH_CHAR (bytepos) == '\n'
756 || FETCH_CHAR (bytepos - 1) == '\n')) 750 || FETCH_CHAR (bytepos - 1) == '\n'))
757 abort (); 751 abort ();
758 752
759 ch = FETCH_CHAR (bytepos);
760 ch_len = CHAR_BYTES (ch);
761 bidi_it->level_stack[0].level = 0; /* default for L2R */ 753 bidi_it->level_stack[0].level = 0; /* default for L2R */
762 if (dir == R2L) 754 if (dir == R2L)
763 bidi_it->level_stack[0].level = 1; 755 bidi_it->level_stack[0].level = 1;
764 else if (dir == NEUTRAL_DIR) /* P2 */ 756 else if (dir == NEUTRAL_DIR) /* P2 */
765 { 757 {
766 bidi_type_t type; 758 int ch = FETCH_CHAR (bytepos), ch_len = CHAR_BYTES (ch);
767 759 int pos = bidi_it->charpos;
768 /* FIXME: should actually go to where the paragraph begins and 760 bidi_type_t type = bidi_get_type (ch);
769 start the loop below from there, since UAX#9 says to find the 761
770 first strong directional character in the paragraph. */ 762 for (pos++, bytepos += ch_len;
771
772 for (type = bidi_get_type (ch), pos++, bytepos += ch_len;
773 /* NOTE: UAX#9 says to search only for L, AL, or R types of 763 /* NOTE: UAX#9 says to search only for L, AL, or R types of
774 characters, and ignore RLE, RLO, LRE, and LRO. However, 764 characters, and ignore RLE, RLO, LRE, and LRO. However,
775 I'm not sure it makes sense to omit those 4; should try 765 I'm not sure it makes sense to omit those 4; should try
776 with and without that to see the effect. */ 766 with and without that to see the effect. */
777 (bidi_get_category (type) != STRONG) 767 (bidi_get_category (type) != STRONG)
793 bidi_it->invalid_levels = 0; 783 bidi_it->invalid_levels = 0;
794 bidi_it->invalid_rl_levels = -1; 784 bidi_it->invalid_rl_levels = -1;
795 bidi_it->new_paragraph = 0; 785 bidi_it->new_paragraph = 0;
796 bidi_it->next_en_pos = -1; 786 bidi_it->next_en_pos = -1;
797 bidi_it->next_for_ws.type = UNKNOWN_BT; 787 bidi_it->next_for_ws.type = UNKNOWN_BT;
798 bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */ 788 bidi_set_sor_type (bidi_it, bidi_overriding_paragraph_direction,
789 bidi_it->level_stack[0].level); /* X10 */
799 790
800 bidi_cache_reset (); 791 bidi_cache_reset ();
801 } 792 }
802 793
803 /* Do whatever UAX#9 clause X8 says should be done at paragraph's end, 794 /* Do whatever UAX#9 clause X8 says should be done at paragraph's end,