Mercurial > emacs
comparison src/bidi.c @ 107592:e6df672626c1
Retrospective commit from 2009-09-27.
Support character mirroring.
Support iteration that starts in the middle of a line.
Misc cleanups.
xdisp.c (next_element_from_buffer): If called not at line
beginning, start bidi iteration from line beginning.
bidi.c (bidi_paragraph_init): Use
bidi_overriding_paragraph_direction instead of a literal zero.
(bidi_initialize): Fix some character types, per Unicode 5.x.
(bidi_get_type): Abort if called with invalid character code.
dispextern.h: Add prototype of bidi_mirror_char.
xdisp.c (get_next_display_element): Mirror characters whose
resolved type is STRONG_R.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Fri, 01 Jan 2010 06:01:34 -0500 |
parents | 86eec24bee2c |
children | a551e4109c04 |
comparison
equal
deleted
inserted
replaced
107591:86eec24bee2c | 107592:e6df672626c1 |
---|---|
96 | 96 |
97 int bidi_ignore_explicit_marks_for_paragraph_level = 1; | 97 int bidi_ignore_explicit_marks_for_paragraph_level = 1; |
98 | 98 |
99 /* FIXME: Should be user-definable. */ | 99 /* FIXME: Should be user-definable. */ |
100 bidi_dir_t bidi_overriding_paragraph_direction = L2R; | 100 bidi_dir_t bidi_overriding_paragraph_direction = L2R; |
101 | |
102 /* FIXME: Unused? */ | |
103 #define ASCII_BIDI_TYPE_SET(STR, TYPE) \ | |
104 do { \ | |
105 unsigned char *p; \ | |
106 for (p = (STR); *p; p++) \ | |
107 CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \ | |
108 } while (0) | |
109 | 101 |
110 static void | 102 static void |
111 bidi_initialize () | 103 bidi_initialize () |
112 { | 104 { |
113 /* FIXME: This should come from the Unicode Database. */ | 105 /* FIXME: This should come from the Unicode Database. */ |
126 { 0x001F, 0x0000, NEUTRAL_S }, | 118 { 0x001F, 0x0000, NEUTRAL_S }, |
127 { 0x0020, 0x0000, NEUTRAL_WS }, | 119 { 0x0020, 0x0000, NEUTRAL_WS }, |
128 { 0x0021, 0x0022, NEUTRAL_ON }, | 120 { 0x0021, 0x0022, NEUTRAL_ON }, |
129 { 0x0023, 0x0025, WEAK_ET }, | 121 { 0x0023, 0x0025, WEAK_ET }, |
130 { 0x0026, 0x002A, NEUTRAL_ON }, | 122 { 0x0026, 0x002A, NEUTRAL_ON }, |
131 { 0x002B, 0x0000, WEAK_ET }, | 123 { 0x002B, 0x0000, WEAK_ES }, |
132 { 0x002C, 0x0000, WEAK_CS }, | 124 { 0x002C, 0x0000, WEAK_CS }, |
133 { 0x002D, 0x0000, WEAK_ET }, | 125 { 0x002D, 0x0000, WEAK_ES }, |
134 { 0x002E, 0x0000, WEAK_CS }, | 126 { 0x002E, 0x002F, WEAK_CS }, |
135 { 0x002F, 0x0000, WEAK_ES }, | |
136 { 0x0030, 0x0039, WEAK_EN }, | 127 { 0x0030, 0x0039, WEAK_EN }, |
137 { 0x003A, 0x0000, WEAK_CS }, | 128 { 0x003A, 0x0000, WEAK_CS }, |
138 { 0x003B, 0x0040, NEUTRAL_ON }, | 129 { 0x003B, 0x0040, NEUTRAL_ON }, |
139 { 0x005B, 0x0060, NEUTRAL_ON }, | 130 { 0x005B, 0x0060, NEUTRAL_ON }, |
140 { 0x007B, 0x007E, NEUTRAL_ON }, | 131 { 0x007B, 0x007E, NEUTRAL_ON }, |
143 { 0x0086, 0x009F, WEAK_BN }, | 134 { 0x0086, 0x009F, WEAK_BN }, |
144 { 0x00A0, 0x0000, WEAK_CS }, | 135 { 0x00A0, 0x0000, WEAK_CS }, |
145 { 0x00A1, 0x0000, NEUTRAL_ON }, | 136 { 0x00A1, 0x0000, NEUTRAL_ON }, |
146 { 0x00A2, 0x00A5, WEAK_ET }, | 137 { 0x00A2, 0x00A5, WEAK_ET }, |
147 { 0x00A6, 0x00A9, NEUTRAL_ON }, | 138 { 0x00A6, 0x00A9, NEUTRAL_ON }, |
148 { 0x00AB, 0x00AF, NEUTRAL_ON }, | 139 { 0x00AB, 0x00AC, NEUTRAL_ON }, |
140 { 0x00AD, 0x0000, WEAK_BN }, | |
141 { 0x00AE, 0x00Af, NEUTRAL_ON }, | |
149 { 0x00B0, 0x00B1, WEAK_ET }, | 142 { 0x00B0, 0x00B1, WEAK_ET }, |
150 { 0x00B2, 0x00B3, WEAK_EN }, | 143 { 0x00B2, 0x00B3, WEAK_EN }, |
151 { 0x00B4, 0x0000, NEUTRAL_ON }, | 144 { 0x00B4, 0x0000, NEUTRAL_ON }, |
152 { 0x00B6, 0x00B8, NEUTRAL_ON }, | 145 { 0x00B6, 0x00B8, NEUTRAL_ON }, |
153 { 0x00B9, 0x0000, WEAK_EN }, | 146 { 0x00B9, 0x0000, WEAK_EN }, |
169 { 0x05BE, 0x0000, STRONG_R }, | 162 { 0x05BE, 0x0000, STRONG_R }, |
170 { 0x05BF, 0x0000, WEAK_NSM }, | 163 { 0x05BF, 0x0000, WEAK_NSM }, |
171 { 0x05C0, 0x0000, STRONG_R }, | 164 { 0x05C0, 0x0000, STRONG_R }, |
172 { 0x05C1, 0x05C2, WEAK_NSM }, | 165 { 0x05C1, 0x05C2, WEAK_NSM }, |
173 { 0x05C3, 0x0000, STRONG_R }, | 166 { 0x05C3, 0x0000, STRONG_R }, |
174 { 0x05C4, 0x0000, WEAK_NSM }, | 167 { 0x05C4, 0x05C5, WEAK_NSM }, |
168 { 0x05C6, 0x0000, STRONG_R }, | |
169 { 0x05C7, 0x0000, WEAK_NSM }, | |
175 { 0x05D0, 0x05F4, STRONG_R }, | 170 { 0x05D0, 0x05F4, STRONG_R }, |
176 { 0x060C, 0x0000, WEAK_CS }, | 171 { 0x060C, 0x0000, WEAK_CS }, |
177 { 0x061B, 0x064A, STRONG_AL }, | 172 { 0x061B, 0x064A, STRONG_AL }, |
178 { 0x064B, 0x0655, WEAK_NSM }, | 173 { 0x064B, 0x0655, WEAK_NSM }, |
179 { 0x0660, 0x0669, WEAK_AN }, | 174 { 0x0660, 0x0669, WEAK_AN }, |
398 bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from, | 393 bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from, |
399 make_number (bidi_type[i].type)); | 394 make_number (bidi_type[i].type)); |
400 bidi_initialized = 1; | 395 bidi_initialized = 1; |
401 } | 396 } |
402 | 397 |
403 static int | |
404 bidi_is_arabic_number (int ch) | |
405 { | |
406 return 0; /* FIXME! */ | |
407 } | |
408 | |
409 /* Return the bidi type of a character CH. */ | 398 /* Return the bidi type of a character CH. */ |
410 bidi_type_t | 399 bidi_type_t |
411 bidi_get_type (int ch) | 400 bidi_get_type (int ch) |
412 { | 401 { |
413 if (ch == BIDI_EOB) | 402 if (ch == BIDI_EOB) |
414 return NEUTRAL_B; | 403 return NEUTRAL_B; |
404 if (ch < 0 || ch > MAX_CHAR) | |
405 abort (); | |
415 return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); | 406 return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); |
416 } | 407 } |
417 | 408 |
418 void | 409 void |
419 bidi_check_type (bidi_type_t type) | 410 bidi_check_type (bidi_type_t type) |
455 default: | 446 default: |
456 abort (); | 447 abort (); |
457 } | 448 } |
458 } | 449 } |
459 | 450 |
451 /* Return the mirrored character of C, if any. | |
452 | |
453 Note: The conditions in UAX#9 clause L4 must be tested by the | |
454 caller. */ | |
460 /* FIXME: exceedingly temporary! Should consult the Unicode database | 455 /* FIXME: exceedingly temporary! Should consult the Unicode database |
461 of character properties. */ | 456 of character properties. */ |
462 int | 457 int |
463 bidi_mirror_char (int c) | 458 bidi_mirror_char (int c) |
464 { | 459 { |
720 next level run only once: when we see the first PDF. That's | 715 next level run only once: when we see the first PDF. That's |
721 because the sor type depends only on the higher of the two levels | 716 because the sor type depends only on the higher of the two levels |
722 that we find on the two sides of the level boundary (see UAX#9, | 717 that we find on the two sides of the level boundary (see UAX#9, |
723 clause X10), and so we don't need to know the final embedding | 718 clause X10), and so we don't need to know the final embedding |
724 level to which we descend after processing all the PDFs. */ | 719 level to which we descend after processing all the PDFs. */ |
725 if (level_before < level_after || !bidi_it->prev_was_pdf) | 720 if (!bidi_it->prev_was_pdf || level_before < level_after) |
726 /* FIXME: should the default sor direction be user selectable? */ | 721 /* FIXME: should the default sor direction be user selectable? */ |
727 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; | 722 bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; |
728 if (level_before > level_after) | 723 if (level_before > level_after) |
729 bidi_it->prev_was_pdf = 1; | 724 bidi_it->prev_was_pdf = 1; |
730 | 725 |
740 } | 735 } |
741 | 736 |
742 void | 737 void |
743 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) | 738 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) |
744 { | 739 { |
745 int pos = bidi_it->charpos, bytepos = bidi_it->bytepos; | 740 int bytepos = bidi_it->bytepos; |
746 int ch, ch_len; | |
747 | 741 |
748 /* We should never be called at EOB or before BEGV. */ | 742 /* We should never be called at EOB or before BEGV. */ |
749 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) | 743 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) |
750 abort (); | 744 abort (); |
751 | 745 |
754 if (!(bytepos == BEGV_BYTE | 748 if (!(bytepos == BEGV_BYTE |
755 || FETCH_CHAR (bytepos) == '\n' | 749 || FETCH_CHAR (bytepos) == '\n' |
756 || FETCH_CHAR (bytepos - 1) == '\n')) | 750 || FETCH_CHAR (bytepos - 1) == '\n')) |
757 abort (); | 751 abort (); |
758 | 752 |
759 ch = FETCH_CHAR (bytepos); | |
760 ch_len = CHAR_BYTES (ch); | |
761 bidi_it->level_stack[0].level = 0; /* default for L2R */ | 753 bidi_it->level_stack[0].level = 0; /* default for L2R */ |
762 if (dir == R2L) | 754 if (dir == R2L) |
763 bidi_it->level_stack[0].level = 1; | 755 bidi_it->level_stack[0].level = 1; |
764 else if (dir == NEUTRAL_DIR) /* P2 */ | 756 else if (dir == NEUTRAL_DIR) /* P2 */ |
765 { | 757 { |
766 bidi_type_t type; | 758 int ch = FETCH_CHAR (bytepos), ch_len = CHAR_BYTES (ch); |
767 | 759 int pos = bidi_it->charpos; |
768 /* FIXME: should actually go to where the paragraph begins and | 760 bidi_type_t type = bidi_get_type (ch); |
769 start the loop below from there, since UAX#9 says to find the | 761 |
770 first strong directional character in the paragraph. */ | 762 for (pos++, bytepos += ch_len; |
771 | |
772 for (type = bidi_get_type (ch), pos++, bytepos += ch_len; | |
773 /* NOTE: UAX#9 says to search only for L, AL, or R types of | 763 /* NOTE: UAX#9 says to search only for L, AL, or R types of |
774 characters, and ignore RLE, RLO, LRE, and LRO. However, | 764 characters, and ignore RLE, RLO, LRE, and LRO. However, |
775 I'm not sure it makes sense to omit those 4; should try | 765 I'm not sure it makes sense to omit those 4; should try |
776 with and without that to see the effect. */ | 766 with and without that to see the effect. */ |
777 (bidi_get_category (type) != STRONG) | 767 (bidi_get_category (type) != STRONG) |
793 bidi_it->invalid_levels = 0; | 783 bidi_it->invalid_levels = 0; |
794 bidi_it->invalid_rl_levels = -1; | 784 bidi_it->invalid_rl_levels = -1; |
795 bidi_it->new_paragraph = 0; | 785 bidi_it->new_paragraph = 0; |
796 bidi_it->next_en_pos = -1; | 786 bidi_it->next_en_pos = -1; |
797 bidi_it->next_for_ws.type = UNKNOWN_BT; | 787 bidi_it->next_for_ws.type = UNKNOWN_BT; |
798 bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */ | 788 bidi_set_sor_type (bidi_it, bidi_overriding_paragraph_direction, |
789 bidi_it->level_stack[0].level); /* X10 */ | |
799 | 790 |
800 bidi_cache_reset (); | 791 bidi_cache_reset (); |
801 } | 792 } |
802 | 793 |
803 /* Do whatever UAX#9 clause X8 says should be done at paragraph's end, | 794 /* Do whatever UAX#9 clause X8 says should be done at paragraph's end, |