Mercurial > emacs
diff src/bidi.c @ 107592:e6df672626c1
Retrospective commit from 2009-09-27.
Support character mirroring.
Support iteration that starts in the middle of a line.
Misc cleanups.
xdisp.c (next_element_from_buffer): If called not at line
beginning, start bidi iteration from line beginning.
bidi.c (bidi_paragraph_init): Use
bidi_overriding_paragraph_direction instead of a literal zero.
(bidi_initialize): Fix some character types, per Unicode 5.x.
(bidi_get_type): Abort if called with invalid character code.
dispextern.h: Add prototype of bidi_mirror_char.
xdisp.c (get_next_display_element): Mirror characters whose
resolved type is STRONG_R.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Fri, 01 Jan 2010 06:01:34 -0500 |
parents | 86eec24bee2c |
children | a551e4109c04 |
line wrap: on
line diff
--- a/src/bidi.c Fri Jan 01 05:57:17 2010 -0500 +++ b/src/bidi.c Fri Jan 01 06:01:34 2010 -0500 @@ -99,14 +99,6 @@ /* FIXME: Should be user-definable. */ bidi_dir_t bidi_overriding_paragraph_direction = L2R; -/* FIXME: Unused? */ -#define ASCII_BIDI_TYPE_SET(STR, TYPE) \ - do { \ - unsigned char *p; \ - for (p = (STR); *p; p++) \ - CHAR_TABLE_SET (bidi_type_table, *p, (TYPE)); \ - } while (0) - static void bidi_initialize () { @@ -128,11 +120,10 @@ { 0x0021, 0x0022, NEUTRAL_ON }, { 0x0023, 0x0025, WEAK_ET }, { 0x0026, 0x002A, NEUTRAL_ON }, - { 0x002B, 0x0000, WEAK_ET }, + { 0x002B, 0x0000, WEAK_ES }, { 0x002C, 0x0000, WEAK_CS }, - { 0x002D, 0x0000, WEAK_ET }, - { 0x002E, 0x0000, WEAK_CS }, - { 0x002F, 0x0000, WEAK_ES }, + { 0x002D, 0x0000, WEAK_ES }, + { 0x002E, 0x002F, WEAK_CS }, { 0x0030, 0x0039, WEAK_EN }, { 0x003A, 0x0000, WEAK_CS }, { 0x003B, 0x0040, NEUTRAL_ON }, @@ -145,7 +136,9 @@ { 0x00A1, 0x0000, NEUTRAL_ON }, { 0x00A2, 0x00A5, WEAK_ET }, { 0x00A6, 0x00A9, NEUTRAL_ON }, - { 0x00AB, 0x00AF, NEUTRAL_ON }, + { 0x00AB, 0x00AC, NEUTRAL_ON }, + { 0x00AD, 0x0000, WEAK_BN }, + { 0x00AE, 0x00Af, NEUTRAL_ON }, { 0x00B0, 0x00B1, WEAK_ET }, { 0x00B2, 0x00B3, WEAK_EN }, { 0x00B4, 0x0000, NEUTRAL_ON }, @@ -171,7 +164,9 @@ { 0x05C0, 0x0000, STRONG_R }, { 0x05C1, 0x05C2, WEAK_NSM }, { 0x05C3, 0x0000, STRONG_R }, - { 0x05C4, 0x0000, WEAK_NSM }, + { 0x05C4, 0x05C5, WEAK_NSM }, + { 0x05C6, 0x0000, STRONG_R }, + { 0x05C7, 0x0000, WEAK_NSM }, { 0x05D0, 0x05F4, STRONG_R }, { 0x060C, 0x0000, WEAK_CS }, { 0x061B, 0x064A, STRONG_AL }, @@ -400,18 +395,14 @@ bidi_initialized = 1; } -static int -bidi_is_arabic_number (int ch) -{ - return 0; /* FIXME! */ -} - /* Return the bidi type of a character CH. */ bidi_type_t bidi_get_type (int ch) { if (ch == BIDI_EOB) return NEUTRAL_B; + if (ch < 0 || ch > MAX_CHAR) + abort (); return (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); } @@ -457,6 +448,10 @@ } } +/* Return the mirrored character of C, if any. + + Note: The conditions in UAX#9 clause L4 must be tested by the + caller. */ /* FIXME: exceedingly temporary! Should consult the Unicode database of character properties. */ int @@ -722,7 +717,7 @@ that we find on the two sides of the level boundary (see UAX#9, clause X10), and so we don't need to know the final embedding level to which we descend after processing all the PDFs. */ - if (level_before < level_after || !bidi_it->prev_was_pdf) + if (!bidi_it->prev_was_pdf || level_before < level_after) /* FIXME: should the default sor direction be user selectable? */ bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; if (level_before > level_after) @@ -742,8 +737,7 @@ void bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) { - int pos = bidi_it->charpos, bytepos = bidi_it->bytepos; - int ch, ch_len; + int bytepos = bidi_it->bytepos; /* We should never be called at EOB or before BEGV. */ if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) @@ -756,20 +750,16 @@ || FETCH_CHAR (bytepos - 1) == '\n')) abort (); - ch = FETCH_CHAR (bytepos); - ch_len = CHAR_BYTES (ch); bidi_it->level_stack[0].level = 0; /* default for L2R */ if (dir == R2L) bidi_it->level_stack[0].level = 1; else if (dir == NEUTRAL_DIR) /* P2 */ { - bidi_type_t type; + int ch = FETCH_CHAR (bytepos), ch_len = CHAR_BYTES (ch); + int pos = bidi_it->charpos; + bidi_type_t type = bidi_get_type (ch); - /* FIXME: should actually go to where the paragraph begins and - start the loop below from there, since UAX#9 says to find the - first strong directional character in the paragraph. */ - - for (type = bidi_get_type (ch), pos++, bytepos += ch_len; + for (pos++, bytepos += ch_len; /* NOTE: UAX#9 says to search only for L, AL, or R types of characters, and ignore RLE, RLO, LRE, and LRO. However, I'm not sure it makes sense to omit those 4; should try @@ -795,7 +785,8 @@ bidi_it->new_paragraph = 0; bidi_it->next_en_pos = -1; bidi_it->next_for_ws.type = UNKNOWN_BT; - bidi_set_sor_type (bidi_it, bidi_it->level_stack[0].level, 0); /* X10 */ + bidi_set_sor_type (bidi_it, bidi_overriding_paragraph_direction, + bidi_it->level_stack[0].level); /* X10 */ bidi_cache_reset (); }