Mercurial > emacs
changeset 107608:11c99bdae327
Merge from mainline.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Fri, 01 Jan 2010 14:30:06 -0500 |
parents | a04b9ac55bc5 (diff) ee43864593e4 (current diff) |
children | 62ebf47086e7 |
files | |
diffstat | 11 files changed, 3431 insertions(+), 215 deletions(-) [+] |
line wrap: on
line diff
--- a/src/.gdbinit Fri Jan 01 13:44:02 2010 -0500 +++ b/src/.gdbinit Fri Jan 01 14:30:06 2010 -0500 @@ -447,6 +447,36 @@ Pretty print window structure w. end +define pbiditype + if ($arg0 == 0) + printf "UNDEF" + end + if ($arg0 == 1) + printf "L" + end + if ($arg0 == 2) + printf "R" + end + if ($arg0 == 3) + printf "EN" + end + if ($arg0 == 4) + printf "AN" + end + if ($arg0 == 5) + printf "BN" + end + if ($arg0 == 6) + printf "B" + end + if ($arg0 < 0 || $arg0 > 6) + printf "%d??", $arg0 + end +end +document pbiditype +Print textual description of bidi type given as first argument. +end + define pgx set $g = $arg0 # CHAR_GLYPH @@ -475,6 +505,11 @@ else printf " pos=%d", $g->charpos end + # For characters, print their resolved level and bidi type + if ($g->type == 0) + printf " blev=%d,btyp=", $g->resolved_level + pbiditype $g->bidi_type + end printf " w=%d a+d=%d+%d", $g->pixel_width, $g->ascent, $g->descent # If not DEFAULT_FACE_ID if ($g->face_id != 0)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/ChangeLog.bidi Fri Jan 01 14:30:06 2010 -0500 @@ -0,0 +1,427 @@ +2009-12-26 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (handle_stop_backwards): Call compute_stop_pos in the + loop, instead of calling handle_stop. Call handle_stop only once, + after the loop. + (next_element_from_buffer): Don't call handle_stop_backwards if at + stop position. If base_level_stop is zero, set it to 1. + + * term.c (append_glyph): Fill resolved_level and bidi_type slots + of struct glyph for unidirectional display. + + * xdisp.c (set_cursor_from_row): Handle zero-width characters. + + * bidi.c (bidi_mirror_char): More efficient code (suggested by + Ehud Karni <ehud@unix.mvs.co.il>). Don't even try to mirror + non-ASCII characters. + +2009-12-19 Eli Zaretskii <eliz@gnu.org> + + * buffer.c (Fbuffer_swap_text): Swap the values of + bidi_display_reordering and bidi_paragraph_direction. + + * bidi.c (bidi_resolve_weak): Fix nesting of conditions for Wn + processing. Move W3 after W1 and W2. Simplify W4 because it is + now always after W1. + + * .gdbinit (pbiditype): New command. + (pgx): Use it to display bidi level and type of the glyph. + +2009-12-12 Eli Zaretskii <eliz@gnu.org> + + * dispextern.h (struct it): New members prev_stop and + base_level_stop. + + * xdisp.c (handle_stop_backwards): New function. + (next_element_from_buffer): Handle the situation where we + overstepped stop_charpos due to non-linearity of the bidi + iteration. Likewise for when we back up beyond the previous + stop_charpos. + (reseat_1, pop_it, push_it): Set prev_stop and base_level_stop. + + * dispextern.h (BIDI_AT_BASE_LEVEL): New macro. + + * bidi.c (bidi_copy_it): Fix compiler warning due to cast of a + pointer to `int'. Don't preserve the first_elt member, as it is + no longer copied, because its position in the structure was + changed, see below. + + * dispextern.h (struct bidi_it): Move first_elt, new_paragraph, + separator_limit, and paragraph_dir to after bidi_stack. Add a + note that anything beyond the level stack is not preserved when + the bidi iterator state is copied/saved. + +2009-11-21 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (set_cursor_from_row): Fix cursor positioning on empty + lines when integer values of `cursor' property is used on display + strings. + +2009-11-14 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (init_iterator, text_outside_line_unchanged_p) + (try_window_id): Rename paragraph_direction to + bidi_paragraph_direction. + (set_cursor_from_row): Handle integer values of `cursor' property + on display strings. + + * buffer.c (init_buffer_once, syms_of_buffer): Rename + paragraph_direction to bidi_paragraph_direction. + + * buffer.h (struct buffer): Rename paragraph_direction to + bidi_paragraph_direction. + +2009-11-07 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_paragraph_init): Don't overstep end of buffer. + Treat end of buffer as a NEUTRAL_B character. + (bidi_resolve_explicit): Don't special-case ZV when bidi_it->type + is NEUTRAL_B, since bidi_set_paragraph_end no longer sets the + new_paragraph flag. + +2009-10-31 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (display_line): Always extend reversed_p rows to the end + of line. + (set_cursor_from_row): In R2L rows that don't display text, put + the cursor on the rightmost glyph. + +2009-10-24 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (set_cursor_from_row): Fix off-by-one error when + skipping over non-character glyphs at end of a reversed row. + + * dispextern.h (struct glyph): The `resolved_level' member needs + only 5 bits, not 6. The `bidi_type' member needs only 3 bits. + (bidi_type_t): Rearrange so that types that can appear in the + resolved type are at the beginning and have values less than 8. + +2009-10-23 Eli Zaretskii <eliz@gnu.org> + + * bidi.c: Include setjmp.h. + +2009-10-17 Eli Zaretskii <eliz@gnu.org> + + * dispextern.h (struct glyph): New members resolved_level and + bidi_type. + + * xdisp.c (append_glyph, append_composite_glyph) + (produce_image_glyph, append_stretch_glyph): Set them. + + * term.c (append_glyph): Ditto. + + * xdisp.c (display_line, next_element_from_buffer): Set the glyph + row's reversed_p flag if the paragraph base direction is odd. + (extend_face_to_end_of_line): Don't reverse the glyphs here. + + * term.c (append_glyph): Reverse glyphs here. + + * bidi.c (bidi_get_next_char_visually): Don't exit early when at + ZV. + (bidi_paragraph_init): Don't step over a newline if at BEGV. + +2009-10-16 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_paragraph_init): Handle empty buffers. + +2009-10-10 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (set_cursor_from_row): Skip over glyphs near end of row + with integer OBJECT even if their CHARPOS is zero. + + * bidi.c (bidi_cache_iterator_state): Don't cache NEW_PARAGRAPH. + Abort if someone tries to add a cached state whose position is not + the immediate successor to that of the last cached state. + (bidi_paragraph_init): Don't bail out too early after a reseat. + +2009-10-09 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (text_outside_line_unchanged_p, try_window_id): Disable + optimizations if we are reordering bidirectional text and the + paragraph direction can be affected by the change. + +2009-10-08 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (string_buffer_position_lim): New function. + (string_buffer_position): Most of code moved to + string_buffer_position_lim. Last argument and return value are + now EMACS_INT; all callers changed. + (set_cursor_from_row): Rewritten to support bidirectional text and + reversed glyph rows. + + dispextern.h <string_buffer_position>: Update prototype. + +2009-10-07 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_paragraph_init): Fix initialization of POS. + + * dispextern.h (struct glyph_row): New member reversed_p. + +2009-10-06 Eli Zaretskii <eliz@gnu.org> + + * buffer.c (syms_of_buffer): Remove DEFVAR_LISP_NOPRO for + default-direction-reversed, default-bidi-display-reordering, and + default-paragraph-direction. + +2009-10-05 Eli Zaretskii <eliz@gnu.org> + + * buffer.h (struct buffer): New member paragraph_direction. + * buffer.c (init_buffer_once): Initialize it. + (syms_of_buffer): Declare Lisp variables + default-paragraph-direction and paragraph-direction. + + * dispextern.h (struct it): New member paragraph_embedding. + * xdisp.c (init_iterator): Initialize it from the buffer's value + of paragraph-direction. + <Qright_to_left, Qleft_to_right>: New variables. + (syms_of_xdisp): Initialize and staticpro them. + (set_iterator_to_next, next_element_from_buffer): Use the value of + paragraph_embedding to determine the paragraph direction. + + * bidi.c (bidi_line_init): Fix second argument to + bidi_set_sor_type. + (bidi_init_it): Initialize paragraph_dir to NEUTRAL_DIR. + (bidi_get_next_char_visually): Record the last character of the + separator in separator_limit, not the character after that. + (bidi_find_paragraph_start): Accept character and byte positions + instead of the whole iterator stricture. All callers changed. + +2009-10-04 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_at_paragraph_end): Check for paragraph-start if + paragraph-separate failed to match. Return the length of the + matched separator. + (bidi_line_init): New function. + (bidi_paragraph_init): Use bidi_line_init. Do nothing if in the + middle of a paragraph-separate sequence. Don't override existing + paragraph direction if no strong characters found in this + paragraph. Set separator_limit according to what + bidi_at_paragraph_end returns. Reset new_paragraph flag when a + new paragraph is found. + (bidi_init_it): Reset separator_limit. + + * dispextern.h (struct bidi_it): New member separator_limit. + + * bidi.c (bidi_find_paragraph_start): Return the byte position of + the paragraph beginning. + + * xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the + new_paragraph flag is set in the bidi iterator. + + * bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use + the buffer-local value of paragraph-start and paragraph-separate. + +2009-10-03 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_set_paragraph_end): Don't set the new_paragraph + flag in the iterator. + (bidi_init_it): Set the new_paragraph flag. + (bidi_at_paragraph_end): Arguments are now character and byte + position of the next character. All callers changed. + (bidi_resolve_explicit): Don't call bidi_at_paragraph_end, and + don't behave as if at paragraph end if it returns true. + (bidi_get_next_char_visually): Don't call bidi_paragraph_init if + new_paragraph flags is set. Set new_paragraph flag when at end of + a paragraph. + <fallback_paragraph_start_re, fallback_paragraph_separate_re>: New + variables. + <Qparagraph_start, Qparagraph_separate>: New variables. + (bidi_initialize): Initialize and staticpro them. + + * dispextern.h <struct bidi_it>: New element paragraph_dir. Make + positional elements EMACS_INT. + + * bidi.c <bidi_overriding_paragraph_direction>: Delete. + +2009-09-28 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_init_it): Initialize charpos, bytepos, and + first_elt before calling bidi_set_paragraph_end. + (bidi_resolve_explicit): Don't call bidi_set_paragraph_end at + EOB. + (bidi_at_paragraph_end): Don't set new_paragraph flag at EOB. + (bidi_get_type): Accept an additional argument OVERRIDE, per UAX#9 + "Explicit Overrides". All callers changed. + +2009-09-27 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (next_element_from_buffer): If called not at line + beginning, start bidi iteration from line beginning. + + * bidi.c (bidi_paragraph_init): Use + bidi_overriding_paragraph_direction instead of a literal zero. + (bidi_initialize): Fix some character types, per Unicode 5.x. + (bidi_get_type): Abort if called with invalid character code. + + * dispextern.h: Add prototype of bidi_mirror_char. + + * xdisp.c (get_next_display_element): Mirror characters whose + resolved type is STRONG_R. + +2009-09-26 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_paragraph_init): Don't set bidi_it->ch_len. Abort + if called not at beginning of a new paragraph. + (bidi_get_next_char_visually): Prepare and use a sentinel iterator + state when first_elt flag is set. + + * dispextern.h (struct bidi_it): New struct member first_elt. + + * bidi.c (bidi_init_it): Initialize bidi_it->first_elt. + (bidi_copy_it): Don't copy the first_elt flag. + + * xdisp.c (reseat_1): Initialize bidi_it.first_elt. Move bidi + scan start code from here... + (next_element_from_buffer): ...to here. Use bidi_it.first_elt + flag. + +2009-09-20 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (reseat_1): Handle position < BEGV. + + * bidi.c (bidi_paragraph_init): Set bidi_it->ch_len. Handle ZV. + (bidi_init_it): Don't initialize bidi_it->ch_len. + (bidi_resolve_explicit_1): Abort if bidi_it->ch_len was not + initialized. + (bidi_at_paragraph_end, bidi_resolve_explicit_1) + (bidi_resolve_weak, bidi_level_of_next_char): Handle bytepos at + ZV_BYTE. + (bidi_resolve_explicit_1): Handle position < BEGV. + +2009-09-19 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (init_iterator): Call bidi_init_it. Set + bidi_it->bytepos if buffer position specified. + (reseat_1): Don't call bidi_init_it. Call bidi_paragraph_init + instead. Move back to preceding character before the call to + bidi_get_next_char_visually. + + * bidi.c: Remove all STANDALONE parts. + (bidi_init_it): Init bidi_it->charpos and bidi_it->bytepos to -1. + Don't call bidi_paragraph_init. Change arguments. + (bidi_paragraph_init): Remove code for negative pos. + + * dispextern.h <bidi_it>: Rename orig_type to type_after_w1 and + pristine_type to orig_type. + +2009-09-12 Eli Zaretskii <eliz@gnu.org> + + * dispnew.c (direct_output_for_insert): Give up if we are + reordering bidirectional text. + + * dispextern.h (IT_STACK_SIZE): Enlarge to 5. + + * xdisp.c (display_line): Set row->end and it->start for the next + row to the next character in logical order. If we are reordering + bidi text, push and pop the iterator before and after momentarily + iterating in logical order. + +2009-09-11 Eli Zaretskii <eliz@gnu.org> + + Note: The following changes were undone on 2009-09-12. + + * xdisp.c (set_iterator_to_next, reseat, reseat_1) + (reseat_at_next_visible_line_start): Accept additional argument + force_logical_p; all callers changed. If force_logical_p is + non-zero, force iteration in buffer's logical order even in bidi + buffers. + + * dispnew.c (direct_output_for_insert): Call set_iterator_to_next + with additional argument zero. + + * dispextern.h (set_iterator_to_next): Now accepts an additional + argument. + +2009-08-29 Eli Zaretskii <eliz@gnu.org> + + * xdisp.c (set_cursor_from_row): Don't assume glyph->charpos + increments linearly. + (try_window_reusing_current_matrix): Don't assume glyph->charpos + increments linearly. + +2009-08-28 Eli Zaretskii <eliz@gnu.org> + + * bidi.c <bidi_overriding_paragraph_direction>: Default to L2R, + for now. + +2009-08-22 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_initialize): staticpro bidi_char_table. + (bidi_check_type): New function. + (bidi_cache_iterator_state, bidi_remember_char) + (bidi_resolve_explicit_1, bidi_resolve_explicit) + (bidi_resolve_weak, bidi_resolve_neutral) + (bidi_level_of_next_char): Use it to validate the bidi type + assigned to the iterator. + +2009-08-15 Eli Zaretskii <eliz@gnu.org> + + * bidi.c (bidi_initialize): Fix initialization of bidi_type_table. + + * xdisp.c (set_iterator_to_next): Fix position setting after call + to bidi_get_next_char_visually. + +2005-12-03 Eli Zaretskii <eliz@gnu.org> + + * bidi.c: Include stdio.h unconditionally. Fix and elaborate + commentary. Add Copyright blurb. + +2004-03-08 Kenichi Handa <handa@m17n.org> + + * xdisp.c (reseat_1): Call bidi_init_it with a previous position. + + * bidi.c (bidi_init_it): Set bidi_it->ch_len even if POS > 0. + +2004-03-04 Kenichi Handa <handa@m17n.org> + + The following changes are to support bidirectional text display. + + * Makefile.in (obj): Include bidi.o. + (bidi.o): New target. + + * bidi.c: New file. + + * buffer.h (struct buffer): New member bidi_display_reordering. + + * buffer.c (init_buffer_once): Initialize bidi_display_reordering. + (syms_of_buffer): Declarations of Lisp variables + default-bidi-display-reordering and bidi-display-reordering. + + * dispextern.h (BIDI_MAXLEVEL): New macro. + (bidi_type_t, bidi_dir_t): New types. + (bidi_saved_info, bidi_stack, bidi_it): New structs. + (struct it): New members bidi_p and bidi_it. + (bidi_init_it): Extern it. + (bidi_get_next_char_visually): Extern it. + + * dispnew.c (direct_output_forward_char): Give up if we need bidi + processing or buffer's direction is right-to-left. + + * xdisp.c (init_iterator): Initialize it->bidi_p. + (reseat_1): Cal bidi_init_it and bidi_get_next_char_visually if + necessary. + (set_iterator_to_next): Cal bidi_get_next_char_visually if + necessary. + + +;; Local Variables: +;; coding: utf-8 +;; add-log-time-zone-rule: t +;; End: + + Copyright (C) 2007, 2008, 2009 Free Software Foundation, Inc. + + This file is part of GNU Emacs. + + GNU Emacs is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + GNU Emacs is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
--- a/src/Makefile.in Fri Jan 01 13:44:02 2010 -0500 +++ b/src/Makefile.in Fri Jan 01 14:30:06 2010 -0500 @@ -560,7 +560,7 @@ /* lastfile must follow all files whose initialized data areas should be dumped as pure by dump-emacs. */ obj= dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \ - charset.o coding.o category.o ccl.o character.o chartab.o \ + charset.o coding.o category.o ccl.o character.o chartab.o bidi.o \ cm.o term.o terminal.o xfaces.o $(XOBJ) $(GTK_OBJ) $(DBUS_OBJ) \ emacs.o keyboard.o macros.o keymap.o sysdep.o \ buffer.o filelock.o insdel.o marker.o \ @@ -1052,6 +1052,7 @@ atimer.o: atimer.c atimer.h syssignal.h systime.h lisp.h blockinput.h \ $(config_h) +bidi.o: bidi.c buffer.h character.h dispextern.h lisp.h $(config_h) buffer.o: buffer.c buffer.h region-cache.h commands.h window.h \ $(INTERVALS_H) blockinput.h atimer.h systime.h character.h \ indent.h keyboard.h coding.h keymap.h frame.h lisp.h $(config_h)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/bidi.c Fri Jan 01 14:30:06 2010 -0500 @@ -0,0 +1,2027 @@ +/* Low-level bidirectional buffer-scanning functions for GNU Emacs. + Copyright (C) 2000, 2001, 2004, 2005, 2009 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + + +You should have received a copy of the GNU General Public License +along with GNU Emacs; see the file COPYING. If not, write to +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. */ + +/* Written by Eli Zaretskii <eliz@gnu.org>. + + A sequential implementation of the Unicode Bidirectional algorithm, + as per UAX#9, a part of the Unicode Standard. + + Unlike the reference and most other implementations, this one is + designed to be called once for every character in the buffer. + + The main entry point is bidi_get_next_char_visually. Each time it + is called, it finds the next character in the visual order, and + returns its information in a special structure. The caller is then + expected to process this character for display or any other + purposes, and call bidi_get_next_char_visually for the next + character. See the comments in bidi_get_next_char_visually for + more details about its algorithm that finds the next visual-order + character by resolving their levels on the fly. + + If you want to understand the code, you will have to read it + together with the relevant portions of UAX#9. The comments include + references to UAX#9 rules, for that very reason. + + A note about references to UAX#9 rules: if the reference says + something like "X9/Retaining", it means that you need to refer to + rule X9 and to its modifications decribed in the "Implementation + Notes" section of UAX#9, under "Retaining Format Codes". */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#include <stdio.h> + +#ifdef HAVE_STRING_H +#include <string.h> +#endif + +#include <setjmp.h> + +#include "lisp.h" +#include "buffer.h" +#include "character.h" +#include "dispextern.h" + +static int bidi_initialized = 0; + +static Lisp_Object bidi_type_table; + +/* FIXME: Remove these when bidi_explicit_dir_char uses a lookup table. */ +#define LRM_CHAR 0x200E +#define RLM_CHAR 0x200F +#define LRE_CHAR 0x202A +#define RLE_CHAR 0x202B +#define PDF_CHAR 0x202C +#define LRO_CHAR 0x202D +#define RLO_CHAR 0x202E + +#define BIDI_EOB -1 +#define BIDI_BOB -2 /* FIXME: Is this needed? */ + +/* Local data structures. (Look in dispextern.h for the rest.) */ + +/* What we need to know about the current paragraph. */ +struct bidi_paragraph_info { + int start_bytepos; /* byte position where it begins */ + int end_bytepos; /* byte position where it ends */ + int embedding_level; /* its basic embedding level */ + bidi_dir_t base_dir; /* its base direction */ +}; + +/* Data type for describing the bidirectional character categories. */ +typedef enum { + UNKNOWN_BC, + NEUTRAL, + WEAK, + STRONG +} bidi_category_t; + +int bidi_ignore_explicit_marks_for_paragraph_level = 1; + +static Lisp_Object fallback_paragraph_start_re, fallback_paragraph_separate_re; +static Lisp_Object Qparagraph_start, Qparagraph_separate; + +static void +bidi_initialize () +{ + /* FIXME: This should come from the Unicode Database. */ + struct { + int from, to; + bidi_type_t type; + } bidi_type[] = + { { 0x0000, 0x0008, WEAK_BN }, + { 0x0009, 0x0000, NEUTRAL_S }, + { 0x000A, 0x0000, NEUTRAL_B }, + { 0x000B, 0x0000, NEUTRAL_S }, + { 0x000C, 0x0000, NEUTRAL_WS }, + { 0x000D, 0x0000, NEUTRAL_B }, + { 0x000E, 0x001B, WEAK_BN }, + { 0x001C, 0x001E, NEUTRAL_B }, + { 0x001F, 0x0000, NEUTRAL_S }, + { 0x0020, 0x0000, NEUTRAL_WS }, + { 0x0021, 0x0022, NEUTRAL_ON }, + { 0x0023, 0x0025, WEAK_ET }, + { 0x0026, 0x002A, NEUTRAL_ON }, + { 0x002B, 0x0000, WEAK_ES }, + { 0x002C, 0x0000, WEAK_CS }, + { 0x002D, 0x0000, WEAK_ES }, + { 0x002E, 0x002F, WEAK_CS }, + { 0x0030, 0x0039, WEAK_EN }, + { 0x003A, 0x0000, WEAK_CS }, + { 0x003B, 0x0040, NEUTRAL_ON }, + { 0x005B, 0x0060, NEUTRAL_ON }, + { 0x007B, 0x007E, NEUTRAL_ON }, + { 0x007F, 0x0084, WEAK_BN }, + { 0x0085, 0x0000, NEUTRAL_B }, + { 0x0086, 0x009F, WEAK_BN }, + { 0x00A0, 0x0000, WEAK_CS }, + { 0x00A1, 0x0000, NEUTRAL_ON }, + { 0x00A2, 0x00A5, WEAK_ET }, + { 0x00A6, 0x00A9, NEUTRAL_ON }, + { 0x00AB, 0x00AC, NEUTRAL_ON }, + { 0x00AD, 0x0000, WEAK_BN }, + { 0x00AE, 0x00Af, NEUTRAL_ON }, + { 0x00B0, 0x00B1, WEAK_ET }, + { 0x00B2, 0x00B3, WEAK_EN }, + { 0x00B4, 0x0000, NEUTRAL_ON }, + { 0x00B6, 0x00B8, NEUTRAL_ON }, + { 0x00B9, 0x0000, WEAK_EN }, + { 0x00BB, 0x00BF, NEUTRAL_ON }, + { 0x00D7, 0x0000, NEUTRAL_ON }, + { 0x00F7, 0x0000, NEUTRAL_ON }, + { 0x02B9, 0x02BA, NEUTRAL_ON }, + { 0x02C2, 0x02CF, NEUTRAL_ON }, + { 0x02D2, 0x02DF, NEUTRAL_ON }, + { 0x02E5, 0x02ED, NEUTRAL_ON }, + { 0x0300, 0x036F, WEAK_NSM }, + { 0x0374, 0x0375, NEUTRAL_ON }, + { 0x037E, 0x0385, NEUTRAL_ON }, + { 0x0387, 0x0000, NEUTRAL_ON }, + { 0x03F6, 0x0000, NEUTRAL_ON }, + { 0x0483, 0x0489, WEAK_NSM }, + { 0x058A, 0x0000, NEUTRAL_ON }, + { 0x0591, 0x05BD, WEAK_NSM }, + { 0x05BE, 0x0000, STRONG_R }, + { 0x05BF, 0x0000, WEAK_NSM }, + { 0x05C0, 0x0000, STRONG_R }, + { 0x05C1, 0x05C2, WEAK_NSM }, + { 0x05C3, 0x0000, STRONG_R }, + { 0x05C4, 0x05C5, WEAK_NSM }, + { 0x05C6, 0x0000, STRONG_R }, + { 0x05C7, 0x0000, WEAK_NSM }, + { 0x05D0, 0x05F4, STRONG_R }, + { 0x060C, 0x0000, WEAK_CS }, + { 0x061B, 0x064A, STRONG_AL }, + { 0x064B, 0x0655, WEAK_NSM }, + { 0x0660, 0x0669, WEAK_AN }, + { 0x066A, 0x0000, WEAK_ET }, + { 0x066B, 0x066C, WEAK_AN }, + { 0x066D, 0x066F, STRONG_AL }, + { 0x0670, 0x0000, WEAK_NSM }, + { 0x0671, 0x06D5, STRONG_AL }, + { 0x06D6, 0x06DC, WEAK_NSM }, + { 0x06DD, 0x0000, STRONG_AL }, + { 0x06DE, 0x06E4, WEAK_NSM }, + { 0x06E5, 0x06E6, STRONG_AL }, + { 0x06E7, 0x06E8, WEAK_NSM }, + { 0x06E9, 0x0000, NEUTRAL_ON }, + { 0x06EA, 0x06ED, WEAK_NSM }, + { 0x06F0, 0x06F9, WEAK_EN }, + { 0x06FA, 0x070D, STRONG_AL }, + { 0x070F, 0x0000, WEAK_BN }, + { 0x0710, 0x0000, STRONG_AL }, + { 0x0711, 0x0000, WEAK_NSM }, + { 0x0712, 0x072C, STRONG_AL }, + { 0x0730, 0x074A, WEAK_NSM }, + { 0x0780, 0x07A5, STRONG_AL }, + { 0x07A6, 0x07B0, WEAK_NSM }, + { 0x07B1, 0x0000, STRONG_AL }, + { 0x0901, 0x0902, WEAK_NSM }, + { 0x093C, 0x0000, WEAK_NSM }, + { 0x0941, 0x0948, WEAK_NSM }, + { 0x094D, 0x0000, WEAK_NSM }, + { 0x0951, 0x0954, WEAK_NSM }, + { 0x0962, 0x0963, WEAK_NSM }, + { 0x0981, 0x0000, WEAK_NSM }, + { 0x09BC, 0x0000, WEAK_NSM }, + { 0x09C1, 0x09C4, WEAK_NSM }, + { 0x09CD, 0x0000, WEAK_NSM }, + { 0x09E2, 0x09E3, WEAK_NSM }, + { 0x09F2, 0x09F3, WEAK_ET }, + { 0x0A02, 0x0000, WEAK_NSM }, + { 0x0A3C, 0x0000, WEAK_NSM }, + { 0x0A41, 0x0A4D, WEAK_NSM }, + { 0x0A70, 0x0A71, WEAK_NSM }, + { 0x0A81, 0x0A82, WEAK_NSM }, + { 0x0ABC, 0x0000, WEAK_NSM }, + { 0x0AC1, 0x0AC8, WEAK_NSM }, + { 0x0ACD, 0x0000, WEAK_NSM }, + { 0x0B01, 0x0000, WEAK_NSM }, + { 0x0B3C, 0x0000, WEAK_NSM }, + { 0x0B3F, 0x0000, WEAK_NSM }, + { 0x0B41, 0x0B43, WEAK_NSM }, + { 0x0B4D, 0x0B56, WEAK_NSM }, + { 0x0B82, 0x0000, WEAK_NSM }, + { 0x0BC0, 0x0000, WEAK_NSM }, + { 0x0BCD, 0x0000, WEAK_NSM }, + { 0x0C3E, 0x0C40, WEAK_NSM }, + { 0x0C46, 0x0C56, WEAK_NSM }, + { 0x0CBF, 0x0000, WEAK_NSM }, + { 0x0CC6, 0x0000, WEAK_NSM }, + { 0x0CCC, 0x0CCD, WEAK_NSM }, + { 0x0D41, 0x0D43, WEAK_NSM }, + { 0x0D4D, 0x0000, WEAK_NSM }, + { 0x0DCA, 0x0000, WEAK_NSM }, + { 0x0DD2, 0x0DD6, WEAK_NSM }, + { 0x0E31, 0x0000, WEAK_NSM }, + { 0x0E34, 0x0E3A, WEAK_NSM }, + { 0x0E3F, 0x0000, WEAK_ET }, + { 0x0E47, 0x0E4E, WEAK_NSM }, + { 0x0EB1, 0x0000, WEAK_NSM }, + { 0x0EB4, 0x0EBC, WEAK_NSM }, + { 0x0EC8, 0x0ECD, WEAK_NSM }, + { 0x0F18, 0x0F19, WEAK_NSM }, + { 0x0F35, 0x0000, WEAK_NSM }, + { 0x0F37, 0x0000, WEAK_NSM }, + { 0x0F39, 0x0000, WEAK_NSM }, + { 0x0F3A, 0x0F3D, NEUTRAL_ON }, + { 0x0F71, 0x0F7E, WEAK_NSM }, + { 0x0F80, 0x0F84, WEAK_NSM }, + { 0x0F86, 0x0F87, WEAK_NSM }, + { 0x0F90, 0x0FBC, WEAK_NSM }, + { 0x0FC6, 0x0000, WEAK_NSM }, + { 0x102D, 0x1030, WEAK_NSM }, + { 0x1032, 0x1037, WEAK_NSM }, + { 0x1039, 0x0000, WEAK_NSM }, + { 0x1058, 0x1059, WEAK_NSM }, + { 0x1680, 0x0000, NEUTRAL_WS }, + { 0x169B, 0x169C, NEUTRAL_ON }, + { 0x1712, 0x1714, WEAK_NSM }, + { 0x1732, 0x1734, WEAK_NSM }, + { 0x1752, 0x1753, WEAK_NSM }, + { 0x1772, 0x1773, WEAK_NSM }, + { 0x17B7, 0x17BD, WEAK_NSM }, + { 0x17C6, 0x0000, WEAK_NSM }, + { 0x17C9, 0x17D3, WEAK_NSM }, + { 0x17DB, 0x0000, WEAK_ET }, + { 0x1800, 0x180A, NEUTRAL_ON }, + { 0x180B, 0x180D, WEAK_NSM }, + { 0x180E, 0x0000, WEAK_BN }, + { 0x18A9, 0x0000, WEAK_NSM }, + { 0x1FBD, 0x0000, NEUTRAL_ON }, + { 0x1FBF, 0x1FC1, NEUTRAL_ON }, + { 0x1FCD, 0x1FCF, NEUTRAL_ON }, + { 0x1FDD, 0x1FDF, NEUTRAL_ON }, + { 0x1FED, 0x1FEF, NEUTRAL_ON }, + { 0x1FFD, 0x1FFE, NEUTRAL_ON }, + { 0x2000, 0x200A, NEUTRAL_WS }, + { 0x200B, 0x200D, WEAK_BN }, + { 0x200F, 0x0000, STRONG_R }, + { 0x2010, 0x2027, NEUTRAL_ON }, + { 0x2028, 0x0000, NEUTRAL_WS }, + { 0x2029, 0x0000, NEUTRAL_B }, + { 0x202A, 0x0000, LRE }, + { 0x202B, 0x0000, RLE }, + { 0x202C, 0x0000, PDF }, + { 0x202D, 0x0000, LRO }, + { 0x202E, 0x0000, RLO }, + { 0x202F, 0x0000, NEUTRAL_WS }, + { 0x2030, 0x2034, WEAK_ET }, + { 0x2035, 0x2057, NEUTRAL_ON }, + { 0x205F, 0x0000, NEUTRAL_WS }, + { 0x2060, 0x206F, WEAK_BN }, + { 0x2070, 0x0000, WEAK_EN }, + { 0x2074, 0x2079, WEAK_EN }, + { 0x207A, 0x207B, WEAK_ET }, + { 0x207C, 0x207E, NEUTRAL_ON }, + { 0x2080, 0x2089, WEAK_EN }, + { 0x208A, 0x208B, WEAK_ET }, + { 0x208C, 0x208E, NEUTRAL_ON }, + { 0x20A0, 0x20B1, WEAK_ET }, + { 0x20D0, 0x20EA, WEAK_NSM }, + { 0x2100, 0x2101, NEUTRAL_ON }, + { 0x2103, 0x2106, NEUTRAL_ON }, + { 0x2108, 0x2109, NEUTRAL_ON }, + { 0x2114, 0x0000, NEUTRAL_ON }, + { 0x2116, 0x2118, NEUTRAL_ON }, + { 0x211E, 0x2123, NEUTRAL_ON }, + { 0x2125, 0x0000, NEUTRAL_ON }, + { 0x2127, 0x0000, NEUTRAL_ON }, + { 0x2129, 0x0000, NEUTRAL_ON }, + { 0x212E, 0x0000, WEAK_ET }, + { 0x2132, 0x0000, NEUTRAL_ON }, + { 0x213A, 0x0000, NEUTRAL_ON }, + { 0x2140, 0x2144, NEUTRAL_ON }, + { 0x214A, 0x215F, NEUTRAL_ON }, + { 0x2190, 0x2211, NEUTRAL_ON }, + { 0x2212, 0x2213, WEAK_ET }, + { 0x2214, 0x2335, NEUTRAL_ON }, + { 0x237B, 0x2394, NEUTRAL_ON }, + { 0x2396, 0x244A, NEUTRAL_ON }, + { 0x2460, 0x249B, WEAK_EN }, + { 0x24EA, 0x0000, WEAK_EN }, + { 0x24EB, 0x2FFB, NEUTRAL_ON }, + { 0x3000, 0x0000, NEUTRAL_WS }, + { 0x3001, 0x3004, NEUTRAL_ON }, + { 0x3008, 0x3020, NEUTRAL_ON }, + { 0x302A, 0x302F, WEAK_NSM }, + { 0x3030, 0x0000, NEUTRAL_ON }, + { 0x3036, 0x3037, NEUTRAL_ON }, + { 0x303D, 0x303F, NEUTRAL_ON }, + { 0x3099, 0x309A, WEAK_NSM }, + { 0x309B, 0x309C, NEUTRAL_ON }, + { 0x30A0, 0x0000, NEUTRAL_ON }, + { 0x30FB, 0x0000, NEUTRAL_ON }, + { 0x3251, 0x325F, NEUTRAL_ON }, + { 0x32B1, 0x32BF, NEUTRAL_ON }, + { 0xA490, 0xA4C6, NEUTRAL_ON }, + { 0xFB1D, 0x0000, STRONG_R }, + { 0xFB1E, 0x0000, WEAK_NSM }, + { 0xFB1F, 0xFB28, STRONG_R }, + { 0xFB29, 0x0000, WEAK_ET }, + { 0xFB2A, 0xFB4F, STRONG_R }, + { 0xFB50, 0xFD3D, STRONG_AL }, + { 0xFD3E, 0xFD3F, NEUTRAL_ON }, + { 0xFD50, 0xFDFC, STRONG_AL }, + { 0xFE00, 0xFE23, WEAK_NSM }, + { 0xFE30, 0xFE4F, NEUTRAL_ON }, + { 0xFE50, 0x0000, WEAK_CS }, + { 0xFE51, 0x0000, NEUTRAL_ON }, + { 0xFE52, 0x0000, WEAK_CS }, + { 0xFE54, 0x0000, NEUTRAL_ON }, + { 0xFE55, 0x0000, WEAK_CS }, + { 0xFE56, 0xFE5E, NEUTRAL_ON }, + { 0xFE5F, 0x0000, WEAK_ET }, + { 0xFE60, 0xFE61, NEUTRAL_ON }, + { 0xFE62, 0xFE63, WEAK_ET }, + { 0xFE64, 0xFE68, NEUTRAL_ON }, + { 0xFE69, 0xFE6A, WEAK_ET }, + { 0xFE6B, 0x0000, NEUTRAL_ON }, + { 0xFE70, 0xFEFC, STRONG_AL }, + { 0xFEFF, 0x0000, WEAK_BN }, + { 0xFF01, 0xFF02, NEUTRAL_ON }, + { 0xFF03, 0xFF05, WEAK_ET }, + { 0xFF06, 0xFF0A, NEUTRAL_ON }, + { 0xFF0B, 0x0000, WEAK_ET }, + { 0xFF0C, 0x0000, WEAK_CS }, + { 0xFF0D, 0x0000, WEAK_ET }, + { 0xFF0E, 0x0000, WEAK_CS }, + { 0xFF0F, 0x0000, WEAK_ES }, + { 0xFF10, 0xFF19, WEAK_EN }, + { 0xFF1A, 0x0000, WEAK_CS }, + { 0xFF1B, 0xFF20, NEUTRAL_ON }, + { 0xFF3B, 0xFF40, NEUTRAL_ON }, + { 0xFF5B, 0xFF65, NEUTRAL_ON }, + { 0xFFE0, 0xFFE1, WEAK_ET }, + { 0xFFE2, 0xFFE4, NEUTRAL_ON }, + { 0xFFE5, 0xFFE6, WEAK_ET }, + { 0xFFE8, 0xFFEE, NEUTRAL_ON }, + { 0xFFF9, 0xFFFB, WEAK_BN }, + { 0xFFFC, 0xFFFD, NEUTRAL_ON }, + { 0x1D167, 0x1D169, WEAK_NSM }, + { 0x1D173, 0x1D17A, WEAK_BN }, + { 0x1D17B, 0x1D182, WEAK_NSM }, + { 0x1D185, 0x1D18B, WEAK_NSM }, + { 0x1D1AA, 0x1D1AD, WEAK_NSM }, + { 0x1D7CE, 0x1D7FF, WEAK_EN }, + { 0xE0001, 0xE007F, WEAK_BN } }; + int i; + + bidi_type_table = Fmake_char_table (Qnil, make_number (STRONG_L)); + staticpro (&bidi_type_table); + + for (i = 0; i < sizeof bidi_type / sizeof bidi_type[0]; i++) + char_table_set_range (bidi_type_table, bidi_type[i].from, + bidi_type[i].to ? bidi_type[i].to : bidi_type[i].from, + make_number (bidi_type[i].type)); + + fallback_paragraph_start_re = + XSYMBOL (Fintern_soft (build_string ("paragraph-start"), Qnil))->value; + if (!STRINGP (fallback_paragraph_start_re)) + fallback_paragraph_start_re = build_string ("\f\\|[ \t]*$"); + staticpro (&fallback_paragraph_start_re); + Qparagraph_start = intern ("paragraph-start"); + staticpro (&Qparagraph_start); + fallback_paragraph_separate_re = + XSYMBOL (Fintern_soft (build_string ("paragraph-separate"), Qnil))->value; + if (!STRINGP (fallback_paragraph_separate_re)) + fallback_paragraph_separate_re = build_string ("[ \t\f]*$"); + staticpro (&fallback_paragraph_separate_re); + Qparagraph_separate = intern ("paragraph-separate"); + staticpro (&Qparagraph_separate); + bidi_initialized = 1; +} + +/* Return the bidi type of a character CH, subject to the current + directional OVERRIDE. */ +bidi_type_t +bidi_get_type (int ch, bidi_dir_t override) +{ + bidi_type_t default_type; + + if (ch == BIDI_EOB) + return NEUTRAL_B; + if (ch < 0 || ch > MAX_CHAR) + abort (); + + default_type = (bidi_type_t) XINT (CHAR_TABLE_REF (bidi_type_table, ch)); + + if (override == NEUTRAL_DIR) + return default_type; + + switch (default_type) + { + /* Although UAX#9 does not tell, it doesn't make sense to + override NEUTRAL_B and LRM/RLM characters. */ + case NEUTRAL_B: + case LRE: + case LRO: + case RLE: + case RLO: + case PDF: + return default_type; + default: + switch (ch) + { + case LRM_CHAR: + case RLM_CHAR: + return default_type; + default: + if (override == L2R) /* X6 */ + return STRONG_L; + else if (override == R2L) + return STRONG_R; + else + abort (); /* can't happen: handled above */ + } + } +} + +void +bidi_check_type (bidi_type_t type) +{ + if (type < UNKNOWN_BT || type > NEUTRAL_ON) + abort (); +} + +/* Given a bidi TYPE of a character, return its category. */ +bidi_category_t +bidi_get_category (bidi_type_t type) +{ + switch (type) + { + case UNKNOWN_BT: + return UNKNOWN_BC; + case STRONG_L: + case STRONG_R: + case STRONG_AL: + case LRE: + case LRO: + case RLE: + case RLO: + return STRONG; + case PDF: /* ??? really?? */ + case WEAK_EN: + case WEAK_ES: + case WEAK_ET: + case WEAK_AN: + case WEAK_CS: + case WEAK_NSM: + case WEAK_BN: + return WEAK; + case NEUTRAL_B: + case NEUTRAL_S: + case NEUTRAL_WS: + case NEUTRAL_ON: + return NEUTRAL; + default: + abort (); + } +} + +/* Return the mirrored character of C, if any. + + Note: The conditions in UAX#9 clause L4 must be tested by the + caller. */ +/* FIXME: exceedingly temporary! Should consult the Unicode database + of character properties. */ +int +bidi_mirror_char (int c) +{ + static const char mirrored_pairs[] = "()<>[]{}"; + const char *p = c > 0 && c < 128 ? strchr (mirrored_pairs, c) : NULL; + + if (p) + { + size_t i = p - mirrored_pairs; + + return mirrored_pairs [(i ^ 1)]; + } + return c; +} + +/* Copy the bidi iterator from FROM to TO. To save cycles, this only + copies the part of the level stack that is actually in use. */ +static inline void +bidi_copy_it (struct bidi_it *to, struct bidi_it *from) +{ + int i; + + /* Copy everything except the level stack and beyond. */ + memcpy (to, from, ((size_t)&((struct bidi_it *)0)->level_stack[0])); + + /* Copy the active part of the level stack. */ + to->level_stack[0] = from->level_stack[0]; /* level zero is always in use */ + for (i = 1; i <= from->stack_idx; i++) + to->level_stack[i] = from->level_stack[i]; +} + +/* Caching the bidi iterator states. */ + +static struct bidi_it bidi_cache[1000]; /* FIXME: make this dynamically allocated! */ +static int bidi_cache_idx; +static int bidi_cache_last_idx; + +static inline void +bidi_cache_reset (void) +{ + bidi_cache_idx = 0; + bidi_cache_last_idx = -1; +} + +static inline void +bidi_cache_fetch_state (int idx, struct bidi_it *bidi_it) +{ + int current_scan_dir = bidi_it->scan_dir; + + if (idx < 0 || idx >= bidi_cache_idx) + abort (); + + bidi_copy_it (bidi_it, &bidi_cache[idx]); + bidi_it->scan_dir = current_scan_dir; + bidi_cache_last_idx = idx; +} + +/* Find a cached state with a given CHARPOS and resolved embedding + level less or equal to LEVEL. if LEVEL is -1, disregard the + resolved levels in cached states. DIR, if non-zero, means search + in that direction from the last cache hit. */ +static inline int +bidi_cache_search (int charpos, int level, int dir) +{ + int i, i_start; + + if (bidi_cache_idx) + { + if (charpos < bidi_cache[bidi_cache_last_idx].charpos) + dir = -1; + else if (charpos > bidi_cache[bidi_cache_last_idx].charpos) + dir = 1; + if (dir) + i_start = bidi_cache_last_idx; + else + { + dir = -1; + i_start = bidi_cache_idx - 1; + } + + if (dir < 0) + { + /* Linear search for now; FIXME! */ + for (i = i_start; i >= 0; i--) + if (bidi_cache[i].charpos == charpos + && (level == -1 || bidi_cache[i].resolved_level <= level)) + return i; + } + else + { + for (i = i_start; i < bidi_cache_idx; i++) + if (bidi_cache[i].charpos == charpos + && (level == -1 || bidi_cache[i].resolved_level <= level)) + return i; + } + } + + return -1; +} + +/* Find a cached state where the resolved level changes to a value + that is lower than LEVEL, and return its cache slot index. DIR is + the direction to search, starting with the last used cache slot. + BEFORE, if non-zero, means return the index of the slot that is + ``before'' the level change in the search direction. That is, + given the cached levels like this: + + 1122333442211 + AB C + + and assuming we are at the position cached at the slot marked with + C, searching backwards (DIR = -1) for LEVEL = 2 will return the + index of slot B or A, depending whether BEFORE is, respectively, + non-zero or zero. */ +static int +bidi_cache_find_level_change (int level, int dir, int before) +{ + if (bidi_cache_idx) + { + int i = dir ? bidi_cache_last_idx : bidi_cache_idx - 1; + int incr = before ? 1 : 0; + + if (!dir) + dir = -1; + else if (!incr) + i += dir; + + if (dir < 0) + { + while (i >= incr) + { + if (bidi_cache[i - incr].resolved_level >= 0 + && bidi_cache[i - incr].resolved_level < level) + return i; + i--; + } + } + else + { + while (i < bidi_cache_idx - incr) + { + if (bidi_cache[i + incr].resolved_level >= 0 + && bidi_cache[i + incr].resolved_level < level) + return i; + i++; + } + } + } + + return -1; +} + +static inline void +bidi_cache_iterator_state (struct bidi_it *bidi_it, int resolved) +{ + int idx; + + /* We should never cache on backward scans. */ + if (bidi_it->scan_dir == -1) + abort (); + idx = bidi_cache_search (bidi_it->charpos, -1, 1); + + if (idx < 0) + { + idx = bidi_cache_idx; + /* Don't overrun the cache limit. */ + if (idx > sizeof (bidi_cache) / sizeof (bidi_cache[0]) - 1) + abort (); + /* Don't violate cache integrity: character positions should + correspond to cache positions 1:1. */ + if (idx > 0 && bidi_it->charpos != bidi_cache[idx - 1].charpos + 1) + abort (); + bidi_copy_it (&bidi_cache[idx], bidi_it); + if (!resolved) + bidi_cache[idx].resolved_level = -1; + bidi_cache[idx].new_paragraph = 0; + } + else + { + /* Copy only the members which could have changed, to avoid + costly copying of the entire struct. */ + bidi_cache[idx].type = bidi_it->type; + bidi_check_type (bidi_it->type); + bidi_cache[idx].type_after_w1 = bidi_it->type_after_w1; + bidi_check_type (bidi_it->type_after_w1); + if (resolved) + bidi_cache[idx].resolved_level = bidi_it->resolved_level; + else + bidi_cache[idx].resolved_level = -1; + bidi_cache[idx].invalid_levels = bidi_it->invalid_levels; + bidi_cache[idx].invalid_rl_levels = bidi_it->invalid_rl_levels; + bidi_cache[idx].next_for_neutral = bidi_it->next_for_neutral; + bidi_cache[idx].next_for_ws = bidi_it->next_for_ws; + bidi_cache[idx].ignore_bn_limit = bidi_it->ignore_bn_limit; + } + + bidi_cache_last_idx = idx; + if (idx >= bidi_cache_idx) + bidi_cache_idx = idx + 1; +} + +static inline bidi_type_t +bidi_cache_find (int charpos, int level, struct bidi_it *bidi_it) +{ + int i = bidi_cache_search (charpos, level, bidi_it->scan_dir); + + if (i >= 0) + { + bidi_dir_t current_scan_dir = bidi_it->scan_dir; + + *bidi_it = bidi_cache[i]; + bidi_cache_last_idx = i; + /* Don't let scan direction from from the cached state override + the current scan direction. */ + bidi_it->scan_dir = current_scan_dir; + return bidi_it->type; + } + + return UNKNOWN_BT; +} + +static inline int +bidi_peek_at_next_level (struct bidi_it *bidi_it) +{ + if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1) + abort (); + return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; +} + +/* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph. + Value is the non-negative length of the paragraph separator + following the buffer position, -1 if position is at the beginning + of a new paragraph, or -2 if position is neither at beginning nor + at end of a paragraph. */ +EMACS_INT +bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) +{ + Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate, + Fcurrent_buffer ()); + Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start, + Fcurrent_buffer ()); + EMACS_INT val; + + if (!STRINGP (sep_re)) + sep_re = fallback_paragraph_separate_re; + if (!STRINGP (start_re)) + start_re = fallback_paragraph_start_re; + + val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); + if (val < 0) + { + if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0) + val = -1; + else + val = -2; + } + + return val; +} + +/* Determine the start-of-run (sor) directional type given the two + embedding levels on either side of the run boundary. Also, update + the saved info about previously seen characters, since that info is + generally valid for a single level run. */ +static inline void +bidi_set_sor_type (struct bidi_it *bidi_it, int level_before, int level_after) +{ + int higher_level = level_before > level_after ? level_before : level_after; + + /* The prev_was_pdf gork is required for when we have several PDFs + in a row. In that case, we want to compute the sor type for the + next level run only once: when we see the first PDF. That's + because the sor type depends only on the higher of the two levels + that we find on the two sides of the level boundary (see UAX#9, + clause X10), and so we don't need to know the final embedding + level to which we descend after processing all the PDFs. */ + if (!bidi_it->prev_was_pdf || level_before < level_after) + /* FIXME: should the default sor direction be user selectable? */ + bidi_it->sor = (higher_level & 1) != 0 ? R2L : L2R; + if (level_before > level_after) + bidi_it->prev_was_pdf = 1; + + bidi_it->prev.type = UNKNOWN_BT; + bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 = + bidi_it->last_strong.orig_type = UNKNOWN_BT; + bidi_it->prev_for_neutral.type = bidi_it->sor == R2L ? STRONG_R : STRONG_L; + bidi_it->prev_for_neutral.charpos = bidi_it->charpos; + bidi_it->prev_for_neutral.bytepos = bidi_it->bytepos; + bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 = + bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; + bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ +} + +static void +bidi_line_init (struct bidi_it *bidi_it) +{ + bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */ + bidi_it->resolved_level = bidi_it->level_stack[0].level; + bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ + bidi_it->invalid_levels = 0; + bidi_it->invalid_rl_levels = -1; + bidi_it->next_en_pos = -1; + bidi_it->next_for_ws.type = UNKNOWN_BT; + bidi_set_sor_type (bidi_it, + bidi_it->paragraph_dir == R2L ? 1 : 0, + bidi_it->level_stack[0].level); /* X10 */ + + bidi_cache_reset (); +} + +/* Find the beginning of this paragraph by looking back in the buffer. + Value is the byte position of the paragraph's beginning. */ +static EMACS_INT +bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte) +{ + Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ()); + EMACS_INT limit = ZV, limit_byte = ZV_BYTE; + + if (!STRINGP (re)) + re = fallback_paragraph_start_re; + while (pos_byte > BEGV_BYTE + && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) + { + pos = find_next_newline_no_quit (pos - 1, -1); + pos_byte = CHAR_TO_BYTE (pos); + } + return pos_byte; +} + +/* Determine the direction, a.k.a. base embedding level, of the + paragraph we are about to iterate through. If DIR is either L2R or + R2L, just use that. Otherwise, determine the paragraph direction + from the first strong character of the paragraph. + + Note that this gives the paragraph separator the same direction as + the preceding paragraph, even though Emacs generally views the + separartor as not belonging to any paragraph. */ +void +bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) +{ + EMACS_INT bytepos = bidi_it->bytepos; + + /* Special case for an empty buffer. */ + if (bytepos == BEGV_BYTE && bytepos == ZV_BYTE) + dir = L2R; + /* We should never be called at EOB or before BEGV. */ + else if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) + abort (); + + if (dir == L2R) + { + bidi_it->paragraph_dir = L2R; + bidi_it->new_paragraph = 0; + } + else if (dir == R2L) + { + bidi_it->paragraph_dir = R2L; + bidi_it->new_paragraph = 0; + } + else if (dir == NEUTRAL_DIR) /* P2 */ + { + int ch, ch_len; + EMACS_INT pos; + bidi_type_t type; + EMACS_INT sep_len; + + /* If we are inside a paragraph separator, we are just waiting + for the separator to be exhausted; use the previous paragraph + direction. But don't do that if we have been just reseated, + because we need to reinitialize below in that case. */ + if (!bidi_it->first_elt + && bidi_it->charpos < bidi_it->separator_limit) + return; + + /* If we are on a newline, get past it to where the next + paragraph might start. But don't do that at BEGV since then + we are potentially in a new paragraph that doesn't yet + exist. */ + pos = bidi_it->charpos; + if (bytepos > BEGV_BYTE && FETCH_CHAR (bytepos) == '\n') + { + bytepos++; + pos++; + } + + /* We are either at the beginning of a paragraph or in the + middle of it. Find where this paragraph starts. */ + bytepos = bidi_find_paragraph_start (pos, bytepos); + + /* We should always be at the beginning of a new line at this + point. */ + if (!(bytepos == BEGV_BYTE || FETCH_CHAR (bytepos - 1) == '\n')) + abort (); + + bidi_it->separator_limit = -1; + bidi_it->new_paragraph = 0; + ch = FETCH_CHAR (bytepos); + ch_len = CHAR_BYTES (ch); + pos = BYTE_TO_CHAR (bytepos); + type = bidi_get_type (ch, NEUTRAL_DIR); + + for (pos++, bytepos += ch_len; + /* NOTE: UAX#9 says to search only for L, AL, or R types of + characters, and ignore RLE, RLO, LRE, and LRO. However, + I'm not sure it makes sense to omit those 4; should try + with and without that to see the effect. */ + (bidi_get_category (type) != STRONG) + || (bidi_ignore_explicit_marks_for_paragraph_level + && (type == RLE || type == RLO + || type == LRE || type == LRO)); + type = bidi_get_type (ch, NEUTRAL_DIR)) + { + if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1) + break; + if (bytepos >= ZV_BYTE) + { + /* Pretend there's a paragraph separator at end of buffer. */ + type = NEUTRAL_B; + break; + } + FETCH_CHAR_ADVANCE (ch, pos, bytepos); + } + if (type == STRONG_R || type == STRONG_AL) /* P3 */ + bidi_it->paragraph_dir = R2L; + else if (type == STRONG_L) + bidi_it->paragraph_dir = L2R; + } + else + abort (); + + /* Contrary to UAX#9 clause P3, we only default the paragraph + direction to L2R if we have no previous usable paragraph + direction. */ + if (bidi_it->paragraph_dir == NEUTRAL_DIR) + bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */ + if (bidi_it->paragraph_dir == R2L) + bidi_it->level_stack[0].level = 1; + else + bidi_it->level_stack[0].level = 0; + + bidi_line_init (bidi_it); +} + +/* Do whatever UAX#9 clause X8 says should be done at paragraph's + end. */ +static inline void +bidi_set_paragraph_end (struct bidi_it *bidi_it) +{ + bidi_it->invalid_levels = 0; + bidi_it->invalid_rl_levels = -1; + bidi_it->stack_idx = 0; + bidi_it->resolved_level = bidi_it->level_stack[0].level; +} + +/* Initialize the bidi iterator from buffer position CHARPOS. */ +void +bidi_init_it (EMACS_INT charpos, EMACS_INT bytepos, struct bidi_it *bidi_it) +{ + if (! bidi_initialized) + bidi_initialize (); + bidi_it->charpos = charpos; + bidi_it->bytepos = bytepos; + bidi_it->first_elt = 1; + bidi_set_paragraph_end (bidi_it); + bidi_it->new_paragraph = 1; + bidi_it->separator_limit = -1; + bidi_it->paragraph_dir = NEUTRAL_DIR; + bidi_it->type = NEUTRAL_B; + bidi_it->type_after_w1 = UNKNOWN_BT; + bidi_it->orig_type = UNKNOWN_BT; + bidi_it->prev_was_pdf = 0; + bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT; + bidi_it->last_strong.type = bidi_it->last_strong.type_after_w1 = + bidi_it->last_strong.orig_type = UNKNOWN_BT; + bidi_it->next_for_neutral.charpos = -1; + bidi_it->next_for_neutral.type = + bidi_it->next_for_neutral.type_after_w1 = + bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; + bidi_it->prev_for_neutral.charpos = -1; + bidi_it->prev_for_neutral.type = + bidi_it->prev_for_neutral.type_after_w1 = + bidi_it->prev_for_neutral.orig_type = UNKNOWN_BT; + bidi_it->sor = L2R; /* FIXME: should it be user-selectable? */ +} + +/* Push the current embedding level and override status; reset the + current level to LEVEL and the current override status to OVERRIDE. */ +static inline void +bidi_push_embedding_level (struct bidi_it *bidi_it, + int level, bidi_dir_t override) +{ + bidi_it->stack_idx++; + if (bidi_it->stack_idx >= BIDI_MAXLEVEL) + abort (); + bidi_it->level_stack[bidi_it->stack_idx].level = level; + bidi_it->level_stack[bidi_it->stack_idx].override = override; +} + +/* Pop the embedding level and directional override status from the + stack, and return the new level. */ +static inline int +bidi_pop_embedding_level (struct bidi_it *bidi_it) +{ + /* UAX#9 says to ignore invalid PDFs. */ + if (bidi_it->stack_idx > 0) + bidi_it->stack_idx--; + return bidi_it->level_stack[bidi_it->stack_idx].level; +} + +/* Record in SAVED_INFO the information about the current character. */ +static inline void +bidi_remember_char (struct bidi_saved_info *saved_info, + struct bidi_it *bidi_it) +{ + saved_info->charpos = bidi_it->charpos; + saved_info->bytepos = bidi_it->bytepos; + saved_info->type = bidi_it->type; + bidi_check_type (bidi_it->type); + saved_info->type_after_w1 = bidi_it->type_after_w1; + bidi_check_type (bidi_it->type_after_w1); + saved_info->orig_type = bidi_it->orig_type; + bidi_check_type (bidi_it->orig_type); +} + +/* Resolve the type of a neutral character according to the type of + surrounding strong text and the current embedding level. */ +static inline bidi_type_t +bidi_resolve_neutral_1 (bidi_type_t prev_type, bidi_type_t next_type, int lev) +{ + /* N1: European and Arabic numbers are treated as though they were R. */ + if (next_type == WEAK_EN || next_type == WEAK_AN) + next_type = STRONG_R; + if (prev_type == WEAK_EN || prev_type == WEAK_AN) + prev_type = STRONG_R; + + if (next_type == prev_type) /* N1 */ + return next_type; + else if ((lev & 1) == 0) /* N2 */ + return STRONG_L; + else + return STRONG_R; +} + +static inline int +bidi_explicit_dir_char (int c) +{ + /* FIXME: this should be replaced with a lookup table with suitable + bits set, like standard C ctype macros do. */ + return (c == LRE_CHAR || c == LRO_CHAR + || c == RLE_CHAR || c == RLO_CHAR || c == PDF_CHAR); +} + +/* A helper function for bidi_resolve_explicit. It advances to the + next character in logical order and determines the new embedding + level and directional override, but does not take into account + empty embeddings. */ +static int +bidi_resolve_explicit_1 (struct bidi_it *bidi_it) +{ + int curchar; + bidi_type_t type; + int current_level; + int new_level; + bidi_dir_t override; + + if (bidi_it->bytepos < BEGV_BYTE /* after reseat to BEGV? */ + || bidi_it->first_elt) + { + bidi_it->first_elt = 0; + if (bidi_it->charpos < BEGV) + bidi_it->charpos = BEGV; + bidi_it->bytepos = CHAR_TO_BYTE (bidi_it->charpos); + } + else if (bidi_it->bytepos < ZV_BYTE) /* don't move at ZV */ + { + bidi_it->charpos++; + if (bidi_it->ch_len == 0) + abort (); + bidi_it->bytepos += bidi_it->ch_len; + } + + current_level = bidi_it->level_stack[bidi_it->stack_idx].level; /* X1 */ + override = bidi_it->level_stack[bidi_it->stack_idx].override; + new_level = current_level; + + /* in case it is a unibyte character (not yet implemented) */ + /* _fetch_multibyte_char_len = 1; */ + if (bidi_it->bytepos >= ZV_BYTE) + { + curchar = BIDI_EOB; + bidi_it->ch_len = 1; + } + else + { + curchar = FETCH_CHAR (bidi_it->bytepos); + bidi_it->ch_len = CHAR_BYTES (curchar); + } + bidi_it->ch = curchar; + + /* Don't apply directional override here, as all the types we handle + below will not be affected by the override anyway, and we need + the original type unaltered. The override will be applied in + bidi_resolve_weak. */ + type = bidi_get_type (curchar, NEUTRAL_DIR); + bidi_it->orig_type = type; + bidi_check_type (bidi_it->orig_type); + + if (type != PDF) + bidi_it->prev_was_pdf = 0; + + bidi_it->type_after_w1 = UNKNOWN_BT; + + switch (type) + { + case RLE: /* X2 */ + case RLO: /* X4 */ + bidi_it->type_after_w1 = type; + bidi_check_type (bidi_it->type_after_w1); + type = WEAK_BN; /* X9/Retaining */ + if (bidi_it->ignore_bn_limit <= 0) + { + if (current_level <= BIDI_MAXLEVEL - 4) + { + /* Compute the least odd embedding level greater than + the current level. */ + new_level = ((current_level + 1) & ~1) + 1; + if (bidi_it->type_after_w1 == RLE) + override = NEUTRAL_DIR; + else + override = R2L; + if (current_level == BIDI_MAXLEVEL - 4) + bidi_it->invalid_rl_levels = 0; + bidi_push_embedding_level (bidi_it, new_level, override); + } + else + { + bidi_it->invalid_levels++; + /* See the commentary about invalid_rl_levels below. */ + if (bidi_it->invalid_rl_levels < 0) + bidi_it->invalid_rl_levels = 0; + bidi_it->invalid_rl_levels++; + } + } + else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ + || bidi_it->next_en_pos > bidi_it->charpos) + type = WEAK_EN; + break; + case LRE: /* X3 */ + case LRO: /* X5 */ + bidi_it->type_after_w1 = type; + bidi_check_type (bidi_it->type_after_w1); + type = WEAK_BN; /* X9/Retaining */ + if (bidi_it->ignore_bn_limit <= 0) + { + if (current_level <= BIDI_MAXLEVEL - 5) + { + /* Compute the least even embedding level greater than + the current level. */ + new_level = ((current_level + 2) & ~1); + if (bidi_it->type_after_w1 == LRE) + override = NEUTRAL_DIR; + else + override = L2R; + bidi_push_embedding_level (bidi_it, new_level, override); + } + else + { + bidi_it->invalid_levels++; + /* invalid_rl_levels counts invalid levels encountered + while the embedding level was already too high for + LRE/LRO, but not for RLE/RLO. That is because + there may be exactly one PDF which we should not + ignore even though invalid_levels is non-zero. + invalid_rl_levels helps to know what PDF is + that. */ + if (bidi_it->invalid_rl_levels >= 0) + bidi_it->invalid_rl_levels++; + } + } + else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ + || bidi_it->next_en_pos > bidi_it->charpos) + type = WEAK_EN; + break; + case PDF: /* X7 */ + bidi_it->type_after_w1 = type; + bidi_check_type (bidi_it->type_after_w1); + type = WEAK_BN; /* X9/Retaining */ + if (bidi_it->ignore_bn_limit <= 0) + { + if (!bidi_it->invalid_rl_levels) + { + new_level = bidi_pop_embedding_level (bidi_it); + bidi_it->invalid_rl_levels = -1; + if (bidi_it->invalid_levels) + bidi_it->invalid_levels--; + /* else nothing: UAX#9 says to ignore invalid PDFs */ + } + if (!bidi_it->invalid_levels) + new_level = bidi_pop_embedding_level (bidi_it); + else + { + bidi_it->invalid_levels--; + bidi_it->invalid_rl_levels--; + } + } + else if (bidi_it->prev.type_after_w1 == WEAK_EN /* W5/Retaining */ + || bidi_it->next_en_pos > bidi_it->charpos) + type = WEAK_EN; + break; + default: + /* Nothing. */ + break; + } + + bidi_it->type = type; + bidi_check_type (bidi_it->type); + + return new_level; +} + +/* Given an iterator state in BIDI_IT, advance one character position + in the buffer to the next character (in the logical order), resolve + any explicit embeddings and directional overrides, and return the + embedding level of the character after resolving explicit + directives and ignoring empty embeddings. */ +static int +bidi_resolve_explicit (struct bidi_it *bidi_it) +{ + int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; + int new_level = bidi_resolve_explicit_1 (bidi_it); + + if (prev_level < new_level + && bidi_it->type == WEAK_BN + && bidi_it->ignore_bn_limit == 0 /* only if not already known */ + && bidi_it->ch != BIDI_EOB /* not already at EOB */ + && bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos + + bidi_it->ch_len))) + { + /* Avoid pushing and popping embedding levels if the level run + is empty, as this breaks level runs where it shouldn't. + UAX#9 removes all the explicit embedding and override codes, + so empty embeddings disappear without a trace. We need to + behave as if we did the same. */ + struct bidi_it saved_it; + int level = prev_level; + + bidi_copy_it (&saved_it, bidi_it); + + while (bidi_explicit_dir_char (FETCH_CHAR (bidi_it->bytepos + + bidi_it->ch_len))) + { + level = bidi_resolve_explicit_1 (bidi_it); + } + + if (level == prev_level) /* empty embedding */ + saved_it.ignore_bn_limit = bidi_it->charpos + 1; + else /* this embedding is non-empty */ + saved_it.ignore_bn_limit = -1; + + bidi_copy_it (bidi_it, &saved_it); + if (bidi_it->ignore_bn_limit > 0) + { + /* We pushed a level, but we shouldn't have. Undo that. */ + if (!bidi_it->invalid_rl_levels) + { + new_level = bidi_pop_embedding_level (bidi_it); + bidi_it->invalid_rl_levels = -1; + if (bidi_it->invalid_levels) + bidi_it->invalid_levels--; + } + if (!bidi_it->invalid_levels) + new_level = bidi_pop_embedding_level (bidi_it); + else + { + bidi_it->invalid_levels--; + bidi_it->invalid_rl_levels--; + } + } + } + + if (bidi_it->type == NEUTRAL_B) /* X8 */ + { + bidi_set_paragraph_end (bidi_it); + /* This is needed by bidi_resolve_weak below, and in L1. */ + bidi_it->type_after_w1 = bidi_it->type; + bidi_check_type (bidi_it->type_after_w1); + } + + return new_level; +} + +/* Advance in the buffer, resolve weak types and return the type of + the next character after weak type resolution. */ +bidi_type_t +bidi_resolve_weak (struct bidi_it *bidi_it) +{ + bidi_type_t type; + bidi_dir_t override; + int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; + int new_level = bidi_resolve_explicit (bidi_it); + int next_char; + bidi_type_t type_of_next; + struct bidi_it saved_it; + + type = bidi_it->type; + override = bidi_it->level_stack[bidi_it->stack_idx].override; + + if (type == UNKNOWN_BT + || type == LRE + || type == LRO + || type == RLE + || type == RLO + || type == PDF) + abort (); + + if (new_level != prev_level + || bidi_it->type == NEUTRAL_B) + { + /* We've got a new embedding level run, compute the directional + type of sor and initialize per-run variables (UAX#9, clause + X10). */ + bidi_set_sor_type (bidi_it, prev_level, new_level); + } + else if (type == NEUTRAL_S || type == NEUTRAL_WS + || type == WEAK_BN || type == STRONG_AL) + bidi_it->type_after_w1 = type; /* needed in L1 */ + bidi_check_type (bidi_it->type_after_w1); + + /* Level and directional override status are already recorded in + bidi_it, and do not need any change; see X6. */ + if (override == R2L) /* X6 */ + type = STRONG_R; + else if (override == L2R) + type = STRONG_L; + else + { + if (type == WEAK_NSM) /* W1 */ + { + /* Note that we don't need to consider the case where the + prev character has its type overridden by an RLO or LRO: + such characters are outside the current level run, and + thus not relevant to this NSM. Thus, NSM gets the + orig_type of the previous character. */ + if (bidi_it->prev.type != UNKNOWN_BT) + type = bidi_it->prev.orig_type; + else if (bidi_it->sor == R2L) + type = STRONG_R; + else if (bidi_it->sor == L2R) + type = STRONG_L; + else /* shouldn't happen! */ + abort (); + } + if (type == WEAK_EN /* W2 */ + && bidi_it->last_strong.type_after_w1 == STRONG_AL) + type = WEAK_AN; + else if (type == STRONG_AL) /* W3 */ + type = STRONG_R; + else if ((type == WEAK_ES /* W4 */ + && bidi_it->prev.type_after_w1 == WEAK_EN + && bidi_it->prev.orig_type == WEAK_EN) + || (type == WEAK_CS + && ((bidi_it->prev.type_after_w1 == WEAK_EN + && bidi_it->prev.orig_type == WEAK_EN) + || bidi_it->prev.type_after_w1 == WEAK_AN))) + { + next_char = + bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE + ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); + type_of_next = bidi_get_type (next_char, override); + + if (type_of_next == WEAK_BN + || bidi_explicit_dir_char (next_char)) + { + bidi_copy_it (&saved_it, bidi_it); + while (bidi_resolve_explicit (bidi_it) == new_level + && bidi_it->type == WEAK_BN) + ; + type_of_next = bidi_it->type; + bidi_copy_it (bidi_it, &saved_it); + } + + /* If the next character is EN, but the last strong-type + character is AL, that next EN will be changed to AN when + we process it in W2 above. So in that case, this ES + should not be changed into EN. */ + if (type == WEAK_ES + && type_of_next == WEAK_EN + && bidi_it->last_strong.type_after_w1 != STRONG_AL) + type = WEAK_EN; + else if (type == WEAK_CS) + { + if (bidi_it->prev.type_after_w1 == WEAK_AN + && (type_of_next == WEAK_AN + /* If the next character is EN, but the last + strong-type character is AL, EN will be later + changed to AN when we process it in W2 above. + So in that case, this ES should not be + changed into EN. */ + || (type_of_next == WEAK_EN + && bidi_it->last_strong.type_after_w1 == STRONG_AL))) + type = WEAK_AN; + else if (bidi_it->prev.type_after_w1 == WEAK_EN + && type_of_next == WEAK_EN + && bidi_it->last_strong.type_after_w1 != STRONG_AL) + type = WEAK_EN; + } + } + else if (type == WEAK_ET /* W5: ET with EN before or after it */ + || type == WEAK_BN) /* W5/Retaining */ + { + if (bidi_it->prev.type_after_w1 == WEAK_EN /* ET/BN w/EN before it */ + || bidi_it->next_en_pos > bidi_it->charpos) + type = WEAK_EN; + else /* W5: ET/BN with EN after it. */ + { + EMACS_INT en_pos = bidi_it->charpos + 1; + + next_char = + bidi_it->bytepos + bidi_it->ch_len >= ZV_BYTE + ? BIDI_EOB : FETCH_CHAR (bidi_it->bytepos + bidi_it->ch_len); + type_of_next = bidi_get_type (next_char, override); + + if (type_of_next == WEAK_ET + || type_of_next == WEAK_BN + || bidi_explicit_dir_char (next_char)) + { + bidi_copy_it (&saved_it, bidi_it); + while (bidi_resolve_explicit (bidi_it) == new_level + && (bidi_it->type == WEAK_BN + || bidi_it->type == WEAK_ET)) + ; + type_of_next = bidi_it->type; + en_pos = bidi_it->charpos; + bidi_copy_it (bidi_it, &saved_it); + } + if (type_of_next == WEAK_EN) + { + /* If the last strong character is AL, the EN we've + found will become AN when we get to it (W2). */ + if (bidi_it->last_strong.type_after_w1 != STRONG_AL) + { + type = WEAK_EN; + /* Remember this EN position, to speed up processing + of the next ETs. */ + bidi_it->next_en_pos = en_pos; + } + else if (type == WEAK_BN) + type = NEUTRAL_ON; /* W6/Retaining */ + } + } + } + } + + if (type == WEAK_ES || type == WEAK_ET || type == WEAK_CS /* W6 */ + || (type == WEAK_BN + && (bidi_it->prev.type_after_w1 == WEAK_CS /* W6/Retaining */ + || bidi_it->prev.type_after_w1 == WEAK_ES + || bidi_it->prev.type_after_w1 == WEAK_ET))) + type = NEUTRAL_ON; + + /* Store the type we've got so far, before we clobber it with strong + types in W7 and while resolving neutral types. But leave alone + the original types that were recorded above, because we will need + them for the L1 clause. */ + if (bidi_it->type_after_w1 == UNKNOWN_BT) + bidi_it->type_after_w1 = type; + bidi_check_type (bidi_it->type_after_w1); + + if (type == WEAK_EN) /* W7 */ + { + if ((bidi_it->last_strong.type_after_w1 == STRONG_L) + || (bidi_it->last_strong.type == UNKNOWN_BT && bidi_it->sor == L2R)) + type = STRONG_L; + } + + bidi_it->type = type; + bidi_check_type (bidi_it->type); + return type; +} + +bidi_type_t +bidi_resolve_neutral (struct bidi_it *bidi_it) +{ + int prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; + bidi_type_t type = bidi_resolve_weak (bidi_it); + int current_level = bidi_it->level_stack[bidi_it->stack_idx].level; + + if (!(type == STRONG_R + || type == STRONG_L + || type == WEAK_BN + || type == WEAK_EN + || type == WEAK_AN + || type == NEUTRAL_B + || type == NEUTRAL_S + || type == NEUTRAL_WS + || type == NEUTRAL_ON)) + abort (); + + if (bidi_get_category (type) == NEUTRAL + || (type == WEAK_BN && prev_level == current_level)) + { + if (bidi_it->next_for_neutral.type != UNKNOWN_BT) + type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, + bidi_it->next_for_neutral.type, + current_level); + else + { + /* Arrrgh!! The UAX#9 algorithm is too deeply entrenched in + the assumption of batch-style processing; see clauses W4, + W5, and especially N1, which require to look far forward + (as well as back) in the buffer. May the fleas of a + thousand camels infest the armpits of those who design + supposedly general-purpose algorithms by looking at their + own implementations, and fail to consider other possible + implementations! */ + struct bidi_it saved_it; + bidi_type_t next_type; + + if (bidi_it->scan_dir == -1) + abort (); + + bidi_copy_it (&saved_it, bidi_it); + /* Scan the text forward until we find the first non-neutral + character, and then use that to resolve the neutral we + are dealing with now. We also cache the scanned iterator + states, to salvage some of the effort later. */ + bidi_cache_iterator_state (bidi_it, 0); + do { + /* Record the info about the previous character, so that + it will be cached below with this state. */ + if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */ + && bidi_it->type != WEAK_BN) + bidi_remember_char (&bidi_it->prev, bidi_it); + type = bidi_resolve_weak (bidi_it); + /* Paragraph separators have their levels fully resolved + at this point, so cache them as resolved. */ + bidi_cache_iterator_state (bidi_it, type == NEUTRAL_B); + /* FIXME: implement L1 here, by testing for a newline and + resetting the level for any sequence of whitespace + characters adjacent to it. */ + } while (!(type == NEUTRAL_B + || (type != WEAK_BN + && bidi_get_category (type) != NEUTRAL) + /* This is all per level run, so stop when we + reach the end of this level run. */ + || bidi_it->level_stack[bidi_it->stack_idx].level != + current_level)); + + bidi_remember_char (&saved_it.next_for_neutral, bidi_it); + + switch (type) + { + case STRONG_L: + case STRONG_R: + case STRONG_AL: + next_type = type; + break; + case WEAK_EN: + case WEAK_AN: + /* N1: ``European and Arabic numbers are treated as + though they were R.'' */ + next_type = STRONG_R; + saved_it.next_for_neutral.type = STRONG_R; + break; + case WEAK_BN: + if (!bidi_explicit_dir_char (bidi_it->ch)) + abort (); /* can't happen: BNs are skipped */ + /* FALLTHROUGH */ + case NEUTRAL_B: + /* Marched all the way to the end of this level run. + We need to use the eor type, whose information is + stored by bidi_set_sor_type in the prev_for_neutral + member. */ + if (saved_it.type != WEAK_BN + || bidi_get_category (bidi_it->prev.type_after_w1) == NEUTRAL) + { + next_type = bidi_it->prev_for_neutral.type; + saved_it.next_for_neutral.type = next_type; + bidi_check_type (next_type); + } + else + { + /* This is a BN which does not adjoin neutrals. + Leave its type alone. */ + bidi_copy_it (bidi_it, &saved_it); + return bidi_it->type; + } + break; + default: + abort (); + } + type = bidi_resolve_neutral_1 (saved_it.prev_for_neutral.type, + next_type, current_level); + saved_it.type = type; + bidi_check_type (type); + bidi_copy_it (bidi_it, &saved_it); + } + } + return type; +} + +/* Given an iterator state in BIDI_IT, advance one character position + in the buffer to the next character (in the logical order), resolve + the bidi type of that next character, and return that type. */ +bidi_type_t +bidi_type_of_next_char (struct bidi_it *bidi_it) +{ + bidi_type_t type; + + /* This should always be called during a forward scan. */ + if (bidi_it->scan_dir != 1) + abort (); + + /* Reset the limit until which to ignore BNs if we step out of the + area where we found only empty levels. */ + if ((bidi_it->ignore_bn_limit > 0 + && bidi_it->ignore_bn_limit <= bidi_it->charpos) + || (bidi_it->ignore_bn_limit == -1 + && !bidi_explicit_dir_char (bidi_it->ch))) + bidi_it->ignore_bn_limit = 0; + + type = bidi_resolve_neutral (bidi_it); + + return type; +} + +/* Given an iterator state BIDI_IT, advance one character position in + the buffer to the next character (in the logical order), resolve + the embedding and implicit levels of that next character, and + return the resulting level. */ +int +bidi_level_of_next_char (struct bidi_it *bidi_it) +{ + bidi_type_t type; + int level, prev_level = -1; + struct bidi_saved_info next_for_neutral; + + if (bidi_it->scan_dir == 1) + { + /* There's no sense in trying to advance if we hit end of text. */ + if (bidi_it->ch == BIDI_EOB) + return bidi_it->resolved_level; + + /* Record the info about the previous character. */ + if (bidi_it->type_after_w1 != WEAK_BN /* W1/Retaining */ + && bidi_it->type != WEAK_BN) + bidi_remember_char (&bidi_it->prev, bidi_it); + if (bidi_it->type_after_w1 == STRONG_R + || bidi_it->type_after_w1 == STRONG_L + || bidi_it->type_after_w1 == STRONG_AL) + bidi_remember_char (&bidi_it->last_strong, bidi_it); + /* FIXME: it sounds like we don't need both prev and + prev_for_neutral members, but I'm leaving them both for now. */ + if (bidi_it->type == STRONG_R || bidi_it->type == STRONG_L + || bidi_it->type == WEAK_EN || bidi_it->type == WEAK_AN) + bidi_remember_char (&bidi_it->prev_for_neutral, bidi_it); + + /* If we overstepped the characters used for resolving neutrals + and whitespace, invalidate their info in the iterator. */ + if (bidi_it->charpos >= bidi_it->next_for_neutral.charpos) + bidi_it->next_for_neutral.type = UNKNOWN_BT; + if (bidi_it->next_en_pos >= 0 + && bidi_it->charpos >= bidi_it->next_en_pos) + bidi_it->next_en_pos = -1; + if (bidi_it->next_for_ws.type != UNKNOWN_BT + && bidi_it->charpos >= bidi_it->next_for_ws.charpos) + bidi_it->next_for_ws.type = UNKNOWN_BT; + + /* This must be taken before we fill the iterator with the info + about the next char. If we scan backwards, the iterator + state must be already cached, so there's no need to know the + embedding level of the previous character, since we will be + returning to our caller shortly. */ + prev_level = bidi_it->level_stack[bidi_it->stack_idx].level; + } + next_for_neutral = bidi_it->next_for_neutral; + + /* Perhaps it is already cached. */ + type = bidi_cache_find (bidi_it->charpos + bidi_it->scan_dir, -1, bidi_it); + if (type != UNKNOWN_BT) + { + /* Don't lose the information for resolving neutrals! The + cached states could have been cached before their + next_for_neutral member was computed. If we are on our way + forward, we can simply take the info from the previous + state. */ + if (bidi_it->scan_dir == 1 + && bidi_it->next_for_neutral.type == UNKNOWN_BT) + bidi_it->next_for_neutral = next_for_neutral; + + /* If resolved_level is -1, it means this state was cached + before it was completely resolved, so we cannot return + it. */ + if (bidi_it->resolved_level != -1) + return bidi_it->resolved_level; + } + if (bidi_it->scan_dir == -1) + /* If we are going backwards, the iterator state is already cached + from previous scans, and should be fully resolved. */ + abort (); + + if (type == UNKNOWN_BT) + type = bidi_type_of_next_char (bidi_it); + + if (type == NEUTRAL_B) + return bidi_it->resolved_level; + + level = bidi_it->level_stack[bidi_it->stack_idx].level; + if ((bidi_get_category (type) == NEUTRAL /* && type != NEUTRAL_B */) + || (type == WEAK_BN && prev_level == level)) + { + if (bidi_it->next_for_neutral.type == UNKNOWN_BT) + abort (); + + /* If the cached state shows a neutral character, it was not + resolved by bidi_resolve_neutral, so do it now. */ + type = bidi_resolve_neutral_1 (bidi_it->prev_for_neutral.type, + bidi_it->next_for_neutral.type, + level); + } + + if (!(type == STRONG_R + || type == STRONG_L + || type == WEAK_BN + || type == WEAK_EN + || type == WEAK_AN)) + abort (); + bidi_it->type = type; + bidi_check_type (bidi_it->type); + + /* For L1 below, we need to know, for each WS character, whether + it belongs to a sequence of WS characters preceeding a newline + or a TAB or a paragraph separator. */ + if (bidi_it->orig_type == NEUTRAL_WS + && bidi_it->next_for_ws.type == UNKNOWN_BT) + { + int ch; + int clen = bidi_it->ch_len; + EMACS_INT bpos = bidi_it->bytepos; + EMACS_INT cpos = bidi_it->charpos; + bidi_type_t chtype; + + do { + /*_fetch_multibyte_char_len = 1;*/ + ch = bpos + clen >= ZV_BYTE ? BIDI_EOB : FETCH_CHAR (bpos + clen); + bpos += clen; + cpos++; + clen = (ch == BIDI_EOB ? 1 : CHAR_BYTES (ch)); + if (ch == '\n' || ch == BIDI_EOB /* || ch == LINESEP_CHAR */) + chtype = NEUTRAL_B; + else + chtype = bidi_get_type (ch, NEUTRAL_DIR); + } while (chtype == NEUTRAL_WS || chtype == WEAK_BN + || bidi_explicit_dir_char (ch)); /* L1/Retaining */ + bidi_it->next_for_ws.type = chtype; + bidi_check_type (bidi_it->next_for_ws.type); + bidi_it->next_for_ws.charpos = cpos; + bidi_it->next_for_ws.bytepos = bpos; + } + + /* Resolve implicit levels, with a twist: PDFs get the embedding + level of the enbedding they terminate. See below for the + reason. */ + if (bidi_it->orig_type == PDF + /* Don't do this if this formatting code didn't change the + embedding level due to invalid or empty embeddings. */ + && prev_level != level) + { + /* Don't look in UAX#9 for the reason for this: it's our own + private quirk. The reason is that we want the formatting + codes to be delivered so that they bracket the text of their + embedding. For example, given the text + + {RLO}teST{PDF} + + we want it to be displayed as + + {RLO}STet{PDF} + + not as + + STet{RLO}{PDF} + + which will result because we bump up the embedding level as + soon as we see the RLO and pop it as soon as we see the PDF, + so RLO itself has the same embedding level as "teST", and + thus would be normally delivered last, just before the PDF. + The switch below fiddles with the level of PDF so that this + ugly side effect does not happen. + + (This is, of course, only important if the formatting codes + are actually displayed, but Emacs does need to display them + if the user wants to.) */ + level = prev_level; + } + else if (bidi_it->orig_type == NEUTRAL_B /* L1 */ + || bidi_it->orig_type == NEUTRAL_S + || bidi_it->ch == '\n' || bidi_it->ch == BIDI_EOB + /* || bidi_it->ch == LINESEP_CHAR */ + || (bidi_it->orig_type == NEUTRAL_WS + && (bidi_it->next_for_ws.type == NEUTRAL_B + || bidi_it->next_for_ws.type == NEUTRAL_S))) + level = bidi_it->level_stack[0].level; + else if ((level & 1) == 0) /* I1 */ + { + if (type == STRONG_R) + level++; + else if (type == WEAK_EN || type == WEAK_AN) + level += 2; + } + else /* I2 */ + { + if (type == STRONG_L || type == WEAK_EN || type == WEAK_AN) + level++; + } + + bidi_it->resolved_level = level; + return level; +} + +/* Move to the other edge of a level given by LEVEL. If END_FLAG is + non-zero, we are at the end of a level, and we need to prepare to + resume the scan of the lower level. + + If this level's other edge is cached, we simply jump to it, filling + the iterator structure with the iterator state on the other edge. + Otherwise, we walk the buffer until we come back to the same level + as LEVEL. + + Note: we are not talking here about a ``level run'' in the UAX#9 + sense of the term, but rather about a ``level'' which includes + all the levels higher than it. In other words, given the levels + like this: + + 11111112222222333333334443343222222111111112223322111 + A B C + + and assuming we are at point A scanning left to right, this + function moves to point C, whereas the UAX#9 ``level 2 run'' ends + at point B. */ +static void +bidi_find_other_level_edge (struct bidi_it *bidi_it, int level, int end_flag) +{ + int dir = end_flag ? -bidi_it->scan_dir : bidi_it->scan_dir; + int idx; + + /* Try the cache first. */ + if ((idx = bidi_cache_find_level_change (level, dir, end_flag)) >= 0) + bidi_cache_fetch_state (idx, bidi_it); + else + { + int new_level; + + if (end_flag) + abort (); /* if we are at end of level, its edges must be cached */ + + bidi_cache_iterator_state (bidi_it, 1); + do { + new_level = bidi_level_of_next_char (bidi_it); + bidi_cache_iterator_state (bidi_it, 1); + } while (new_level >= level); + } +} + +void +bidi_get_next_char_visually (struct bidi_it *bidi_it) +{ + int old_level, new_level, next_level; + struct bidi_it sentinel; + + if (bidi_it->scan_dir == 0) + { + bidi_it->scan_dir = 1; /* default to logical order */ + } + + /* If we just passed a newline, initialize for the next line. */ + if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B) + bidi_line_init (bidi_it); + + /* Prepare the sentinel iterator state. */ + if (bidi_cache_idx == 0) + { + bidi_copy_it (&sentinel, bidi_it); + if (bidi_it->first_elt) + { + sentinel.charpos--; /* cached charpos needs to be monotonic */ + sentinel.bytepos--; + sentinel.ch = '\n'; /* doesn't matter, but why not? */ + sentinel.ch_len = 1; + } + } + + old_level = bidi_it->resolved_level; + new_level = bidi_level_of_next_char (bidi_it); + + /* Reordering of resolved levels (clause L2) is implemented by + jumping to the other edge of the level and flipping direction of + scanning the buffer whenever we find a level change. */ + if (new_level != old_level) + { + int ascending = new_level > old_level; + int level_to_search = ascending ? old_level + 1 : old_level; + int incr = ascending ? 1 : -1; + int expected_next_level = old_level + incr; + + /* If we don't have anything cached yet, we need to cache the + sentinel state, since we'll need it to record where to jump + when the last non-base level is exhausted. */ + if (bidi_cache_idx == 0) + bidi_cache_iterator_state (&sentinel, 1); + /* Jump (or walk) to the other edge of this level. */ + bidi_find_other_level_edge (bidi_it, level_to_search, !ascending); + /* Switch scan direction and peek at the next character in the + new direction. */ + bidi_it->scan_dir = -bidi_it->scan_dir; + + /* The following loop handles the case where the resolved level + jumps by more than one. This is typical for numbers inside a + run of text with left-to-right embedding direction, but can + also happen in other situations. In those cases the decision + where to continue after a level change, and in what direction, + is tricky. For example, given a text like below: + + abcdefgh + 11336622 + + (where the numbers below the text show the resolved levels), + the result of reordering according to UAX#9 should be this: + + efdcghba + + This is implemented by the loop below which flips direction + and jumps to the other edge of the level each time it finds + the new level not to be the expected one. The expected level + is always one more or one less than the previous one. */ + next_level = bidi_peek_at_next_level (bidi_it); + while (next_level != expected_next_level) + { + expected_next_level += incr; + level_to_search += incr; + bidi_find_other_level_edge (bidi_it, level_to_search, !ascending); + bidi_it->scan_dir = -bidi_it->scan_dir; + next_level = bidi_peek_at_next_level (bidi_it); + } + + /* Finally, deliver the next character in the new direction. */ + next_level = bidi_level_of_next_char (bidi_it); + } + + /* Take note when we have just processed the newline that precedes + the end of the paragraph. The next time we are about to be + called, set_iterator_to_next will automatically reinit the + paragraph direction, if needed. We do this at the newline before + the paragraph separator, because the next character might not be + the first character of the next paragraph, due to the bidi + reordering. */ + if (bidi_it->scan_dir == 1 + && bidi_it->orig_type == NEUTRAL_B + && bidi_it->bytepos < ZV_BYTE) + { + EMACS_INT sep_len = + bidi_at_paragraph_end (bidi_it->charpos + 1, + bidi_it->bytepos + bidi_it->ch_len); + if (sep_len >= 0) + { + bidi_it->new_paragraph = 1; + /* Record the buffer position of the last character of the + paragraph separator. */ + bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len; + } + } + + if (bidi_it->scan_dir == 1 && bidi_cache_idx) + { + /* If we are at paragraph's base embedding level and beyond the + last cached position, the cache's job is done and we can + discard it. */ + if (bidi_it->resolved_level == bidi_it->level_stack[0].level + && bidi_it->charpos > bidi_cache[bidi_cache_idx - 1].charpos) + bidi_cache_reset (); + /* But as long as we are caching during forward scan, we must + cache each state, or else the cache integrity will be + compromised: it assumes cached states correspond to buffer + positions 1:1. */ + else + bidi_cache_iterator_state (bidi_it, 1); + } +} + +/* This is meant to be called from within the debugger, whenever you + wish to examine the cache contents. */ +void +bidi_dump_cached_states (void) +{ + int i; + int ndigits = 1; + + if (bidi_cache_idx == 0) + { + fprintf (stderr, "The cache is empty.\n"); + return; + } + fprintf (stderr, "Total of %d state%s in cache:\n", + bidi_cache_idx, bidi_cache_idx == 1 ? "" : "s"); + + for (i = bidi_cache[bidi_cache_idx - 1].charpos; i > 0; i /= 10) + ndigits++; + fputs ("ch ", stderr); + for (i = 0; i < bidi_cache_idx; i++) + fprintf (stderr, "%*c", ndigits, bidi_cache[i].ch); + fputs ("\n", stderr); + fputs ("lvl ", stderr); + for (i = 0; i < bidi_cache_idx; i++) + fprintf (stderr, "%*d", ndigits, bidi_cache[i].resolved_level); + fputs ("\n", stderr); + fputs ("pos ", stderr); + for (i = 0; i < bidi_cache_idx; i++) + fprintf (stderr, "%*d", ndigits, bidi_cache[i].charpos); + fputs ("\n", stderr); +}
--- a/src/buffer.c Fri Jan 01 13:44:02 2010 -0500 +++ b/src/buffer.c Fri Jan 01 14:30:06 2010 -0500 @@ -2261,6 +2261,8 @@ swapfield (undo_list, Lisp_Object); swapfield (mark, Lisp_Object); swapfield (enable_multibyte_characters, Lisp_Object); + swapfield (bidi_display_reordering, Lisp_Object); + swapfield (bidi_paragraph_direction, Lisp_Object); /* FIXME: Not sure what we should do with these *_marker fields. Hopefully they're just nil anyway. */ swapfield (pt_marker, Lisp_Object); @@ -5186,7 +5188,9 @@ buffer_defaults.truncate_lines = Qnil; buffer_defaults.word_wrap = Qnil; buffer_defaults.ctl_arrow = Qt; + buffer_defaults.bidi_display_reordering = Qnil; buffer_defaults.direction_reversed = Qnil; + buffer_defaults.bidi_paragraph_direction = Qnil; buffer_defaults.cursor_type = Qt; buffer_defaults.extra_line_spacing = Qnil; buffer_defaults.cursor_in_non_selected_windows = Qt; @@ -5271,7 +5275,9 @@ XSETFASTINT (buffer_local_flags.syntax_table, idx); ++idx; XSETFASTINT (buffer_local_flags.cache_long_line_scans, idx); ++idx; XSETFASTINT (buffer_local_flags.category_table, idx); ++idx; + XSETFASTINT (buffer_local_flags.bidi_display_reordering, idx); ++idx; XSETFASTINT (buffer_local_flags.direction_reversed, idx); ++idx; + XSETFASTINT (buffer_local_flags.bidi_paragraph_direction, idx); ++idx; XSETFASTINT (buffer_local_flags.buffer_file_coding_system, idx); /* Make this one a permanent local. */ buffer_permanent_local_flags[idx++] = 1; @@ -5528,11 +5534,6 @@ doc: /* Default value of `ctl-arrow' for buffers that do not override it. This is the same as (default-value 'ctl-arrow). */); - DEFVAR_LISP_NOPRO ("default-direction-reversed", - &buffer_defaults.direction_reversed, - doc: /* Default value of `direction-reversed' for buffers that do not override it. -This is the same as (default-value 'direction-reversed). */); - DEFVAR_LISP_NOPRO ("default-enable-multibyte-characters", &buffer_defaults.enable_multibyte_characters, doc: /* *Default value of `enable-multibyte-characters' for buffers not overriding it. @@ -5789,11 +5790,29 @@ This variable is never applied to a way of decoding a file while reading it. */); - DEFVAR_PER_BUFFER ("direction-reversed", ¤t_buffer->direction_reversed, - Qnil, - doc: /* *Non-nil means lines in the buffer are displayed right to left. */); - - DEFVAR_PER_BUFFER ("truncate-lines", ¤t_buffer->truncate_lines, Qnil, + DEFVAR_PER_BUFFER ("direction-reversed", + ¤t_buffer->direction_reversed, Qnil, + doc: /* Non-nil means set beginning of lines at the right edge of the window. +See also the variable `bidi-display-reordering'. */); + + DEFVAR_PER_BUFFER ("bidi-display-reordering", + ¤t_buffer->bidi_display_reordering, Qnil, + doc: /* Non-nil means reorder bidirectional text for display in the visual order. +See also the variable `direction-reversed'. */); + + DEFVAR_PER_BUFFER ("bidi-paragraph-direction", + ¤t_buffer->bidi_paragraph_direction, Qnil, + doc: /* *If non-nil, forces directionality of text paragraphs in the buffer. + +If this is nil (the default), the direction of each paragraph is +determined by the first strong directional character of its text. +The values of `right-to-left' and `left-to-right' override that. +Any other value is treated as nil. + +This variable has no effect unless the buffer's value of +\`bidi-display-reordering' is non-nil. */); + + DEFVAR_PER_BUFFER ("truncate-lines", ¤t_buffer->truncate_lines, Qnil, doc: /* *Non-nil means do not display continuation lines. Instead, give each line of text just one screen line.
--- a/src/buffer.h Fri Jan 01 13:44:02 2010 -0500 +++ b/src/buffer.h Fri Jan 01 14:30:06 2010 -0500 @@ -658,8 +658,16 @@ Lisp_Object word_wrap; /* Non-nil means display ctl chars with uparrow. */ Lisp_Object ctl_arrow; - /* Non-nil means display text from right to left. */ + /* Non-nil means reorder bidirectional text for display in the + visual order. */ + Lisp_Object bidi_display_reordering; + /* Non-nil means set beginning of lines at the right edge of + windows. */ Lisp_Object direction_reversed; + /* If non-nil, specifies which direction of text to force in all the + paragraphs of the buffer. Nil means determine paragraph + direction dynamically for each paragraph. */ + Lisp_Object bidi_paragraph_direction; /* Non-nil means do selective display; see doc string in syms_of_buffer (buffer.c) for details. */ Lisp_Object selective_display;
--- a/src/dispextern.h Fri Jan 01 13:44:02 2010 -0500 +++ b/src/dispextern.h Fri Jan 01 14:30:06 2010 -0500 @@ -370,6 +370,16 @@ /* Non-zero means don't display cursor here. */ unsigned avoid_cursor_p : 1; + /* Resolved bidirectional level of this character [0..63]. */ + unsigned resolved_level : 5; + + /* Resolved bidirectional type of this character, see enum + bidi_type_t below. Note that according to UAX#9, only some + values (STRONG_L, STRONG_R, WEAK_AN, WEAK_EN, WEAK_BN, and + NEUTRAL_B) can appear in the resolved type, so we only reserve + space for those that can. */ + unsigned bidi_type : 3; + #define FACE_ID_BITS 20 /* Face of the glyph. This is a realized face ID, @@ -739,14 +749,18 @@ /* First position in this row. This is the text position, including overlay position information etc, where the display of this row started, and can thus be less the position of the first glyph - (e.g. due to invisible text or horizontal scrolling). */ + (e.g. due to invisible text or horizontal scrolling). BIDI Note: + This is the smallest character position in the row, but not + necessarily the character that is the leftmost on the display. */ struct display_pos start; /* Text position at the end of this row. This is the position after the last glyph on this row. It can be greater than the last glyph position + 1, due to truncation, invisible text etc. In an up-to-date display, this should always be equal to the start - position of the next row. */ + position of the next row. BIDI Note: this is the character whose + buffer position is the largest, but not necessarily the rightmost + one on the display. */ struct display_pos end; /* Non-zero means the overlay arrow bitmap is on this line. @@ -872,6 +886,10 @@ the bottom line of the window, but not end of the buffer. */ unsigned indicate_bottom_line_p : 1; + /* Non-zero means the row was reversed to display text in a + right-to-left paragraph. */ + unsigned reversed_p : 1; + /* Continuation lines width at the start of the row. */ int continuation_lines_width; @@ -924,12 +942,18 @@ (MATRIX_ROW ((MATRIX), (ROW))->used[TEXT_AREA]) /* Return the character/ byte position at which the display of ROW - starts. */ + starts. BIDI Note: this is the smallest character/byte position + among characters in ROW, i.e. the first logical-order character + displayed by ROW, which is not necessarily the smallest horizontal + position. */ #define MATRIX_ROW_START_CHARPOS(ROW) ((ROW)->start.pos.charpos) #define MATRIX_ROW_START_BYTEPOS(ROW) ((ROW)->start.pos.bytepos) -/* Return the character/ byte position at which ROW ends. */ +/* Return the character/ byte position at which ROW ends. BIDI Note: + this is the largest character/byte position among characters in + ROW, i.e. the last logical-order character displayed by ROW, which + is not necessarily the largest horizontal position. */ #define MATRIX_ROW_END_CHARPOS(ROW) ((ROW)->end.pos.charpos) #define MATRIX_ROW_END_BYTEPOS(ROW) ((ROW)->end.pos.bytepos) @@ -1702,7 +1726,93 @@ extern int face_change_count; - +/* For reordering of bidirectional text. */ +#define BIDI_MAXLEVEL 64 + +/* Data type for describing the bidirectional character types. The + first 7 must be at the beginning, because they are the only values + valid in the `bidi_type' member of `struct glyph'; we only reserve + 3 bits for it, so we cannot use there values larger than 7. */ +typedef enum { + UNKNOWN_BT = 0, + STRONG_L, /* strong left-to-right */ + STRONG_R, /* strong right-to-left */ + WEAK_EN, /* european number */ + WEAK_AN, /* arabic number */ + WEAK_BN, /* boundary neutral */ + NEUTRAL_B, /* paragraph separator */ + STRONG_AL, /* arabic right-to-left letter */ + LRE, /* left-to-right embedding */ + LRO, /* left-to-right override */ + RLE, /* right-to-left embedding */ + RLO, /* right-to-left override */ + PDF, /* pop directional format */ + WEAK_ES, /* european number separator */ + WEAK_ET, /* european number terminator */ + WEAK_CS, /* common separator */ + WEAK_NSM, /* non-spacing mark */ + NEUTRAL_S, /* segment separator */ + NEUTRAL_WS, /* whitespace */ + NEUTRAL_ON /* other neutrals */ +} bidi_type_t; + +/* The basic directionality data type. */ +typedef enum { NEUTRAL_DIR, L2R, R2L } bidi_dir_t; + +/* Data type for storing information about characters we need to + remember. */ +struct bidi_saved_info { + int bytepos, charpos; /* character's buffer position */ + bidi_type_t type; /* character's resolved bidi type */ + bidi_type_t type_after_w1; /* original type of the character, after W1 */ + bidi_type_t orig_type; /* type as we found it in the buffer */ +}; + +/* Data type for keeping track of saved embedding levels and override + status information. */ +struct bidi_stack { + int level; + bidi_dir_t override; +}; + +/* Data type for iterating over bidi text. */ +struct bidi_it { + EMACS_INT bytepos; /* iterator's position in buffer */ + EMACS_INT charpos; + int ch; /* character itself */ + int ch_len; /* length of its multibyte sequence */ + bidi_type_t type; /* bidi type of this character, after + resolving weak and neutral types */ + bidi_type_t type_after_w1; /* original type, after overrides and W1 */ + bidi_type_t orig_type; /* original type, as found in the buffer */ + int resolved_level; /* final resolved level of this character */ + int invalid_levels; /* how many PDFs to ignore */ + int invalid_rl_levels; /* how many PDFs from RLE/RLO to ignore */ + int prev_was_pdf; /* if non-zero, previous char was PDF */ + struct bidi_saved_info prev; /* info about previous character */ + struct bidi_saved_info last_strong; /* last-seen strong directional char */ + struct bidi_saved_info next_for_neutral; /* surrounding characters for... */ + struct bidi_saved_info prev_for_neutral; /* ...resolving neutrals */ + struct bidi_saved_info next_for_ws; /* character after sequence of ws */ + EMACS_INT next_en_pos; /* position of next EN char for ET */ + EMACS_INT ignore_bn_limit; /* position until which to ignore BNs */ + bidi_dir_t sor; /* direction of start-of-run in effect */ + int scan_dir; /* direction of text scan */ + int stack_idx; /* index of current data on the stack */ + /* Note: Everything from here on is not copied/saved when the bidi + iterator state is saved, pushed, or popped. So only put here + stuff that is not part of the bidi iterator's state! */ + struct bidi_stack level_stack[BIDI_MAXLEVEL]; /* stack of embedding levels */ + int first_elt; /* if non-zero, examine current char first */ + bidi_dir_t paragraph_dir; /* current paragraph direction */ + int new_paragraph; /* if non-zero, we expect a new paragraph */ + EMACS_INT separator_limit; /* where paragraph separator should end */ +}; + +/* Value is non-zero when the bidi iterator is at base paragraph + embedding level. */ +#define BIDI_AT_BASE_LEVEL(BIDI_IT) \ + (BIDI_IT).resolved_level == (BIDI_IT).level_stack[0].level /*********************************************************************** @@ -1854,7 +1964,7 @@ NUM_IT_METHODS }; -#define IT_STACK_SIZE 4 +#define IT_STACK_SIZE 5 /* Iterator for composition (both for static and automatic). */ struct composition_it @@ -1902,6 +2012,13 @@ text, overlay strings, end of text etc., which see. */ EMACS_INT stop_charpos; + /* Previous stop position, i.e. the last one before the current + buffer position. */ + EMACS_INT prev_stop; + + /* Last stop_pos at the current paragraph's embedding level. */ + EMACS_INT base_level_stop; + /* Maximum string or buffer position + 1. ZV when iterating over current_buffer. */ EMACS_INT end_charpos; @@ -2008,6 +2125,8 @@ int string_nchars; EMACS_INT end_charpos; EMACS_INT stop_charpos; + EMACS_INT prev_stop; + EMACS_INT base_level_stop; struct composition_it cmp_it; int face_id; @@ -2233,6 +2352,14 @@ /* Face of the right fringe glyph. */ unsigned right_user_fringe_face_id : FACE_ID_BITS; + + /* Non-zero means we need to reorder bidirectional text for display + in the visual order. */ + int bidi_p; + + /* For iterating over bidirectional text. */ + struct bidi_it bidi_it; + bidi_dir_t paragraph_embedding; }; @@ -2704,12 +2831,20 @@ Function Prototypes ***********************************************************************/ +/* Defined in bidi.c */ + +extern void bidi_init_it P_ ((EMACS_INT, EMACS_INT, struct bidi_it *)); +extern void bidi_get_next_char_visually P_ ((struct bidi_it *)); +extern void bidi_paragraph_init P_ ((bidi_dir_t, struct bidi_it *)); +extern int bidi_mirror_char P_ ((int)); + /* Defined in xdisp.c */ struct glyph_row *row_containing_pos P_ ((struct window *, int, struct glyph_row *, struct glyph_row *, int)); -int string_buffer_position P_ ((struct window *, Lisp_Object, int)); +EMACS_INT string_buffer_position P_ ((struct window *, Lisp_Object, + EMACS_INT)); int line_bottom_y P_ ((struct it *)); int display_prop_intangible_p P_ ((Lisp_Object)); void resize_echo_area_exactly P_ ((void));
--- a/src/dispnew.c Fri Jan 01 13:44:02 2010 -0500 +++ b/src/dispnew.c Fri Jan 01 14:30:06 2010 -0500 @@ -3500,6 +3500,8 @@ || !display_completed /* Give up if buffer appears in two places. */ || buffer_shared > 1 + /* Give up if we need to reorder bidirectional text. */ + || !NILP (current_buffer->bidi_display_reordering) /* Give up if currently displaying a message instead of the minibuffer contents. */ || (EQ (selected_window, minibuf_window) @@ -3776,6 +3778,10 @@ if (!display_completed || cursor_in_echo_area) return 0; + /* Give up if we need to reorder bidirectional text. */ + if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering)) + return 0; + /* Give up if the buffer's direction is reversed. */ if (!NILP (XBUFFER (w->buffer)->direction_reversed)) return 0;
--- a/src/term.c Fri Jan 01 13:44:02 2010 -0500 +++ b/src/term.c Fri Jan 01 14:30:06 2010 -0500 @@ -1545,6 +1545,26 @@ + it->glyph_row->used[it->area]); end = it->glyph_row->glyphs[1 + it->area]; + /* If the glyph row is reversed, we need to prepend the glyph rather + than append it. */ + if (it->glyph_row->reversed_p && it->area == TEXT_AREA) + { + struct glyph *g; + int move_by = it->pixel_width; + + /* Make room for the new glyphs. */ + if (move_by > end - glyph) /* don't overstep end of this area */ + move_by = end - glyph; + for (g = glyph - 1; g >= it->glyph_row->glyphs[it->area]; g--) + g[move_by] = *g; + glyph = it->glyph_row->glyphs[it->area]; + end = glyph + move_by; + } + + /* BIDI Note: we put the glyphs of a "multi-pixel" character left to + right, even in the REVERSED_P case, since (a) all of its u.ch are + identical, and (b) the PADDING_P flag needs to be set for the + leftmost one, because we write to the terminal left-to-right. */ for (i = 0; i < it->pixel_width && glyph < end; ++i) @@ -1556,6 +1576,18 @@ glyph->padding_p = i > 0; glyph->charpos = CHARPOS (it->position); glyph->object = it->object; + if (it->bidi_p) + { + glyph->resolved_level = it->bidi_it.resolved_level; + if ((it->bidi_it.type & 7) != it->bidi_it.type) + abort (); + glyph->bidi_type = it->bidi_it.type; + } + else + { + glyph->resolved_level = 0; + glyph->bidi_type = UNKNOWN_BT; + } ++it->glyph_row->used[it->area]; ++glyph;
--- a/src/window.h Fri Jan 01 13:44:02 2010 -0500 +++ b/src/window.h Fri Jan 01 14:30:06 2010 -0500 @@ -117,7 +117,10 @@ /* The buffer displayed in this window */ /* Of the fields vchild, hchild and buffer, only one is non-nil. */ Lisp_Object buffer; - /* A marker pointing to where in the text to start displaying */ + /* A marker pointing to where in the text to start displaying. + BIDI Note: This is the _logical-order_ start, i.e. the smallest + buffer position visible in the window, not necessarily the + character displayed in the top left corner of the window. */ Lisp_Object start; /* A marker pointing to where in the text point is in this window, used only when the window is not selected.
--- a/src/xdisp.c Fri Jan 01 13:44:02 2010 -0500 +++ b/src/xdisp.c Fri Jan 01 14:30:06 2010 -0500 @@ -248,6 +248,7 @@ Lisp_Object Qgrow_only; Lisp_Object Qinhibit_eval_during_redisplay; Lisp_Object Qbuffer_position, Qposition, Qobject; +Lisp_Object Qright_to_left, Qleft_to_right; /* Cursor shapes */ Lisp_Object Qbar, Qhbar, Qbox, Qhollow; @@ -2658,6 +2659,9 @@ /* Are multibyte characters enabled in current_buffer? */ it->multibyte_p = !NILP (current_buffer->enable_multibyte_characters); + /* Do we need to reorder bidirectional text? */ + it->bidi_p = !NILP (current_buffer->bidi_display_reordering); + /* Non-zero if we should highlight the region. */ highlight_region_p = (!NILP (Vtransient_mark_mode) @@ -2803,6 +2807,21 @@ it->start_of_box_run_p = 1; } + /* If we are to reorder bidirectional text, init the bidi + iterator. */ + if (it->bidi_p) + { + /* Note the paragraph direction that this buffer wants to + use. */ + if (EQ (current_buffer->bidi_paragraph_direction, Qleft_to_right)) + it->paragraph_embedding = L2R; + else if (EQ (current_buffer->bidi_paragraph_direction, Qright_to_left)) + it->paragraph_embedding = R2L; + else + it->paragraph_embedding = NEUTRAL_DIR; + bidi_init_it (charpos, bytepos, &it->bidi_it); + } + /* If a buffer position was specified, set the iterator there, getting overlays and face properties from that position. */ if (charpos >= BUF_BEG (current_buffer)) @@ -4575,43 +4594,46 @@ return 0; } - -/* Determine which buffer position in W's buffer STRING comes from. - AROUND_CHARPOS is an approximate position where it could come from. - Value is the buffer position or 0 if it couldn't be determined. +/* Look for STRING in overlays and text properties in W's buffer, + between character positions FROM and TO (excluding TO). + BACK_P non-zero means look back (in this case, TO is supposed to be + less than FROM). + Value is the first character position where STRING was found, or + zero if it wasn't found before hitting TO. W's buffer must be current. - This function is necessary because we don't record buffer positions - in glyphs generated from strings (to keep struct glyph small). This function may only use code that doesn't eval because it is called asynchronously from note_mouse_highlight. */ -int -string_buffer_position (w, string, around_charpos) +static EMACS_INT +string_buffer_position_lim (w, string, from, to, back_p) struct window *w; Lisp_Object string; - int around_charpos; + EMACS_INT from, to; + int back_p; { Lisp_Object limit, prop, pos; - const int MAX_DISTANCE = 1000; int found = 0; - pos = make_number (around_charpos); - limit = make_number (min (XINT (pos) + MAX_DISTANCE, ZV)); - while (!found && !EQ (pos, limit)) - { - prop = Fget_char_property (pos, Qdisplay, Qnil); - if (!NILP (prop) && display_prop_string_p (prop, string)) - found = 1; - else - pos = Fnext_single_char_property_change (pos, Qdisplay, Qnil, limit); - } - - if (!found) - { - pos = make_number (around_charpos); - limit = make_number (max (XINT (pos) - MAX_DISTANCE, BEGV)); + pos = make_number (from); + + if (!back_p) /* looking forward */ + { + limit = make_number (min (to, ZV)); + while (!found && !EQ (pos, limit)) + { + prop = Fget_char_property (pos, Qdisplay, Qnil); + if (!NILP (prop) && display_prop_string_p (prop, string)) + found = 1; + else + pos = Fnext_single_char_property_change (pos, Qdisplay, Qnil, + limit); + } + } + else /* looking back */ + { + limit = make_number (max (to, BEGV)); while (!found && !EQ (pos, limit)) { prop = Fget_char_property (pos, Qdisplay, Qnil); @@ -4626,6 +4648,35 @@ return found ? XINT (pos) : 0; } +/* Determine which buffer position in W's buffer STRING comes from. + AROUND_CHARPOS is an approximate position where it could come from. + Value is the buffer position or 0 if it couldn't be determined. + + W's buffer must be current. + + This function is necessary because we don't record buffer positions + in glyphs generated from strings (to keep struct glyph small). + This function may only use code that doesn't eval because it is + called asynchronously from note_mouse_highlight. */ + +EMACS_INT +string_buffer_position (w, string, around_charpos) + struct window *w; + Lisp_Object string; + EMACS_INT around_charpos; +{ + Lisp_Object limit, prop, pos; + const int MAX_DISTANCE = 1000; + EMACS_INT found = string_buffer_position_lim (w, string, around_charpos, + around_charpos + MAX_DISTANCE, + 0); + + if (!found) + found = string_buffer_position_lim (w, string, around_charpos, + around_charpos - MAX_DISTANCE, 1); + return found; +} + /*********************************************************************** @@ -5092,6 +5143,8 @@ p = it->stack + it->sp; p->stop_charpos = it->stop_charpos; + p->prev_stop = it->prev_stop; + p->base_level_stop = it->base_level_stop; p->cmp_it = it->cmp_it; xassert (it->face_id >= 0); p->face_id = it->face_id; @@ -5142,6 +5195,8 @@ --it->sp; p = it->stack + it->sp; it->stop_charpos = p->stop_charpos; + it->prev_stop = p->prev_stop; + it->base_level_stop = p->base_level_stop; it->cmp_it = p->cmp_it; it->face_id = p->face_id; it->current = p->current; @@ -5319,8 +5374,8 @@ if (IT_CHARPOS (*it) <= BEGV) break; - /* If selective > 0, then lines indented more than that values - are invisible. */ + /* If selective > 0, then lines indented more than its value are + invisible. */ if (it->selective > 0 && indented_beyond_p (IT_CHARPOS (*it), IT_BYTEPOS (*it), (double) it->selective)) /* iftc */ @@ -5514,9 +5569,14 @@ it->sp = 0; it->string_from_display_prop_p = 0; it->face_before_selective_p = 0; + if (it->bidi_p) + it->bidi_it.first_elt = 1; if (set_stop_p) - it->stop_charpos = CHARPOS (pos); + { + it->stop_charpos = CHARPOS (pos); + it->base_level_stop = CHARPOS (pos); + } } @@ -5620,7 +5680,7 @@ /*********************************************************************** Iteration - ***********************************************************************/ +***********************************************************************/ /* Map enum it_method value to corresponding next_element_from_* function. */ @@ -5672,6 +5732,13 @@ if (it->what == IT_CHARACTER) { + /* UAX#9, L4: "A character is depicted by a mirrored glyph if + and only if (a) the resolved directionality of that character + is R..." */ + /* FIXME: Do we need an exception for characters from display + tables? */ + if (it->bidi_p && it->bidi_it.type == STRONG_R) + it->c = bidi_mirror_char (it->c); /* Map via display table or translate control characters. IT->c, IT->len etc. have been set to the next character by the function call above. If we have a display table, and it @@ -5686,7 +5753,7 @@ Lisp_Object dv; struct charset *unibyte = CHARSET_FROM_ID (charset_unibyte); enum { char_is_other = 0, char_is_nbsp, char_is_soft_hyphen } - nbsp_or_shy = char_is_other; + nbsp_or_shy = char_is_other; int decoded = it->c; if (it->dp @@ -5904,12 +5971,12 @@ happen actually, but due to bugs it may happen. Let's print the char as is, there's not much meaningful we can do with it. */ - str[0] = it->c; - str[1] = it->c >> 8; - str[2] = it->c >> 16; - str[3] = it->c >> 24; - len = 4; - } + str[0] = it->c; + str[1] = it->c >> 8; + str[2] = it->c >> 16; + str[3] = it->c >> 24; + len = 4; + } for (i = 0; i < len; i++) { @@ -6078,8 +6145,22 @@ else { xassert (it->len != 0); - IT_BYTEPOS (*it) += it->len; - IT_CHARPOS (*it) += 1; + + if (!it->bidi_p) + { + IT_BYTEPOS (*it) += it->len; + IT_CHARPOS (*it) += 1; + } + else + { + /* If this is a new paragraph, determine its base + direction (a.k.a. its base embedding level). */ + if (it->bidi_it.new_paragraph) + bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it); + bidi_get_next_char_visually (&it->bidi_it); + IT_BYTEPOS (*it) = it->bidi_it.bytepos; + IT_CHARPOS (*it) = it->bidi_it.charpos; + } xassert (IT_BYTEPOS (*it) == CHAR_TO_BYTE (IT_CHARPOS (*it))); } break; @@ -6232,7 +6313,7 @@ it->face_id = it->saved_face_id; /* KFS: This code used to check ip->dpvec[0] instead of the current element. - That seemed totally bogus - so I changed it... */ + That seemed totally bogus - so I changed it... */ gc = it->dpvec[it->current.dpvec_index]; if (GLYPH_CODE_P (gc) && GLYPH_CODE_CHAR_VALID_P (gc)) @@ -6467,6 +6548,43 @@ return 1; } +/* Scan forward from CHARPOS in the current buffer, until we find a + stop position > current IT's position. Then handle the stop + position before that. + + This is called when we are reordering bidirectional text. The + caller should save and restore IT and in particular the bidi_p + flag, because this function modifies them. */ + +static void +handle_stop_backwards (it, charpos) + struct it *it; + EMACS_INT charpos; +{ + struct text_pos pos1; + EMACS_INT where_we_are = IT_CHARPOS (*it); + EMACS_INT next_stop; + + /* Scan in strict logical order. */ + it->bidi_p = 0; + do + { + it->prev_stop = charpos; + SET_TEXT_POS (pos1, charpos, CHAR_TO_BYTE (charpos)); + reseat_1 (it, pos1, 0); + compute_stop_pos (it); + /* We must advance forward, right? */ + if (it->stop_charpos <= it->prev_stop) + abort (); + charpos = it->stop_charpos; + } + while (charpos <= where_we_are); + + next_stop = it->stop_charpos; + it->stop_charpos = it->prev_stop; + handle_stop (it); + it->stop_charpos = next_stop; +} /* Load IT with the next display element from current_buffer. Value is zero if end of buffer reached. IT->stop_charpos is the next @@ -6481,6 +6599,57 @@ xassert (IT_CHARPOS (*it) >= BEGV); + /* With bidi reordering, the character to display might not be the + character at IT_CHARPOS. BIDI_IT.FIRST_ELT non-zero means that + we were reseat()ed to a new buffer position, which is potentially + a different paragraph. */ + if (it->bidi_p && it->bidi_it.first_elt) + { + it->bidi_it.charpos = IT_CHARPOS (*it); + it->bidi_it.bytepos = IT_BYTEPOS (*it); + /* If we are at the beginning of a line, we can produce the next + element right away. */ + if (it->bidi_it.bytepos == BEGV_BYTE + /* FIXME: Should support all Unicode line separators. */ + || FETCH_CHAR (it->bidi_it.bytepos - 1) == '\n' + || FETCH_CHAR (it->bidi_it.bytepos) == '\n') + { + bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it); + /* If the paragraph base direction is R2L, its glyphs should + be reversed. */ + if (it->glyph_row && (it->bidi_it.level_stack[0].level & 1) != 0) + it->glyph_row->reversed_p = 1; + bidi_get_next_char_visually (&it->bidi_it); + } + else + { + int orig_bytepos = IT_BYTEPOS (*it); + + /* We need to prime the bidi iterator starting at the line's + beginning, before we will be able to produce the next + element. */ + IT_CHARPOS (*it) = find_next_newline_no_quit (IT_CHARPOS (*it), -1); + IT_BYTEPOS (*it) = CHAR_TO_BYTE (IT_CHARPOS (*it)); + it->bidi_it.charpos = IT_CHARPOS (*it); + it->bidi_it.bytepos = IT_BYTEPOS (*it); + bidi_paragraph_init (it->paragraph_embedding, &it->bidi_it); + if (it->glyph_row && (it->bidi_it.level_stack[0].level & 1) != 0) + it->glyph_row->reversed_p = 1; + do { + /* Now return to buffer position where we were asked to + get the next display element, and produce that. */ + bidi_get_next_char_visually (&it->bidi_it); + } while (it->bidi_it.bytepos != orig_bytepos + && it->bidi_it.bytepos < ZV_BYTE); + } + + it->bidi_it.first_elt = 0; /* paranoia: bidi.c does this */ + /* Adjust IT's position information to where we ended up. */ + IT_CHARPOS (*it) = it->bidi_it.charpos; + IT_BYTEPOS (*it) = it->bidi_it.bytepos; + SET_TEXT_POS (it->position, IT_CHARPOS (*it), IT_BYTEPOS (*it)); + } + if (IT_CHARPOS (*it) >= it->stop_charpos) { if (IT_CHARPOS (*it) >= it->end_charpos) @@ -6506,12 +6675,50 @@ success_p = 0; } } - else - { + else if (!(!it->bidi_p + || BIDI_AT_BASE_LEVEL (it->bidi_it) + || IT_CHARPOS (*it) == it->stop_charpos)) + { + /* With bidi non-linear iteration, we could find ourselves + far beyond the last computed stop_charpos, with several + other stop positions in between that we missed. Scan + them all now, in buffer's logical order, until we find + and handle the last stop_charpos that precedes our + current position. */ + struct it save_it = *it; + + handle_stop_backwards (it, it->stop_charpos); + it->bidi_p = 1; + it->current = save_it.current; + it->position = save_it.position; + return GET_NEXT_DISPLAY_ELEMENT (it); + } + else + { + /* If we are at base paragraph embedding level, take note of + the last stop position seen at this level. */ + if (BIDI_AT_BASE_LEVEL (it->bidi_it)) + it->base_level_stop = it->stop_charpos; handle_stop (it); return GET_NEXT_DISPLAY_ELEMENT (it); } } + else if (it->bidi_p && IT_CHARPOS (*it) < it->prev_stop) + { + struct it save_it = *it; + + if (it->base_level_stop <= 0) + it->base_level_stop = 1; + if (IT_CHARPOS (*it) < it->base_level_stop) + abort (); + if (BIDI_AT_BASE_LEVEL (it->bidi_it)) + abort (); + handle_stop_backwards (it, it->base_level_stop); + it->bidi_p = 1; + it->current = save_it.current; + it->position = save_it.position; + return GET_NEXT_DISPLAY_ELEMENT (it); + } else { /* No face changes, overlays etc. in sight, so just return a @@ -6665,9 +6872,9 @@ line on the display without producing glyphs. OP should be a bit mask including some or all of these bits: - MOVE_TO_X: Stop on reaching x-position TO_X. - MOVE_TO_POS: Stop on reaching buffer or string position TO_CHARPOS. - Regardless of OP's value, stop in reaching the end of the display line. + MOVE_TO_X: Stop upon reaching x-position TO_X. + MOVE_TO_POS: Stop upon reaching buffer or string position TO_CHARPOS. + Regardless of OP's value, stop upon reaching the end of the display line. TO_X is normally a value 0 <= TO_X <= IT->last_visible_x. This means, in particular, that TO_X includes window's horizontal @@ -11028,6 +11235,17 @@ && overlay_touches_p (Z - end)) unchanged_p = 0; } + + /* Under bidi reordering, adding or deleting a character in the + beginning of a paragraph, before the first strong directional + character, can change the base direction of the paragraph (unless + the buffer specifies a fixed paragraph direction), which will + require to redisplay the whole paragraph. It might be worthwhile + to find the paragraph limits and widen the range of redisplayed + lines to that, but for now just give up this optimization. */ + if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering) + && NILP (XBUFFER (w->buffer)->bidi_paragraph_direction)) + unchanged_p = 0; } return unchanged_p; @@ -12318,160 +12536,363 @@ struct glyph *glyph = row->glyphs[TEXT_AREA]; struct glyph *end = glyph + row->used[TEXT_AREA]; struct glyph *cursor = NULL; - /* The first glyph that starts a sequence of glyphs from a string - that is a value of a display property. */ - struct glyph *string_start; - /* The X coordinate of string_start. */ - int string_start_x; /* The last known character position in row. */ int last_pos = MATRIX_ROW_START_CHARPOS (row) + delta; - /* The last known character position before string_start. */ - int string_before_pos; int x = row->x; int cursor_x = x; - /* Last buffer position covered by an overlay. */ - int cursor_from_overlay_pos = 0; - int pt_old = PT - delta; - - /* Skip over glyphs not having an object at the start of the row. - These are special glyphs like truncation marks on terminal - frames. */ + EMACS_INT pt_old = PT - delta; + EMACS_INT pos_before = MATRIX_ROW_START_CHARPOS (row) + delta; + EMACS_INT pos_after = MATRIX_ROW_END_CHARPOS (row) + delta; + struct glyph *glyph_before = glyph - 1, *glyph_after = end; + /* Non-zero means we've found a match for cursor position, but that + glyph has the avoid_cursor_p flag set. */ + int match_with_avoid_cursor = 0; + /* Non-zero means we've seen at least one glyph that came from a + display string. */ + int string_seen = 0; + /* Largest buffer position seen so far during scan of glyph row. */ + EMACS_INT bpos_max = last_pos; + /* Last buffer position covered by an overlay string with an integer + `cursor' property. */ + EMACS_INT bpos_covered = 0; + + /* Skip over glyphs not having an object at the start and the end of + the row. These are special glyphs like truncation marks on + terminal frames. */ if (row->displays_text_p) - while (glyph < end - && INTEGERP (glyph->object) - && glyph->charpos < 0) + { + if (!row->reversed_p) + { + while (glyph < end + && INTEGERP (glyph->object) + && glyph->charpos < 0) + { + x += glyph->pixel_width; + ++glyph; + } + while (end > glyph + && INTEGERP ((end - 1)->object) + /* CHARPOS is zero for blanks inserted by + extend_face_to_end_of_line. */ + && (end - 1)->charpos <= 0) + --end; + glyph_before = glyph - 1; + glyph_after = end; + } + else + { + struct glyph *g; + + /* If the glyph row is reversed, we need to process it from back + to front, so swap the edge pointers. */ + end = glyph - 1; + glyph += row->used[TEXT_AREA] - 1; + /* Reverse the known positions in the row. */ + last_pos = pos_after = MATRIX_ROW_START_CHARPOS (row) + delta; + pos_before = MATRIX_ROW_END_CHARPOS (row) + delta; + + while (glyph > end + 1 + && INTEGERP (glyph->object) + && glyph->charpos < 0) + { + --glyph; + x -= glyph->pixel_width; + } + if (INTEGERP (glyph->object) && glyph->charpos < 0) + --glyph; + /* By default, put the cursor on the rightmost glyph. */ + for (g = end + 1; g < glyph; g++) + x += g->pixel_width; + cursor_x = x; + while (end < glyph + && INTEGERP ((end + 1)->object) + && (end + 1)->charpos <= 0) + ++end; + glyph_before = glyph + 1; + glyph_after = end; + } + } + else if (row->reversed_p) + { + /* In R2L rows that don't display text, put the cursor on the + rightmost glyph. Case in point: an empty last line that is + part of an R2L paragraph. */ + cursor = end - 1; + x = -1; /* will be computed below, at lable compute_x */ + } + + /* Step 1: Try to find the glyph whose character position + corresponds to point. If that's not possible, find 2 glyphs + whose character positions are the closest to point, one before + point, the other after it. */ + if (!row->reversed_p) + while (/* not marched to end of glyph row */ + glyph < end + /* glyph was not inserted by redisplay for internal purposes */ + && !INTEGERP (glyph->object)) { + if (BUFFERP (glyph->object)) + { + EMACS_INT dpos = glyph->charpos - pt_old; + + if (glyph->charpos > bpos_max) + bpos_max = glyph->charpos; + if (!glyph->avoid_cursor_p) + { + /* If we hit point, we've found the glyph on which to + display the cursor. */ + if (dpos == 0) + { + match_with_avoid_cursor = 0; + break; + } + /* See if we've found a better approximation to + POS_BEFORE or to POS_AFTER. Note that we want the + first (leftmost) glyph of all those that are the + closest from below, and the last (rightmost) of all + those from above. */ + if (0 > dpos && dpos > pos_before - pt_old) + { + pos_before = glyph->charpos; + glyph_before = glyph; + } + else if (0 < dpos && dpos <= pos_after - pt_old) + { + pos_after = glyph->charpos; + glyph_after = glyph; + } + } + else if (dpos == 0) + match_with_avoid_cursor = 1; + } + else if (STRINGP (glyph->object)) + { + Lisp_Object chprop; + int glyph_pos = glyph->charpos; + + chprop = Fget_char_property (make_number (glyph_pos), Qcursor, + glyph->object); + if (INTEGERP (chprop)) + { + bpos_covered = bpos_max + XINT (chprop); + /* If the `cursor' property covers buffer positions up + to and including point, we should display cursor on + this glyph. */ + /* Implementation note: bpos_max == pt_old when, e.g., + we are in an empty line, where bpos_max is set to + MATRIX_ROW_START_CHARPOS, see above. */ + if (bpos_max <= pt_old && bpos_covered >= pt_old) + { + cursor = glyph; + break; + } + } + + string_seen = 1; + } x += glyph->pixel_width; ++glyph; } - - string_start = NULL; - while (glyph < end - && !INTEGERP (glyph->object) - && (!BUFFERP (glyph->object) - || (last_pos = glyph->charpos) < pt_old - || glyph->avoid_cursor_p)) - { - if (! STRINGP (glyph->object)) - { - string_start = NULL; - x += glyph->pixel_width; - ++glyph; - /* If we are beyond the cursor position computed from the - last overlay seen, that overlay is not in effect for - current cursor position. Reset the cursor information - computed from that overlay. */ - if (cursor_from_overlay_pos - && last_pos >= cursor_from_overlay_pos) - { - cursor_from_overlay_pos = 0; - cursor = NULL; - } - } - else - { - if (string_start == NULL) - { - string_before_pos = last_pos; - string_start = glyph; - string_start_x = x; - } - /* Skip all glyphs from a string. */ - do - { - Lisp_Object cprop; - int pos; - if ((cursor == NULL || glyph > cursor) - && (cprop = Fget_char_property (make_number ((glyph)->charpos), - Qcursor, (glyph)->object), - !NILP (cprop)) - && (pos = string_buffer_position (w, glyph->object, - string_before_pos), - (pos == 0 /* from overlay */ - || pos == pt_old))) - { - /* Compute the first buffer position after the overlay. - If the `cursor' property tells us how many positions - are associated with the overlay, use that. Otherwise, - estimate from the buffer positions of the glyphs - before and after the overlay. */ - cursor_from_overlay_pos = (pos ? 0 : last_pos - + (INTEGERP (cprop) ? XINT (cprop) : 0)); - cursor = glyph; - cursor_x = x; - } + else if (glyph > end) /* row is reversed */ + while (!INTEGERP (glyph->object)) + { + if (BUFFERP (glyph->object)) + { + EMACS_INT dpos = glyph->charpos - pt_old; + + if (glyph->charpos > bpos_max) + bpos_max = glyph->charpos; + if (!glyph->avoid_cursor_p) + { + if (dpos == 0) + { + match_with_avoid_cursor = 0; + break; + } + if (0 > dpos && dpos > pos_before - pt_old) + { + pos_before = glyph->charpos; + glyph_before = glyph; + } + else if (0 < dpos && dpos <= pos_after - pt_old) + { + pos_after = glyph->charpos; + glyph_after = glyph; + } + } + else if (dpos == 0) + match_with_avoid_cursor = 1; + } + else if (STRINGP (glyph->object)) + { + Lisp_Object chprop; + int glyph_pos = glyph->charpos; + + chprop = Fget_char_property (make_number (glyph_pos), Qcursor, + glyph->object); + if (INTEGERP (chprop)) + { + bpos_covered = bpos_max + XINT (chprop); + /* If the `cursor' property covers buffer positions up + to and including point, we should display cursor on + this glyph. */ + if (bpos_max <= pt_old && bpos_covered >= pt_old) + { + cursor = glyph; + break; + } + } + string_seen = 1; + } + --glyph; + if (glyph == end) + break; + x -= glyph->pixel_width; + } + + /* Step 2: If we didn't find an exact match for point, we need to + look for a proper place to put the cursor among glyphs between + GLYPH_BEFORE and GLYPH_AFTER. */ + if (!(BUFFERP (glyph->object) && glyph->charpos == pt_old) + && bpos_covered < pt_old) + { + if (row->ends_in_ellipsis_p && pos_after == last_pos) + { + EMACS_INT ellipsis_pos; + + /* Scan back over the ellipsis glyphs. */ + if (!row->reversed_p) + { + ellipsis_pos = (glyph - 1)->charpos; + while (glyph > row->glyphs[TEXT_AREA] + && (glyph - 1)->charpos == ellipsis_pos) + glyph--, x -= glyph->pixel_width; + /* That loop always goes one position too far, including + the glyph before the ellipsis. So scan forward over + that one. */ x += glyph->pixel_width; - ++glyph; - } - while (glyph < end && EQ (glyph->object, string_start->object)); - } - } - + glyph++; + } + else /* row is reversed */ + { + ellipsis_pos = (glyph + 1)->charpos; + while (glyph < row->glyphs[TEXT_AREA] + row->used[TEXT_AREA] - 1 + && (glyph + 1)->charpos == ellipsis_pos) + glyph++, x += glyph->pixel_width; + x -= glyph->pixel_width; + glyph--; + } + } + else if (match_with_avoid_cursor + /* zero-width characters produce no glyphs */ + || eabs (glyph_after - glyph_before) == 1) + { + cursor = glyph_after; + x = -1; + } + else if (string_seen) + { + int incr = row->reversed_p ? -1 : +1; + + /* Need to find the glyph that came out of a string which is + present at point. That glyph is somewhere between + GLYPH_BEFORE and GLYPH_AFTER, and it came from a string + positioned between POS_BEFORE and POS_AFTER in the + buffer. */ + struct glyph *stop = glyph_after; + EMACS_INT pos = pos_before; + + x = -1; + for (glyph = glyph_before + incr; + row->reversed_p ? glyph > stop : glyph < stop; ) + { + + /* Any glyphs that come from the buffer are here because + of bidi reordering. Skip them, and only pay + attention to glyphs that came from some string. */ + if (STRINGP (glyph->object)) + { + Lisp_Object str; + EMACS_INT tem; + + str = glyph->object; + tem = string_buffer_position_lim (w, str, pos, pos_after, 0); + if (pos <= tem) + { + /* If the string from which this glyph came is + found in the buffer at point, then we've + found the glyph we've been looking for. */ + if (tem == pt_old) + { + /* The glyphs from this string could have + been reordered. Find the one with the + smallest string position. Or there could + be a character in the string with the + `cursor' property, which means display + cursor on that character's glyph. */ + int strpos = glyph->charpos; + + cursor = glyph; + for (glyph += incr; + EQ (glyph->object, str); + glyph += incr) + { + Lisp_Object cprop; + int gpos = glyph->charpos; + + cprop = Fget_char_property (make_number (gpos), + Qcursor, + glyph->object); + if (!NILP (cprop)) + { + cursor = glyph; + break; + } + if (glyph->charpos < strpos) + { + strpos = glyph->charpos; + cursor = glyph; + } + } + + goto compute_x; + } + pos = tem + 1; /* don't find previous instances */ + } + /* This string is not what we want; skip all of the + glyphs that came from it. */ + do + glyph += incr; + while ((row->reversed_p ? glyph > stop : glyph < stop) + && EQ (glyph->object, str)); + } + else + glyph += incr; + } + + /* If we reached the end of the line, and END was from a string, + the cursor is not on this line. */ + if (glyph == end + && STRINGP ((glyph - incr)->object) + && row->continued_p) + return 0; + } + } + + compute_x: if (cursor != NULL) - { - glyph = cursor; - x = cursor_x; - } - else if (row->ends_in_ellipsis_p && glyph == end) - { - /* Scan back over the ellipsis glyphs, decrementing positions. */ - while (glyph > row->glyphs[TEXT_AREA] - && (glyph - 1)->charpos == last_pos) - glyph--, x -= glyph->pixel_width; - /* That loop always goes one position too far, including the - glyph before the ellipsis. So scan forward over that one. */ - x += glyph->pixel_width; - glyph++; - } - else if (string_start - && (glyph == end || !BUFFERP (glyph->object) || last_pos > pt_old)) - { - /* We may have skipped over point because the previous glyphs - are from string. As there's no easy way to know the - character position of the current glyph, find the correct - glyph on point by scanning from string_start again. */ - Lisp_Object limit; - Lisp_Object string; - struct glyph *stop = glyph; - int pos; - - limit = make_number (pt_old + 1); - glyph = string_start; - x = string_start_x; - string = glyph->object; - pos = string_buffer_position (w, string, string_before_pos); - /* If POS == 0, STRING is from overlay. We skip such glyphs - because we always put the cursor after overlay strings. */ - while (pos == 0 && glyph < stop) - { - string = glyph->object; - SKIP_GLYPHS (glyph, stop, x, EQ (glyph->object, string)); - if (glyph < stop) - pos = string_buffer_position (w, glyph->object, string_before_pos); - } - - while (glyph < stop) - { - pos = XINT (Fnext_single_char_property_change - (make_number (pos), Qdisplay, Qnil, limit)); - if (pos > pt_old) - break; - /* Skip glyphs from the same string. */ - string = glyph->object; - SKIP_GLYPHS (glyph, stop, x, EQ (glyph->object, string)); - /* Skip glyphs from an overlay. */ - while (glyph < stop - && ! string_buffer_position (w, glyph->object, pos)) - { - string = glyph->object; - SKIP_GLYPHS (glyph, stop, x, EQ (glyph->object, string)); - } - } - - /* If we reached the end of the line, and END was from a string, - the cursor is not on this line. */ - if (glyph == end && row->continued_p) - return 0; + glyph = cursor; + if (x < 0) + { + struct glyph *g; + + /* Need to compute x that corresponds to GLYPH. */ + for (g = row->glyphs[TEXT_AREA], x = row->x; g < glyph; g++) + { + if (g >= row->glyphs[TEXT_AREA] + row->used[TEXT_AREA]) + abort (); + x += g->pixel_width; + } } w->cursor.hpos = glyph - row->glyphs[TEXT_AREA]; @@ -14469,15 +14890,39 @@ { struct glyph *glyph = row->glyphs[TEXT_AREA] + w->cursor.hpos; struct glyph *end = glyph + row->used[TEXT_AREA]; + struct glyph *orig_glyph = glyph; + struct cursor_pos orig_cursor = w->cursor; for (; glyph < end && (!BUFFERP (glyph->object) - || glyph->charpos < PT); + || glyph->charpos != PT); glyph++) { w->cursor.hpos++; w->cursor.x += glyph->pixel_width; } + /* With bidi reordering, charpos changes non-linearly + with hpos, so the right glyph could be to the + left. */ + if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering) + && (!BUFFERP (glyph->object) || glyph->charpos != PT)) + { + struct glyph *start_glyph = row->glyphs[TEXT_AREA]; + + glyph = orig_glyph - 1; + orig_cursor.hpos--; + orig_cursor.x -= glyph->pixel_width; + for (; glyph >= start_glyph + && (!BUFFERP (glyph->object) + || glyph->charpos != PT); + glyph--) + { + w->cursor.hpos--; + w->cursor.x -= glyph->pixel_width; + } + if (BUFFERP (glyph->object) && glyph->charpos == PT) + w->cursor = orig_cursor; + } } } @@ -14921,6 +15366,18 @@ if (!NILP (XBUFFER (w->buffer)->word_wrap)) GIVE_UP (21); + /* Under bidi reordering, adding or deleting a character in the + beginning of a paragraph, before the first strong directional + character, can change the base direction of the paragraph (unless + the buffer specifies a fixed paragraph direction), which will + require to redisplay the whole paragraph. It might be worthwhile + to find the paragraph limits and widen the range of redisplayed + lines to that, but for now just give up this optimization and + redisplay from scratch. */ + if (!NILP (XBUFFER (w->buffer)->bidi_display_reordering) + && NILP (XBUFFER (w->buffer)->bidi_paragraph_direction)) + GIVE_UP (22); + /* Make sure beg_unchanged and end_unchanged are up to date. Do it only if buffer has really changed. The reason is that the gap is initially at Z for freshly visited files. The code below would @@ -16496,6 +16953,7 @@ int wrap_row_used = -1, wrap_row_ascent, wrap_row_height; int wrap_row_phys_ascent, wrap_row_phys_height; int wrap_row_extra_line_spacing; + struct display_pos row_end; /* We always start displaying at hpos zero even if hscrolled. */ xassert (it->hpos == 0 && it->current_x == 0); @@ -16520,6 +16978,10 @@ row->displays_text_p = 1; row->starts_in_middle_of_char_p = it->starts_in_middle_of_char_p; it->starts_in_middle_of_char_p = 0; + /* If the paragraph base direction is R2L, its glyphs should be + reversed. */ + if (it->bidi_p && (it->bidi_it.level_stack[0].level & 1) != 0) + row->reversed_p = 1; /* Arrange the overlays nicely for our purposes. Usually, we call display_line on only one line at a time, in which case this @@ -16584,6 +17046,12 @@ it->continuation_lines_width = 0; row->ends_at_zv_p = 1; + /* A row that displays right-to-left text must always have + its last face extended all the way to the end of line, + even if this row ends in ZV. */ + if (row->reversed_p) + extend_face_to_end_of_line (it); + row_end = it->current; break; } @@ -16818,6 +17286,7 @@ it->max_phys_descent = phys_descent; } + row_end = it->current; break; } else if (new_x > it->first_visible_x) @@ -16851,7 +17320,10 @@ /* End of this display line if row is continued. */ if (row->continued_p || row->ends_at_zv_p) - break; + { + row_end = it->current; + break; + } } at_end_of_line: @@ -16877,8 +17349,22 @@ row->glyphs[TEXT_AREA]->charpos = CHARPOS (it->position); /* Consume the line end. This skips over invisible lines. */ + if (it->bidi_p) + { + /* When we are reordering bidi text, we still need the + next character in logical order, to set row->end + correctly below. */ + push_it (it); + it->bidi_p = 0; + set_iterator_to_next (it, 1); + row_end = it->current; + pop_it (it); + it->bidi_p = 1; + } set_iterator_to_next (it, 1); it->continuation_lines_width = 0; + if (!it->bidi_p) + row_end = it->current; break; } @@ -16916,6 +17402,7 @@ it->continuation_lines_width = 0; row->ends_at_zv_p = 1; row->exact_window_width_line_p = 1; + row_end = it->current; break; } if (ITERATOR_AT_END_OF_LINE_P (it)) @@ -16931,6 +17418,7 @@ row->ends_at_zv_p = FETCH_BYTE (IT_BYTEPOS (*it) - 1) != '\n'; it->hpos = hpos_before; it->current_x = x_before; + row_end = it->current; break; } } @@ -16991,7 +17479,7 @@ compute_line_metrics (it); /* Remember the position at which this line ends. */ - row->end = it->current; + row->end = row_end; /* Record whether this row ends inside an ellipsis. */ row->ends_in_ellipsis_p @@ -17028,7 +17516,7 @@ it->current_y += row->height; ++it->vpos; ++it->glyph_row; - it->start = it->current; + it->start = row_end; return row->displays_text_p; } @@ -20581,6 +21069,13 @@ glyph->u.ch = it->char_to_display; glyph->slice = null_glyph_slice; glyph->font_type = FONT_TYPE_UNKNOWN; + if (it->bidi_p) + { + glyph->resolved_level = it->bidi_it.resolved_level; + if ((it->bidi_it.type & 7) != it->bidi_it.type) + abort (); + glyph->bidi_type = it->bidi_it.type; + } ++it->glyph_row->used[area]; } else @@ -20633,6 +21128,13 @@ glyph->face_id = it->face_id; glyph->slice = null_glyph_slice; glyph->font_type = FONT_TYPE_UNKNOWN; + if (it->bidi_p) + { + glyph->resolved_level = it->bidi_it.resolved_level; + if ((it->bidi_it.type & 7) != it->bidi_it.type) + abort (); + glyph->bidi_type = it->bidi_it.type; + } ++it->glyph_row->used[area]; } else @@ -20807,6 +21309,13 @@ glyph->u.img_id = img->id; glyph->slice = slice; glyph->font_type = FONT_TYPE_UNKNOWN; + if (it->bidi_p) + { + glyph->resolved_level = it->bidi_it.resolved_level; + if ((it->bidi_it.type & 7) != it->bidi_it.type) + abort (); + glyph->bidi_type = it->bidi_it.type; + } ++it->glyph_row->used[area]; } else @@ -20853,6 +21362,13 @@ glyph->u.stretch.height = height; glyph->slice = null_glyph_slice; glyph->font_type = FONT_TYPE_UNKNOWN; + if (it->bidi_p) + { + glyph->resolved_level = it->bidi_it.resolved_level; + if ((it->bidi_it.type & 7) != it->bidi_it.type) + abort (); + glyph->bidi_type = it->bidi_it.type; + } ++it->glyph_row->used[area]; } else @@ -23030,7 +23546,7 @@ associated with the end position, which must not be highlighted. */ Lisp_Object prev_object; - int pos; + EMACS_INT pos; while (glyph > row->glyphs[TEXT_AREA]) { @@ -23662,7 +24178,8 @@ && XFASTINT (w->last_modified) == BUF_MODIFF (b) && XFASTINT (w->last_overlay_modified) == BUF_OVERLAY_MODIFF (b)) { - int hpos, vpos, pos, i, dx, dy, area; + int hpos, vpos, i, dx, dy, area; + EMACS_INT pos; struct glyph *glyph; Lisp_Object object; Lisp_Object mouse_face = Qnil, overlay = Qnil, position; @@ -23950,7 +24467,7 @@ struct glyph_row *r = MATRIX_ROW (w->current_matrix, vpos); int start = MATRIX_ROW_START_CHARPOS (r); - int pos = string_buffer_position (w, object, start); + EMACS_INT pos = string_buffer_position (w, object, start); if (pos > 0) { help = Fget_char_property (make_number (pos), @@ -24005,7 +24522,8 @@ struct glyph_row *r = MATRIX_ROW (w->current_matrix, vpos); int start = MATRIX_ROW_START_CHARPOS (r); - int pos = string_buffer_position (w, object, start); + EMACS_INT pos = string_buffer_position (w, object, + start); if (pos > 0) pointer = Fget_char_property (make_number (pos), Qpointer, w->buffer); @@ -24814,6 +25332,11 @@ staticpro (&previous_help_echo_string); help_echo_pos = -1; + Qright_to_left = intern ("right-to-left"); + staticpro (&Qright_to_left); + Qleft_to_right = intern ("left-to-right"); + staticpro (&Qleft_to_right); + #ifdef HAVE_WINDOW_SYSTEM DEFVAR_BOOL ("x-stretch-cursor", &x_stretch_cursor_p, doc: /* *Non-nil means draw block cursor as wide as the glyph under it.