comparison src/bidi.c @ 107595:69c12db7031d

Retrospective commit from 2009-10-05. Continue working on paragraph base direction. Support per-buffer default paragraph direction. buffer.h (struct buffer): New member paragraph_direction. buffer.c (init_buffer_once): Initialize it. (syms_of_buffer): Declare Lisp variables default-paragraph-direction and paragraph-direction. dispextern.h (struct it): New member paragraph_embedding. xdisp.c (init_iterator): Initialize it from the buffer's value of paragraph-direction. <Qright_to_left, Qleft_to_right>: New variables. (syms_of_xdisp): Initialize and staticpro them. (set_iterator_to_next, next_element_from_buffer): Use the value of paragraph_embedding to determine the paragraph direction. bidi.c (bidi_line_init): Fix second argument to bidi_set_sor_type. (bidi_init_it): Initialize paragraph_dir to NEUTRAL_DIR. (bidi_get_next_char_visually): Record the last character of the separator in separator_limit, not the character after that. (bidi_find_paragraph_start): Accept character and byte positions instead of the whole iterator stricture. All callers changed.
author Eli Zaretskii <eliz@gnu.org>
date Fri, 01 Jan 2010 06:22:52 -0500
parents 40b49fa464cf
children 866e76f8ad75
comparison
equal deleted inserted replaced
107594:40b49fa464cf 107595:69c12db7031d
805 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ 805 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
806 bidi_it->invalid_levels = 0; 806 bidi_it->invalid_levels = 0;
807 bidi_it->invalid_rl_levels = -1; 807 bidi_it->invalid_rl_levels = -1;
808 bidi_it->next_en_pos = -1; 808 bidi_it->next_en_pos = -1;
809 bidi_it->next_for_ws.type = UNKNOWN_BT; 809 bidi_it->next_for_ws.type = UNKNOWN_BT;
810 bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir, 810 bidi_set_sor_type (bidi_it,
811 bidi_it->paragraph_dir == R2L ? 1 : 0,
811 bidi_it->level_stack[0].level); /* X10 */ 812 bidi_it->level_stack[0].level); /* X10 */
812 813
813 bidi_cache_reset (); 814 bidi_cache_reset ();
814 } 815 }
815 816
816 /* Find the beginning of this paragraph by looking back in the buffer. 817 /* Find the beginning of this paragraph by looking back in the buffer.
817 Value is the byte position of the paragraph's beginning. */ 818 Value is the byte position of the paragraph's beginning. */
818 static EMACS_INT 819 static EMACS_INT
819 bidi_find_paragraph_start (struct bidi_it *bidi_it) 820 bidi_find_paragraph_start (EMACS_INT pos, EMACS_INT pos_byte)
820 { 821 {
821 Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ()); 822 Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ());
822 EMACS_INT pos = bidi_it->charpos;
823 EMACS_INT pos_byte = bidi_it->bytepos;
824 EMACS_INT limit = ZV, limit_byte = ZV_BYTE; 823 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
825 824
826 if (!STRINGP (re)) 825 if (!STRINGP (re))
827 re = fallback_paragraph_start_re; 826 re = fallback_paragraph_start_re;
828 while (pos_byte > BEGV_BYTE 827 while (pos_byte > BEGV_BYTE
833 } 832 }
834 return pos_byte; 833 return pos_byte;
835 } 834 }
836 835
837 /* Determine the direction, a.k.a. base embedding level, of the 836 /* Determine the direction, a.k.a. base embedding level, of the
838 paragraph we are about to iterate through. */ 837 paragraph we are about to iterate through. If DIR is either L2R or
838 R2L, just use that. Otherwise, determine the paragraph direction
839 from the first strong character of the paragraph.
840
841 Note that this gives the paragraph separator the same direction as
842 the preceding paragraph, even though Emacs generally views the
843 separartor as not belonging to any paragraph. */
839 void 844 void
840 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) 845 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
841 { 846 {
842 EMACS_INT bytepos = bidi_it->bytepos; 847 EMACS_INT bytepos = bidi_it->bytepos;
843 848
866 for the separator to be exhausted; use the previous paragraph 871 for the separator to be exhausted; use the previous paragraph
867 direction. */ 872 direction. */
868 if (bidi_it->charpos < bidi_it->separator_limit) 873 if (bidi_it->charpos < bidi_it->separator_limit)
869 return; 874 return;
870 875
871 /* If we are before another paragraph separator, continue 876 /* If we are on a newline, get past it to where the next
872 through that with the previous paragraph direction. */ 877 paragraph might start. */
873 sep_len = bidi_at_paragraph_end (bidi_it->charpos, bytepos); 878 if (FETCH_CHAR (bytepos) == '\n')
874 if (sep_len >= 0)
875 { 879 {
876 bidi_it->separator_limit += sep_len + 1; 880 bytepos++;
877 return; 881 pos = bidi_it->charpos + 1;
878 } 882 }
879 else if (sep_len == -2) 883
880 /* We are in the middle of a paragraph. Search back to where 884 /* We are either at the beginning of a paragraph or in the
881 this paragraph starts. */ 885 middle of it. Find where this paragraph starts. */
882 bytepos = bidi_find_paragraph_start (bidi_it); 886 bytepos = bidi_find_paragraph_start (pos, bytepos);
883 887
884 /* We should always be at the beginning of a new line at this 888 /* We should always be at the beginning of a new line at this
885 point. */ 889 point. */
886 if (!(bytepos == BEGV_BYTE 890 if (!(bytepos == BEGV_BYTE || FETCH_CHAR (bytepos - 1) == '\n'))
887 || FETCH_CHAR (bytepos) == '\n'
888 || FETCH_CHAR (bytepos - 1) == '\n'))
889 abort (); 891 abort ();
890 892
891 bidi_it->separator_limit = -1; 893 bidi_it->separator_limit = -1;
892 bidi_it->new_paragraph = 0; 894 bidi_it->new_paragraph = 0;
893 ch = FETCH_CHAR (bytepos); 895 ch = FETCH_CHAR (bytepos);
916 bidi_it->paragraph_dir = L2R; 918 bidi_it->paragraph_dir = L2R;
917 } 919 }
918 else 920 else
919 abort (); 921 abort ();
920 922
921 /* Contrary to UAX#9 clause P3, we only default to L2R if we have no 923 /* Contrary to UAX#9 clause P3, we only default the paragraph
922 previous usable paragraph direction. */ 924 direction to L2R if we have no previous usable paragraph
925 direction. */
923 if (bidi_it->paragraph_dir == NEUTRAL_DIR) 926 if (bidi_it->paragraph_dir == NEUTRAL_DIR)
924 bidi_it->paragraph_dir = L2R; /* P3 */ 927 bidi_it->paragraph_dir = L2R; /* P3 and ``higher protocols'' */
925 if (bidi_it->paragraph_dir == R2L) 928 if (bidi_it->paragraph_dir == R2L)
926 bidi_it->level_stack[0].level == 1; 929 bidi_it->level_stack[0].level = 1;
927 else 930 else
928 bidi_it->level_stack[0].level == 0; 931 bidi_it->level_stack[0].level = 0;
929 932
930 bidi_line_init (bidi_it); 933 bidi_line_init (bidi_it);
931 } 934 }
932 935
933 /* Do whatever UAX#9 clause X8 says should be done at paragraph's 936 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
951 bidi_it->bytepos = bytepos; 954 bidi_it->bytepos = bytepos;
952 bidi_it->first_elt = 1; 955 bidi_it->first_elt = 1;
953 bidi_set_paragraph_end (bidi_it); 956 bidi_set_paragraph_end (bidi_it);
954 bidi_it->new_paragraph = 1; 957 bidi_it->new_paragraph = 1;
955 bidi_it->separator_limit = -1; 958 bidi_it->separator_limit = -1;
959 bidi_it->paragraph_dir = NEUTRAL_DIR;
956 bidi_it->type = NEUTRAL_B; 960 bidi_it->type = NEUTRAL_B;
957 bidi_it->type_after_w1 = UNKNOWN_BT; 961 bidi_it->type_after_w1 = UNKNOWN_BT;
958 bidi_it->orig_type = UNKNOWN_BT; 962 bidi_it->orig_type = UNKNOWN_BT;
959 bidi_it->prev_was_pdf = 0; 963 bidi_it->prev_was_pdf = 0;
960 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT; 964 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
1941 1945
1942 /* Finally, deliver the next character in the new direction. */ 1946 /* Finally, deliver the next character in the new direction. */
1943 next_level = bidi_level_of_next_char (bidi_it); 1947 next_level = bidi_level_of_next_char (bidi_it);
1944 } 1948 }
1945 1949
1946 /* Take note when we are at the end of the paragraph. The next time 1950 /* Take note when we have just processed the newline that precedes
1947 we are about to be called, set_iterator_to_next will 1951 the end of the paragraph. The next time we are about to be
1948 automatically reinit the paragraph direction, if needed. */ 1952 called, set_iterator_to_next will automatically reinit the
1953 paragraph direction, if needed. We do this at the newline before
1954 the paragraph separator, because the next character might not be
1955 the first character of the next paragraph, due to the bidi
1956 reordering. */
1949 if (bidi_it->scan_dir == 1 1957 if (bidi_it->scan_dir == 1
1950 && bidi_it->orig_type == NEUTRAL_B 1958 && bidi_it->orig_type == NEUTRAL_B
1951 && bidi_it->bytepos < ZV_BYTE) 1959 && bidi_it->bytepos < ZV_BYTE)
1952 { 1960 {
1953 EMACS_INT sep_len = 1961 EMACS_INT sep_len =
1954 bidi_at_paragraph_end (bidi_it->charpos + 1, 1962 bidi_at_paragraph_end (bidi_it->charpos + 1,
1955 bidi_it->bytepos + bidi_it->ch_len); 1963 bidi_it->bytepos + bidi_it->ch_len);
1956 if (sep_len >= 0) 1964 if (sep_len >= 0)
1957 { 1965 {
1958 bidi_it->new_paragraph = 1; 1966 bidi_it->new_paragraph = 1;
1959 /* Record the buffer position of the first character after 1967 /* Record the buffer position of the last character of the
1960 the paragraph separator. */ 1968 paragraph separator. */
1961 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len + 1; 1969 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len;
1962 } 1970 }
1963 } 1971 }
1964 1972
1965 if (bidi_it->scan_dir == 1 && bidi_cache_idx) 1973 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1966 { 1974 {