comparison src/bidi.c @ 107594:40b49fa464cf

Retrospective commit from 2009-10-04. Continue working on determining paragraph's base direction. bidi.c (bidi_at_paragraph_end): Check for paragraph-start if paragraph-separate failed to match. Return the length of the matched separator. (bidi_line_init): New function. (bidi_paragraph_init): Use bidi_line_init. Do nothing if in the middle of a paragraph-separate sequence. Don't override existing paragraph direction if no strong characters found in this paragraph. Set separator_limit according to what bidi_at_paragraph_end returns. Reset new_paragraph flag when a new paragraph is found. (bidi_init_it): Reset separator_limit. dispextern.h (struct bidi_it): New member separator_limit. bidi.c (bidi_find_paragraph_start): Return the byte position of the paragraph beginning. xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the new_paragraph flag is set in the bidi iterator. bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use the buffer-local value of paragraph-start and paragraph-separate.
author Eli Zaretskii <eliz@gnu.org>
date Fri, 01 Jan 2010 06:17:13 -0500
parents a551e4109c04
children 69c12db7031d
comparison
equal deleted inserted replaced
107593:a551e4109c04 107594:40b49fa464cf
731 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1) 731 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1)
732 abort (); 732 abort ();
733 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; 733 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level;
734 } 734 }
735 735
736 /* Return non-zero if buffer's byte position POS is the end of a 736 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph.
737 paragraph. */ 737 Value is the non-negative length of the paragraph separator
738 int 738 following the buffer position, -1 if position is at the beginning
739 of a new paragraph, or -2 if position is neither at beginning nor
740 at end of a paragraph. */
741 EMACS_INT
739 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) 742 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos)
740 { 743 {
741 Lisp_Object re = XSYMBOL (Qparagraph_separate)->value; 744 Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate,
742 745 Fcurrent_buffer ());
743 if (!STRINGP (re)) 746 Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start,
744 re = fallback_paragraph_separate_re; 747 Fcurrent_buffer ());
745 748 EMACS_INT val;
746 return fast_looking_at (re, charpos, bytepos, ZV, ZV_BYTE, Qnil) > 0; 749
750 if (!STRINGP (sep_re))
751 sep_re = fallback_paragraph_separate_re;
752 if (!STRINGP (start_re))
753 start_re = fallback_paragraph_start_re;
754
755 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil);
756 if (val < 0)
757 {
758 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0)
759 val = -1;
760 else
761 val = -2;
762 }
763
764 return val;
747 } 765 }
748 766
749 /* Determine the start-of-run (sor) directional type given the two 767 /* Determine the start-of-run (sor) directional type given the two
750 embedding levels on either side of the run boundary. Also, update 768 embedding levels on either side of the run boundary. Also, update
751 the saved info about previously seen characters, since that info is 769 the saved info about previously seen characters, since that info is
777 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 = 795 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 =
778 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; 796 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT;
779 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ 797 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */
780 } 798 }
781 799
782 /* Find the beginning of this paragraph by looking back in the
783 buffer. */
784 static void 800 static void
801 bidi_line_init (struct bidi_it *bidi_it)
802 {
803 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */
804 bidi_it->resolved_level = bidi_it->level_stack[0].level;
805 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */
806 bidi_it->invalid_levels = 0;
807 bidi_it->invalid_rl_levels = -1;
808 bidi_it->next_en_pos = -1;
809 bidi_it->next_for_ws.type = UNKNOWN_BT;
810 bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir,
811 bidi_it->level_stack[0].level); /* X10 */
812
813 bidi_cache_reset ();
814 }
815
816 /* Find the beginning of this paragraph by looking back in the buffer.
817 Value is the byte position of the paragraph's beginning. */
818 static EMACS_INT
785 bidi_find_paragraph_start (struct bidi_it *bidi_it) 819 bidi_find_paragraph_start (struct bidi_it *bidi_it)
786 { 820 {
787 Lisp_Object re = XSYMBOL (Qparagraph_start)->value; 821 Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ());
788 EMACS_INT pos = bidi_it->charpos; 822 EMACS_INT pos = bidi_it->charpos;
789 EMACS_INT pos_byte = bidi_it->bytepos; 823 EMACS_INT pos_byte = bidi_it->bytepos;
790 EMACS_INT limit = ZV, limit_byte = ZV_BYTE; 824 EMACS_INT limit = ZV, limit_byte = ZV_BYTE;
791 825
792 if (!STRINGP (re)) 826 if (!STRINGP (re))
793 re = fallback_paragraph_start_re; 827 re = fallback_paragraph_start_re;
794 while (pos_byte > BEGV_BYTE 828 while (pos_byte > BEGV_BYTE
795 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) 829 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0)
796 { 830 {
797 find_next_newline_no_quit (pos, -1); 831 pos = find_next_newline_no_quit (pos - 1, -1);
798 } 832 pos_byte = CHAR_TO_BYTE (pos);
799 } 833 }
800 834 return pos_byte;
835 }
836
837 /* Determine the direction, a.k.a. base embedding level, of the
838 paragraph we are about to iterate through. */
801 void 839 void
802 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) 840 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it)
803 { 841 {
804 EMACS_INT bytepos = bidi_it->bytepos; 842 EMACS_INT bytepos = bidi_it->bytepos;
805 843
806 /* We should never be called at EOB or before BEGV. */ 844 /* We should never be called at EOB or before BEGV. */
807 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) 845 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE)
808 abort (); 846 abort ();
809 847
810 bidi_it->level_stack[0].level = 0; /* default for L2R */ 848 if (dir == L2R)
811 bidi_it->paragraph_dir = L2R; 849 {
812 if (dir == R2L) 850 bidi_it->paragraph_dir = L2R;
813 bidi_it->level_stack[0].level = 1; 851 bidi_it->new_paragraph = 0;
852 }
853 else if (dir == R2L)
854 {
855 bidi_it->paragraph_dir = R2L;
856 bidi_it->new_paragraph = 0;
857 }
814 else if (dir == NEUTRAL_DIR) /* P2 */ 858 else if (dir == NEUTRAL_DIR) /* P2 */
815 { 859 {
816 int ch, ch_len; 860 int ch, ch_len;
817 EMACS_INT pos; 861 EMACS_INT pos;
818 bidi_type_t type; 862 bidi_type_t type;
819 863 EMACS_INT sep_len;
820 /* Search back to where this paragraph starts. */ 864
821 bidi_find_paragraph_start (bidi_it); 865 /* If we are inside a paragraph separator, we are just waiting
866 for the separator to be exhausted; use the previous paragraph
867 direction. */
868 if (bidi_it->charpos < bidi_it->separator_limit)
869 return;
870
871 /* If we are before another paragraph separator, continue
872 through that with the previous paragraph direction. */
873 sep_len = bidi_at_paragraph_end (bidi_it->charpos, bytepos);
874 if (sep_len >= 0)
875 {
876 bidi_it->separator_limit += sep_len + 1;
877 return;
878 }
879 else if (sep_len == -2)
880 /* We are in the middle of a paragraph. Search back to where
881 this paragraph starts. */
882 bytepos = bidi_find_paragraph_start (bidi_it);
822 883
823 /* We should always be at the beginning of a new line at this 884 /* We should always be at the beginning of a new line at this
824 point. */ 885 point. */
825 if (!(bytepos == BEGV_BYTE 886 if (!(bytepos == BEGV_BYTE
826 || FETCH_CHAR (bytepos) == '\n' 887 || FETCH_CHAR (bytepos) == '\n'
827 || FETCH_CHAR (bytepos - 1) == '\n')) 888 || FETCH_CHAR (bytepos - 1) == '\n'))
828 abort (); 889 abort ();
829 890
891 bidi_it->separator_limit = -1;
892 bidi_it->new_paragraph = 0;
830 ch = FETCH_CHAR (bytepos); 893 ch = FETCH_CHAR (bytepos);
831 ch_len = CHAR_BYTES (ch); 894 ch_len = CHAR_BYTES (ch);
832 pos = bidi_it->charpos; 895 pos = BYTE_TO_CHAR (bytepos);
833 type = bidi_get_type (ch, NEUTRAL_DIR); 896 type = bidi_get_type (ch, NEUTRAL_DIR);
834 897
835 for (pos++, bytepos += ch_len; 898 for (pos++, bytepos += ch_len;
836 /* NOTE: UAX#9 says to search only for L, AL, or R types of 899 /* NOTE: UAX#9 says to search only for L, AL, or R types of
837 characters, and ignore RLE, RLO, LRE, and LRO. However, 900 characters, and ignore RLE, RLO, LRE, and LRO. However,
841 || (bidi_ignore_explicit_marks_for_paragraph_level 904 || (bidi_ignore_explicit_marks_for_paragraph_level
842 && (type == RLE || type == RLO 905 && (type == RLE || type == RLO
843 || type == LRE || type == LRO)); 906 || type == LRE || type == LRO));
844 type = bidi_get_type (ch, NEUTRAL_DIR)) 907 type = bidi_get_type (ch, NEUTRAL_DIR))
845 { 908 {
846 if (type == NEUTRAL_B || bidi_at_paragraph_end (pos, bytepos)) 909 if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1)
847 break; 910 break;
848 FETCH_CHAR_ADVANCE (ch, pos, bytepos); 911 FETCH_CHAR_ADVANCE (ch, pos, bytepos);
849 } 912 }
850 if (type == STRONG_R || type == STRONG_AL) /* P3 */ 913 if (type == STRONG_R || type == STRONG_AL) /* P3 */
851 bidi_it->level_stack[0].level = 1; 914 bidi_it->paragraph_dir = R2L;
852 } 915 else if (type == STRONG_L)
853 if (bidi_it->level_stack[0].level == 1) 916 bidi_it->paragraph_dir = L2R;
854 bidi_it->paragraph_dir = R2L; 917 }
855 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */ 918 else
856 bidi_it->resolved_level = bidi_it->level_stack[0].level; 919 abort ();
857 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ 920
858 bidi_it->invalid_levels = 0; 921 /* Contrary to UAX#9 clause P3, we only default to L2R if we have no
859 bidi_it->invalid_rl_levels = -1; 922 previous usable paragraph direction. */
860 bidi_it->new_paragraph = 0; 923 if (bidi_it->paragraph_dir == NEUTRAL_DIR)
861 bidi_it->next_en_pos = -1; 924 bidi_it->paragraph_dir = L2R; /* P3 */
862 bidi_it->next_for_ws.type = UNKNOWN_BT; 925 if (bidi_it->paragraph_dir == R2L)
863 bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir, 926 bidi_it->level_stack[0].level == 1;
864 bidi_it->level_stack[0].level); /* X10 */ 927 else
865 928 bidi_it->level_stack[0].level == 0;
866 bidi_cache_reset (); 929
930 bidi_line_init (bidi_it);
867 } 931 }
868 932
869 /* Do whatever UAX#9 clause X8 says should be done at paragraph's 933 /* Do whatever UAX#9 clause X8 says should be done at paragraph's
870 end. */ 934 end. */
871 static inline void 935 static inline void
886 bidi_it->charpos = charpos; 950 bidi_it->charpos = charpos;
887 bidi_it->bytepos = bytepos; 951 bidi_it->bytepos = bytepos;
888 bidi_it->first_elt = 1; 952 bidi_it->first_elt = 1;
889 bidi_set_paragraph_end (bidi_it); 953 bidi_set_paragraph_end (bidi_it);
890 bidi_it->new_paragraph = 1; 954 bidi_it->new_paragraph = 1;
955 bidi_it->separator_limit = -1;
891 bidi_it->type = NEUTRAL_B; 956 bidi_it->type = NEUTRAL_B;
892 bidi_it->type_after_w1 = UNKNOWN_BT; 957 bidi_it->type_after_w1 = UNKNOWN_BT;
893 bidi_it->orig_type = UNKNOWN_BT; 958 bidi_it->orig_type = UNKNOWN_BT;
894 bidi_it->prev_was_pdf = 0; 959 bidi_it->prev_was_pdf = 0;
895 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT; 960 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT;
1800 if (bidi_it->scan_dir == 0) 1865 if (bidi_it->scan_dir == 0)
1801 { 1866 {
1802 bidi_it->scan_dir = 1; /* default to logical order */ 1867 bidi_it->scan_dir = 1; /* default to logical order */
1803 } 1868 }
1804 1869
1870 /* If we just passed a newline, initialize for the next line. */
1871 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B)
1872 bidi_line_init (bidi_it);
1873
1805 /* Prepare the sentinel iterator state. */ 1874 /* Prepare the sentinel iterator state. */
1806 if (bidi_cache_idx == 0) 1875 if (bidi_cache_idx == 0)
1807 { 1876 {
1808 bidi_copy_it (&sentinel, bidi_it); 1877 bidi_copy_it (&sentinel, bidi_it);
1809 if (bidi_it->first_elt) 1878 if (bidi_it->first_elt)
1873 /* Finally, deliver the next character in the new direction. */ 1942 /* Finally, deliver the next character in the new direction. */
1874 next_level = bidi_level_of_next_char (bidi_it); 1943 next_level = bidi_level_of_next_char (bidi_it);
1875 } 1944 }
1876 1945
1877 /* Take note when we are at the end of the paragraph. The next time 1946 /* Take note when we are at the end of the paragraph. The next time
1878 we are about to be called, next_element_from_buffer will 1947 we are about to be called, set_iterator_to_next will
1879 automatically reinit the paragraph direction, if needed. */ 1948 automatically reinit the paragraph direction, if needed. */
1880 if (bidi_it->scan_dir == 1 1949 if (bidi_it->scan_dir == 1
1881 && bidi_it->type == NEUTRAL_B 1950 && bidi_it->orig_type == NEUTRAL_B
1882 && bidi_it->bytepos < ZV_BYTE 1951 && bidi_it->bytepos < ZV_BYTE)
1883 && bidi_at_paragraph_end (bidi_it->charpos + 1, 1952 {
1884 bidi_it->bytepos + bidi_it->ch_len)) 1953 EMACS_INT sep_len =
1885 bidi_it->new_paragraph = 1; 1954 bidi_at_paragraph_end (bidi_it->charpos + 1,
1955 bidi_it->bytepos + bidi_it->ch_len);
1956 if (sep_len >= 0)
1957 {
1958 bidi_it->new_paragraph = 1;
1959 /* Record the buffer position of the first character after
1960 the paragraph separator. */
1961 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len + 1;
1962 }
1963 }
1886 1964
1887 if (bidi_it->scan_dir == 1 && bidi_cache_idx) 1965 if (bidi_it->scan_dir == 1 && bidi_cache_idx)
1888 { 1966 {
1889 /* If we are at paragraph's base embedding level and beyond the 1967 /* If we are at paragraph's base embedding level and beyond the
1890 last cached position, the cache's job is done and we can 1968 last cached position, the cache's job is done and we can