Mercurial > emacs
comparison src/bidi.c @ 107594:40b49fa464cf
Retrospective commit from 2009-10-04.
Continue working on determining paragraph's base direction.
bidi.c (bidi_at_paragraph_end): Check for paragraph-start if
paragraph-separate failed to match. Return the length of the
matched separator.
(bidi_line_init): New function.
(bidi_paragraph_init): Use bidi_line_init. Do nothing if in the
middle of a paragraph-separate sequence. Don't override existing
paragraph direction if no strong characters found in this
paragraph. Set separator_limit according to what
bidi_at_paragraph_end returns. Reset new_paragraph flag when a
new paragraph is found.
(bidi_init_it): Reset separator_limit.
dispextern.h (struct bidi_it): New member separator_limit.
bidi.c (bidi_find_paragraph_start): Return the byte position of
the paragraph beginning.
xdisp.c (set_iterator_to_next): Call bidi_paragraph_init if the
new_paragraph flag is set in the bidi iterator.
bidi.c (bidi_at_paragraph_end, bidi_find_paragraph_start): Use
the buffer-local value of paragraph-start and paragraph-separate.
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Fri, 01 Jan 2010 06:17:13 -0500 |
parents | a551e4109c04 |
children | 69c12db7031d |
comparison
equal
deleted
inserted
replaced
107593:a551e4109c04 | 107594:40b49fa464cf |
---|---|
731 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1) | 731 if (bidi_cache_idx == 0 || bidi_cache_last_idx == -1) |
732 abort (); | 732 abort (); |
733 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; | 733 return bidi_cache[bidi_cache_last_idx + bidi_it->scan_dir].resolved_level; |
734 } | 734 } |
735 | 735 |
736 /* Return non-zero if buffer's byte position POS is the end of a | 736 /* Check if buffer position CHARPOS/BYTEPOS is the end of a paragraph. |
737 paragraph. */ | 737 Value is the non-negative length of the paragraph separator |
738 int | 738 following the buffer position, -1 if position is at the beginning |
739 of a new paragraph, or -2 if position is neither at beginning nor | |
740 at end of a paragraph. */ | |
741 EMACS_INT | |
739 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) | 742 bidi_at_paragraph_end (EMACS_INT charpos, EMACS_INT bytepos) |
740 { | 743 { |
741 Lisp_Object re = XSYMBOL (Qparagraph_separate)->value; | 744 Lisp_Object sep_re = Fbuffer_local_value (Qparagraph_separate, |
742 | 745 Fcurrent_buffer ()); |
743 if (!STRINGP (re)) | 746 Lisp_Object start_re = Fbuffer_local_value (Qparagraph_start, |
744 re = fallback_paragraph_separate_re; | 747 Fcurrent_buffer ()); |
745 | 748 EMACS_INT val; |
746 return fast_looking_at (re, charpos, bytepos, ZV, ZV_BYTE, Qnil) > 0; | 749 |
750 if (!STRINGP (sep_re)) | |
751 sep_re = fallback_paragraph_separate_re; | |
752 if (!STRINGP (start_re)) | |
753 start_re = fallback_paragraph_start_re; | |
754 | |
755 val = fast_looking_at (sep_re, charpos, bytepos, ZV, ZV_BYTE, Qnil); | |
756 if (val < 0) | |
757 { | |
758 if (fast_looking_at (start_re, charpos, bytepos, ZV, ZV_BYTE, Qnil) >= 0) | |
759 val = -1; | |
760 else | |
761 val = -2; | |
762 } | |
763 | |
764 return val; | |
747 } | 765 } |
748 | 766 |
749 /* Determine the start-of-run (sor) directional type given the two | 767 /* Determine the start-of-run (sor) directional type given the two |
750 embedding levels on either side of the run boundary. Also, update | 768 embedding levels on either side of the run boundary. Also, update |
751 the saved info about previously seen characters, since that info is | 769 the saved info about previously seen characters, since that info is |
777 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 = | 795 bidi_it->next_for_neutral.type = bidi_it->next_for_neutral.type_after_w1 = |
778 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; | 796 bidi_it->next_for_neutral.orig_type = UNKNOWN_BT; |
779 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ | 797 bidi_it->ignore_bn_limit = 0; /* meaning it's unknown */ |
780 } | 798 } |
781 | 799 |
782 /* Find the beginning of this paragraph by looking back in the | |
783 buffer. */ | |
784 static void | 800 static void |
801 bidi_line_init (struct bidi_it *bidi_it) | |
802 { | |
803 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */ | |
804 bidi_it->resolved_level = bidi_it->level_stack[0].level; | |
805 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ | |
806 bidi_it->invalid_levels = 0; | |
807 bidi_it->invalid_rl_levels = -1; | |
808 bidi_it->next_en_pos = -1; | |
809 bidi_it->next_for_ws.type = UNKNOWN_BT; | |
810 bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir, | |
811 bidi_it->level_stack[0].level); /* X10 */ | |
812 | |
813 bidi_cache_reset (); | |
814 } | |
815 | |
816 /* Find the beginning of this paragraph by looking back in the buffer. | |
817 Value is the byte position of the paragraph's beginning. */ | |
818 static EMACS_INT | |
785 bidi_find_paragraph_start (struct bidi_it *bidi_it) | 819 bidi_find_paragraph_start (struct bidi_it *bidi_it) |
786 { | 820 { |
787 Lisp_Object re = XSYMBOL (Qparagraph_start)->value; | 821 Lisp_Object re = Fbuffer_local_value (Qparagraph_start, Fcurrent_buffer ()); |
788 EMACS_INT pos = bidi_it->charpos; | 822 EMACS_INT pos = bidi_it->charpos; |
789 EMACS_INT pos_byte = bidi_it->bytepos; | 823 EMACS_INT pos_byte = bidi_it->bytepos; |
790 EMACS_INT limit = ZV, limit_byte = ZV_BYTE; | 824 EMACS_INT limit = ZV, limit_byte = ZV_BYTE; |
791 | 825 |
792 if (!STRINGP (re)) | 826 if (!STRINGP (re)) |
793 re = fallback_paragraph_start_re; | 827 re = fallback_paragraph_start_re; |
794 while (pos_byte > BEGV_BYTE | 828 while (pos_byte > BEGV_BYTE |
795 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) | 829 && fast_looking_at (re, pos, pos_byte, limit, limit_byte, Qnil) < 0) |
796 { | 830 { |
797 find_next_newline_no_quit (pos, -1); | 831 pos = find_next_newline_no_quit (pos - 1, -1); |
798 } | 832 pos_byte = CHAR_TO_BYTE (pos); |
799 } | 833 } |
800 | 834 return pos_byte; |
835 } | |
836 | |
837 /* Determine the direction, a.k.a. base embedding level, of the | |
838 paragraph we are about to iterate through. */ | |
801 void | 839 void |
802 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) | 840 bidi_paragraph_init (bidi_dir_t dir, struct bidi_it *bidi_it) |
803 { | 841 { |
804 EMACS_INT bytepos = bidi_it->bytepos; | 842 EMACS_INT bytepos = bidi_it->bytepos; |
805 | 843 |
806 /* We should never be called at EOB or before BEGV. */ | 844 /* We should never be called at EOB or before BEGV. */ |
807 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) | 845 if (bytepos >= ZV_BYTE || bytepos < BEGV_BYTE) |
808 abort (); | 846 abort (); |
809 | 847 |
810 bidi_it->level_stack[0].level = 0; /* default for L2R */ | 848 if (dir == L2R) |
811 bidi_it->paragraph_dir = L2R; | 849 { |
812 if (dir == R2L) | 850 bidi_it->paragraph_dir = L2R; |
813 bidi_it->level_stack[0].level = 1; | 851 bidi_it->new_paragraph = 0; |
852 } | |
853 else if (dir == R2L) | |
854 { | |
855 bidi_it->paragraph_dir = R2L; | |
856 bidi_it->new_paragraph = 0; | |
857 } | |
814 else if (dir == NEUTRAL_DIR) /* P2 */ | 858 else if (dir == NEUTRAL_DIR) /* P2 */ |
815 { | 859 { |
816 int ch, ch_len; | 860 int ch, ch_len; |
817 EMACS_INT pos; | 861 EMACS_INT pos; |
818 bidi_type_t type; | 862 bidi_type_t type; |
819 | 863 EMACS_INT sep_len; |
820 /* Search back to where this paragraph starts. */ | 864 |
821 bidi_find_paragraph_start (bidi_it); | 865 /* If we are inside a paragraph separator, we are just waiting |
866 for the separator to be exhausted; use the previous paragraph | |
867 direction. */ | |
868 if (bidi_it->charpos < bidi_it->separator_limit) | |
869 return; | |
870 | |
871 /* If we are before another paragraph separator, continue | |
872 through that with the previous paragraph direction. */ | |
873 sep_len = bidi_at_paragraph_end (bidi_it->charpos, bytepos); | |
874 if (sep_len >= 0) | |
875 { | |
876 bidi_it->separator_limit += sep_len + 1; | |
877 return; | |
878 } | |
879 else if (sep_len == -2) | |
880 /* We are in the middle of a paragraph. Search back to where | |
881 this paragraph starts. */ | |
882 bytepos = bidi_find_paragraph_start (bidi_it); | |
822 | 883 |
823 /* We should always be at the beginning of a new line at this | 884 /* We should always be at the beginning of a new line at this |
824 point. */ | 885 point. */ |
825 if (!(bytepos == BEGV_BYTE | 886 if (!(bytepos == BEGV_BYTE |
826 || FETCH_CHAR (bytepos) == '\n' | 887 || FETCH_CHAR (bytepos) == '\n' |
827 || FETCH_CHAR (bytepos - 1) == '\n')) | 888 || FETCH_CHAR (bytepos - 1) == '\n')) |
828 abort (); | 889 abort (); |
829 | 890 |
891 bidi_it->separator_limit = -1; | |
892 bidi_it->new_paragraph = 0; | |
830 ch = FETCH_CHAR (bytepos); | 893 ch = FETCH_CHAR (bytepos); |
831 ch_len = CHAR_BYTES (ch); | 894 ch_len = CHAR_BYTES (ch); |
832 pos = bidi_it->charpos; | 895 pos = BYTE_TO_CHAR (bytepos); |
833 type = bidi_get_type (ch, NEUTRAL_DIR); | 896 type = bidi_get_type (ch, NEUTRAL_DIR); |
834 | 897 |
835 for (pos++, bytepos += ch_len; | 898 for (pos++, bytepos += ch_len; |
836 /* NOTE: UAX#9 says to search only for L, AL, or R types of | 899 /* NOTE: UAX#9 says to search only for L, AL, or R types of |
837 characters, and ignore RLE, RLO, LRE, and LRO. However, | 900 characters, and ignore RLE, RLO, LRE, and LRO. However, |
841 || (bidi_ignore_explicit_marks_for_paragraph_level | 904 || (bidi_ignore_explicit_marks_for_paragraph_level |
842 && (type == RLE || type == RLO | 905 && (type == RLE || type == RLO |
843 || type == LRE || type == LRO)); | 906 || type == LRE || type == LRO)); |
844 type = bidi_get_type (ch, NEUTRAL_DIR)) | 907 type = bidi_get_type (ch, NEUTRAL_DIR)) |
845 { | 908 { |
846 if (type == NEUTRAL_B || bidi_at_paragraph_end (pos, bytepos)) | 909 if (type == NEUTRAL_B && bidi_at_paragraph_end (pos, bytepos) >= -1) |
847 break; | 910 break; |
848 FETCH_CHAR_ADVANCE (ch, pos, bytepos); | 911 FETCH_CHAR_ADVANCE (ch, pos, bytepos); |
849 } | 912 } |
850 if (type == STRONG_R || type == STRONG_AL) /* P3 */ | 913 if (type == STRONG_R || type == STRONG_AL) /* P3 */ |
851 bidi_it->level_stack[0].level = 1; | 914 bidi_it->paragraph_dir = R2L; |
852 } | 915 else if (type == STRONG_L) |
853 if (bidi_it->level_stack[0].level == 1) | 916 bidi_it->paragraph_dir = L2R; |
854 bidi_it->paragraph_dir = R2L; | 917 } |
855 bidi_it->scan_dir = 1; /* FIXME: do we need to have control on this? */ | 918 else |
856 bidi_it->resolved_level = bidi_it->level_stack[0].level; | 919 abort (); |
857 bidi_it->level_stack[0].override = NEUTRAL_DIR; /* X1 */ | 920 |
858 bidi_it->invalid_levels = 0; | 921 /* Contrary to UAX#9 clause P3, we only default to L2R if we have no |
859 bidi_it->invalid_rl_levels = -1; | 922 previous usable paragraph direction. */ |
860 bidi_it->new_paragraph = 0; | 923 if (bidi_it->paragraph_dir == NEUTRAL_DIR) |
861 bidi_it->next_en_pos = -1; | 924 bidi_it->paragraph_dir = L2R; /* P3 */ |
862 bidi_it->next_for_ws.type = UNKNOWN_BT; | 925 if (bidi_it->paragraph_dir == R2L) |
863 bidi_set_sor_type (bidi_it, bidi_it->paragraph_dir, | 926 bidi_it->level_stack[0].level == 1; |
864 bidi_it->level_stack[0].level); /* X10 */ | 927 else |
865 | 928 bidi_it->level_stack[0].level == 0; |
866 bidi_cache_reset (); | 929 |
930 bidi_line_init (bidi_it); | |
867 } | 931 } |
868 | 932 |
869 /* Do whatever UAX#9 clause X8 says should be done at paragraph's | 933 /* Do whatever UAX#9 clause X8 says should be done at paragraph's |
870 end. */ | 934 end. */ |
871 static inline void | 935 static inline void |
886 bidi_it->charpos = charpos; | 950 bidi_it->charpos = charpos; |
887 bidi_it->bytepos = bytepos; | 951 bidi_it->bytepos = bytepos; |
888 bidi_it->first_elt = 1; | 952 bidi_it->first_elt = 1; |
889 bidi_set_paragraph_end (bidi_it); | 953 bidi_set_paragraph_end (bidi_it); |
890 bidi_it->new_paragraph = 1; | 954 bidi_it->new_paragraph = 1; |
955 bidi_it->separator_limit = -1; | |
891 bidi_it->type = NEUTRAL_B; | 956 bidi_it->type = NEUTRAL_B; |
892 bidi_it->type_after_w1 = UNKNOWN_BT; | 957 bidi_it->type_after_w1 = UNKNOWN_BT; |
893 bidi_it->orig_type = UNKNOWN_BT; | 958 bidi_it->orig_type = UNKNOWN_BT; |
894 bidi_it->prev_was_pdf = 0; | 959 bidi_it->prev_was_pdf = 0; |
895 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT; | 960 bidi_it->prev.type = bidi_it->prev.type_after_w1 = UNKNOWN_BT; |
1800 if (bidi_it->scan_dir == 0) | 1865 if (bidi_it->scan_dir == 0) |
1801 { | 1866 { |
1802 bidi_it->scan_dir = 1; /* default to logical order */ | 1867 bidi_it->scan_dir = 1; /* default to logical order */ |
1803 } | 1868 } |
1804 | 1869 |
1870 /* If we just passed a newline, initialize for the next line. */ | |
1871 if (!bidi_it->first_elt && bidi_it->orig_type == NEUTRAL_B) | |
1872 bidi_line_init (bidi_it); | |
1873 | |
1805 /* Prepare the sentinel iterator state. */ | 1874 /* Prepare the sentinel iterator state. */ |
1806 if (bidi_cache_idx == 0) | 1875 if (bidi_cache_idx == 0) |
1807 { | 1876 { |
1808 bidi_copy_it (&sentinel, bidi_it); | 1877 bidi_copy_it (&sentinel, bidi_it); |
1809 if (bidi_it->first_elt) | 1878 if (bidi_it->first_elt) |
1873 /* Finally, deliver the next character in the new direction. */ | 1942 /* Finally, deliver the next character in the new direction. */ |
1874 next_level = bidi_level_of_next_char (bidi_it); | 1943 next_level = bidi_level_of_next_char (bidi_it); |
1875 } | 1944 } |
1876 | 1945 |
1877 /* Take note when we are at the end of the paragraph. The next time | 1946 /* Take note when we are at the end of the paragraph. The next time |
1878 we are about to be called, next_element_from_buffer will | 1947 we are about to be called, set_iterator_to_next will |
1879 automatically reinit the paragraph direction, if needed. */ | 1948 automatically reinit the paragraph direction, if needed. */ |
1880 if (bidi_it->scan_dir == 1 | 1949 if (bidi_it->scan_dir == 1 |
1881 && bidi_it->type == NEUTRAL_B | 1950 && bidi_it->orig_type == NEUTRAL_B |
1882 && bidi_it->bytepos < ZV_BYTE | 1951 && bidi_it->bytepos < ZV_BYTE) |
1883 && bidi_at_paragraph_end (bidi_it->charpos + 1, | 1952 { |
1884 bidi_it->bytepos + bidi_it->ch_len)) | 1953 EMACS_INT sep_len = |
1885 bidi_it->new_paragraph = 1; | 1954 bidi_at_paragraph_end (bidi_it->charpos + 1, |
1955 bidi_it->bytepos + bidi_it->ch_len); | |
1956 if (sep_len >= 0) | |
1957 { | |
1958 bidi_it->new_paragraph = 1; | |
1959 /* Record the buffer position of the first character after | |
1960 the paragraph separator. */ | |
1961 bidi_it->separator_limit = bidi_it->charpos + 1 + sep_len + 1; | |
1962 } | |
1963 } | |
1886 | 1964 |
1887 if (bidi_it->scan_dir == 1 && bidi_cache_idx) | 1965 if (bidi_it->scan_dir == 1 && bidi_cache_idx) |
1888 { | 1966 { |
1889 /* If we are at paragraph's base embedding level and beyond the | 1967 /* If we are at paragraph's base embedding level and beyond the |
1890 last cached position, the cache's job is done and we can | 1968 last cached position, the cache's job is done and we can |