comparison src/syntax.c @ 25445:d678b229c05a

(lisp_parse_state, back_comment, Fmodify_syntax_entry) (Fforward_comment, scan_lists, scan_sexps_forward) (Fparse_partial_sexp): Add support for nested comments. (forw_comment): New subroutine used in them.
author Richard M. Stallman <rms@gnu.org>
date Sun, 29 Aug 1999 21:32:52 +0000
parents 3f78f1fed4e0
children 863b49f711d6
comparison
equal deleted inserted replaced
25444:37b1636d73d5 25445:d678b229c05a
23 #include <ctype.h> 23 #include <ctype.h>
24 #include "lisp.h" 24 #include "lisp.h"
25 #include "commands.h" 25 #include "commands.h"
26 #include "buffer.h" 26 #include "buffer.h"
27 #include "charset.h" 27 #include "charset.h"
28 #include <assert.h>
28 29
29 /* Make syntax table lookup grant data in gl_state. */ 30 /* Make syntax table lookup grant data in gl_state. */
30 #define SYNTAX_ENTRY_VIA_PROPERTY 31 #define SYNTAX_ENTRY_VIA_PROPERTY
31 32
32 #include "syntax.h" 33 #include "syntax.h"
54 55
55 struct lisp_parse_state 56 struct lisp_parse_state
56 { 57 {
57 int depth; /* Depth at end of parsing. */ 58 int depth; /* Depth at end of parsing. */
58 int instring; /* -1 if not within string, else desired terminator. */ 59 int instring; /* -1 if not within string, else desired terminator. */
59 int incomment; /* Nonzero if within a comment at end of parsing. */ 60 int incomment; /* -1 if in unnestable comment else comment nesting */
60 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */ 61 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
61 int quoted; /* Nonzero if just after an escape char at end of parsing */ 62 int quoted; /* Nonzero if just after an escape char at end of parsing */
62 int thislevelstart; /* Char number of most recent start-of-expression at current level */ 63 int thislevelstart; /* Char number of most recent start-of-expression at current level */
63 int prevlevelstart; /* Char number of start of containing expression */ 64 int prevlevelstart; /* Char number of start of containing expression */
64 int location; /* Char number at which parsing stopped. */ 65 int location; /* Char number at which parsing stopped. */
83 static int find_start_begv; 84 static int find_start_begv;
84 static int find_start_modiff; 85 static int find_start_modiff;
85 86
86 87
87 static int find_defun_start P_ ((int, int)); 88 static int find_defun_start P_ ((int, int));
88 static int back_comment P_ ((int, int, int, int, int *, int *)); 89 static int back_comment P_ ((int, int, int, int, int, int *, int *));
89 static int char_quoted P_ ((int, int)); 90 static int char_quoted P_ ((int, int));
90 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object)); 91 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object));
91 static Lisp_Object scan_lists P_ ((int, int, int, int)); 92 static Lisp_Object scan_lists P_ ((int, int, int, int));
92 static void scan_sexps_forward P_ ((struct lisp_parse_state *, 93 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
93 int, int, int, int, 94 int, int, int, int,
440 441
441 Global syntax data remains valid for backward search starting at 442 Global syntax data remains valid for backward search starting at
442 the returned value (or at FROM, if the search was not successful). */ 443 the returned value (or at FROM, if the search was not successful). */
443 444
444 static int 445 static int
445 back_comment (from, from_byte, stop, comstyle, charpos_ptr, bytepos_ptr) 446 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
446 int from, from_byte, stop; 447 int from, from_byte, stop;
447 int comstyle; 448 int comnested, comstyle;
448 int *charpos_ptr, *bytepos_ptr; 449 int *charpos_ptr, *bytepos_ptr;
449 { 450 {
450 /* Look back, counting the parity of string-quotes, 451 /* Look back, counting the parity of string-quotes,
451 and recording the comment-starters seen. 452 and recording the comment-starters seen.
452 When we reach a safe place, assume that's not in a string; 453 When we reach a safe place, assume that's not in a string;
470 /* Place where the containing defun starts, 471 /* Place where the containing defun starts,
471 or 0 if we didn't come across it yet. */ 472 or 0 if we didn't come across it yet. */
472 int defun_start = 0; 473 int defun_start = 0;
473 int defun_start_byte = 0; 474 int defun_start_byte = 0;
474 register enum syntaxcode code; 475 register enum syntaxcode code;
476 int nesting = 1; /* current comment nesting */
475 int c; 477 int c;
476 478
477 /* At beginning of range to scan, we're outside of strings; 479 /* At beginning of range to scan, we're outside of strings;
478 that determines quote parity to the comment-end. */ 480 that determines quote parity to the comment-end. */
479 while (from != stop) 481 while (from != stop)
537 else if (my_stringend != (code == Sstring_fence 539 else if (my_stringend != (code == Sstring_fence
538 ? ST_STRING_STYLE : ST_COMMENT_STYLE)) 540 ? ST_STRING_STYLE : ST_COMMENT_STYLE))
539 string_lossage = 1; 541 string_lossage = 1;
540 } 542 }
541 543
542 /* Record comment-starters according to that
543 quote-parity to the comment-end. */
544 if (code == Scomment) 544 if (code == Scomment)
545 { 545 /* FIXME: we should also check that the comstyle is correct
546 if the Scomment is a single-char. */
547 {
548 if (comnested && --nesting <= 0 && parity == 0 && !string_lossage)
549 /* nested comments have to be balanced, so we don't need to
550 keep looking for earlier ones. We use here the same (slightly
551 incorrect) reasoning as below: since it is followed by uniform
552 paired string quotes, this comment-start has to be outside of
553 strings, else the comment-end itself would be inside a string. */
554 goto done;
555
556 /* Record comment-starters according to that
557 quote-parity to the comment-end. */
546 comstart_parity = parity; 558 comstart_parity = parity;
547 comstart_pos = from; 559 comstart_pos = from;
548 comstart_byte = from_byte; 560 comstart_byte = from_byte;
549 } 561 }
550 562
551 /* If we find another earlier comment-ender, 563 /* If we find another earlier comment-ender,
552 any comment-starts earlier than that don't count 564 any comment-starts earlier than that don't count
553 (because they go with the earlier comment-ender). */ 565 (because they go with the earlier comment-ender). */
554 if (code == Sendcomment 566 if (code == Sendcomment
555 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle) 567 && SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)) == comstyle)
556 break; 568 if (comnested)
569 nesting++;
570 else
571 break;
557 572
558 /* Assume a defun-start point is outside of strings. */ 573 /* Assume a defun-start point is outside of strings. */
559 if (code == Sopen 574 if (code == Sopen
560 && (from == stop 575 && (from == stop
561 || (temp_byte = dec_bytepos (from_byte), 576 || (temp_byte = dec_bytepos (from_byte),
576 /* If the earliest comment starter 591 /* If the earliest comment starter
577 is followed by uniform paired string quotes or none, 592 is followed by uniform paired string quotes or none,
578 we know it can't be inside a string 593 we know it can't be inside a string
579 since if it were then the comment ender would be inside one. 594 since if it were then the comment ender would be inside one.
580 So it does start a comment. Skip back to it. */ 595 So it does start a comment. Skip back to it. */
581 else if (comstart_parity == 0 && !string_lossage) 596 else if (!comnested && comstart_parity == 0 && !string_lossage)
582 { 597 {
583 from = comstart_pos; 598 from = comstart_pos;
584 from_byte = comstart_byte; 599 from_byte = comstart_byte;
585 /* Globals are correct now. */ 600 /* Globals are correct now. */
586 } 601 }
614 } 629 }
615 from_byte = CHAR_TO_BYTE (from); 630 from_byte = CHAR_TO_BYTE (from);
616 UPDATE_SYNTAX_TABLE_FORWARD (from - 1); 631 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
617 } 632 }
618 633
634 done:
619 *charpos_ptr = from; 635 *charpos_ptr = from;
620 *bytepos_ptr = from_byte; 636 *bytepos_ptr = from_byte;
621 637
622 return from; 638 return from;
623 } 639 }
820 Only single-character comment start and end sequences are represented thus.\n\ 836 Only single-character comment start and end sequences are represented thus.\n\
821 Two-character sequences are represented as described below.\n\ 837 Two-character sequences are represented as described below.\n\
822 The second character of S is the matching parenthesis,\n\ 838 The second character of S is the matching parenthesis,\n\
823 used only if the first character is `(' or `)'.\n\ 839 used only if the first character is `(' or `)'.\n\
824 Any additional characters are flags.\n\ 840 Any additional characters are flags.\n\
825 Defined flags are the characters 1, 2, 3, 4, b, and p.\n\ 841 Defined flags are the characters 1, 2, 3, 4, b, p, and n.\n\
826 1 means CHAR is the start of a two-char comment start sequence.\n\ 842 1 means CHAR is the start of a two-char comment start sequence.\n\
827 2 means CHAR is the second character of such a sequence.\n\ 843 2 means CHAR is the second character of such a sequence.\n\
828 3 means CHAR is the start of a two-char comment end sequence.\n\ 844 3 means CHAR is the start of a two-char comment end sequence.\n\
829 4 means CHAR is the second character of such a sequence.\n\ 845 4 means CHAR is the second character of such a sequence.\n\
830 \n\ 846 \n\
832 language modes such as C++. By default, all comment sequences are of style\n\ 848 language modes such as C++. By default, all comment sequences are of style\n\
833 a, but you can set the comment sequence style to b (on the second character\n\ 849 a, but you can set the comment sequence style to b (on the second character\n\
834 of a comment-start, or the first character of a comment-end sequence) using\n\ 850 of a comment-start, or the first character of a comment-end sequence) using\n\
835 this flag:\n\ 851 this flag:\n\
836 b means CHAR is part of comment sequence b.\n\ 852 b means CHAR is part of comment sequence b.\n\
853 n means CHAR is part of a nestable comment sequence.\n\
837 \n\ 854 \n\
838 p means CHAR is a prefix character for `backward-prefix-chars';\n\ 855 p means CHAR is a prefix character for `backward-prefix-chars';\n\
839 such characters are treated as whitespace when they occur\n\ 856 such characters are treated as whitespace when they occur\n\
840 between expressions.") 857 between expressions.")
841 (char, s, table) 858 (char, s, table)
911 val |= 1 << 20; 928 val |= 1 << 20;
912 break; 929 break;
913 930
914 case 'b': 931 case 'b':
915 val |= 1 << 21; 932 val |= 1 << 21;
933 break;
934
935 case 'n':
936 val |= 1 << 22;
916 break; 937 break;
917 } 938 }
918 939
919 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match)) 940 if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
920 newentry = XVECTOR (Vsyntax_code_object)->contents[val]; 941 newentry = XVECTOR (Vsyntax_code_object)->contents[val];
1602 1623
1603 return make_number (PT - start_point); 1624 return make_number (PT - start_point);
1604 } 1625 }
1605 } 1626 }
1606 1627
1628 /* Jump over a comment, assuming we are at the beginning of one.
1629 FROM is the current position.
1630 FROM_BYTE is the bytepos corresponding to FROM.
1631 Do not move past STOP (a charpos).
1632 The comment over which we have to jump is of style STYLE
1633 (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
1634 NESTING should be positive to indicate the nesting at the beginning
1635 for nested comments and should be zero or negative else.
1636 ST_COMMENT_STYLE cannot be nested.
1637 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
1638 (or 0 If the search cannot start in the middle of a two-character).
1639
1640 If successful, return 1 and store the charpos of the comment's end
1641 into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
1642 Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
1643 corresponding bytepos into *BYTEPOS_PTR and the current nesting
1644 (as defined for state.incomment) in *INCOMMENT_PTR.
1645
1646 The comment end is the last character of the comment rather than the
1647 character just after the comment.
1648
1649 Global syntax data is assumed to initially be valid for FROM and
1650 remains valid for forward search starting at the returned position. */
1651
1652 static int
1653 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
1654 charpos_ptr, bytepos_ptr, incomment_ptr)
1655 int from, from_byte, stop;
1656 int nesting, style, prev_syntax;
1657 int *charpos_ptr, *bytepos_ptr, *incomment_ptr;
1658 {
1659 register int c, c1;
1660 register enum syntaxcode code;
1661 register int syntax;
1662
1663 if (nesting <= 0) nesting = -1;
1664
1665 /* Enter the loop in the middle so that we find
1666 a 2-char comment ender if we start in the middle of it. */
1667 syntax = prev_syntax;
1668 if (syntax != 0) goto forw_incomment;
1669
1670 while (1)
1671 {
1672 if (from == stop)
1673 {
1674 *incomment_ptr = nesting;
1675 *charpos_ptr = from;
1676 *bytepos_ptr = from_byte;
1677 return 0;
1678 }
1679 c = FETCH_CHAR (from_byte);
1680 syntax = SYNTAX_WITH_FLAGS (c);
1681 code = syntax & 0xff;
1682 if (code == Sendcomment
1683 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1684 && --nesting <= 0)
1685 /* we have encountered a comment end of the same style
1686 as the comment sequence which began this comment
1687 section */
1688 break;
1689 if (code == Scomment_fence
1690 && style == ST_COMMENT_STYLE)
1691 /* we have encountered a comment end of the same style
1692 as the comment sequence which began this comment
1693 section. */
1694 break;
1695 if (nesting > 0
1696 && code == Scomment
1697 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
1698 /* we have encountered a nested comment of the same style
1699 as the comment sequence which began this comment section */
1700 nesting++;
1701 INC_BOTH (from, from_byte);
1702 UPDATE_SYNTAX_TABLE_FORWARD (from);
1703
1704 forw_incomment:
1705 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
1706 && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
1707 && (c1 = FETCH_CHAR (from_byte),
1708 SYNTAX_COMEND_SECOND (c1)))
1709 if (--nesting <= 0)
1710 /* we have encountered a comment end of the same style
1711 as the comment sequence which began this comment
1712 section */
1713 break;
1714 else
1715 {
1716 INC_BOTH (from, from_byte);
1717 UPDATE_SYNTAX_TABLE_FORWARD (from);
1718 }
1719 if (nesting > 0
1720 && from < stop
1721 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
1722 && (c1 = FETCH_CHAR (from_byte),
1723 SYNTAX_COMMENT_STYLE (c1) == style
1724 && SYNTAX_COMSTART_SECOND (c1)))
1725 /* we have encountered a nested comment of the same style
1726 as the comment sequence which began this comment
1727 section */
1728 {
1729 INC_BOTH (from, from_byte);
1730 UPDATE_SYNTAX_TABLE_FORWARD (from);
1731 nesting++;
1732 }
1733 }
1734 *charpos_ptr = from;
1735 *bytepos_ptr = from_byte;
1736 return 1;
1737 }
1738
1607 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0, 1739 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
1608 "Move forward across up to N comments. If N is negative, move backward.\n\ 1740 "Move forward across up to N comments. If N is negative, move backward.\n\
1609 Stop scanning if we find something other than a comment or whitespace.\n\ 1741 Stop scanning if we find something other than a comment or whitespace.\n\
1610 Set point to where scanning stops.\n\ 1742 Set point to where scanning stops.\n\
1611 If N comments are found as expected, with nothing except whitespace\n\ 1743 If N comments are found as expected, with nothing except whitespace\n\
1617 int from_byte; 1749 int from_byte;
1618 register int stop; 1750 register int stop;
1619 register int c, c1; 1751 register int c, c1;
1620 register enum syntaxcode code; 1752 register enum syntaxcode code;
1621 int comstyle = 0; /* style of comment encountered */ 1753 int comstyle = 0; /* style of comment encountered */
1754 int comnested = 0; /* whether the comment is nestable or not */
1622 int found; 1755 int found;
1623 int count1; 1756 int count1;
1624 int out_charpos, out_bytepos; 1757 int out_charpos, out_bytepos;
1758 int dummy;
1625 1759
1626 CHECK_NUMBER (count, 0); 1760 CHECK_NUMBER (count, 0);
1627 count1 = XINT (count); 1761 count1 = XINT (count);
1628 stop = count1 > 0 ? ZV : BEGV; 1762 stop = count1 > 0 ? ZV : BEGV;
1629 1763
1644 { 1778 {
1645 SET_PT_BOTH (from, from_byte); 1779 SET_PT_BOTH (from, from_byte);
1646 immediate_quit = 0; 1780 immediate_quit = 0;
1647 return Qnil; 1781 return Qnil;
1648 } 1782 }
1649 UPDATE_SYNTAX_TABLE_FORWARD (from);
1650 c = FETCH_CHAR (from_byte); 1783 c = FETCH_CHAR (from_byte);
1651 code = SYNTAX (c); 1784 code = SYNTAX (c);
1652 comstart_first = SYNTAX_COMSTART_FIRST (c); 1785 comstart_first = SYNTAX_COMSTART_FIRST (c);
1786 comnested = SYNTAX_COMMENT_NESTED (c);
1653 INC_BOTH (from, from_byte); 1787 INC_BOTH (from, from_byte);
1654 UPDATE_SYNTAX_TABLE_FORWARD (from); 1788 UPDATE_SYNTAX_TABLE_FORWARD (from);
1655 comstyle = 0; 1789 comstyle = 0;
1656 if (from < stop && comstart_first 1790 if (from < stop && comstart_first
1657 && (c1 = FETCH_CHAR (from_byte), 1791 && (c1 = FETCH_CHAR (from_byte),
1662 the comment style this sequence begins so that later, 1796 the comment style this sequence begins so that later,
1663 only a comment end of the same style actually ends 1797 only a comment end of the same style actually ends
1664 the comment section. */ 1798 the comment section. */
1665 code = Scomment; 1799 code = Scomment;
1666 comstyle = SYNTAX_COMMENT_STYLE (c1); 1800 comstyle = SYNTAX_COMMENT_STYLE (c1);
1801 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1667 INC_BOTH (from, from_byte); 1802 INC_BOTH (from, from_byte);
1803 UPDATE_SYNTAX_TABLE_FORWARD (from);
1668 } 1804 }
1669 } 1805 }
1670 while (code == Swhitespace || code == Sendcomment); 1806 while (code == Swhitespace || code == Sendcomment);
1671 1807
1672 if (code == Scomment_fence) 1808 if (code == Scomment_fence)
1677 DEC_BOTH (from, from_byte); 1813 DEC_BOTH (from, from_byte);
1678 SET_PT_BOTH (from, from_byte); 1814 SET_PT_BOTH (from, from_byte);
1679 return Qnil; 1815 return Qnil;
1680 } 1816 }
1681 /* We're at the start of a comment. */ 1817 /* We're at the start of a comment. */
1682 while (1) 1818 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
1683 { 1819 &out_charpos, &out_bytepos, &dummy);
1684 if (from == stop) 1820 from = out_charpos; from_byte = out_bytepos;
1685 { 1821 if (!found)
1686 immediate_quit = 0; 1822 {
1687 SET_PT_BOTH (from, from_byte); 1823 immediate_quit = 0;
1688 return Qnil; 1824 SET_PT_BOTH (from, from_byte);
1689 } 1825 return Qnil;
1690 UPDATE_SYNTAX_TABLE_FORWARD (from); 1826 }
1691 c = FETCH_CHAR (from_byte); 1827 INC_BOTH (from, from_byte);
1692 INC_BOTH (from, from_byte); 1828 UPDATE_SYNTAX_TABLE_FORWARD (from);
1693 if (SYNTAX (c) == Sendcomment
1694 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1695 /* we have encountered a comment end of the same style
1696 as the comment sequence which began this comment
1697 section */
1698 break;
1699 if (SYNTAX (c) == Scomment_fence
1700 && comstyle == ST_COMMENT_STYLE)
1701 /* we have encountered a comment end of the same style
1702 as the comment sequence which began this comment
1703 section. */
1704 break;
1705 if (from < stop && SYNTAX_COMEND_FIRST (c)
1706 && SYNTAX_COMMENT_STYLE (c) == comstyle
1707 && (c1 = FETCH_CHAR (from_byte),
1708 UPDATE_SYNTAX_TABLE_FORWARD (from),
1709 SYNTAX_COMEND_SECOND (c1)))
1710 /* we have encountered a comment end of the same style
1711 as the comment sequence which began this comment
1712 section */
1713 {
1714 INC_BOTH (from, from_byte);
1715 break;
1716 }
1717 }
1718 /* We have skipped one comment. */ 1829 /* We have skipped one comment. */
1719 count1--; 1830 count1--;
1720 } 1831 }
1721 1832
1722 while (count1 < 0) 1833 while (count1 < 0)
1741 goto leave; 1852 goto leave;
1742 } 1853 }
1743 c = FETCH_CHAR (from_byte); 1854 c = FETCH_CHAR (from_byte);
1744 code = SYNTAX (c); 1855 code = SYNTAX (c);
1745 comstyle = 0; 1856 comstyle = 0;
1857 comnested = SYNTAX_COMMENT_NESTED (c);
1746 if (code == Sendcomment) 1858 if (code == Sendcomment)
1747 comstyle = SYNTAX_COMMENT_STYLE (c); 1859 comstyle = SYNTAX_COMMENT_STYLE (c);
1748 comstart_second = SYNTAX_COMSTART_SECOND (c); 1860 comstart_second = SYNTAX_COMSTART_SECOND (c);
1749 if (from > stop && SYNTAX_COMEND_SECOND (c) 1861 if (from > stop && SYNTAX_COMEND_SECOND (c)
1750 && prev_char_comend_first (from, from_byte) 1862 && prev_char_comend_first (from, from_byte)
1755 sequence of the same style. */ 1867 sequence of the same style. */
1756 DEC_BOTH (from, from_byte); 1868 DEC_BOTH (from, from_byte);
1757 code = Sendcomment; 1869 code = Sendcomment;
1758 /* Calling char_quoted, above, set up global syntax position 1870 /* Calling char_quoted, above, set up global syntax position
1759 at the new value of FROM. */ 1871 at the new value of FROM. */
1760 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)); 1872 c1 = FETCH_CHAR (from_byte);
1873 comstyle = SYNTAX_COMMENT_STYLE (c1);
1874 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1761 } 1875 }
1762 if (from > stop && comstart_second 1876 if (from > stop && comstart_second
1763 && prev_char_comstart_first (from, from_byte) 1877 && prev_char_comstart_first (from, from_byte)
1764 && !char_quoted (from - 1, dec_bytepos (from_byte))) 1878 && !char_quoted (from - 1, dec_bytepos (from_byte)))
1765 { 1879 {
1766 /* We must record the comment style encountered so that
1767 later, we can match only the proper comment begin
1768 sequence of the same style. */
1769 code = Scomment; 1880 code = Scomment;
1770 DEC_BOTH (from, from_byte); 1881 DEC_BOTH (from, from_byte);
1771 } 1882 }
1772 1883
1773 if (code == Scomment_fence) 1884 if (code == Scomment_fence)
1796 goto leave; 1907 goto leave;
1797 } 1908 }
1798 } 1909 }
1799 else if (code == Sendcomment) 1910 else if (code == Sendcomment)
1800 { 1911 {
1801 found = back_comment (from, from_byte, stop, comstyle, 1912 found = back_comment (from, from_byte, stop, comnested, comstyle,
1802 &out_charpos, &out_bytepos); 1913 &out_charpos, &out_bytepos);
1803 if (found != -1) 1914 if (found != -1)
1804 from = out_charpos, from_byte = out_bytepos; 1915 from = out_charpos, from_byte = out_bytepos;
1805 /* We have skipped one comment. */ 1916 /* We have skipped one comment. */
1806 break; 1917 break;
1835 int quoted; 1946 int quoted;
1836 int mathexit = 0; 1947 int mathexit = 0;
1837 register enum syntaxcode code, temp_code; 1948 register enum syntaxcode code, temp_code;
1838 int min_depth = depth; /* Err out if depth gets less than this. */ 1949 int min_depth = depth; /* Err out if depth gets less than this. */
1839 int comstyle = 0; /* style of comment encountered */ 1950 int comstyle = 0; /* style of comment encountered */
1951 int comnested = 0; /* whether the comment is nestable or not */
1840 int temp_pos; 1952 int temp_pos;
1841 int last_good = from; 1953 int last_good = from;
1842 int found; 1954 int found;
1843 int from_byte; 1955 int from_byte;
1844 int out_bytepos, out_charpos; 1956 int out_bytepos, out_charpos;
1845 int temp; 1957 int temp, dummy;
1846 1958
1847 if (depth > 0) min_depth = 0; 1959 if (depth > 0) min_depth = 0;
1848 1960
1849 if (from > ZV) from = ZV; 1961 if (from > ZV) from = ZV;
1850 if (from < BEGV) from = BEGV; 1962 if (from < BEGV) from = BEGV;
1862 int comstart_first, prefix; 1974 int comstart_first, prefix;
1863 UPDATE_SYNTAX_TABLE_FORWARD (from); 1975 UPDATE_SYNTAX_TABLE_FORWARD (from);
1864 c = FETCH_CHAR (from_byte); 1976 c = FETCH_CHAR (from_byte);
1865 code = SYNTAX (c); 1977 code = SYNTAX (c);
1866 comstart_first = SYNTAX_COMSTART_FIRST (c); 1978 comstart_first = SYNTAX_COMSTART_FIRST (c);
1979 comnested = SYNTAX_COMMENT_NESTED (c);
1867 prefix = SYNTAX_PREFIX (c); 1980 prefix = SYNTAX_PREFIX (c);
1868 if (depth == min_depth) 1981 if (depth == min_depth)
1869 last_good = from; 1982 last_good = from;
1870 INC_BOTH (from, from_byte); 1983 INC_BOTH (from, from_byte);
1871 UPDATE_SYNTAX_TABLE_FORWARD (from); 1984 UPDATE_SYNTAX_TABLE_FORWARD (from);
1877 are ignoring all text inside comments. We must record 1990 are ignoring all text inside comments. We must record
1878 the comment style this sequence begins so that later, 1991 the comment style this sequence begins so that later,
1879 only a comment end of the same style actually ends 1992 only a comment end of the same style actually ends
1880 the comment section */ 1993 the comment section */
1881 code = Scomment; 1994 code = Scomment;
1882 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)); 1995 c1 = FETCH_CHAR (from_byte);
1996 comstyle = SYNTAX_COMMENT_STYLE (c1);
1997 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
1883 INC_BOTH (from, from_byte); 1998 INC_BOTH (from, from_byte);
1884 UPDATE_SYNTAX_TABLE_FORWARD (from); 1999 UPDATE_SYNTAX_TABLE_FORWARD (from);
1885 } 2000 }
1886 2001
1887 if (prefix) 2002 if (prefix)
1920 } 2035 }
1921 INC_BOTH (from, from_byte); 2036 INC_BOTH (from, from_byte);
1922 } 2037 }
1923 goto done; 2038 goto done;
1924 2039
2040 case Scomment_fence:
2041 comstyle = ST_COMMENT_STYLE;
2042 /* FALLTHROUGH */
1925 case Scomment: 2043 case Scomment:
1926 case Scomment_fence:
1927 if (!parse_sexp_ignore_comments) break; 2044 if (!parse_sexp_ignore_comments) break;
1928 while (1) 2045 UPDATE_SYNTAX_TABLE_FORWARD (from);
2046 found = forw_comment (from, from_byte, stop,
2047 comnested, comstyle, 0,
2048 &out_charpos, &out_bytepos, &dummy);
2049 from = out_charpos, from_byte = out_bytepos;
2050 if (!found)
1929 { 2051 {
1930 if (from == stop) 2052 if (depth == 0)
1931 { 2053 goto done;
1932 if (depth == 0) 2054 goto lose;
1933 goto done;
1934 goto lose;
1935 }
1936 UPDATE_SYNTAX_TABLE_FORWARD (from);
1937 c = FETCH_CHAR (from_byte);
1938 INC_BOTH (from, from_byte);
1939 if (code == Scomment
1940 ? (SYNTAX (c) == Sendcomment
1941 && SYNTAX_COMMENT_STYLE (c) == comstyle)
1942 : (SYNTAX (c) == Scomment_fence))
1943 /* we have encountered a comment end of the same style
1944 as the comment sequence which began this comment
1945 section */
1946 break;
1947 if (from < stop && SYNTAX_COMEND_FIRST (c)
1948 && SYNTAX_COMMENT_STYLE (c) == comstyle
1949 && (UPDATE_SYNTAX_TABLE_FORWARD (from),
1950 SYNTAX_COMEND_SECOND (FETCH_CHAR (from_byte)))
1951 && code == Scomment)
1952 /* we have encountered a comment end of the same style
1953 as the comment sequence which began this comment
1954 section */
1955 {
1956 INC_BOTH (from, from_byte);
1957 break;
1958 }
1959 } 2055 }
2056 INC_BOTH (from, from_byte);
2057 UPDATE_SYNTAX_TABLE_FORWARD (from);
1960 break; 2058 break;
1961 2059
1962 case Smath: 2060 case Smath:
1963 if (!sexpflag) 2061 if (!sexpflag)
1964 break; 2062 break;
2036 c = FETCH_CHAR (from_byte); 2134 c = FETCH_CHAR (from_byte);
2037 code = SYNTAX (c); 2135 code = SYNTAX (c);
2038 if (depth == min_depth) 2136 if (depth == min_depth)
2039 last_good = from; 2137 last_good = from;
2040 comstyle = 0; 2138 comstyle = 0;
2139 comnested = SYNTAX_COMMENT_NESTED (c);
2041 if (code == Sendcomment) 2140 if (code == Sendcomment)
2042 comstyle = SYNTAX_COMMENT_STYLE (c); 2141 comstyle = SYNTAX_COMMENT_STYLE (c);
2043 if (from > stop && SYNTAX_COMEND_SECOND (c) 2142 if (from > stop && SYNTAX_COMEND_SECOND (c)
2044 && prev_char_comend_first (from, from_byte) 2143 && prev_char_comend_first (from, from_byte)
2045 && parse_sexp_ignore_comments) 2144 && parse_sexp_ignore_comments)
2048 later, we can match only the proper comment begin 2147 later, we can match only the proper comment begin
2049 sequence of the same style. */ 2148 sequence of the same style. */
2050 DEC_BOTH (from, from_byte); 2149 DEC_BOTH (from, from_byte);
2051 UPDATE_SYNTAX_TABLE_BACKWARD (from); 2150 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2052 code = Sendcomment; 2151 code = Sendcomment;
2053 comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)); 2152 c1 = FETCH_CHAR (from_byte);
2153 comstyle = SYNTAX_COMMENT_STYLE (c1);
2154 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2054 } 2155 }
2055 2156
2056 /* Quoting turns anything except a comment-ender 2157 /* Quoting turns anything except a comment-ender
2057 into a word character. Note that this if cannot be true 2158 into a word character. Note that this if cannot be true
2058 if we decremented FROM in the if-statement above. */ 2159 if we decremented FROM in the if-statement above. */
2129 break; 2230 break;
2130 2231
2131 case Sendcomment: 2232 case Sendcomment:
2132 if (!parse_sexp_ignore_comments) 2233 if (!parse_sexp_ignore_comments)
2133 break; 2234 break;
2134 found = back_comment (from, from_byte, stop, comstyle, 2235 found = back_comment (from, from_byte, stop, comnested, comstyle,
2135 &out_charpos, &out_bytepos); 2236 &out_charpos, &out_bytepos);
2136 if (found != -1) 2237 if (found != -1)
2137 from = out_charpos, from_byte = out_bytepos; 2238 from = out_charpos, from_byte = out_bytepos;
2138 break; 2239 break;
2139 2240
2300 int commentstop; 2401 int commentstop;
2301 { 2402 {
2302 struct lisp_parse_state state; 2403 struct lisp_parse_state state;
2303 2404
2304 register enum syntaxcode code; 2405 register enum syntaxcode code;
2406 int c1;
2407 int comnested;
2305 struct level { int last, prev; }; 2408 struct level { int last, prev; };
2306 struct level levelstart[100]; 2409 struct level levelstart[100];
2307 register struct level *curlevel = levelstart; 2410 register struct level *curlevel = levelstart;
2308 struct level *endlevel = levelstart + 100; 2411 struct level *endlevel = levelstart + 100;
2309 int prev;
2310 register int depth; /* Paren depth of current scanning location. 2412 register int depth; /* Paren depth of current scanning location.
2311 level - levelstart equals this except 2413 level - levelstart equals this except
2312 when the depth becomes negative. */ 2414 when the depth becomes negative. */
2313 int mindepth; /* Lowest DEPTH value seen. */ 2415 int mindepth; /* Lowest DEPTH value seen. */
2314 int start_quoted = 0; /* Nonzero means starting after a char quote */ 2416 int start_quoted = 0; /* Nonzero means starting after a char quote */
2316 int prev_from; /* Keep one character before FROM. */ 2418 int prev_from; /* Keep one character before FROM. */
2317 int prev_from_byte; 2419 int prev_from_byte;
2318 int prev_from_syntax; 2420 int prev_from_syntax;
2319 int boundary_stop = commentstop == -1; 2421 int boundary_stop = commentstop == -1;
2320 int nofence; 2422 int nofence;
2423 int found;
2424 int out_bytepos, out_charpos;
2321 int temp; 2425 int temp;
2322 2426
2323 prev_from = from; 2427 prev_from = from;
2324 prev_from_byte = from_byte; 2428 prev_from_byte = from_byte;
2325 if (from != BEGV) 2429 if (from != BEGV)
2363 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE) 2467 ? ( INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2364 : -1); 2468 : -1);
2365 2469
2366 oldstate = Fcdr (oldstate); 2470 oldstate = Fcdr (oldstate);
2367 tem = Fcar (oldstate); 2471 tem = Fcar (oldstate);
2368 state.incomment = !NILP (tem); 2472 state.incomment = ( !NILP (tem)
2473 ? ( INTEGERP (tem) ? XINT (tem) : -1)
2474 : 0);
2369 2475
2370 oldstate = Fcdr (oldstate); 2476 oldstate = Fcdr (oldstate);
2371 tem = Fcar (oldstate); 2477 tem = Fcar (oldstate);
2372 start_quoted = !NILP (tem); 2478 start_quoted = !NILP (tem);
2373 2479
2374 /* if the eight element of the list is nil, we are in comment 2480 /* if the eighth element of the list is nil, we are in comment
2375 style a. If it is non-nil, we are in comment style b */ 2481 style a. If it is non-nil, we are in comment style b */
2376 oldstate = Fcdr (oldstate); 2482 oldstate = Fcdr (oldstate);
2377 oldstate = Fcdr (oldstate); 2483 oldstate = Fcdr (oldstate);
2378 tem = Fcar (oldstate); 2484 tem = Fcar (oldstate);
2379 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table) 2485 state.comstyle = NILP (tem) ? 0 : ( EQ (tem, Qsyntax_table)
2429 { 2535 {
2430 INC_FROM; 2536 INC_FROM;
2431 code = prev_from_syntax & 0xff; 2537 code = prev_from_syntax & 0xff;
2432 2538
2433 if (code == Scomment) 2539 if (code == Scomment)
2434 state.comstr_start = prev_from; 2540 {
2541 state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
2542 1 : -1);
2543 state.comstr_start = prev_from;
2544 }
2435 else if (code == Scomment_fence) 2545 else if (code == Scomment_fence)
2436 { 2546 {
2437 /* Record the comment style we have entered so that only 2547 /* Record the comment style we have entered so that only
2438 the comment-end sequence of the same style actually 2548 the comment-end sequence of the same style actually
2439 terminates the comment section. */ 2549 terminates the comment section. */
2440 state.comstyle = ( code == Scomment_fence 2550 state.comstyle = ST_COMMENT_STYLE;
2441 ? ST_COMMENT_STYLE 2551 state.incomment = -1;
2442 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte)));
2443 state.comstr_start = prev_from; 2552 state.comstr_start = prev_from;
2444 if (code != Scomment_fence)
2445 INC_FROM;
2446 code = Scomment; 2553 code = Scomment;
2447 } 2554 }
2448 else if (from < end) 2555 else if (from < end)
2449 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)) 2556 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax))
2450 if (SYNTAX_COMSTART_SECOND (FETCH_CHAR (from_byte))) 2557 if (c1 = FETCH_CHAR (from_byte),
2558 SYNTAX_COMSTART_SECOND (c1))
2451 /* Duplicate code to avoid a complex if-expression 2559 /* Duplicate code to avoid a complex if-expression
2452 which causes trouble for the SGI compiler. */ 2560 which causes trouble for the SGI compiler. */
2453 { 2561 {
2454 /* Record the comment style we have entered so that only 2562 /* Record the comment style we have entered so that only
2455 the comment-end sequence of the same style actually 2563 the comment-end sequence of the same style actually
2456 terminates the comment section. */ 2564 terminates the comment section. */
2457 state.comstyle = ( code == Scomment_fence 2565 state.comstyle = SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte));
2458 ? ST_COMMENT_STYLE 2566 comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
2459 : SYNTAX_COMMENT_STYLE (FETCH_CHAR (from_byte))); 2567 comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2568 state.incomment = comnested ? 1 : -1;
2460 state.comstr_start = prev_from; 2569 state.comstr_start = prev_from;
2461 if (code != Scomment_fence) 2570 INC_FROM;
2462 INC_FROM;
2463 code = Scomment; 2571 code = Scomment;
2464 } 2572 }
2465 2573
2466 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax)) 2574 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
2467 continue; 2575 continue;
2506 break; 2614 break;
2507 2615
2508 startincomment: 2616 startincomment:
2509 if (commentstop == 1) 2617 if (commentstop == 1)
2510 goto done; 2618 goto done;
2511 if (from != BEGV)
2512 {
2513 /* Enter the loop in the middle so that we find
2514 a 2-char comment ender if we start in the middle of it. */
2515 goto startincomment_1;
2516 }
2517 /* At beginning of buffer, enter the loop the ordinary way. */
2518 state.incomment = 1;
2519 goto commentloop; 2619 goto commentloop;
2520 2620
2521 case Scomment: 2621 case Scomment:
2522 state.incomment = 1; 2622 assert (state.incomment != 0); /* state.incomment = -1; */
2523 if (commentstop || boundary_stop) goto done; 2623 if (commentstop || boundary_stop) goto done;
2524 commentloop: 2624 commentloop:
2525 while (1) 2625 /* The (from == BEGV) test is to enter the loop in the middle so
2526 { 2626 that we find a 2-char comment ender even if we start in the
2527 if (from == end) goto done; 2627 middle of it. */
2528 prev = FETCH_CHAR (from_byte); 2628 found = forw_comment (from, from_byte, end,
2529 if (SYNTAX (prev) == Sendcomment 2629 state.incomment, state.comstyle,
2530 && SYNTAX_COMMENT_STYLE (prev) == state.comstyle) 2630 (from == BEGV) ? 0 : prev_from_syntax,
2531 /* Only terminate the comment section if the endcomment 2631 &out_charpos, &out_bytepos, &state.incomment);
2532 of the same style as the start sequence has been 2632 from = out_charpos; from_byte = out_bytepos;
2533 encountered. */ 2633 /* Beware! prev_from and friends are invalid now.
2534 break; 2634 Luckily, the `done' doesn't use them and the INC_FROM
2535 if (state.comstyle == ST_COMMENT_STYLE 2635 sets them to a sane value without looking at them. */
2536 && SYNTAX (prev) == Scomment_fence) 2636 if (!found) goto done;
2537 break;
2538 INC_FROM;
2539 startincomment_1:
2540 if (from < end && SYNTAX_FLAGS_COMEND_FIRST (prev_from_syntax)
2541 && SYNTAX_COMEND_SECOND (FETCH_CHAR (from_byte))
2542 && (SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax)
2543 == state.comstyle))
2544 /* Only terminate the comment section if the end-comment
2545 sequence of the same style as the start sequence has
2546 been encountered. */
2547 break;
2548 }
2549 INC_FROM; 2637 INC_FROM;
2550 state.incomment = 0; 2638 state.incomment = 0;
2551 state.comstyle = 0; /* reset the comment style */ 2639 state.comstyle = 0; /* reset the comment style */
2552 if (boundary_stop) goto done; 2640 if (boundary_stop) goto done;
2553 break; 2641 break;
2666 1. character address of start of innermost containing list; nil if none.\n\ 2754 1. character address of start of innermost containing list; nil if none.\n\
2667 2. character address of start of last complete sexp terminated.\n\ 2755 2. character address of start of last complete sexp terminated.\n\
2668 3. non-nil if inside a string.\n\ 2756 3. non-nil if inside a string.\n\
2669 (it is the character that will terminate the string,\n\ 2757 (it is the character that will terminate the string,\n\
2670 or t if the string should be terminated by a generic string delimiter.)\n\ 2758 or t if the string should be terminated by a generic string delimiter.)\n\
2671 4. t if inside a comment.\n\ 2759 4. nil if outside a comment, t if inside a non-nestable comment, \n\
2760 else an integer (the current comment nesting).\n\
2672 5. t if following a quote character.\n\ 2761 5. t if following a quote character.\n\
2673 6. the minimum paren-depth encountered during this scan.\n\ 2762 6. the minimum paren-depth encountered during this scan.\n\
2674 7. t if in a comment of style b; `syntax-table' if the comment\n\ 2763 7. t if in a comment of style b; `syntax-table' if the comment\n\
2675 should be terminated by a generic comment delimiter.\n\ 2764 should be terminated by a generic comment delimiter.\n\
2676 8. character address of start of comment or string; nil if not in one.\n\ 2765 8. character address of start of comment or string; nil if not in one.\n\
2717 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart), 2806 Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
2718 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart), 2807 Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
2719 Fcons (state.instring >= 0 2808 Fcons (state.instring >= 0
2720 ? (state.instring == ST_STRING_STYLE 2809 ? (state.instring == ST_STRING_STYLE
2721 ? Qt : make_number (state.instring)) : Qnil, 2810 ? Qt : make_number (state.instring)) : Qnil,
2722 Fcons (state.incomment ? Qt : Qnil, 2811 Fcons (state.incomment < 0 ? Qt :
2812 (state.incomment == 0 ? Qnil :
2813 make_number (state.incomment)),
2723 Fcons (state.quoted ? Qt : Qnil, 2814 Fcons (state.quoted ? Qt : Qnil,
2724 Fcons (make_number (state.mindepth), 2815 Fcons (make_number (state.mindepth),
2725 Fcons ((state.comstyle 2816 Fcons ((state.comstyle
2726 ? (state.comstyle == ST_COMMENT_STYLE 2817 ? (state.comstyle == ST_COMMENT_STYLE
2727 ? Qsyntax_table : Qt) : 2818 ? Qsyntax_table : Qt) :