Mercurial > emacs
comparison src/syntax.c @ 56088:a7a676c680d4
Include regex.h.
(skip_chars): New arg HANDLE_ISO_CLASSES. Callers changed.
If requested, make a list of classes, then check the scanned
chars for membership in them.
(in_classes): New function.
(Fskip_chars_forward): Doc fix.
author | Richard M. Stallman <rms@gnu.org> |
---|---|
date | Sun, 13 Jun 2004 22:25:34 +0000 |
parents | c1e92ca4c785 |
children | 6725db0f57d5 |
comparison
equal
deleted
inserted
replaced
56087:3d12da599e18 | 56088:a7a676c680d4 |
---|---|
24 #include "lisp.h" | 24 #include "lisp.h" |
25 #include "commands.h" | 25 #include "commands.h" |
26 #include "buffer.h" | 26 #include "buffer.h" |
27 #include "charset.h" | 27 #include "charset.h" |
28 #include "keymap.h" | 28 #include "keymap.h" |
29 #include "regex.h" | |
29 | 30 |
30 /* Make syntax table lookup grant data in gl_state. */ | 31 /* Make syntax table lookup grant data in gl_state. */ |
31 #define SYNTAX_ENTRY_VIA_PROPERTY | 32 #define SYNTAX_ENTRY_VIA_PROPERTY |
32 | 33 |
33 #include "syntax.h" | 34 #include "syntax.h" |
95 | 96 |
96 | 97 |
97 static int find_defun_start P_ ((int, int)); | 98 static int find_defun_start P_ ((int, int)); |
98 static int back_comment P_ ((int, int, int, int, int, int *, int *)); | 99 static int back_comment P_ ((int, int, int, int, int, int *, int *)); |
99 static int char_quoted P_ ((int, int)); | 100 static int char_quoted P_ ((int, int)); |
100 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object)); | 101 static Lisp_Object skip_chars P_ ((int, int, Lisp_Object, Lisp_Object, int)); |
101 static Lisp_Object scan_lists P_ ((int, int, int, int)); | 102 static Lisp_Object scan_lists P_ ((int, int, int, int)); |
102 static void scan_sexps_forward P_ ((struct lisp_parse_state *, | 103 static void scan_sexps_forward P_ ((struct lisp_parse_state *, |
103 int, int, int, int, | 104 int, int, int, int, |
104 int, Lisp_Object, int)); | 105 int, Lisp_Object, int)); |
106 static int in_classes P_ ((int, Lisp_Object)); | |
105 | 107 |
106 | 108 |
107 struct gl_state_s gl_state; /* Global state of syntax parser. */ | 109 struct gl_state_s gl_state; /* Global state of syntax parser. */ |
108 | 110 |
109 INTERVAL interval_of (); | 111 INTERVAL interval_of (); |
1319 STRING is like the inside of a `[...]' in a regular expression | 1321 STRING is like the inside of a `[...]' in a regular expression |
1320 except that `]' is never special and `\\' quotes `^', `-' or `\\' | 1322 except that `]' is never special and `\\' quotes `^', `-' or `\\' |
1321 (but not as the end of a range; quoting is never needed there). | 1323 (but not as the end of a range; quoting is never needed there). |
1322 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. | 1324 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter. |
1323 With arg "^a-zA-Z", skips nonletters stopping before first letter. | 1325 With arg "^a-zA-Z", skips nonletters stopping before first letter. |
1324 Returns the distance traveled, either zero or positive. | 1326 Char classes, e.g. `[:alpha:]', are supported. |
1325 Note that char classes, e.g. `[:alpha:]', are not currently supported; | 1327 |
1326 they will be treated as literals. */) | 1328 Returns the distance traveled, either zero or positive. */) |
1327 (string, lim) | 1329 (string, lim) |
1328 Lisp_Object string, lim; | 1330 Lisp_Object string, lim; |
1329 { | 1331 { |
1330 return skip_chars (1, 0, string, lim); | 1332 return skip_chars (1, 0, string, lim, 1); |
1331 } | 1333 } |
1332 | 1334 |
1333 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0, | 1335 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0, |
1334 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM. | 1336 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM. |
1335 See `skip-chars-forward' for details. | 1337 See `skip-chars-forward' for details. |
1336 Returns the distance traveled, either zero or negative. */) | 1338 Returns the distance traveled, either zero or negative. */) |
1337 (string, lim) | 1339 (string, lim) |
1338 Lisp_Object string, lim; | 1340 Lisp_Object string, lim; |
1339 { | 1341 { |
1340 return skip_chars (0, 0, string, lim); | 1342 return skip_chars (0, 0, string, lim, 1); |
1341 } | 1343 } |
1342 | 1344 |
1343 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0, | 1345 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0, |
1344 doc: /* Move point forward across chars in specified syntax classes. | 1346 doc: /* Move point forward across chars in specified syntax classes. |
1345 SYNTAX is a string of syntax code characters. | 1347 SYNTAX is a string of syntax code characters. |
1347 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. | 1349 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1348 This function returns the distance traveled, either zero or positive. */) | 1350 This function returns the distance traveled, either zero or positive. */) |
1349 (syntax, lim) | 1351 (syntax, lim) |
1350 Lisp_Object syntax, lim; | 1352 Lisp_Object syntax, lim; |
1351 { | 1353 { |
1352 return skip_chars (1, 1, syntax, lim); | 1354 return skip_chars (1, 1, syntax, lim, 0); |
1353 } | 1355 } |
1354 | 1356 |
1355 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0, | 1357 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0, |
1356 doc: /* Move point backward across chars in specified syntax classes. | 1358 doc: /* Move point backward across chars in specified syntax classes. |
1357 SYNTAX is a string of syntax code characters. | 1359 SYNTAX is a string of syntax code characters. |
1359 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. | 1361 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX. |
1360 This function returns the distance traveled, either zero or negative. */) | 1362 This function returns the distance traveled, either zero or negative. */) |
1361 (syntax, lim) | 1363 (syntax, lim) |
1362 Lisp_Object syntax, lim; | 1364 Lisp_Object syntax, lim; |
1363 { | 1365 { |
1364 return skip_chars (0, 1, syntax, lim); | 1366 return skip_chars (0, 1, syntax, lim, 0); |
1365 } | 1367 } |
1366 | 1368 |
1367 static Lisp_Object | 1369 static Lisp_Object |
1368 skip_chars (forwardp, syntaxp, string, lim) | 1370 skip_chars (forwardp, syntaxp, string, lim, handle_iso_classes) |
1369 int forwardp, syntaxp; | 1371 int forwardp, syntaxp; |
1370 Lisp_Object string, lim; | 1372 Lisp_Object string, lim; |
1373 int handle_iso_classes; | |
1371 { | 1374 { |
1372 register unsigned int c; | 1375 register unsigned int c; |
1373 unsigned char fastmap[0400]; | 1376 unsigned char fastmap[0400]; |
1374 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters | 1377 /* If SYNTAXP is 0, STRING may contain multi-byte form of characters |
1375 of which codes don't fit in FASTMAP. In that case, set the | 1378 of which codes don't fit in FASTMAP. In that case, set the |
1381 int multibyte = !NILP (current_buffer->enable_multibyte_characters); | 1384 int multibyte = !NILP (current_buffer->enable_multibyte_characters); |
1382 int string_multibyte; | 1385 int string_multibyte; |
1383 int size_byte; | 1386 int size_byte; |
1384 const unsigned char *str; | 1387 const unsigned char *str; |
1385 int len; | 1388 int len; |
1389 Lisp_Object iso_classes; | |
1386 | 1390 |
1387 CHECK_STRING (string); | 1391 CHECK_STRING (string); |
1388 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2); | 1392 char_ranges = (int *) alloca (SCHARS (string) * (sizeof (int)) * 2); |
1389 string_multibyte = STRING_MULTIBYTE (string); | 1393 string_multibyte = STRING_MULTIBYTE (string); |
1390 str = SDATA (string); | 1394 str = SDATA (string); |
1391 size_byte = SBYTES (string); | 1395 size_byte = SBYTES (string); |
1396 iso_classes = Qnil; | |
1392 | 1397 |
1393 /* Adjust the multibyteness of the string to that of the buffer. */ | 1398 /* Adjust the multibyteness of the string to that of the buffer. */ |
1394 if (multibyte != string_multibyte) | 1399 if (multibyte != string_multibyte) |
1395 { | 1400 { |
1396 int nbytes; | 1401 int nbytes; |
1442 | 1447 |
1443 if (syntaxp) | 1448 if (syntaxp) |
1444 fastmap[syntax_spec_code[c & 0377]] = 1; | 1449 fastmap[syntax_spec_code[c & 0377]] = 1; |
1445 else | 1450 else |
1446 { | 1451 { |
1452 if (handle_iso_classes && c == '[' | |
1453 && i_byte < size_byte | |
1454 && STRING_CHAR (str + i_byte, size_byte - i_byte) == ':') | |
1455 { | |
1456 const unsigned char *class_beg = str + i_byte + 1; | |
1457 const unsigned char *class_end = class_beg; | |
1458 const unsigned char *class_limit = str + size_byte; | |
1459 /* Leave room for the null. */ | |
1460 unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1]; | |
1461 re_wctype_t cc; | |
1462 | |
1463 if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH) | |
1464 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH; | |
1465 | |
1466 while (class_end != class_limit | |
1467 && ! (*class_end >= 0200 | |
1468 || *class_end <= 040 | |
1469 || (*class_end == ':' | |
1470 && class_end[1] == ']'))) | |
1471 class_end++; | |
1472 | |
1473 if (class_end == class_limit | |
1474 || *class_end >= 0200 | |
1475 || *class_end <= 040) | |
1476 error ("Invalid ISO C character class"); | |
1477 | |
1478 bcopy (class_beg, class_name, class_end - class_beg); | |
1479 class_name[class_end - class_beg] = 0; | |
1480 | |
1481 cc = re_wctype (class_name); | |
1482 if (cc == 0) | |
1483 error ("Invalid ISO C character class"); | |
1484 | |
1485 iso_classes = Fcons (make_number (cc), iso_classes); | |
1486 | |
1487 i_byte = class_end + 2 - str; | |
1488 continue; | |
1489 } | |
1490 | |
1447 if (c == '\\') | 1491 if (c == '\\') |
1448 { | 1492 { |
1449 if (i_byte == size_byte) | 1493 if (i_byte == size_byte) |
1450 break; | 1494 break; |
1451 | 1495 |
1635 break; | 1679 break; |
1636 p = GAP_END_ADDR; | 1680 p = GAP_END_ADDR; |
1637 stop = endp; | 1681 stop = endp; |
1638 } | 1682 } |
1639 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); | 1683 c = STRING_CHAR_AND_LENGTH (p, MAX_MULTIBYTE_LENGTH, nbytes); |
1684 | |
1685 if (! NILP (iso_classes) && in_classes (c, iso_classes)) | |
1686 { | |
1687 if (negate) | |
1688 break; | |
1689 else | |
1690 goto fwd_ok; | |
1691 } | |
1692 | |
1640 if (SINGLE_BYTE_CHAR_P (c)) | 1693 if (SINGLE_BYTE_CHAR_P (c)) |
1641 { | 1694 { |
1642 if (!fastmap[c]) | 1695 if (!fastmap[c]) |
1643 break; | 1696 break; |
1644 } | 1697 } |
1657 if (c >= char_ranges[i] && c <= char_ranges[i + 1]) | 1710 if (c >= char_ranges[i] && c <= char_ranges[i + 1]) |
1658 break; | 1711 break; |
1659 if (!(negate ^ (i < n_char_ranges))) | 1712 if (!(negate ^ (i < n_char_ranges))) |
1660 break; | 1713 break; |
1661 } | 1714 } |
1715 fwd_ok: | |
1662 p += nbytes, pos++, pos_byte += nbytes; | 1716 p += nbytes, pos++, pos_byte += nbytes; |
1663 } | 1717 } |
1664 else | 1718 else |
1665 while (1) | 1719 while (1) |
1666 { | 1720 { |
1669 if (p >= endp) | 1723 if (p >= endp) |
1670 break; | 1724 break; |
1671 p = GAP_END_ADDR; | 1725 p = GAP_END_ADDR; |
1672 stop = endp; | 1726 stop = endp; |
1673 } | 1727 } |
1728 | |
1729 if (!NILP (iso_classes) && in_classes (*p, iso_classes)) | |
1730 { | |
1731 if (negate) | |
1732 break; | |
1733 else | |
1734 goto fwd_ok; | |
1735 } | |
1736 | |
1674 if (!fastmap[*p]) | 1737 if (!fastmap[*p]) |
1675 break; | 1738 break; |
1739 | |
1740 fwd_unibyte_ok: | |
1676 p++, pos++; | 1741 p++, pos++; |
1677 } | 1742 } |
1678 } | 1743 } |
1679 else | 1744 else |
1680 { | 1745 { |
1696 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes); | 1761 PARSE_MULTIBYTE_SEQ (p, MAX_MULTIBYTE_LENGTH, nbytes); |
1697 if (prev_p - p > nbytes) | 1762 if (prev_p - p > nbytes) |
1698 p = prev_p - 1, c = *p, nbytes = 1; | 1763 p = prev_p - 1, c = *p, nbytes = 1; |
1699 else | 1764 else |
1700 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); | 1765 c = STRING_CHAR (p, MAX_MULTIBYTE_LENGTH); |
1766 | |
1767 if (! NILP (iso_classes) && in_classes (c, iso_classes)) | |
1768 { | |
1769 if (negate) | |
1770 break; | |
1771 else | |
1772 goto back_ok; | |
1773 } | |
1774 | |
1701 if (SINGLE_BYTE_CHAR_P (c)) | 1775 if (SINGLE_BYTE_CHAR_P (c)) |
1702 { | 1776 { |
1703 if (!fastmap[c]) | 1777 if (!fastmap[c]) |
1704 break; | 1778 break; |
1705 } | 1779 } |
1710 if (c >= char_ranges[i] && c <= char_ranges[i + 1]) | 1784 if (c >= char_ranges[i] && c <= char_ranges[i + 1]) |
1711 break; | 1785 break; |
1712 if (!(negate ^ (i < n_char_ranges))) | 1786 if (!(negate ^ (i < n_char_ranges))) |
1713 break; | 1787 break; |
1714 } | 1788 } |
1789 back_ok: | |
1715 pos--, pos_byte -= nbytes; | 1790 pos--, pos_byte -= nbytes; |
1716 } | 1791 } |
1717 else | 1792 else |
1718 while (1) | 1793 while (1) |
1719 { | 1794 { |
1722 if (p <= endp) | 1797 if (p <= endp) |
1723 break; | 1798 break; |
1724 p = GPT_ADDR; | 1799 p = GPT_ADDR; |
1725 stop = endp; | 1800 stop = endp; |
1726 } | 1801 } |
1802 | |
1803 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes)) | |
1804 { | |
1805 if (negate) | |
1806 break; | |
1807 else | |
1808 goto fwd_ok; | |
1809 } | |
1810 | |
1727 if (!fastmap[p[-1]]) | 1811 if (!fastmap[p[-1]]) |
1728 break; | 1812 break; |
1813 | |
1814 back_unibyte_ok: | |
1729 p--, pos--; | 1815 p--, pos--; |
1730 } | 1816 } |
1731 } | 1817 } |
1732 } | 1818 } |
1733 | 1819 |
1745 SET_PT_BOTH (pos, pos_byte); | 1831 SET_PT_BOTH (pos, pos_byte); |
1746 immediate_quit = 0; | 1832 immediate_quit = 0; |
1747 | 1833 |
1748 return make_number (PT - start_point); | 1834 return make_number (PT - start_point); |
1749 } | 1835 } |
1836 } | |
1837 | |
1838 /* Return 1 if character C belongs to one of the ISO classes | |
1839 in the list ISO_CLASSES. Each class is represented by an | |
1840 integer which is its type according to re_wctype. */ | |
1841 | |
1842 static int | |
1843 in_classes (c, iso_classes) | |
1844 int c; | |
1845 Lisp_Object iso_classes; | |
1846 { | |
1847 int fits_class = 0; | |
1848 | |
1849 while (! NILP (iso_classes)) | |
1850 { | |
1851 Lisp_Object elt; | |
1852 elt = XCAR (iso_classes); | |
1853 iso_classes = XCDR (iso_classes); | |
1854 | |
1855 if (re_iswctype (c, XFASTINT (elt))) | |
1856 fits_class = 1; | |
1857 } | |
1858 | |
1859 return fits_class; | |
1750 } | 1860 } |
1751 | 1861 |
1752 /* Jump over a comment, assuming we are at the beginning of one. | 1862 /* Jump over a comment, assuming we are at the beginning of one. |
1753 FROM is the current position. | 1863 FROM is the current position. |
1754 FROM_BYTE is the bytepos corresponding to FROM. | 1864 FROM_BYTE is the bytepos corresponding to FROM. |