486
|
1 /* Declarations having to do with GNU Emacs syntax tables.
|
75227
|
2 Copyright (C) 1985, 1993, 1994, 1997, 1998, 2001, 2002, 2003, 2004,
|
79759
|
3 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
486
|
4
|
|
5 This file is part of GNU Emacs.
|
|
6
|
94994
|
7 GNU Emacs is free software: you can redistribute it and/or modify
|
486
|
8 it under the terms of the GNU General Public License as published by
|
94994
|
9 the Free Software Foundation, either version 3 of the License, or
|
|
10 (at your option) any later version.
|
486
|
11
|
|
12 GNU Emacs is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 GNU General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
94994
|
18 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
|
486
|
19
|
|
20
|
|
21 extern Lisp_Object Qsyntax_table_p;
|
20349
|
22 extern void update_syntax_table P_ ((int, int, int, Lisp_Object));
|
486
|
23
|
|
24 /* The standard syntax table is stored where it will automatically
|
|
25 be used in all new buffers. */
|
|
26 #define Vstandard_syntax_table buffer_defaults.syntax_table
|
|
27
|
13143
|
28 /* A syntax table is a chartable whose elements are cons cells
|
|
29 (CODE+FLAGS . MATCHING-CHAR). MATCHING-CHAR can be nil if the char
|
|
30 is not a kind of parenthesis.
|
486
|
31
|
13143
|
32 The low 8 bits of CODE+FLAGS is a code, as follows: */
|
486
|
33
|
|
34 enum syntaxcode
|
|
35 {
|
|
36 Swhitespace, /* for a whitespace character */
|
|
37 Spunct, /* for random punctuation characters */
|
|
38 Sword, /* for a word constituent */
|
|
39 Ssymbol, /* symbol constituent but not word constituent */
|
|
40 Sopen, /* for a beginning delimiter */
|
|
41 Sclose, /* for an ending delimiter */
|
|
42 Squote, /* for a prefix character like Lisp ' */
|
|
43 Sstring, /* for a string-grouping character like Lisp " */
|
17465
|
44 Smath, /* for delimiters like $ in Tex. */
|
486
|
45 Sescape, /* for a character that begins a C-style escape */
|
|
46 Scharquote, /* for a character that quotes the following character */
|
|
47 Scomment, /* for a comment-starting character */
|
|
48 Sendcomment, /* for a comment-ending character */
|
5441
|
49 Sinherit, /* use the standard syntax table for this character */
|
17465
|
50 Scomment_fence, /* Starts/ends comment which is delimited on the
|
20487
|
51 other side by any char with the same syntaxcode. */
|
17465
|
52 Sstring_fence, /* Starts/ends string which is delimited on the
|
20487
|
53 other side by any char with the same syntaxcode. */
|
486
|
54 Smax /* Upper bound on codes that are meaningful */
|
|
55 };
|
|
56
|
17045
|
57 /* Set the syntax entry VAL for char C in table TABLE. */
|
13143
|
58
|
88390
|
59 #define SET_RAW_SYNTAX_ENTRY(table, c, val) \
|
88432
|
60 CHAR_TABLE_SET ((table), c, (val))
|
5441
|
61
|
88390
|
62 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
|
|
63 RANGE is a cons (FROM . TO) specifying the range of characters. */
|
13143
|
64
|
88390
|
65 #define SET_RAW_SYNTAX_ENTRY_RANGE(table, range, val) \
|
|
66 Fset_char_table_range ((table), (range), (val))
|
17045
|
67
|
17465
|
68 /* SYNTAX_ENTRY fetches the information from the entry for character C
|
49600
|
69 in syntax table TABLE, or from globally kept data (gl_state).
|
17465
|
70 Does inheritance. */
|
|
71 /* CURRENT_SYNTAX_TABLE gives the syntax table valid for current
|
|
72 position, it is either the buffer's syntax table, or syntax table
|
|
73 found in text properties. */
|
17045
|
74
|
17465
|
75 #ifdef SYNTAX_ENTRY_VIA_PROPERTY
|
|
76 # define SYNTAX_ENTRY(c) \
|
|
77 (gl_state.use_global ? gl_state.global_code : SYNTAX_ENTRY_INT (c))
|
|
78 # define CURRENT_SYNTAX_TABLE gl_state.current_syntax_table
|
|
79 #else
|
|
80 # define SYNTAX_ENTRY SYNTAX_ENTRY_INT
|
|
81 # define CURRENT_SYNTAX_TABLE current_buffer->syntax_table
|
|
82 #endif
|
|
83
|
88852
|
84 #define SYNTAX_ENTRY_INT(c) CHAR_TABLE_REF (CURRENT_SYNTAX_TABLE, (c))
|
17045
|
85
|
|
86 /* Extract the information from the entry for character C
|
|
87 in the current syntax table. */
|
|
88
|
|
89 #ifdef __GNUC__
|
13143
|
90 #define SYNTAX(c) \
|
56068
|
91 ({ Lisp_Object _syntax_temp; \
|
|
92 _syntax_temp = SYNTAX_ENTRY (c); \
|
|
93 (CONSP (_syntax_temp) \
|
|
94 ? (enum syntaxcode) (XINT (XCAR (_syntax_temp)) & 0xff) \
|
17045
|
95 : Swhitespace); })
|
13143
|
96
|
|
97 #define SYNTAX_WITH_FLAGS(c) \
|
56068
|
98 ({ Lisp_Object _syntax_temp; \
|
|
99 _syntax_temp = SYNTAX_ENTRY (c); \
|
|
100 (CONSP (_syntax_temp) \
|
|
101 ? XINT (XCAR (_syntax_temp)) \
|
17045
|
102 : (int) Swhitespace); })
|
13143
|
103
|
|
104 #define SYNTAX_MATCH(c) \
|
56068
|
105 ({ Lisp_Object _syntax_temp; \
|
|
106 _syntax_temp = SYNTAX_ENTRY (c); \
|
|
107 (CONSP (_syntax_temp) \
|
|
108 ? XCDR (_syntax_temp) \
|
17045
|
109 : Qnil); })
|
5441
|
110 #else
|
88471
|
111 extern Lisp_Object syntax_temp;
|
13143
|
112 #define SYNTAX(c) \
|
13318
94a4b6e9d310
(SYNTAX, SYNTAX_WITH_FLAGS, SYNTAX_MATCH): Fix the non-GCC definitions.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
113 (syntax_temp = SYNTAX_ENTRY ((c)), \
|
13143
|
114 (CONSP (syntax_temp) \
|
25640
|
115 ? (enum syntaxcode) (XINT (XCAR (syntax_temp)) & 0xff) \
|
17045
|
116 : Swhitespace))
|
13143
|
117
|
|
118 #define SYNTAX_WITH_FLAGS(c) \
|
13318
94a4b6e9d310
(SYNTAX, SYNTAX_WITH_FLAGS, SYNTAX_MATCH): Fix the non-GCC definitions.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
119 (syntax_temp = SYNTAX_ENTRY ((c)), \
|
13143
|
120 (CONSP (syntax_temp) \
|
25640
|
121 ? XINT (XCAR (syntax_temp)) \
|
17045
|
122 : (int) Swhitespace))
|
13143
|
123
|
|
124 #define SYNTAX_MATCH(c) \
|
13318
94a4b6e9d310
(SYNTAX, SYNTAX_WITH_FLAGS, SYNTAX_MATCH): Fix the non-GCC definitions.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
125 (syntax_temp = SYNTAX_ENTRY ((c)), \
|
13143
|
126 (CONSP (syntax_temp) \
|
25640
|
127 ? XCDR (syntax_temp) \
|
17045
|
128 : Qnil))
|
5441
|
129 #endif
|
486
|
130
|
25446
|
131 /* Then there are seven single-bit flags that have the following meanings:
|
486
|
132 1. This character is the first of a two-character comment-start sequence.
|
|
133 2. This character is the second of a two-character comment-start sequence.
|
|
134 3. This character is the first of a two-character comment-end sequence.
|
|
135 4. This character is the second of a two-character comment-end sequence.
|
|
136 5. This character is a prefix, for backward-prefix-chars.
|
25446
|
137 6. see below
|
|
138 7. This character is part of a nestable comment sequence.
|
1073
|
139 Note that any two-character sequence whose first character has flag 1
|
|
140 and whose second character has flag 2 will be interpreted as a comment start.
|
|
141
|
|
142 bit 6 is used to discriminate between two different comment styles.
|
|
143 Languages such as C++ allow two orthogonal syntax start/end pairs
|
|
144 and bit 6 is used to determine whether a comment-end or Scommentend
|
17465
|
145 ends style a or b. Comment start sequences can start style a or b.
|
1073
|
146 Style a is always the default.
|
|
147 */
|
486
|
148
|
21206
|
149 /* These macros extract a particular flag for a given character. */
|
|
150
|
13143
|
151 #define SYNTAX_COMSTART_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 16) & 1)
|
5441
|
152
|
13143
|
153 #define SYNTAX_COMSTART_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 17) & 1)
|
486
|
154
|
13143
|
155 #define SYNTAX_COMEND_FIRST(c) ((SYNTAX_WITH_FLAGS (c) >> 18) & 1)
|
486
|
156
|
13143
|
157 #define SYNTAX_COMEND_SECOND(c) ((SYNTAX_WITH_FLAGS (c) >> 19) & 1)
|
486
|
158
|
13143
|
159 #define SYNTAX_PREFIX(c) ((SYNTAX_WITH_FLAGS (c) >> 20) & 1)
|
486
|
160
|
13143
|
161 #define SYNTAX_COMMENT_STYLE(c) ((SYNTAX_WITH_FLAGS (c) >> 21) & 1)
|
1073
|
162
|
25446
|
163 #define SYNTAX_COMMENT_NESTED(c) ((SYNTAX_WITH_FLAGS (c) >> 22) & 1)
|
|
164
|
21206
|
165 /* These macros extract specific flags from an integer
|
|
166 that holds the syntax code and the flags. */
|
|
167
|
|
168 #define SYNTAX_FLAGS_COMSTART_FIRST(flags) (((flags) >> 16) & 1)
|
|
169
|
|
170 #define SYNTAX_FLAGS_COMSTART_SECOND(flags) (((flags) >> 17) & 1)
|
|
171
|
|
172 #define SYNTAX_FLAGS_COMEND_FIRST(flags) (((flags) >> 18) & 1)
|
|
173
|
|
174 #define SYNTAX_FLAGS_COMEND_SECOND(flags) (((flags) >> 19) & 1)
|
|
175
|
|
176 #define SYNTAX_FLAGS_PREFIX(flags) (((flags) >> 20) & 1)
|
|
177
|
|
178 #define SYNTAX_FLAGS_COMMENT_STYLE(flags) (((flags) >> 21) & 1)
|
|
179
|
25446
|
180 #define SYNTAX_FLAGS_COMMENT_NESTED(flags) (((flags) >> 22) & 1)
|
|
181
|
486
|
182 /* This array, indexed by a character, contains the syntax code which that
|
|
183 character signifies (as a char). For example,
|
17465
|
184 (enum syntaxcode) syntax_spec_code['w'] is Sword. */
|
486
|
185
|
|
186 extern unsigned char syntax_spec_code[0400];
|
|
187
|
17465
|
188 /* Indexed by syntax code, give the letter that describes it. */
|
|
189
|
|
190 extern char syntax_code_spec[16];
|
|
191
|
20649
|
192 /* Convert the byte offset BYTEPOS into a character position,
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
193 for the object recorded in gl_state with SETUP_SYNTAX_TABLE_FOR_OBJECT.
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
194
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
195 The value is meant for use in the UPDATE_SYNTAX_TABLE... macros.
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
196 These macros do nothing when parse_sexp_lookup_properties is 0,
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
197 so we return 0 in that case, for speed. */
|
20649
|
198
|
|
199 #define SYNTAX_TABLE_BYTE_TO_CHAR(bytepos) \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
200 (! parse_sexp_lookup_properties \
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
201 ? 0 \
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
202 : STRINGP (gl_state.object) \
|
20649
|
203 ? string_byte_to_char (gl_state.object, (bytepos)) \
|
|
204 : BUFFERP (gl_state.object) \
|
21481
|
205 ? buf_bytepos_to_charpos (XBUFFER (gl_state.object), \
|
|
206 (bytepos) + BUF_BEGV_BYTE (XBUFFER (gl_state.object)) - 1) - BUF_BEGV (XBUFFER (gl_state.object)) + 1 \
|
20649
|
207 : NILP (gl_state.object) \
|
21481
|
208 ? BYTE_TO_CHAR ((bytepos) + BEGV_BYTE - 1) - BEGV + 1 \
|
20649
|
209 : (bytepos))
|
|
210
|
21481
|
211 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
|
|
212 currently good for a position before CHARPOS. */
|
17465
|
213
|
21481
|
214 #define UPDATE_SYNTAX_TABLE_FORWARD(charpos) \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
215 (parse_sexp_lookup_properties \
|
21481
|
216 && (charpos) >= gl_state.e_property \
|
|
217 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0, \
|
20649
|
218 gl_state.object), \
|
|
219 1) \
|
|
220 : 0)
|
17465
|
221
|
21481
|
222 /* Make syntax table state (gl_state) good for CHARPOS, assuming it is
|
|
223 currently good for a position after CHARPOS. */
|
18678
|
224
|
21481
|
225 #define UPDATE_SYNTAX_TABLE_BACKWARD(charpos) \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
226 (parse_sexp_lookup_properties \
|
39795
|
227 && (charpos) < gl_state.b_property \
|
21481
|
228 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
|
20649
|
229 gl_state.object), \
|
|
230 1) \
|
|
231 : 0)
|
17465
|
232
|
21481
|
233 /* Make syntax table good for CHARPOS. */
|
18678
|
234
|
21481
|
235 #define UPDATE_SYNTAX_TABLE(charpos) \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
236 (parse_sexp_lookup_properties \
|
39795
|
237 && (charpos) < gl_state.b_property \
|
21481
|
238 ? (update_syntax_table ((charpos) + gl_state.offset, -1, 0, \
|
20649
|
239 gl_state.object), \
|
|
240 1) \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
241 : (parse_sexp_lookup_properties \
|
21481
|
242 && (charpos) >= gl_state.e_property \
|
|
243 ? (update_syntax_table ((charpos) + gl_state.offset, 1, 0,\
|
20649
|
244 gl_state.object), \
|
|
245 1) \
|
|
246 : 0))
|
17465
|
247
|
|
248 /* This macro should be called with FROM at the start of forward
|
|
249 search, or after the last position of the backward search. It
|
|
250 makes sure that the first char is picked up with correct table, so
|
|
251 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
|
49600
|
252 call.
|
17465
|
253 Sign of COUNT gives the direction of the search.
|
|
254 */
|
|
255
|
20649
|
256 #define SETUP_SYNTAX_TABLE(FROM, COUNT) \
|
91618
|
257 do \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
258 { \
|
39795
|
259 gl_state.b_property = BEGV; \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
260 gl_state.e_property = ZV + 1; \
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
261 gl_state.object = Qnil; \
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
262 gl_state.use_global = 0; \
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
263 gl_state.offset = 0; \
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
264 gl_state.current_syntax_table = current_buffer->syntax_table; \
|
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
265 if (parse_sexp_lookup_properties) \
|
22438
|
266 if ((COUNT) > 0 || (FROM) > BEGV) \
|
|
267 update_syntax_table ((COUNT) > 0 ? (FROM) : (FROM) - 1, (COUNT),\
|
|
268 1, Qnil); \
|
21015
9f8f791762a8
(UPDATE_SYNTAX_TABLE): Do nothing unless parse_sexp_lookup_properties.
Richard M. Stallman <rms@gnu.org>
diff
changeset
|
269 } \
|
91618
|
270 while (0)
|
17465
|
271
|
|
272 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
|
18678
|
273 If it is t, ignore properties altogether.
|
|
274
|
|
275 This is meant for regex.c to use. For buffers, regex.c passes arguments
|
|
276 to the UPDATE_SYNTAX_TABLE macros which are relative to BEGV.
|
20544
|
277 So if it is a buffer, we set the offset field to BEGV. */
|
486
|
278
|
20649
|
279 #define SETUP_SYNTAX_TABLE_FOR_OBJECT(OBJECT, FROM, COUNT) \
|
91618
|
280 do \
|
20544
|
281 { \
|
20649
|
282 gl_state.object = (OBJECT); \
|
|
283 if (BUFFERP (gl_state.object)) \
|
|
284 { \
|
|
285 struct buffer *buf = XBUFFER (gl_state.object); \
|
39795
|
286 gl_state.b_property = 1; \
|
20957
|
287 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1; \
|
20649
|
288 gl_state.offset = BUF_BEGV (buf) - 1; \
|
|
289 } \
|
|
290 else if (NILP (gl_state.object)) \
|
20544
|
291 { \
|
39795
|
292 gl_state.b_property = 1; \
|
20957
|
293 gl_state.e_property = ZV - BEGV + 1; \
|
20544
|
294 gl_state.offset = BEGV - 1; \
|
|
295 } \
|
20649
|
296 else if (EQ (gl_state.object, Qt)) \
|
20544
|
297 { \
|
39795
|
298 gl_state.b_property = 0; \
|
20544
|
299 gl_state.e_property = 1500000000; \
|
|
300 gl_state.offset = 0; \
|
|
301 } \
|
|
302 else \
|
|
303 { \
|
39795
|
304 gl_state.b_property = 0; \
|
46370
40db0673e6f0
Most uses of XSTRING combined with STRING_BYTES or indirection changed to
Ken Raeburn <raeburn@raeburn.org>
diff
changeset
|
305 gl_state.e_property = 1 + SCHARS (gl_state.object); \
|
20544
|
306 gl_state.offset = 0; \
|
|
307 } \
|
|
308 gl_state.use_global = 0; \
|
|
309 gl_state.current_syntax_table = current_buffer->syntax_table; \
|
|
310 if (parse_sexp_lookup_properties) \
|
21481
|
311 update_syntax_table (((FROM) + gl_state.offset \
|
20649
|
312 + (COUNT > 0 ? 0 : -1)), \
|
|
313 COUNT, 1, gl_state.object); \
|
20544
|
314 } \
|
91618
|
315 while (0)
|
17465
|
316
|
|
317 struct gl_state_s
|
|
318 {
|
20649
|
319 Lisp_Object object; /* The object we are scanning. */
|
17465
|
320 int start; /* Where to stop. */
|
|
321 int stop; /* Where to stop. */
|
|
322 int use_global; /* Whether to use global_code
|
|
323 or c_s_t. */
|
|
324 Lisp_Object global_code; /* Syntax code of current char. */
|
|
325 Lisp_Object current_syntax_table; /* Syntax table for current pos. */
|
|
326 Lisp_Object old_prop; /* Syntax-table prop at prev pos. */
|
39795
|
327 int b_property; /* First index where c_s_t is valid. */
|
17465
|
328 int e_property; /* First index where c_s_t is
|
|
329 not valid. */
|
|
330 INTERVAL forward_i; /* Where to start lookup on forward */
|
|
331 INTERVAL backward_i; /* or backward movement. The
|
|
332 data in c_s_t is valid
|
|
333 between these intervals,
|
|
334 and possibly at the
|
|
335 intervals too, depending
|
|
336 on: */
|
18678
|
337 /* Offset for positions specified to UPDATE_SYNTAX_TABLE. */
|
|
338 int offset;
|
17465
|
339 };
|
|
340
|
|
341 extern struct gl_state_s gl_state;
|
|
342 extern int parse_sexp_lookup_properties;
|
20349
|
343 extern INTERVAL interval_of P_ ((int, Lisp_Object));
|
|
344
|
|
345 extern int scan_words P_ ((int, int));
|
52401
|
346
|
|
347 /* arch-tag: 28833cca-cd73-4741-8c85-a3111166a0e0
|
|
348 (do not change this comment) */
|