89909
|
1 ;;; ebnf-ebx.el --- parser for EBNF used to specify XML (EBNFX)
|
|
2
|
|
3 ;; Copyright (C) 2004 Free Sofware Foundation, Inc.
|
|
4
|
|
5 ;; Author: Vinicius Jose Latorre <viniciusjl@ig.com.br>
|
|
6 ;; Maintainer: Vinicius Jose Latorre <viniciusjl@ig.com.br>
|
|
7 ;; Time-stamp: <2004/04/03 16:45:34 vinicius>
|
|
8 ;; Keywords: wp, ebnf, PostScript
|
|
9 ;; Version: 1.1
|
|
10
|
|
11 ;; This file is part of GNU Emacs.
|
|
12
|
|
13 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
14 ;; it under the terms of the GNU General Public License as published by
|
|
15 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
16 ;; any later version.
|
|
17
|
|
18 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
21 ;; GNU General Public License for more details.
|
|
22
|
|
23 ;; You should have received a copy of the GNU General Public License
|
|
24 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
25 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
26 ;; Boston, MA 02111-1307, USA.
|
|
27
|
|
28 ;;; Commentary:
|
|
29
|
|
30 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
31 ;;
|
|
32 ;;
|
|
33 ;; This is part of ebnf2ps package.
|
|
34 ;;
|
|
35 ;; This package defines a parser for EBNF used to specify XML (EBNFX).
|
|
36 ;;
|
|
37 ;; See ebnf2ps.el for documentation.
|
|
38 ;;
|
|
39 ;;
|
|
40 ;; EBNFX Syntax
|
|
41 ;; ------------
|
|
42 ;;
|
|
43 ;; See the URL:
|
|
44 ;; `http://www.w3.org/TR/2004/REC-xml-20040204/#sec-notation'
|
|
45 ;; (Extensible Markup Language (XML) 1.0 (Third Edition))
|
|
46 ;;
|
|
47 ;;
|
|
48 ;; rule ::= symbol '::=' expression
|
|
49 ;; /* rules are separated by at least one blank line. */
|
|
50 ;;
|
|
51 ;; expression ::= concatenation ('|' concatenation)*
|
|
52 ;;
|
|
53 ;; concatenation ::= exception*
|
|
54 ;;
|
|
55 ;; exception ::= term ('-' term)?
|
|
56 ;;
|
|
57 ;; term ::= factor ('*' | '+' | '?')?
|
|
58 ;;
|
|
59 ;; factor ::= hex-char+
|
|
60 ;; | '[' '^'? ( char ( '-' char )? )+ ']'
|
|
61 ;; | '"' 'string' '"'
|
|
62 ;; | "'" "string" "'"
|
|
63 ;; | '(' expression ')'
|
|
64 ;; | symbol
|
|
65 ;;
|
|
66 ;; symbol ::= 'upper or lower case letter'
|
|
67 ;; ('upper or lower case letter' | '-' | '_')*
|
|
68 ;; /* upper and lower 8-bit accentuated characters are included */
|
|
69 ;;
|
|
70 ;; hex-char ::= '#x' [0-9A-Fa-f]+
|
|
71 ;;
|
|
72 ;; char ::= hex-char | 'any character except control characters'
|
|
73 ;; /* 8-bit accentuated characters are included */
|
|
74 ;;
|
|
75 ;; any-char ::= char | 'newline' | 'tab'
|
|
76 ;;
|
|
77 ;; ignore ::= '[' ('wfc' | 'WFC' | 'vc' | 'VC') ':' ( any-char - ']' )* ']'
|
|
78 ;;
|
|
79 ;; comment ::= '/*' ( any-char - '*/' ) '*/'
|
|
80 ;;
|
|
81 ;;
|
|
82 ;; Below is the Notation section extracted from the URL cited above.
|
|
83 ;;
|
|
84 ;; 6 Notation
|
|
85 ;;
|
|
86 ;; The formal grammar of XML is given in this specification using a simple
|
|
87 ;; Extended Backus-Naur Form (EBNF) notation. Each rule in the grammar defines
|
|
88 ;; one symbol, in the form
|
|
89 ;;
|
|
90 ;; symbol ::= expression
|
|
91 ;;
|
|
92 ;; Symbols are written with an initial capital letter if they are the start
|
|
93 ;; symbol of a regular language, otherwise with an initial lowercase letter.
|
|
94 ;; Literal strings are quoted.
|
|
95 ;;
|
|
96 ;; Within the expression on the right-hand side of a rule, the following
|
|
97 ;; expressions are used to match strings of one or more characters:
|
|
98 ;;
|
|
99 ;; #xN
|
|
100 ;;
|
|
101 ;; where N is a hexadecimal integer, the expression matches the character
|
|
102 ;; whose number (code point) in ISO/IEC 10646 is N. The number of leading
|
|
103 ;; zeros in the #xN form is insignificant.
|
|
104 ;;
|
|
105 ;; [a-zA-Z], [#xN-#xN]
|
|
106 ;;
|
|
107 ;; matches any Char with a value in the range(s) indicated (inclusive).
|
|
108 ;;
|
|
109 ;; [abc], [#xN#xN#xN]
|
|
110 ;;
|
|
111 ;; matches any Char with a value among the characters enumerated.
|
|
112 ;; Enumerations and ranges can be mixed in one set of brackets.
|
|
113 ;;
|
|
114 ;; [^a-z], [^#xN-#xN]
|
|
115 ;;
|
|
116 ;; matches any Char with a value outside the range indicated.
|
|
117 ;;
|
|
118 ;; [^abc], [^#xN#xN#xN]
|
|
119 ;;
|
|
120 ;; matches any Char with a value not among the characters given.
|
|
121 ;; Enumerations and ranges of forbidden values can be mixed in one set of
|
|
122 ;; brackets.
|
|
123 ;;
|
|
124 ;; "string"
|
|
125 ;;
|
|
126 ;; matches a literal string matching that given inside the double quotes.
|
|
127 ;;
|
|
128 ;; 'string'
|
|
129 ;;
|
|
130 ;; matches a literal string matching that given inside the single quotes.
|
|
131 ;;
|
|
132 ;; These symbols may be combined to match more complex patterns as follows,
|
|
133 ;; where A and B represent simple expressions:
|
|
134 ;;
|
|
135 ;; (expression)
|
|
136 ;;
|
|
137 ;; expression is treated as a unit and may be combined as described in this
|
|
138 ;; list.
|
|
139 ;;
|
|
140 ;; A?
|
|
141 ;;
|
|
142 ;; matches A or nothing; optional A.
|
|
143 ;;
|
|
144 ;; A B
|
|
145 ;;
|
|
146 ;; matches A followed by B. This operator has higher precedence than
|
|
147 ;; alternation; thus A B | C D is identical to (A B) | (C D).
|
|
148 ;;
|
|
149 ;; A | B
|
|
150 ;;
|
|
151 ;; matches A or B.
|
|
152 ;;
|
|
153 ;; A - B
|
|
154 ;;
|
|
155 ;; matches any string that matches A but does not match B.
|
|
156 ;;
|
|
157 ;; A+
|
|
158 ;;
|
|
159 ;; matches one or more occurrences of A. Concatenation has higher
|
|
160 ;; precedence than alternation; thus A+ | B+ is identical to (A+) | (B+).
|
|
161 ;;
|
|
162 ;; A*
|
|
163 ;;
|
|
164 ;; matches zero or more occurrences of A. Concatenation has higher
|
|
165 ;; precedence than alternation; thus A* | B* is identical to (A*) | (B*).
|
|
166 ;;
|
|
167 ;; Other notations used in the productions are:
|
|
168 ;;
|
|
169 ;; /* ... */
|
|
170 ;;
|
|
171 ;; comment.
|
|
172 ;;
|
|
173 ;; [ wfc: ... ]
|
|
174 ;;
|
|
175 ;; well-formedness constraint; this identifies by name a constraint on
|
|
176 ;; well-formed documents associated with a production.
|
|
177 ;;
|
|
178 ;; [ vc: ... ]
|
|
179 ;;
|
|
180 ;; validity constraint; this identifies by name a constraint on valid
|
|
181 ;; documents associated with a production.
|
|
182 ;;
|
|
183 ;;
|
|
184 ;; Differences Between EBNFX And ebnf2ps EBNFX
|
|
185 ;; -------------------------------------------
|
|
186 ;;
|
|
187 ;; Besides the characters that EBNFX accepts, ebnf2ps EBNFX accepts also the
|
|
188 ;; underscore (_) and minus (-) for rule name and european 8-bit accentuated
|
|
189 ;; characters (from \240 to \377) for rule name, string and comment. Also
|
|
190 ;; rule name can start with upper case letter.
|
|
191 ;;
|
|
192 ;;
|
|
193 ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
194
|
|
195 ;;; Code:
|
|
196
|
|
197
|
|
198 (require 'ebnf-otz)
|
|
199
|
|
200
|
|
201 (defvar ebnf-ebx-lex nil
|
|
202 "Value returned by `ebnf-ebx-lex' function.")
|
|
203
|
|
204
|
|
205 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
206 ;; Syntactic analyzer
|
|
207
|
|
208
|
|
209 ;;; rulelist ::= rule+
|
|
210
|
|
211 (defun ebnf-ebx-parser (start)
|
|
212 "EBNFX parser."
|
|
213 (let ((total (+ (- ebnf-limit start) 1))
|
|
214 (bias (1- start))
|
|
215 (origin (point))
|
|
216 rule-list token rule)
|
|
217 (goto-char start)
|
|
218 (setq token (ebnf-ebx-lex))
|
|
219 (and (eq token 'end-of-input)
|
|
220 (error "Invalid EBNFX file format"))
|
|
221 (and (eq token 'end-of-rule)
|
|
222 (setq token (ebnf-ebx-lex)))
|
|
223 (while (not (eq token 'end-of-input))
|
|
224 (ebnf-message-float
|
|
225 "Parsing...%s%%"
|
|
226 (/ (* (- (point) bias) 100.0) total))
|
|
227 (setq token (ebnf-ebx-rule token)
|
|
228 rule (cdr token)
|
|
229 token (car token))
|
|
230 (or (ebnf-add-empty-rule-list rule)
|
|
231 (setq rule-list (cons rule rule-list))))
|
|
232 (goto-char origin)
|
|
233 rule-list))
|
|
234
|
|
235
|
|
236 ;;; rule ::= symbol '::=' expression
|
|
237
|
|
238
|
|
239 (defun ebnf-ebx-rule (token)
|
|
240 (let ((name ebnf-ebx-lex)
|
|
241 (action ebnf-action)
|
|
242 elements)
|
|
243 (setq ebnf-action nil)
|
|
244 (or (eq token 'non-terminal)
|
|
245 (error "Invalid rule name"))
|
|
246 (setq token (ebnf-ebx-lex))
|
|
247 (or (eq token 'production)
|
|
248 (error "Invalid rule: missing `::='"))
|
|
249 (setq elements (ebnf-ebx-expression))
|
|
250 (or (memq (car elements) '(end-of-rule end-of-input))
|
|
251 (error "Invalid rule: there is no end of rule"))
|
|
252 (setq elements (cdr elements))
|
|
253 (ebnf-eps-add-production name)
|
|
254 (cons (ebnf-ebx-lex)
|
|
255 (ebnf-make-production name elements action))))
|
|
256
|
|
257
|
|
258 ;; expression ::= concatenation ('|' concatenation)*
|
|
259
|
|
260
|
|
261 (defun ebnf-ebx-expression ()
|
|
262 (let (body concatenation)
|
|
263 (while (eq (car (setq concatenation
|
|
264 (ebnf-ebx-concatenation (ebnf-ebx-lex))))
|
|
265 'alternative)
|
|
266 (setq body (cons (cdr concatenation) body)))
|
|
267 (ebnf-token-alternative body concatenation)))
|
|
268
|
|
269
|
|
270 ;; concatenation ::= exception*
|
|
271
|
|
272
|
|
273 (defun ebnf-ebx-concatenation (token)
|
|
274 (let ((term (ebnf-ebx-exception token))
|
|
275 seq)
|
|
276 (or (setq token (car term)
|
|
277 term (cdr term))
|
|
278 (error "Empty element"))
|
|
279 (setq seq (cons term seq))
|
|
280 (while (setq term (ebnf-ebx-exception token)
|
|
281 token (car term)
|
|
282 term (cdr term))
|
|
283 (setq seq (cons term seq)))
|
|
284 (cons token
|
|
285 (ebnf-token-sequence seq))))
|
|
286
|
|
287
|
|
288 ;;; exception ::= term ('-' term)?
|
|
289
|
|
290
|
|
291 (defun ebnf-ebx-exception (token)
|
|
292 (let ((term (ebnf-ebx-term token)))
|
|
293 (if (eq (car term) 'exception)
|
|
294 (let ((except (ebnf-ebx-term (ebnf-ebx-lex))))
|
|
295 (cons (car except)
|
|
296 (ebnf-make-except (cdr term) (cdr except))))
|
|
297 term)))
|
|
298
|
|
299
|
|
300
|
|
301 ;;; term ::= factor ('*' | '+' | '?')?
|
|
302
|
|
303
|
|
304 (defun ebnf-ebx-term (token)
|
|
305 (let ((factor (ebnf-ebx-factor token)))
|
|
306 (when factor
|
|
307 (setq token (ebnf-ebx-lex))
|
|
308 (cond ((eq token 'zero-or-more)
|
|
309 (setq factor (ebnf-make-zero-or-more factor)
|
|
310 token (ebnf-ebx-lex)))
|
|
311 ((eq token 'one-or-more)
|
|
312 (setq factor (ebnf-make-one-or-more factor)
|
|
313 token (ebnf-ebx-lex)))
|
|
314 ((eq token 'optional)
|
|
315 (setq factor (ebnf-token-optional factor)
|
|
316 token (ebnf-ebx-lex)))))
|
|
317 (cons token factor)))
|
|
318
|
|
319
|
|
320 ;;; factor ::= hex-char+
|
|
321 ;;; | '[' '^'? ( char ( '-' char )? )+ ']'
|
|
322 ;;; | '"' 'string' '"'
|
|
323 ;;; | "'" "string" "'"
|
|
324 ;;; | '(' expression ')'
|
|
325 ;;; | symbol
|
|
326 ;;;
|
|
327 ;;; symbol ::= 'upper or lower case letter'
|
|
328 ;;; ('upper or lower case letter' | '-' | '_')*
|
|
329 ;;; /* upper and lower 8-bit accentuated characters are included */
|
|
330 ;;;
|
|
331 ;;; hex-char ::= '#x' [0-9A-Fa-f]+
|
|
332 ;;;
|
|
333 ;;; char ::= hex-char | 'any character except control characters'
|
|
334 ;;; /* 8-bit accentuated characters are included */
|
|
335 ;;;
|
|
336 ;;; any-char ::= char | 'newline' | 'tab'
|
|
337
|
|
338
|
|
339 (defun ebnf-ebx-factor (token)
|
|
340 (cond
|
|
341 ;; terminal
|
|
342 ((eq token 'terminal)
|
|
343 (ebnf-make-terminal ebnf-ebx-lex))
|
|
344 ;; non-terminal
|
|
345 ((eq token 'non-terminal)
|
|
346 (ebnf-make-non-terminal ebnf-ebx-lex))
|
|
347 ;; group
|
|
348 ((eq token 'begin-group)
|
|
349 (let ((body (ebnf-ebx-expression)))
|
|
350 (or (eq (car body) 'end-group)
|
|
351 (error "Missing `)'"))
|
|
352 (cdr body)))
|
|
353 ;; no element
|
|
354 (t
|
|
355 nil)
|
|
356 ))
|
|
357
|
|
358
|
|
359 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
360 ;; Lexical analyzer
|
|
361
|
|
362
|
|
363 (defconst ebnf-ebx-token-table (make-vector 256 'error)
|
|
364 "Vector used to map characters to a lexical token.")
|
|
365
|
|
366
|
|
367 (defun ebnf-ebx-initialize ()
|
|
368 "Initialize EBNFX token table."
|
|
369 ;; control character & control 8-bit character are set to `error'
|
|
370 (let ((char ?\101))
|
|
371 ;; printable character: A-Z
|
|
372 (while (< char ?\133)
|
|
373 (aset ebnf-ebx-token-table char 'non-terminal)
|
|
374 (setq char (1+ char)))
|
|
375 ;; printable character: a-z
|
|
376 (setq char ?\141)
|
|
377 (while (< char ?\173)
|
|
378 (aset ebnf-ebx-token-table char 'non-terminal)
|
|
379 (setq char (1+ char)))
|
|
380 ;; European 8-bit accentuated characters:
|
|
381 (setq char ?\240)
|
|
382 (while (< char ?\400)
|
|
383 (aset ebnf-ebx-token-table char 'non-terminal)
|
|
384 (setq char (1+ char)))
|
|
385 ;; Override end of line characters:
|
|
386 (aset ebnf-ebx-token-table ?\n 'end-of-rule) ; [NL] linefeed
|
|
387 (aset ebnf-ebx-token-table ?\r 'end-of-rule) ; [CR] carriage return
|
|
388 ;; Override space characters:
|
|
389 (aset ebnf-ebx-token-table ?\013 'space) ; [VT] vertical tab
|
|
390 (aset ebnf-ebx-token-table ?\t 'space) ; [HT] horizontal tab
|
|
391 (aset ebnf-ebx-token-table ?\ 'space) ; [SP] space
|
|
392 ;; Override form feed character:
|
|
393 (aset ebnf-ebx-token-table ?\f 'form-feed) ; [FF] form feed
|
|
394 ;; Override other lexical characters:
|
|
395 (aset ebnf-ebx-token-table ?# 'hash)
|
|
396 (aset ebnf-ebx-token-table ?\" 'double-quote)
|
|
397 (aset ebnf-ebx-token-table ?\' 'single-quote)
|
|
398 (aset ebnf-ebx-token-table ?\( 'begin-group)
|
|
399 (aset ebnf-ebx-token-table ?\) 'end-group)
|
|
400 (aset ebnf-ebx-token-table ?- 'exception)
|
|
401 (aset ebnf-ebx-token-table ?: 'colon)
|
|
402 (aset ebnf-ebx-token-table ?\[ 'begin-square)
|
|
403 (aset ebnf-ebx-token-table ?| 'alternative)
|
|
404 (aset ebnf-ebx-token-table ?* 'zero-or-more)
|
|
405 (aset ebnf-ebx-token-table ?+ 'one-or-more)
|
|
406 (aset ebnf-ebx-token-table ?\? 'optional)
|
|
407 ;; Override comment character:
|
|
408 (aset ebnf-ebx-token-table ?/ 'comment)))
|
|
409
|
|
410
|
|
411 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
|
|
412 (defconst ebnf-ebx-non-terminal-chars
|
|
413 (ebnf-range-regexp "-_A-Za-z" ?\240 ?\377))
|
|
414 (defconst ebnf-ebx-non-terminal-letter-chars
|
|
415 (ebnf-range-regexp "A-Za-z" ?\240 ?\377))
|
|
416
|
|
417
|
|
418 (defun ebnf-ebx-lex ()
|
|
419 "Lexical analyser for EBNFX.
|
|
420
|
|
421 Return a lexical token.
|
|
422
|
|
423 See documentation for variable `ebnf-ebx-lex'."
|
|
424 (if (>= (point) ebnf-limit)
|
|
425 'end-of-input
|
|
426 (let (token)
|
|
427 ;; skip spaces and comments
|
|
428 (while (if (> (following-char) 255)
|
|
429 (progn
|
|
430 (setq token 'error)
|
|
431 nil)
|
|
432 (setq token (aref ebnf-ebx-token-table (following-char)))
|
|
433 (cond
|
|
434 ((eq token 'space)
|
|
435 (skip-chars-forward " \013\t" ebnf-limit)
|
|
436 (< (point) ebnf-limit))
|
|
437 ((eq token 'comment)
|
|
438 (ebnf-ebx-skip-comment))
|
|
439 ((eq token 'form-feed)
|
|
440 (forward-char)
|
|
441 (setq ebnf-action 'form-feed))
|
|
442 ((eq token 'end-of-rule)
|
|
443 (ebnf-ebx-skip-end-of-rule))
|
|
444 ((and (eq token 'begin-square)
|
|
445 (let ((case-fold-search t))
|
|
446 (looking-at "\\[\\(wfc\\|vc\\):")))
|
|
447 (ebnf-ebx-skip-constraint))
|
|
448 (t nil)
|
|
449 )))
|
|
450 (cond
|
|
451 ;; end of input
|
|
452 ((>= (point) ebnf-limit)
|
|
453 'end-of-input)
|
|
454 ;; error
|
|
455 ((eq token 'error)
|
|
456 (error "Illegal character"))
|
|
457 ;; end of rule
|
|
458 ((eq token 'end-of-rule)
|
|
459 'end-of-rule)
|
|
460 ;; terminal: #x [0-9A-Fa-f]+
|
|
461 ((eq token 'hash)
|
|
462 (setq ebnf-ebx-lex (ebnf-ebx-character))
|
|
463 'terminal)
|
|
464 ;; terminal: "string"
|
|
465 ((eq token 'double-quote)
|
|
466 (setq ebnf-ebx-lex (ebnf-ebx-string ?\"))
|
|
467 'terminal)
|
|
468 ;; terminal: 'string'
|
|
469 ((eq token 'single-quote)
|
|
470 (setq ebnf-ebx-lex (ebnf-ebx-string ?\'))
|
|
471 'terminal)
|
|
472 ;; terminal: [ ^? ( char ( - char )? )+ ]
|
|
473 ((eq token 'begin-square)
|
|
474 (setq ebnf-ebx-lex (ebnf-ebx-range))
|
|
475 'terminal)
|
|
476 ;; non-terminal: NAME
|
|
477 ((eq token 'non-terminal)
|
|
478 (setq ebnf-ebx-lex
|
|
479 (ebnf-buffer-substring ebnf-ebx-non-terminal-chars))
|
|
480 'non-terminal)
|
|
481 ;; colon: ::=
|
|
482 ((eq token 'colon)
|
|
483 (or (looking-at "::=")
|
|
484 (error "Missing `::=' token"))
|
|
485 (forward-char 3)
|
|
486 'production)
|
|
487 ;; miscellaneous: (, ), *, +, ?, |, -
|
|
488 (t
|
|
489 (forward-char)
|
|
490 token)
|
|
491 ))))
|
|
492
|
|
493
|
|
494 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
|
|
495 (defconst ebnf-ebx-constraint-chars
|
|
496 (ebnf-range-regexp "^\000-\010\016-\037]" ?\177 ?\237))
|
|
497
|
|
498
|
|
499 (defun ebnf-ebx-skip-constraint ()
|
|
500 (or (> (skip-chars-forward ebnf-ebx-constraint-chars ebnf-limit) 0)
|
|
501 (error "Invalid character"))
|
|
502 (or (= (following-char) ?\])
|
|
503 (error "Missing end of constraint `]'"))
|
|
504 (forward-char)
|
|
505 t)
|
|
506
|
|
507
|
|
508
|
|
509 (defun ebnf-ebx-skip-end-of-rule ()
|
|
510 (let (eor-p)
|
|
511 (while (progn
|
|
512 ;; end of rule ==> 2 or more consecutive end of lines
|
|
513 (setq eor-p (or (> (skip-chars-forward "\r\n" ebnf-limit) 1)
|
|
514 eor-p))
|
|
515 ;; skip spaces
|
|
516 (skip-chars-forward " \013\t" ebnf-limit)
|
|
517 ;; skip comments
|
|
518 (and (= (following-char) ?/)
|
|
519 (ebnf-ebx-skip-comment))))
|
|
520 (not eor-p)))
|
|
521
|
|
522
|
|
523 ;; replace the range "\177-\237" (see `ebnf-range-regexp').
|
|
524 (defconst ebnf-ebx-comment-chars
|
|
525 (ebnf-range-regexp "^\000-\010\016-\037\\*" ?\177 ?\237))
|
|
526 (defconst ebnf-ebx-filename-chars
|
|
527 (ebnf-range-regexp "^\000-\037\\*" ?\177 ?\237))
|
|
528
|
|
529
|
|
530 (defun ebnf-ebx-skip-comment ()
|
|
531 (forward-char)
|
|
532 (or (= (following-char) ?*)
|
|
533 (error "Invalid beginning of comment"))
|
|
534 (forward-char)
|
|
535 (cond
|
|
536 ;; open EPS file
|
|
537 ((and ebnf-eps-executing (= (following-char) ?\[))
|
|
538 (ebnf-eps-add-context (ebnf-ebx-eps-filename)))
|
|
539 ;; close EPS file
|
|
540 ((and ebnf-eps-executing (= (following-char) ?\]))
|
|
541 (ebnf-eps-remove-context (ebnf-ebx-eps-filename)))
|
|
542 ;; any other action in comment
|
|
543 (t
|
|
544 (setq ebnf-action (aref ebnf-comment-table (following-char))))
|
|
545 )
|
|
546 (while (progn
|
|
547 (skip-chars-forward ebnf-ebx-comment-chars ebnf-limit)
|
|
548 (or (= (following-char) ?*)
|
|
549 (error "Missing end of comment"))
|
|
550 (forward-char)
|
|
551 (and (/= (following-char) ?/)
|
|
552 (< (point) ebnf-limit))))
|
|
553 ;; check for a valid end of comment
|
|
554 (and (>= (point) ebnf-limit)
|
|
555 (error "Missing end of comment"))
|
|
556 (forward-char)
|
|
557 t)
|
|
558
|
|
559
|
|
560 (defun ebnf-ebx-eps-filename ()
|
|
561 (forward-char)
|
|
562 (let (fname nchar)
|
|
563 (while (progn
|
|
564 (setq fname
|
|
565 (concat fname
|
|
566 (ebnf-buffer-substring ebnf-ebx-filename-chars)))
|
|
567 (and (< (point) ebnf-limit)
|
|
568 (> (setq nchar (skip-chars-forward "*" ebnf-limit)) 0)
|
|
569 (< (point) ebnf-limit)
|
|
570 (/= (following-char) ?/)))
|
|
571 (setq fname (concat fname (make-string nchar ?*))
|
|
572 nchar nil))
|
|
573 (if (or (not nchar) (= nchar 0))
|
|
574 fname
|
|
575 (and (< (point) ebnf-limit)
|
|
576 (= (following-char) ?/)
|
|
577 (setq nchar (1- nchar)))
|
|
578 (concat fname (make-string nchar ?*)))))
|
|
579
|
|
580
|
|
581 ;; replace the range "\240-\377" (see `ebnf-range-regexp').
|
|
582 (defconst ebnf-ebx-double-string-chars
|
|
583 (ebnf-range-regexp "\t -!#-~" ?\240 ?\377))
|
|
584 (defconst ebnf-ebx-single-string-chars
|
|
585 (ebnf-range-regexp "\t -&(-~" ?\240 ?\377))
|
|
586
|
|
587
|
|
588 (defun ebnf-ebx-string (delim)
|
|
589 (buffer-substring-no-properties
|
|
590 (progn
|
|
591 (forward-char)
|
|
592 (point))
|
|
593 (progn
|
|
594 (skip-chars-forward (if (= delim ?\")
|
|
595 ebnf-ebx-double-string-chars
|
|
596 ebnf-ebx-single-string-chars)
|
|
597 ebnf-limit)
|
|
598 (or (= (following-char) delim)
|
|
599 (error "Missing string delimiter `%c'" delim))
|
|
600 (prog1
|
|
601 (point)
|
|
602 (forward-char)))))
|
|
603
|
|
604
|
|
605 (defun ebnf-ebx-character ()
|
|
606 ;; #x [0-9A-Fa-f]+
|
|
607 (buffer-substring-no-properties
|
|
608 (point)
|
|
609 (progn
|
|
610 (ebnf-ebx-hex-character)
|
|
611 (point))))
|
|
612
|
|
613
|
|
614 (defun ebnf-ebx-range ()
|
|
615 ;; [ ^? ( char ( - char )? )+ ]
|
|
616 (buffer-substring-no-properties
|
|
617 (point)
|
|
618 (progn
|
|
619 (forward-char)
|
|
620 (and (= (following-char) ?^)
|
|
621 (forward-char))
|
|
622 (and (= (following-char) ?-)
|
|
623 (forward-char))
|
|
624 (while (progn
|
|
625 (ebnf-ebx-any-character)
|
|
626 (when (= (following-char) ?-)
|
|
627 (forward-char)
|
|
628 (ebnf-ebx-any-character))
|
|
629 (and (/= (following-char) ?\])
|
|
630 (< (point) ebnf-limit))))
|
|
631 (and (>= (point) ebnf-limit)
|
|
632 (error "Missing end of character range `]'"))
|
|
633 (forward-char)
|
|
634 (point))))
|
|
635
|
|
636
|
|
637 (defun ebnf-ebx-any-character ()
|
|
638 (let ((char (following-char)))
|
|
639 (cond ((= char ?#)
|
|
640 (ebnf-ebx-hex-character t))
|
|
641 ((or (and (<= ?\ char) (<= char ?\")) ; #
|
|
642 (and (<= ?$ char) (<= char ?,)) ; -
|
|
643 (and (<= ?. char) (<= char ?\\)) ; ]
|
|
644 (and (<= ?^ char) (<= char ?~))
|
|
645 (and (<= ?\240 char) (<= char ?\377)))
|
|
646 (forward-char))
|
|
647 (t
|
|
648 (error "Invalid character `%c'" char)))))
|
|
649
|
|
650
|
|
651 (defun ebnf-ebx-hex-character (&optional no-error)
|
|
652 ;; #x [0-9A-Fa-f]+
|
|
653 (forward-char)
|
|
654 (if (/= (following-char) ?x)
|
|
655 (or no-error
|
|
656 (error "Invalid hexadecimal character"))
|
|
657 (forward-char)
|
|
658 (or (> (skip-chars-forward "0-9A-Fa-f" ebnf-limit) 0)
|
|
659 (error "Invalid hexadecimal character"))))
|
|
660
|
|
661
|
|
662 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
663
|
|
664
|
|
665 (provide 'ebnf-ebx)
|
|
666
|
|
667 ;;; arch-tag: bfe2f95b-66bc-4dc6-8b7e-b7831e68f5fb
|
|
668 ;;; ebnf-ebx.el ends here
|