Mercurial > emacs
annotate lisp/nxml/rng-cmpct.el @ 97537:1e0f10eba754
*** empty log message ***
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Mon, 18 Aug 2008 12:04:35 +0000 |
parents | e374c747704b |
children | a9dc0e7c3f2b |
rev | line source |
---|---|
86361 | 1 ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas |
2 | |
87665 | 3 ;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc. |
86361 | 4 |
5 ;; Author: James Clark | |
6 ;; Keywords: XML, RelaxNG | |
7 | |
86545 | 8 ;; This file is part of GNU Emacs. |
9 | |
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
10 ;; GNU Emacs is free software: you can redistribute it and/or modify |
86545 | 11 ;; it under the terms of the GNU General Public License as published by |
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
12 ;; the Free Software Foundation, either version 3 of the License, or |
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
13 ;; (at your option) any later version. |
86361 | 14 |
86545 | 15 ;; GNU Emacs is distributed in the hope that it will be useful, |
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 ;; GNU General Public License for more details. | |
86361 | 19 |
86545 | 20 ;; You should have received a copy of the GNU General Public License |
94666
d495d4d5452f
Switch to recommended form of GPLv3 permissions notice.
Glenn Morris <rgm@gnu.org>
parents:
87665
diff
changeset
|
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. |
86361 | 22 |
23 ;;; Commentary: | |
24 | |
25 ;; This parses a RELAX NG Compact Syntax schema into the form | |
26 ;; specified in rng-pttrn.el. | |
27 ;; | |
28 ;; RELAX NG Compact Syntax is specified by | |
29 ;; http://relaxng.org/compact.html | |
30 ;; | |
31 ;; This file uses the prefix "rng-c-". | |
32 | |
33 ;;; Code: | |
34 | |
35 (require 'nxml-util) | |
36 (require 'rng-util) | |
37 (require 'rng-uri) | |
38 (require 'rng-pttrn) | |
39 | |
40 ;;;###autoload | |
41 (defun rng-c-load-schema (filename) | |
42 "Load a schema in RELAX NG compact syntax from FILENAME. | |
43 Return a pattern." | |
44 (rng-c-parse-file filename)) | |
45 | |
46 ;;; Error handling | |
47 | |
48 (put 'rng-c-incorrect-schema | |
49 'error-conditions | |
50 '(error rng-error nxml-file-parse-error rng-c-incorrect-schema)) | |
51 | |
52 (put 'rng-c-incorrect-schema | |
53 'error-message | |
54 "Incorrect schema") | |
55 | |
56 (defun rng-c-signal-incorrect-schema (filename pos message) | |
57 (nxml-signal-file-parse-error filename | |
58 pos | |
59 message | |
60 'rng-c-incorrect-schema)) | |
61 | |
62 ;;; Lexing | |
63 | |
64 (defconst rng-c-keywords | |
65 '("attribute" | |
66 "default" | |
67 "datatypes" | |
68 "div" | |
69 "element" | |
70 "empty" | |
71 "external" | |
72 "grammar" | |
73 "include" | |
74 "inherit" | |
75 "list" | |
76 "mixed" | |
77 "namespace" | |
78 "notAllowed" | |
79 "parent" | |
80 "start" | |
81 "string" | |
82 "text" | |
83 "token") | |
84 "List of strings that are keywords in the compact syntax.") | |
85 | |
86 (defconst rng-c-anchored-keyword-re | |
87 (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'") | |
88 "Regular expression to match a keyword in the compact syntax.") | |
89 | |
90 (defvar rng-c-syntax-table nil | |
91 "Syntax table for parsing the compact syntax.") | |
92 | |
93 (if rng-c-syntax-table | |
94 () | |
95 (setq rng-c-syntax-table (make-syntax-table)) | |
96 (modify-syntax-entry ?# "<" rng-c-syntax-table) | |
97 (modify-syntax-entry ?\n ">" rng-c-syntax-table) | |
98 (modify-syntax-entry ?- "w" rng-c-syntax-table) | |
99 (modify-syntax-entry ?. "w" rng-c-syntax-table) | |
100 (modify-syntax-entry ?_ "w" rng-c-syntax-table) | |
101 (modify-syntax-entry ?: "_" rng-c-syntax-table)) | |
102 | |
103 (defconst rng-c-literal-1-re | |
104 "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'" | |
105 "Regular expression to match a single-quoted literal.") | |
106 | |
107 (defconst rng-c-literal-2-re | |
108 (replace-regexp-in-string "'" "\"" rng-c-literal-1-re) | |
109 "Regular expression to match a double-quoted literal.") | |
110 | |
111 (defconst rng-c-ncname-re "\\w+") | |
112 | |
113 (defconst rng-c-anchored-ncname-re | |
114 (concat "\\`" rng-c-ncname-re "\\'")) | |
115 | |
116 (defconst rng-c-token-re | |
117 (concat "[&|]=" "\\|" | |
118 "[][()|&,*+?{}~=-]" "\\|" | |
119 rng-c-literal-1-re "\\|" | |
120 rng-c-literal-2-re "\\|" | |
121 rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|" | |
122 "\\\\" rng-c-ncname-re "\\|" | |
123 ">>") | |
124 "Regular expression to match a token in the compact syntax.") | |
125 | |
126 (defun rng-c-init-buffer () | |
127 (setq case-fold-search nil) ; automatically becomes buffer-local when set | |
128 (set-buffer-multibyte t) | |
129 (set-syntax-table rng-c-syntax-table)) | |
130 | |
131 (defvar rng-c-current-token nil) | |
132 (make-variable-buffer-local 'rng-c-current-token) | |
133 | |
134 (defun rng-c-advance () | |
135 (cond ((looking-at rng-c-token-re) | |
136 (setq rng-c-current-token (match-string 0)) | |
137 (goto-char (match-end 0)) | |
138 (forward-comment (point-max))) | |
139 ((= (point) (point-max)) | |
140 (setq rng-c-current-token "")) | |
141 (t (rng-c-error "Invalid token")))) | |
142 | |
143 (defconst rng-c-anchored-datatype-name-re | |
144 (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'")) | |
145 | |
146 (defsubst rng-c-current-token-keyword-p () | |
147 (string-match rng-c-anchored-keyword-re rng-c-current-token)) | |
148 | |
149 (defsubst rng-c-current-token-prefixed-name-p () | |
150 (string-match rng-c-anchored-datatype-name-re rng-c-current-token)) | |
151 | |
152 (defsubst rng-c-current-token-literal-p () | |
153 (string-match "\\`['\"]" rng-c-current-token)) | |
154 | |
155 (defsubst rng-c-current-token-quoted-identifier-p () | |
156 (string-match "\\`\\\\" rng-c-current-token)) | |
157 | |
158 (defsubst rng-c-current-token-ncname-p () | |
159 (string-match rng-c-anchored-ncname-re rng-c-current-token)) | |
160 | |
161 (defsubst rng-c-current-token-ns-name-p () | |
162 (let ((len (length rng-c-current-token))) | |
163 (and (> len 0) | |
164 (= (aref rng-c-current-token (- len 1)) ?*)))) | |
165 | |
166 ;;; Namespaces | |
167 | |
168 (defvar rng-c-inherit-namespace nil) | |
169 | |
170 (defvar rng-c-default-namespace nil) | |
171 | |
172 (defvar rng-c-default-namespace-declared nil) | |
173 | |
174 (defvar rng-c-namespace-decls nil | |
175 "Alist of namespace declarations.") | |
176 | |
177 (defconst rng-c-no-namespace nil) | |
178 | |
179 (defun rng-c-declare-standard-namespaces () | |
180 (setq rng-c-namespace-decls | |
181 (cons (cons "xml" nxml-xml-namespace-uri) | |
182 rng-c-namespace-decls)) | |
183 (when (and (not rng-c-default-namespace-declared) | |
184 rng-c-inherit-namespace) | |
185 (setq rng-c-default-namespace rng-c-inherit-namespace))) | |
186 | |
187 (defun rng-c-expand-name (prefixed-name) | |
188 (let ((i (string-match ":" prefixed-name))) | |
189 (rng-make-name (rng-c-lookup-prefix (substring prefixed-name | |
190 0 | |
191 i)) | |
192 (substring prefixed-name (+ i 1))))) | |
193 | |
194 (defun rng-c-lookup-prefix (prefix) | |
195 (let ((binding (assoc prefix rng-c-namespace-decls))) | |
196 (or binding (rng-c-error "Undefined prefix %s" prefix)) | |
197 (cdr binding))) | |
198 | |
199 (defun rng-c-unqualified-namespace (attribute) | |
200 (if attribute | |
201 rng-c-no-namespace | |
202 rng-c-default-namespace)) | |
203 | |
204 (defun rng-c-make-context () | |
205 (cons rng-c-default-namespace rng-c-namespace-decls)) | |
206 | |
207 ;;; Datatypes | |
208 | |
209 (defconst rng-string-datatype | |
210 (rng-make-datatype rng-builtin-datatypes-uri "string")) | |
211 | |
212 (defconst rng-token-datatype | |
213 (rng-make-datatype rng-builtin-datatypes-uri "token")) | |
214 | |
215 (defvar rng-c-datatype-decls nil | |
216 "Alist of datatype declarations. | |
217 Contains a list of pairs (PREFIX . URI) where PREFIX is a string | |
218 and URI is a symbol.") | |
219 | |
220 (defun rng-c-declare-standard-datatypes () | |
221 (setq rng-c-datatype-decls | |
222 (cons (cons "xsd" rng-xsd-datatypes-uri) | |
223 rng-c-datatype-decls))) | |
224 | |
225 (defun rng-c-lookup-datatype-prefix (prefix) | |
226 (let ((binding (assoc prefix rng-c-datatype-decls))) | |
227 (or binding (rng-c-error "Undefined prefix %s" prefix)) | |
228 (cdr binding))) | |
229 | |
230 (defun rng-c-expand-datatype (prefixed-name) | |
231 (let ((i (string-match ":" prefixed-name))) | |
232 (rng-make-datatype | |
233 (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i)) | |
234 (substring prefixed-name (+ i 1))))) | |
235 | |
236 ;;; Grammars | |
237 | |
238 (defvar rng-c-current-grammar nil) | |
239 (defvar rng-c-parent-grammar nil) | |
240 | |
241 (defun rng-c-make-grammar () | |
242 (make-hash-table :test 'equal)) | |
243 | |
244 (defconst rng-c-about-override-slot 0) | |
245 (defconst rng-c-about-combine-slot 1) | |
246 | |
247 (defun rng-c-lookup-create (name grammar) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
248 "Return a def object for NAME. |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
249 A def object is a pair \(ABOUT . REF) where REF is returned by |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
250 `rng-make-ref'. |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
251 ABOUT is a two-element vector [OVERRIDE COMBINE]. |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
252 COMBINE is either nil, choice or interleave. |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
253 OVERRIDE is either nil, require or t." |
86361 | 254 (let ((def (gethash name grammar))) |
255 (if def | |
256 def | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
257 (progn |
86361 | 258 (setq def (cons (vector nil nil) (rng-make-ref name))) |
259 (puthash name def grammar) | |
260 def)))) | |
261 | |
262 (defun rng-c-make-ref (name) | |
263 (or rng-c-current-grammar | |
264 (rng-c-error "Reference not in a grammar")) | |
265 (cdr (rng-c-lookup-create name rng-c-current-grammar))) | |
266 | |
267 (defun rng-c-make-parent-ref (name) | |
268 (or rng-c-parent-grammar | |
269 (rng-c-error "Reference to non-existent parent grammar")) | |
270 (cdr (rng-c-lookup-create name rng-c-parent-grammar))) | |
271 | |
272 (defvar rng-c-overrides nil | |
273 "Contains a list of (NAME . DEF) pairs.") | |
274 | |
275 (defun rng-c-merge-combine (def combine name) | |
276 (let* ((about (car def)) | |
277 (current-combine (aref about rng-c-about-combine-slot))) | |
278 (if combine | |
279 (if current-combine | |
280 (or (eq combine current-combine) | |
281 (rng-c-error "Inconsistent combine for %s" name)) | |
282 (aset about rng-c-about-combine-slot combine)) | |
283 current-combine))) | |
284 | |
285 (defun rng-c-prepare-define (name combine in-include) | |
286 (let* ((def (rng-c-lookup-create name rng-c-current-grammar)) | |
287 (about (car def)) | |
288 (overridden (aref about rng-c-about-override-slot))) | |
289 (and in-include | |
290 (setq rng-c-overrides (cons (cons name def) rng-c-overrides))) | |
291 (cond (overridden (and (eq overridden 'require) | |
292 (aset about rng-c-about-override-slot t)) | |
293 nil) | |
294 (t (setq combine (rng-c-merge-combine def combine name)) | |
295 (and (rng-ref-get (cdr def)) | |
296 (not combine) | |
297 (rng-c-error "Duplicate definition of %s" name)) | |
298 def)))) | |
299 | |
300 (defun rng-c-start-include (overrides) | |
301 (mapcar (lambda (name-def) | |
302 (let* ((def (cdr name-def)) | |
303 (about (car def)) | |
304 (save (aref about rng-c-about-override-slot))) | |
305 (aset about rng-c-about-override-slot 'require) | |
306 (cons save name-def))) | |
307 overrides)) | |
308 | |
309 (defun rng-c-end-include (overrides) | |
310 (mapcar (lambda (o) | |
311 (let* ((saved (car o)) | |
312 (name-def (cdr o)) | |
313 (name (car name-def)) | |
314 (def (cdr name-def)) | |
315 (about (car def))) | |
316 (and (eq (aref about rng-c-about-override-slot) 'require) | |
317 (rng-c-error "Definition of %s in include did not override definition in included file" name)) | |
318 (aset about rng-c-about-override-slot saved))) | |
319 overrides)) | |
320 | |
321 (defun rng-c-define (def value) | |
322 (and def | |
323 (let ((current-value (rng-ref-get (cdr def)))) | |
324 (rng-ref-set (cdr def) | |
325 (if current-value | |
326 (if (eq (aref (car def) rng-c-about-combine-slot) | |
327 'choice) | |
328 (rng-make-choice (list current-value value)) | |
329 (rng-make-interleave (list current-value value))) | |
330 value))))) | |
331 | |
332 (defun rng-c-finish-grammar () | |
333 (maphash (lambda (key def) | |
334 (or (rng-ref-get (cdr def)) | |
335 (rng-c-error "Reference to undefined pattern %s" key))) | |
336 rng-c-current-grammar) | |
337 (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar) | |
338 (rng-c-error "No definition of start"))))) | |
339 | |
340 ;;; Parsing | |
341 | |
342 (defvar rng-c-escape-positions nil) | |
343 (make-variable-buffer-local 'rng-c-escape-positions) | |
344 | |
345 (defvar rng-c-file-name nil) | |
346 (make-variable-buffer-local 'rng-c-file-name) | |
347 | |
348 (defvar rng-c-file-index nil) | |
349 | |
350 (defun rng-c-parse-file (filename &optional context) | |
351 (save-excursion | |
352 (set-buffer (get-buffer-create (rng-c-buffer-name context))) | |
353 (erase-buffer) | |
354 (rng-c-init-buffer) | |
355 (setq rng-c-file-name | |
356 (car (insert-file-contents filename))) | |
357 (setq rng-c-escape-positions nil) | |
358 (rng-c-process-escapes) | |
359 (rng-c-parse-top-level context))) | |
360 | |
361 (defun rng-c-buffer-name (context) | |
362 (concat " *RNC Input" | |
363 (if context | |
364 (concat "<" | |
365 (number-to-string (setq rng-c-file-index | |
366 (1+ rng-c-file-index))) | |
367 ">*") | |
368 (setq rng-c-file-index 1) | |
369 "*"))) | |
370 | |
371 (defun rng-c-process-escapes () | |
372 ;; Check for any nuls, since we will use nul chars | |
373 ;; for internal purposes. | |
374 (let ((pos (search-forward "\C-@" nil t))) | |
375 (and pos | |
376 (rng-c-error "Nul character found (binary file?)"))) | |
377 (let ((offset 0)) | |
378 (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}" | |
379 (point-max) | |
380 t) | |
381 (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16)))) | |
382 (if (and ch (> ch 0)) | |
383 (let ((begin (match-beginning 0)) | |
384 (end (match-end 0))) | |
385 (delete-region begin end) | |
386 ;; Represent an escaped newline by nul, so | |
387 ;; that we can distinguish it from a literal newline. | |
388 ;; We will translate it back into a real newline later. | |
389 (insert (if (eq ch ?\n) 0 ch)) | |
390 (setq offset (+ offset (- end begin 1))) | |
391 (setq rng-c-escape-positions | |
392 (cons (cons (point) offset) | |
393 rng-c-escape-positions))) | |
394 (rng-c-error "Invalid character escape"))))) | |
395 (goto-char 1)) | |
396 | |
397 (defun rng-c-translate-position (pos) | |
398 (let ((tem rng-c-escape-positions)) | |
399 (while (and tem | |
400 (> (caar tem) pos)) | |
401 (setq tem (cdr tem))) | |
402 (if tem | |
403 (+ pos (cdar tem)) | |
404 pos))) | |
405 | |
406 (defun rng-c-error (&rest args) | |
407 (rng-c-signal-incorrect-schema rng-c-file-name | |
408 (rng-c-translate-position (point)) | |
409 (apply 'format args))) | |
410 | |
411 (defun rng-c-parse-top-level (context) | |
412 (let ((rng-c-namespace-decls nil) | |
413 (rng-c-default-namespace nil) | |
414 (rng-c-datatype-decls nil)) | |
415 (goto-char (point-min)) | |
416 (forward-comment (point-max)) | |
417 (rng-c-advance) | |
418 (rng-c-parse-decls) | |
419 (let ((p (if (eq context 'include) | |
420 (if (rng-c-implicit-grammar-p) | |
421 (rng-c-parse-grammar-body "") | |
422 (rng-c-parse-included-grammar)) | |
423 (if (rng-c-implicit-grammar-p) | |
424 (rng-c-parse-implicit-grammar) | |
425 (rng-c-parse-pattern))))) | |
426 (or (string-equal rng-c-current-token "") | |
427 (rng-c-error "Unexpected characters after pattern")) | |
428 p))) | |
429 | |
430 (defun rng-c-parse-included-grammar () | |
431 (or (string-equal rng-c-current-token "grammar") | |
432 (rng-c-error "Included schema is not a grammar")) | |
433 (rng-c-advance) | |
434 (rng-c-expect "{") | |
435 (rng-c-parse-grammar-body "}")) | |
436 | |
437 (defun rng-c-implicit-grammar-p () | |
438 (or (and (or (rng-c-current-token-prefixed-name-p) | |
439 (rng-c-current-token-quoted-identifier-p) | |
440 (and (rng-c-current-token-ncname-p) | |
441 (not (rng-c-current-token-keyword-p)))) | |
442 (looking-at "\\[")) | |
443 (and (string-equal rng-c-current-token "[") | |
444 (rng-c-parse-lead-annotation) | |
445 nil) | |
446 (member rng-c-current-token '("div" "include" "")) | |
447 (looking-at "[|&]?="))) | |
448 | |
449 (defun rng-c-parse-decls () | |
450 (setq rng-c-default-namespace-declared nil) | |
451 (while (progn | |
452 (let ((binding | |
453 (assoc rng-c-current-token | |
454 '(("namespace" . rng-c-parse-namespace) | |
455 ("datatypes" . rng-c-parse-datatypes) | |
456 ("default" . rng-c-parse-default))))) | |
457 (if binding | |
458 (progn | |
459 (rng-c-advance) | |
460 (funcall (cdr binding)) | |
461 t) | |
462 nil)))) | |
463 (rng-c-declare-standard-datatypes) | |
464 (rng-c-declare-standard-namespaces)) | |
465 | |
466 (defun rng-c-parse-datatypes () | |
467 (let ((prefix (rng-c-parse-identifier-or-keyword))) | |
468 (or (not (assoc prefix rng-c-datatype-decls)) | |
469 (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix)) | |
470 (rng-c-expect "=") | |
471 (setq rng-c-datatype-decls | |
472 (cons (cons prefix | |
473 (rng-make-datatypes-uri (rng-c-parse-literal))) | |
474 rng-c-datatype-decls)))) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
475 |
86361 | 476 (defun rng-c-parse-namespace () |
477 (rng-c-declare-namespace nil | |
478 (rng-c-parse-identifier-or-keyword))) | |
479 | |
480 (defun rng-c-parse-default () | |
481 (rng-c-expect "namespace") | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
482 (rng-c-declare-namespace t |
86361 | 483 (if (string-equal rng-c-current-token "=") |
484 nil | |
485 (rng-c-parse-identifier-or-keyword)))) | |
486 | |
487 (defun rng-c-declare-namespace (declare-default prefix) | |
488 (rng-c-expect "=") | |
489 (let ((ns (cond ((string-equal rng-c-current-token "inherit") | |
490 (rng-c-advance) | |
491 rng-c-inherit-namespace) | |
492 (t | |
493 (nxml-make-namespace (rng-c-parse-literal)))))) | |
494 (and prefix | |
495 (or (not (assoc prefix rng-c-namespace-decls)) | |
496 (rng-c-error "Duplicate namespace declaration for prefix %s" | |
497 prefix)) | |
498 (setq rng-c-namespace-decls | |
499 (cons (cons prefix ns) rng-c-namespace-decls))) | |
500 (and declare-default | |
501 (or (not rng-c-default-namespace-declared) | |
502 (rng-c-error "Duplicate default namespace declaration")) | |
503 (setq rng-c-default-namespace-declared t) | |
504 (setq rng-c-default-namespace ns)))) | |
505 | |
506 (defun rng-c-parse-implicit-grammar () | |
507 (let* ((rng-c-parent-grammar rng-c-current-grammar) | |
508 (rng-c-current-grammar (rng-c-make-grammar))) | |
509 (rng-c-parse-grammar-body "") | |
510 (rng-c-finish-grammar))) | |
511 | |
512 (defun rng-c-parse-grammar-body (close-token &optional in-include) | |
513 (while (not (string-equal rng-c-current-token close-token)) | |
514 (cond ((rng-c-current-token-keyword-p) | |
515 (let ((kw (intern rng-c-current-token))) | |
516 (cond ((eq kw 'start) | |
517 (rng-c-parse-define 'start in-include)) | |
518 ((eq kw 'div) | |
519 (rng-c-advance) | |
520 (rng-c-parse-div in-include)) | |
521 ((eq kw 'include) | |
522 (and in-include | |
523 (rng-c-error "Nested include")) | |
524 (rng-c-advance) | |
525 (rng-c-parse-include)) | |
526 (t (rng-c-error "Invalid grammar keyword"))))) | |
527 ((rng-c-current-token-ncname-p) | |
528 (if (looking-at "\\[") | |
529 (rng-c-parse-annotation-element) | |
530 (rng-c-parse-define rng-c-current-token | |
531 in-include))) | |
532 ((rng-c-current-token-quoted-identifier-p) | |
533 (if (looking-at "\\[") | |
534 (rng-c-parse-annotation-element) | |
535 (rng-c-parse-define (substring rng-c-current-token 1) | |
536 in-include))) | |
537 ((rng-c-current-token-prefixed-name-p) | |
538 (rng-c-parse-annotation-element)) | |
539 ((string-equal rng-c-current-token "[") | |
540 (rng-c-parse-lead-annotation) | |
541 (and (string-equal rng-c-current-token close-token) | |
542 (rng-c-error "Missing annotation subject")) | |
543 (and (looking-at "\\[") | |
544 (rng-c-error "Leading annotation applied to annotation"))) | |
545 (t (rng-c-error "Invalid grammar content")))) | |
546 (or (string-equal rng-c-current-token "") | |
547 (rng-c-advance))) | |
548 | |
549 (defun rng-c-parse-div (in-include) | |
550 (rng-c-expect "{") | |
551 (rng-c-parse-grammar-body "}" in-include)) | |
552 | |
553 (defun rng-c-parse-include () | |
554 (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) | |
555 (rng-c-inherit-namespace (rng-c-parse-opt-inherit)) | |
556 overrides) | |
557 (cond ((string-equal rng-c-current-token "{") | |
558 (rng-c-advance) | |
559 (let ((rng-c-overrides nil)) | |
560 (rng-c-parse-grammar-body "}" t) | |
561 (setq overrides rng-c-overrides)) | |
562 (setq overrides (rng-c-start-include overrides)) | |
563 (rng-c-parse-file filename 'include) | |
564 (rng-c-end-include overrides)) | |
565 (t (rng-c-parse-file filename 'include))))) | |
566 | |
567 (defun rng-c-parse-define (name in-include) | |
568 (rng-c-advance) | |
569 (let ((assign (assoc rng-c-current-token | |
570 '(("=" . nil) | |
571 ("|=" . choice) | |
572 ("&=" . interleave))))) | |
573 (or assign | |
574 (rng-c-error "Expected assignment operator")) | |
575 (rng-c-advance) | |
576 (let ((ref (rng-c-prepare-define name (cdr assign) in-include))) | |
577 (rng-c-define ref (rng-c-parse-pattern))))) | |
578 | |
579 (defvar rng-c-had-except nil) | |
580 | |
581 (defun rng-c-parse-pattern () | |
582 (let* ((rng-c-had-except nil) | |
583 (p (rng-c-parse-repeated)) | |
584 (op (assoc rng-c-current-token | |
585 '(("|" . rng-make-choice) | |
586 ("," . rng-make-group) | |
587 ("&" . rng-make-interleave))))) | |
588 (if op | |
589 (if rng-c-had-except | |
590 (rng-c-error "Parentheses required around pattern using -") | |
591 (let* ((patterns (cons p nil)) | |
592 (tail patterns) | |
593 (connector rng-c-current-token)) | |
594 (while (progn | |
595 (rng-c-advance) | |
596 (let ((newcdr (cons (rng-c-parse-repeated) nil))) | |
597 (setcdr tail newcdr) | |
598 (setq tail newcdr)) | |
599 (string-equal rng-c-current-token connector))) | |
600 (funcall (cdr op) patterns))) | |
601 p))) | |
602 | |
603 (defun rng-c-parse-repeated () | |
604 (let ((p (rng-c-parse-follow-annotations | |
605 (rng-c-parse-primary))) | |
606 (op (assoc rng-c-current-token | |
607 '(("*" . rng-make-zero-or-more) | |
608 ("+" . rng-make-one-or-more) | |
609 ("?" . rng-make-optional))))) | |
610 (if op | |
611 (if rng-c-had-except | |
612 (rng-c-error "Parentheses required around pattern using -") | |
613 (rng-c-parse-follow-annotations | |
614 (progn | |
615 (rng-c-advance) | |
616 (funcall (cdr op) p)))) | |
617 p))) | |
618 | |
619 (defun rng-c-parse-primary () | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
620 "Parse a primary expression. |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
621 The current token must be the first token of the expression. |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
622 After parsing the current token should be the token following |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
623 the primary expression." |
86361 | 624 (cond ((rng-c-current-token-keyword-p) |
625 (let ((parse-function (get (intern rng-c-current-token) | |
626 'rng-c-pattern))) | |
627 (or parse-function | |
628 (rng-c-error "Keyword %s does not introduce a pattern" | |
629 rng-c-current-token)) | |
630 (rng-c-advance) | |
631 (funcall parse-function))) | |
632 ((rng-c-current-token-ncname-p) | |
633 (rng-c-advance-with (rng-c-make-ref rng-c-current-token))) | |
634 ((string-equal rng-c-current-token "(") | |
635 (rng-c-advance) | |
636 (let ((p (rng-c-parse-pattern))) | |
637 (rng-c-expect ")") | |
638 p)) | |
639 ((rng-c-current-token-prefixed-name-p) | |
640 (let ((name (rng-c-expand-datatype rng-c-current-token))) | |
641 (rng-c-advance) | |
642 (rng-c-parse-data name))) | |
643 ((rng-c-current-token-literal-p) | |
644 (rng-make-value rng-token-datatype (rng-c-parse-literal) nil)) | |
645 ((rng-c-current-token-quoted-identifier-p) | |
646 (rng-c-advance-with | |
647 (rng-c-make-ref (substring rng-c-current-token 1)))) | |
648 ((string-equal rng-c-current-token "[") | |
649 (rng-c-parse-lead-annotation) | |
650 (rng-c-parse-primary)) | |
651 (t (rng-c-error "Invalid pattern")))) | |
652 | |
653 (defun rng-c-parse-parent () | |
654 (and (rng-c-current-token-keyword-p) | |
655 (rng-c-error "Keyword following parent was not quoted" | |
656 rng-c-current-token)) | |
657 (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword))) | |
658 | |
659 (defun rng-c-parse-literal () | |
660 (rng-c-fix-escaped-newlines | |
661 (apply 'concat (rng-c-parse-literal-segments)))) | |
662 | |
663 (defun rng-c-parse-literal-segments () | |
664 (let ((str (rng-c-parse-literal-segment))) | |
665 (cons str | |
666 (cond ((string-equal rng-c-current-token "~") | |
667 (rng-c-advance) | |
668 (rng-c-parse-literal-segments)) | |
669 (t nil))))) | |
670 | |
671 (defun rng-c-parse-literal-segment () | |
672 (or (rng-c-current-token-literal-p) | |
673 (rng-c-error "Expected a literal")) | |
674 (rng-c-advance-with | |
675 (let ((n (if (and (>= (length rng-c-current-token) 6) | |
676 (eq (aref rng-c-current-token 0) | |
677 (aref rng-c-current-token 1))) | |
678 3 | |
679 1))) | |
680 (substring rng-c-current-token n (- n))))) | |
681 | |
682 (defun rng-c-fix-escaped-newlines (str) | |
683 (let ((pos 0)) | |
684 (while (progn | |
685 (let ((n (string-match "\C-@" str pos))) | |
686 (and n | |
687 (aset str n ?\n) | |
688 (setq pos (1+ n))))))) | |
689 str) | |
690 | |
691 (defun rng-c-parse-identifier-or-keyword () | |
692 (cond ((rng-c-current-token-ncname-p) | |
693 (rng-c-advance-with rng-c-current-token)) | |
694 ((rng-c-current-token-quoted-identifier-p) | |
695 (rng-c-advance-with (substring rng-c-current-token 1))) | |
696 (t (rng-c-error "Expected identifier or keyword")))) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
697 |
86361 | 698 (put 'string 'rng-c-pattern 'rng-c-parse-string) |
699 (put 'token 'rng-c-pattern 'rng-c-parse-token) | |
700 (put 'element 'rng-c-pattern 'rng-c-parse-element) | |
701 (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute) | |
702 (put 'list 'rng-c-pattern 'rng-c-parse-list) | |
703 (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed) | |
704 (put 'text 'rng-c-pattern 'rng-c-parse-text) | |
705 (put 'empty 'rng-c-pattern 'rng-c-parse-empty) | |
706 (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed) | |
707 (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar) | |
708 (put 'parent 'rng-c-pattern 'rng-c-parse-parent) | |
709 (put 'external 'rng-c-pattern 'rng-c-parse-external) | |
710 | |
711 (defun rng-c-parse-element () | |
712 (let ((name-class (rng-c-parse-name-class nil))) | |
713 (rng-c-expect "{") | |
714 (let ((pattern (rng-c-parse-pattern))) | |
715 (rng-c-expect "}") | |
716 (rng-make-element name-class pattern)))) | |
717 | |
718 (defun rng-c-parse-attribute () | |
719 (let ((name-class (rng-c-parse-name-class 'attribute))) | |
720 (rng-c-expect "{") | |
721 (let ((pattern (rng-c-parse-pattern))) | |
722 (rng-c-expect "}") | |
723 (rng-make-attribute name-class pattern)))) | |
724 | |
725 (defun rng-c-parse-name-class (attribute) | |
726 (let* ((rng-c-had-except nil) | |
727 (name-class | |
728 (rng-c-parse-follow-annotations | |
729 (rng-c-parse-primary-name-class attribute)))) | |
730 (if (string-equal rng-c-current-token "|") | |
731 (let* ((name-classes (cons name-class nil)) | |
732 (tail name-classes)) | |
733 (or (not rng-c-had-except) | |
734 (rng-c-error "Parentheses required around name-class using - operator")) | |
735 (while (progn | |
736 (rng-c-advance) | |
737 (let ((newcdr | |
738 (cons (rng-c-parse-follow-annotations | |
739 (rng-c-parse-primary-name-class attribute)) | |
740 nil))) | |
741 (setcdr tail newcdr) | |
742 (setq tail newcdr)) | |
743 (string-equal rng-c-current-token "|"))) | |
744 (rng-make-choice-name-class name-classes)) | |
745 name-class))) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
746 |
86361 | 747 (defun rng-c-parse-primary-name-class (attribute) |
748 (cond ((rng-c-current-token-ncname-p) | |
749 (rng-c-advance-with | |
750 (rng-make-name-name-class | |
751 (rng-make-name (rng-c-unqualified-namespace attribute) | |
752 rng-c-current-token)))) | |
753 ((rng-c-current-token-prefixed-name-p) | |
754 (rng-c-advance-with | |
755 (rng-make-name-name-class | |
756 (rng-c-expand-name rng-c-current-token)))) | |
757 ((string-equal rng-c-current-token "*") | |
758 (let ((except (rng-c-parse-opt-except-name-class attribute))) | |
759 (if except | |
760 (rng-make-any-name-except-name-class except) | |
761 (rng-make-any-name-name-class)))) | |
762 ((rng-c-current-token-ns-name-p) | |
763 (let* ((ns | |
764 (rng-c-lookup-prefix (substring rng-c-current-token | |
765 0 | |
766 -2))) | |
767 (except (rng-c-parse-opt-except-name-class attribute))) | |
768 (if except | |
769 (rng-make-ns-name-except-name-class ns except) | |
770 (rng-make-ns-name-name-class ns)))) | |
771 ((string-equal rng-c-current-token "(") | |
772 (rng-c-advance) | |
773 (let ((name-class (rng-c-parse-name-class attribute))) | |
774 (rng-c-expect ")") | |
775 name-class)) | |
776 ((rng-c-current-token-quoted-identifier-p) | |
777 (rng-c-advance-with | |
778 (rng-make-name-name-class | |
779 (rng-make-name (rng-c-unqualified-namespace attribute) | |
780 (substring rng-c-current-token 1))))) | |
781 ((string-equal rng-c-current-token "[") | |
782 (rng-c-parse-lead-annotation) | |
783 (rng-c-parse-primary-name-class attribute)) | |
784 (t (rng-c-error "Bad name class")))) | |
785 | |
786 (defun rng-c-parse-opt-except-name-class (attribute) | |
787 (rng-c-advance) | |
788 (and (string-equal rng-c-current-token "-") | |
789 (or (not rng-c-had-except) | |
790 (rng-c-error "Parentheses required around name-class using - operator")) | |
791 (setq rng-c-had-except t) | |
792 (progn | |
793 (rng-c-advance) | |
794 (rng-c-parse-primary-name-class attribute)))) | |
795 | |
796 (defun rng-c-parse-mixed () | |
797 (rng-c-expect "{") | |
798 (let ((pattern (rng-make-mixed (rng-c-parse-pattern)))) | |
799 (rng-c-expect "}") | |
800 pattern)) | |
801 | |
802 (defun rng-c-parse-list () | |
803 (rng-c-expect "{") | |
804 (let ((pattern (rng-make-list (rng-c-parse-pattern)))) | |
805 (rng-c-expect "}") | |
806 pattern)) | |
807 | |
808 (defun rng-c-parse-text () | |
809 (rng-make-text)) | |
810 | |
811 (defun rng-c-parse-empty () | |
812 (rng-make-empty)) | |
813 | |
814 (defun rng-c-parse-not-allowed () | |
815 (rng-make-not-allowed)) | |
816 | |
817 (defun rng-c-parse-string () | |
818 (rng-c-parse-data rng-string-datatype)) | |
819 | |
820 (defun rng-c-parse-token () | |
821 (rng-c-parse-data rng-token-datatype)) | |
822 | |
823 (defun rng-c-parse-data (name) | |
824 (if (rng-c-current-token-literal-p) | |
825 (rng-make-value name | |
826 (rng-c-parse-literal) | |
827 (and (car name) | |
828 (rng-c-make-context))) | |
829 (let ((params (rng-c-parse-optional-params))) | |
830 (if (string-equal rng-c-current-token "-") | |
831 (progn | |
832 (if rng-c-had-except | |
833 (rng-c-error "Parentheses required around pattern using -") | |
834 (setq rng-c-had-except t)) | |
835 (rng-c-advance) | |
836 (rng-make-data-except name | |
837 params | |
838 (rng-c-parse-primary))) | |
839 (rng-make-data name params))))) | |
840 | |
841 (defun rng-c-parse-optional-params () | |
842 (and (string-equal rng-c-current-token "{") | |
843 (let* ((head (cons nil nil)) | |
844 (tail head)) | |
845 (rng-c-advance) | |
846 (while (not (string-equal rng-c-current-token "}")) | |
847 (and (string-equal rng-c-current-token "[") | |
848 (rng-c-parse-lead-annotation)) | |
849 (let ((name (rng-c-parse-identifier-or-keyword))) | |
850 (rng-c-expect "=") | |
851 (let ((newcdr (cons (cons (intern name) | |
852 (rng-c-parse-literal)) | |
853 nil))) | |
854 (setcdr tail newcdr) | |
855 (setq tail newcdr)))) | |
856 (rng-c-advance) | |
857 (cdr head)))) | |
858 | |
859 (defun rng-c-parse-external () | |
860 (let* ((filename (rng-c-expand-file (rng-c-parse-literal))) | |
861 (rng-c-inherit-namespace (rng-c-parse-opt-inherit))) | |
862 (rng-c-parse-file filename 'external))) | |
863 | |
864 (defun rng-c-expand-file (uri) | |
865 (condition-case err | |
866 (rng-uri-file-name (rng-uri-resolve uri | |
867 (rng-file-name-uri rng-c-file-name))) | |
868 (rng-uri-error | |
869 (rng-c-error (cadr err))))) | |
870 | |
871 (defun rng-c-parse-opt-inherit () | |
872 (cond ((string-equal rng-c-current-token "inherit") | |
873 (rng-c-advance) | |
874 (rng-c-expect "=") | |
875 (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword))) | |
876 (t rng-c-default-namespace))) | |
877 | |
878 (defun rng-c-parse-grammar () | |
879 (rng-c-expect "{") | |
880 (let* ((rng-c-parent-grammar rng-c-current-grammar) | |
881 (rng-c-current-grammar (rng-c-make-grammar))) | |
882 (rng-c-parse-grammar-body "}") | |
883 (rng-c-finish-grammar))) | |
884 | |
885 (defun rng-c-parse-lead-annotation () | |
886 (rng-c-parse-annotation-body) | |
887 (and (string-equal rng-c-current-token "[") | |
888 (rng-c-error "Multiple leading annotations"))) | |
889 | |
890 (defun rng-c-parse-follow-annotations (obj) | |
891 (while (string-equal rng-c-current-token ">>") | |
892 (rng-c-advance) | |
893 (if (rng-c-current-token-prefixed-name-p) | |
894 (rng-c-advance) | |
895 (rng-c-parse-identifier-or-keyword)) | |
896 (rng-c-parse-annotation-body t)) | |
897 obj) | |
898 | |
899 (defun rng-c-parse-annotation-element () | |
900 (rng-c-advance) | |
901 (rng-c-parse-annotation-body t)) | |
902 | |
903 ;; XXX need stricter checking of attribute names | |
904 ;; XXX don't allow attributes after text | |
905 | |
906 (defun rng-c-parse-annotation-body (&optional allow-text) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
907 "Current token is [. Parse up to matching ]. |
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
908 Current token after parse is token following ]." |
86361 | 909 (or (string-equal rng-c-current-token "[") |
910 (rng-c-error "Expected [")) | |
911 (rng-c-advance) | |
912 (while (not (string-equal rng-c-current-token "]")) | |
913 (cond ((rng-c-current-token-literal-p) | |
914 (or allow-text | |
915 (rng-c-error "Out of place text within annotation")) | |
916 (rng-c-parse-literal)) | |
917 (t | |
918 (if (rng-c-current-token-prefixed-name-p) | |
919 (rng-c-advance) | |
920 (rng-c-parse-identifier-or-keyword)) | |
921 (cond ((string-equal rng-c-current-token "[") | |
922 (rng-c-parse-annotation-body t)) | |
923 ((string-equal rng-c-current-token "=") | |
924 (rng-c-advance) | |
925 (rng-c-parse-literal)) | |
926 (t (rng-c-error "Expected = or [")))))) | |
927 (rng-c-advance)) | |
96496
e374c747704b
Fix typos, and general docstring cleanup.
Juanma Barranquero <lekktu@gmail.com>
parents:
94666
diff
changeset
|
928 |
86361 | 929 (defun rng-c-advance-with (pattern) |
930 (rng-c-advance) | |
931 pattern) | |
932 | |
933 (defun rng-c-expect (str) | |
934 (or (string-equal rng-c-current-token str) | |
935 (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token)) | |
936 (rng-c-advance)) | |
937 | |
938 (provide 'rng-cmpct) | |
939 | |
940 ;;; rng-cmpct.el | |
86379 | 941 |
942 ;; arch-tag: 90395eb1-283b-4146-bbc1-6d6ef1704e57 |