86361
|
1 ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas
|
|
2
|
|
3 ;; Copyright (C) 2003 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: James Clark
|
|
6 ;; Keywords: XML, RelaxNG
|
|
7
|
|
8 ;; This program is free software; you can redistribute it and/or
|
|
9 ;; modify it under the terms of the GNU General Public License as
|
|
10 ;; published by the Free Software Foundation; either version 2 of
|
|
11 ;; the License, or (at your option) any later version.
|
|
12
|
|
13 ;; This program is distributed in the hope that it will be
|
|
14 ;; useful, but WITHOUT ANY WARRANTY; without even the implied
|
|
15 ;; warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
16 ;; PURPOSE. See the GNU General Public License for more details.
|
|
17
|
|
18 ;; You should have received a copy of the GNU General Public
|
|
19 ;; License along with this program; if not, write to the Free
|
|
20 ;; Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
|
|
21 ;; MA 02111-1307 USA
|
|
22
|
|
23 ;;; Commentary:
|
|
24
|
|
25 ;; This parses a RELAX NG Compact Syntax schema into the form
|
|
26 ;; specified in rng-pttrn.el.
|
|
27 ;;
|
|
28 ;; RELAX NG Compact Syntax is specified by
|
|
29 ;; http://relaxng.org/compact.html
|
|
30 ;;
|
|
31 ;; This file uses the prefix "rng-c-".
|
|
32
|
|
33 ;;; Code:
|
|
34
|
|
35 (require 'nxml-util)
|
|
36 (require 'rng-util)
|
|
37 (require 'rng-uri)
|
|
38 (require 'rng-pttrn)
|
|
39
|
|
40 ;;;###autoload
|
|
41 (defun rng-c-load-schema (filename)
|
|
42 "Load a schema in RELAX NG compact syntax from FILENAME.
|
|
43 Return a pattern."
|
|
44 (rng-c-parse-file filename))
|
|
45
|
|
46 ;;; Error handling
|
|
47
|
|
48 (put 'rng-c-incorrect-schema
|
|
49 'error-conditions
|
|
50 '(error rng-error nxml-file-parse-error rng-c-incorrect-schema))
|
|
51
|
|
52 (put 'rng-c-incorrect-schema
|
|
53 'error-message
|
|
54 "Incorrect schema")
|
|
55
|
|
56 (defun rng-c-signal-incorrect-schema (filename pos message)
|
|
57 (nxml-signal-file-parse-error filename
|
|
58 pos
|
|
59 message
|
|
60 'rng-c-incorrect-schema))
|
|
61
|
|
62 ;;; Lexing
|
|
63
|
|
64 (defconst rng-c-keywords
|
|
65 '("attribute"
|
|
66 "default"
|
|
67 "datatypes"
|
|
68 "div"
|
|
69 "element"
|
|
70 "empty"
|
|
71 "external"
|
|
72 "grammar"
|
|
73 "include"
|
|
74 "inherit"
|
|
75 "list"
|
|
76 "mixed"
|
|
77 "namespace"
|
|
78 "notAllowed"
|
|
79 "parent"
|
|
80 "start"
|
|
81 "string"
|
|
82 "text"
|
|
83 "token")
|
|
84 "List of strings that are keywords in the compact syntax.")
|
|
85
|
|
86 (defconst rng-c-anchored-keyword-re
|
|
87 (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'")
|
|
88 "Regular expression to match a keyword in the compact syntax.")
|
|
89
|
|
90 (defvar rng-c-syntax-table nil
|
|
91 "Syntax table for parsing the compact syntax.")
|
|
92
|
|
93 (if rng-c-syntax-table
|
|
94 ()
|
|
95 (setq rng-c-syntax-table (make-syntax-table))
|
|
96 (modify-syntax-entry ?# "<" rng-c-syntax-table)
|
|
97 (modify-syntax-entry ?\n ">" rng-c-syntax-table)
|
|
98 (modify-syntax-entry ?- "w" rng-c-syntax-table)
|
|
99 (modify-syntax-entry ?. "w" rng-c-syntax-table)
|
|
100 (modify-syntax-entry ?_ "w" rng-c-syntax-table)
|
|
101 (modify-syntax-entry ?: "_" rng-c-syntax-table))
|
|
102
|
|
103 (defconst rng-c-literal-1-re
|
|
104 "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'"
|
|
105 "Regular expression to match a single-quoted literal.")
|
|
106
|
|
107 (defconst rng-c-literal-2-re
|
|
108 (replace-regexp-in-string "'" "\"" rng-c-literal-1-re)
|
|
109 "Regular expression to match a double-quoted literal.")
|
|
110
|
|
111 (defconst rng-c-ncname-re "\\w+")
|
|
112
|
|
113 (defconst rng-c-anchored-ncname-re
|
|
114 (concat "\\`" rng-c-ncname-re "\\'"))
|
|
115
|
|
116 (defconst rng-c-token-re
|
|
117 (concat "[&|]=" "\\|"
|
|
118 "[][()|&,*+?{}~=-]" "\\|"
|
|
119 rng-c-literal-1-re "\\|"
|
|
120 rng-c-literal-2-re "\\|"
|
|
121 rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|"
|
|
122 "\\\\" rng-c-ncname-re "\\|"
|
|
123 ">>")
|
|
124 "Regular expression to match a token in the compact syntax.")
|
|
125
|
|
126 (defun rng-c-init-buffer ()
|
|
127 (setq case-fold-search nil) ; automatically becomes buffer-local when set
|
|
128 (set-buffer-multibyte t)
|
|
129 (set-syntax-table rng-c-syntax-table))
|
|
130
|
|
131 (defvar rng-c-current-token nil)
|
|
132 (make-variable-buffer-local 'rng-c-current-token)
|
|
133
|
|
134 (defun rng-c-advance ()
|
|
135 (cond ((looking-at rng-c-token-re)
|
|
136 (setq rng-c-current-token (match-string 0))
|
|
137 (goto-char (match-end 0))
|
|
138 (forward-comment (point-max)))
|
|
139 ((= (point) (point-max))
|
|
140 (setq rng-c-current-token ""))
|
|
141 (t (rng-c-error "Invalid token"))))
|
|
142
|
|
143 (defconst rng-c-anchored-datatype-name-re
|
|
144 (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'"))
|
|
145
|
|
146 (defsubst rng-c-current-token-keyword-p ()
|
|
147 (string-match rng-c-anchored-keyword-re rng-c-current-token))
|
|
148
|
|
149 (defsubst rng-c-current-token-prefixed-name-p ()
|
|
150 (string-match rng-c-anchored-datatype-name-re rng-c-current-token))
|
|
151
|
|
152 (defsubst rng-c-current-token-literal-p ()
|
|
153 (string-match "\\`['\"]" rng-c-current-token))
|
|
154
|
|
155 (defsubst rng-c-current-token-quoted-identifier-p ()
|
|
156 (string-match "\\`\\\\" rng-c-current-token))
|
|
157
|
|
158 (defsubst rng-c-current-token-ncname-p ()
|
|
159 (string-match rng-c-anchored-ncname-re rng-c-current-token))
|
|
160
|
|
161 (defsubst rng-c-current-token-ns-name-p ()
|
|
162 (let ((len (length rng-c-current-token)))
|
|
163 (and (> len 0)
|
|
164 (= (aref rng-c-current-token (- len 1)) ?*))))
|
|
165
|
|
166 ;;; Namespaces
|
|
167
|
|
168 (defvar rng-c-inherit-namespace nil)
|
|
169
|
|
170 (defvar rng-c-default-namespace nil)
|
|
171
|
|
172 (defvar rng-c-default-namespace-declared nil)
|
|
173
|
|
174 (defvar rng-c-namespace-decls nil
|
|
175 "Alist of namespace declarations.")
|
|
176
|
|
177 (defconst rng-c-no-namespace nil)
|
|
178
|
|
179 (defun rng-c-declare-standard-namespaces ()
|
|
180 (setq rng-c-namespace-decls
|
|
181 (cons (cons "xml" nxml-xml-namespace-uri)
|
|
182 rng-c-namespace-decls))
|
|
183 (when (and (not rng-c-default-namespace-declared)
|
|
184 rng-c-inherit-namespace)
|
|
185 (setq rng-c-default-namespace rng-c-inherit-namespace)))
|
|
186
|
|
187 (defun rng-c-expand-name (prefixed-name)
|
|
188 (let ((i (string-match ":" prefixed-name)))
|
|
189 (rng-make-name (rng-c-lookup-prefix (substring prefixed-name
|
|
190 0
|
|
191 i))
|
|
192 (substring prefixed-name (+ i 1)))))
|
|
193
|
|
194 (defun rng-c-lookup-prefix (prefix)
|
|
195 (let ((binding (assoc prefix rng-c-namespace-decls)))
|
|
196 (or binding (rng-c-error "Undefined prefix %s" prefix))
|
|
197 (cdr binding)))
|
|
198
|
|
199 (defun rng-c-unqualified-namespace (attribute)
|
|
200 (if attribute
|
|
201 rng-c-no-namespace
|
|
202 rng-c-default-namespace))
|
|
203
|
|
204 (defun rng-c-make-context ()
|
|
205 (cons rng-c-default-namespace rng-c-namespace-decls))
|
|
206
|
|
207 ;;; Datatypes
|
|
208
|
|
209 (defconst rng-string-datatype
|
|
210 (rng-make-datatype rng-builtin-datatypes-uri "string"))
|
|
211
|
|
212 (defconst rng-token-datatype
|
|
213 (rng-make-datatype rng-builtin-datatypes-uri "token"))
|
|
214
|
|
215 (defvar rng-c-datatype-decls nil
|
|
216 "Alist of datatype declarations.
|
|
217 Contains a list of pairs (PREFIX . URI) where PREFIX is a string
|
|
218 and URI is a symbol.")
|
|
219
|
|
220 (defun rng-c-declare-standard-datatypes ()
|
|
221 (setq rng-c-datatype-decls
|
|
222 (cons (cons "xsd" rng-xsd-datatypes-uri)
|
|
223 rng-c-datatype-decls)))
|
|
224
|
|
225 (defun rng-c-lookup-datatype-prefix (prefix)
|
|
226 (let ((binding (assoc prefix rng-c-datatype-decls)))
|
|
227 (or binding (rng-c-error "Undefined prefix %s" prefix))
|
|
228 (cdr binding)))
|
|
229
|
|
230 (defun rng-c-expand-datatype (prefixed-name)
|
|
231 (let ((i (string-match ":" prefixed-name)))
|
|
232 (rng-make-datatype
|
|
233 (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i))
|
|
234 (substring prefixed-name (+ i 1)))))
|
|
235
|
|
236 ;;; Grammars
|
|
237
|
|
238 (defvar rng-c-current-grammar nil)
|
|
239 (defvar rng-c-parent-grammar nil)
|
|
240
|
|
241 (defun rng-c-make-grammar ()
|
|
242 (make-hash-table :test 'equal))
|
|
243
|
|
244 (defconst rng-c-about-override-slot 0)
|
|
245 (defconst rng-c-about-combine-slot 1)
|
|
246
|
|
247 (defun rng-c-lookup-create (name grammar)
|
|
248 "Return a def object for NAME. A def object is a pair
|
|
249 \(ABOUT . REF) where REF is returned by `rng-make-ref'. ABOUT is a
|
|
250 two-element vector [OVERRIDE COMBINE]. COMBINE is either nil, choice
|
|
251 or interleave. OVERRIDE is either nil, require or t."
|
|
252 (let ((def (gethash name grammar)))
|
|
253 (if def
|
|
254 def
|
|
255 (progn
|
|
256 (setq def (cons (vector nil nil) (rng-make-ref name)))
|
|
257 (puthash name def grammar)
|
|
258 def))))
|
|
259
|
|
260 (defun rng-c-make-ref (name)
|
|
261 (or rng-c-current-grammar
|
|
262 (rng-c-error "Reference not in a grammar"))
|
|
263 (cdr (rng-c-lookup-create name rng-c-current-grammar)))
|
|
264
|
|
265 (defun rng-c-make-parent-ref (name)
|
|
266 (or rng-c-parent-grammar
|
|
267 (rng-c-error "Reference to non-existent parent grammar"))
|
|
268 (cdr (rng-c-lookup-create name rng-c-parent-grammar)))
|
|
269
|
|
270 (defvar rng-c-overrides nil
|
|
271 "Contains a list of (NAME . DEF) pairs.")
|
|
272
|
|
273 (defun rng-c-merge-combine (def combine name)
|
|
274 (let* ((about (car def))
|
|
275 (current-combine (aref about rng-c-about-combine-slot)))
|
|
276 (if combine
|
|
277 (if current-combine
|
|
278 (or (eq combine current-combine)
|
|
279 (rng-c-error "Inconsistent combine for %s" name))
|
|
280 (aset about rng-c-about-combine-slot combine))
|
|
281 current-combine)))
|
|
282
|
|
283 (defun rng-c-prepare-define (name combine in-include)
|
|
284 (let* ((def (rng-c-lookup-create name rng-c-current-grammar))
|
|
285 (about (car def))
|
|
286 (overridden (aref about rng-c-about-override-slot)))
|
|
287 (and in-include
|
|
288 (setq rng-c-overrides (cons (cons name def) rng-c-overrides)))
|
|
289 (cond (overridden (and (eq overridden 'require)
|
|
290 (aset about rng-c-about-override-slot t))
|
|
291 nil)
|
|
292 (t (setq combine (rng-c-merge-combine def combine name))
|
|
293 (and (rng-ref-get (cdr def))
|
|
294 (not combine)
|
|
295 (rng-c-error "Duplicate definition of %s" name))
|
|
296 def))))
|
|
297
|
|
298 (defun rng-c-start-include (overrides)
|
|
299 (mapcar (lambda (name-def)
|
|
300 (let* ((def (cdr name-def))
|
|
301 (about (car def))
|
|
302 (save (aref about rng-c-about-override-slot)))
|
|
303 (aset about rng-c-about-override-slot 'require)
|
|
304 (cons save name-def)))
|
|
305 overrides))
|
|
306
|
|
307 (defun rng-c-end-include (overrides)
|
|
308 (mapcar (lambda (o)
|
|
309 (let* ((saved (car o))
|
|
310 (name-def (cdr o))
|
|
311 (name (car name-def))
|
|
312 (def (cdr name-def))
|
|
313 (about (car def)))
|
|
314 (and (eq (aref about rng-c-about-override-slot) 'require)
|
|
315 (rng-c-error "Definition of %s in include did not override definition in included file" name))
|
|
316 (aset about rng-c-about-override-slot saved)))
|
|
317 overrides))
|
|
318
|
|
319 (defun rng-c-define (def value)
|
|
320 (and def
|
|
321 (let ((current-value (rng-ref-get (cdr def))))
|
|
322 (rng-ref-set (cdr def)
|
|
323 (if current-value
|
|
324 (if (eq (aref (car def) rng-c-about-combine-slot)
|
|
325 'choice)
|
|
326 (rng-make-choice (list current-value value))
|
|
327 (rng-make-interleave (list current-value value)))
|
|
328 value)))))
|
|
329
|
|
330 (defun rng-c-finish-grammar ()
|
|
331 (maphash (lambda (key def)
|
|
332 (or (rng-ref-get (cdr def))
|
|
333 (rng-c-error "Reference to undefined pattern %s" key)))
|
|
334 rng-c-current-grammar)
|
|
335 (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar)
|
|
336 (rng-c-error "No definition of start")))))
|
|
337
|
|
338 ;;; Parsing
|
|
339
|
|
340 (defvar rng-c-escape-positions nil)
|
|
341 (make-variable-buffer-local 'rng-c-escape-positions)
|
|
342
|
|
343 (defvar rng-c-file-name nil)
|
|
344 (make-variable-buffer-local 'rng-c-file-name)
|
|
345
|
|
346 (defvar rng-c-file-index nil)
|
|
347
|
|
348 (defun rng-c-parse-file (filename &optional context)
|
|
349 (save-excursion
|
|
350 (set-buffer (get-buffer-create (rng-c-buffer-name context)))
|
|
351 (erase-buffer)
|
|
352 (rng-c-init-buffer)
|
|
353 (setq rng-c-file-name
|
|
354 (car (insert-file-contents filename)))
|
|
355 (setq rng-c-escape-positions nil)
|
|
356 (rng-c-process-escapes)
|
|
357 (rng-c-parse-top-level context)))
|
|
358
|
|
359 (defun rng-c-buffer-name (context)
|
|
360 (concat " *RNC Input"
|
|
361 (if context
|
|
362 (concat "<"
|
|
363 (number-to-string (setq rng-c-file-index
|
|
364 (1+ rng-c-file-index)))
|
|
365 ">*")
|
|
366 (setq rng-c-file-index 1)
|
|
367 "*")))
|
|
368
|
|
369 (defun rng-c-process-escapes ()
|
|
370 ;; Check for any nuls, since we will use nul chars
|
|
371 ;; for internal purposes.
|
|
372 (let ((pos (search-forward "\C-@" nil t)))
|
|
373 (and pos
|
|
374 (rng-c-error "Nul character found (binary file?)")))
|
|
375 (let ((offset 0))
|
|
376 (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}"
|
|
377 (point-max)
|
|
378 t)
|
|
379 (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16))))
|
|
380 (if (and ch (> ch 0))
|
|
381 (let ((begin (match-beginning 0))
|
|
382 (end (match-end 0)))
|
|
383 (delete-region begin end)
|
|
384 ;; Represent an escaped newline by nul, so
|
|
385 ;; that we can distinguish it from a literal newline.
|
|
386 ;; We will translate it back into a real newline later.
|
|
387 (insert (if (eq ch ?\n) 0 ch))
|
|
388 (setq offset (+ offset (- end begin 1)))
|
|
389 (setq rng-c-escape-positions
|
|
390 (cons (cons (point) offset)
|
|
391 rng-c-escape-positions)))
|
|
392 (rng-c-error "Invalid character escape")))))
|
|
393 (goto-char 1))
|
|
394
|
|
395 (defun rng-c-translate-position (pos)
|
|
396 (let ((tem rng-c-escape-positions))
|
|
397 (while (and tem
|
|
398 (> (caar tem) pos))
|
|
399 (setq tem (cdr tem)))
|
|
400 (if tem
|
|
401 (+ pos (cdar tem))
|
|
402 pos)))
|
|
403
|
|
404 (defun rng-c-error (&rest args)
|
|
405 (rng-c-signal-incorrect-schema rng-c-file-name
|
|
406 (rng-c-translate-position (point))
|
|
407 (apply 'format args)))
|
|
408
|
|
409 (defun rng-c-parse-top-level (context)
|
|
410 (let ((rng-c-namespace-decls nil)
|
|
411 (rng-c-default-namespace nil)
|
|
412 (rng-c-datatype-decls nil))
|
|
413 (goto-char (point-min))
|
|
414 (forward-comment (point-max))
|
|
415 (rng-c-advance)
|
|
416 (rng-c-parse-decls)
|
|
417 (let ((p (if (eq context 'include)
|
|
418 (if (rng-c-implicit-grammar-p)
|
|
419 (rng-c-parse-grammar-body "")
|
|
420 (rng-c-parse-included-grammar))
|
|
421 (if (rng-c-implicit-grammar-p)
|
|
422 (rng-c-parse-implicit-grammar)
|
|
423 (rng-c-parse-pattern)))))
|
|
424 (or (string-equal rng-c-current-token "")
|
|
425 (rng-c-error "Unexpected characters after pattern"))
|
|
426 p)))
|
|
427
|
|
428 (defun rng-c-parse-included-grammar ()
|
|
429 (or (string-equal rng-c-current-token "grammar")
|
|
430 (rng-c-error "Included schema is not a grammar"))
|
|
431 (rng-c-advance)
|
|
432 (rng-c-expect "{")
|
|
433 (rng-c-parse-grammar-body "}"))
|
|
434
|
|
435 (defun rng-c-implicit-grammar-p ()
|
|
436 (or (and (or (rng-c-current-token-prefixed-name-p)
|
|
437 (rng-c-current-token-quoted-identifier-p)
|
|
438 (and (rng-c-current-token-ncname-p)
|
|
439 (not (rng-c-current-token-keyword-p))))
|
|
440 (looking-at "\\["))
|
|
441 (and (string-equal rng-c-current-token "[")
|
|
442 (rng-c-parse-lead-annotation)
|
|
443 nil)
|
|
444 (member rng-c-current-token '("div" "include" ""))
|
|
445 (looking-at "[|&]?=")))
|
|
446
|
|
447 (defun rng-c-parse-decls ()
|
|
448 (setq rng-c-default-namespace-declared nil)
|
|
449 (while (progn
|
|
450 (let ((binding
|
|
451 (assoc rng-c-current-token
|
|
452 '(("namespace" . rng-c-parse-namespace)
|
|
453 ("datatypes" . rng-c-parse-datatypes)
|
|
454 ("default" . rng-c-parse-default)))))
|
|
455 (if binding
|
|
456 (progn
|
|
457 (rng-c-advance)
|
|
458 (funcall (cdr binding))
|
|
459 t)
|
|
460 nil))))
|
|
461 (rng-c-declare-standard-datatypes)
|
|
462 (rng-c-declare-standard-namespaces))
|
|
463
|
|
464 (defun rng-c-parse-datatypes ()
|
|
465 (let ((prefix (rng-c-parse-identifier-or-keyword)))
|
|
466 (or (not (assoc prefix rng-c-datatype-decls))
|
|
467 (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix))
|
|
468 (rng-c-expect "=")
|
|
469 (setq rng-c-datatype-decls
|
|
470 (cons (cons prefix
|
|
471 (rng-make-datatypes-uri (rng-c-parse-literal)))
|
|
472 rng-c-datatype-decls))))
|
|
473
|
|
474 (defun rng-c-parse-namespace ()
|
|
475 (rng-c-declare-namespace nil
|
|
476 (rng-c-parse-identifier-or-keyword)))
|
|
477
|
|
478 (defun rng-c-parse-default ()
|
|
479 (rng-c-expect "namespace")
|
|
480 (rng-c-declare-namespace t
|
|
481 (if (string-equal rng-c-current-token "=")
|
|
482 nil
|
|
483 (rng-c-parse-identifier-or-keyword))))
|
|
484
|
|
485 (defun rng-c-declare-namespace (declare-default prefix)
|
|
486 (rng-c-expect "=")
|
|
487 (let ((ns (cond ((string-equal rng-c-current-token "inherit")
|
|
488 (rng-c-advance)
|
|
489 rng-c-inherit-namespace)
|
|
490 (t
|
|
491 (nxml-make-namespace (rng-c-parse-literal))))))
|
|
492 (and prefix
|
|
493 (or (not (assoc prefix rng-c-namespace-decls))
|
|
494 (rng-c-error "Duplicate namespace declaration for prefix %s"
|
|
495 prefix))
|
|
496 (setq rng-c-namespace-decls
|
|
497 (cons (cons prefix ns) rng-c-namespace-decls)))
|
|
498 (and declare-default
|
|
499 (or (not rng-c-default-namespace-declared)
|
|
500 (rng-c-error "Duplicate default namespace declaration"))
|
|
501 (setq rng-c-default-namespace-declared t)
|
|
502 (setq rng-c-default-namespace ns))))
|
|
503
|
|
504 (defun rng-c-parse-implicit-grammar ()
|
|
505 (let* ((rng-c-parent-grammar rng-c-current-grammar)
|
|
506 (rng-c-current-grammar (rng-c-make-grammar)))
|
|
507 (rng-c-parse-grammar-body "")
|
|
508 (rng-c-finish-grammar)))
|
|
509
|
|
510 (defun rng-c-parse-grammar-body (close-token &optional in-include)
|
|
511 (while (not (string-equal rng-c-current-token close-token))
|
|
512 (cond ((rng-c-current-token-keyword-p)
|
|
513 (let ((kw (intern rng-c-current-token)))
|
|
514 (cond ((eq kw 'start)
|
|
515 (rng-c-parse-define 'start in-include))
|
|
516 ((eq kw 'div)
|
|
517 (rng-c-advance)
|
|
518 (rng-c-parse-div in-include))
|
|
519 ((eq kw 'include)
|
|
520 (and in-include
|
|
521 (rng-c-error "Nested include"))
|
|
522 (rng-c-advance)
|
|
523 (rng-c-parse-include))
|
|
524 (t (rng-c-error "Invalid grammar keyword")))))
|
|
525 ((rng-c-current-token-ncname-p)
|
|
526 (if (looking-at "\\[")
|
|
527 (rng-c-parse-annotation-element)
|
|
528 (rng-c-parse-define rng-c-current-token
|
|
529 in-include)))
|
|
530 ((rng-c-current-token-quoted-identifier-p)
|
|
531 (if (looking-at "\\[")
|
|
532 (rng-c-parse-annotation-element)
|
|
533 (rng-c-parse-define (substring rng-c-current-token 1)
|
|
534 in-include)))
|
|
535 ((rng-c-current-token-prefixed-name-p)
|
|
536 (rng-c-parse-annotation-element))
|
|
537 ((string-equal rng-c-current-token "[")
|
|
538 (rng-c-parse-lead-annotation)
|
|
539 (and (string-equal rng-c-current-token close-token)
|
|
540 (rng-c-error "Missing annotation subject"))
|
|
541 (and (looking-at "\\[")
|
|
542 (rng-c-error "Leading annotation applied to annotation")))
|
|
543 (t (rng-c-error "Invalid grammar content"))))
|
|
544 (or (string-equal rng-c-current-token "")
|
|
545 (rng-c-advance)))
|
|
546
|
|
547 (defun rng-c-parse-div (in-include)
|
|
548 (rng-c-expect "{")
|
|
549 (rng-c-parse-grammar-body "}" in-include))
|
|
550
|
|
551 (defun rng-c-parse-include ()
|
|
552 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
|
|
553 (rng-c-inherit-namespace (rng-c-parse-opt-inherit))
|
|
554 overrides)
|
|
555 (cond ((string-equal rng-c-current-token "{")
|
|
556 (rng-c-advance)
|
|
557 (let ((rng-c-overrides nil))
|
|
558 (rng-c-parse-grammar-body "}" t)
|
|
559 (setq overrides rng-c-overrides))
|
|
560 (setq overrides (rng-c-start-include overrides))
|
|
561 (rng-c-parse-file filename 'include)
|
|
562 (rng-c-end-include overrides))
|
|
563 (t (rng-c-parse-file filename 'include)))))
|
|
564
|
|
565 (defun rng-c-parse-define (name in-include)
|
|
566 (rng-c-advance)
|
|
567 (let ((assign (assoc rng-c-current-token
|
|
568 '(("=" . nil)
|
|
569 ("|=" . choice)
|
|
570 ("&=" . interleave)))))
|
|
571 (or assign
|
|
572 (rng-c-error "Expected assignment operator"))
|
|
573 (rng-c-advance)
|
|
574 (let ((ref (rng-c-prepare-define name (cdr assign) in-include)))
|
|
575 (rng-c-define ref (rng-c-parse-pattern)))))
|
|
576
|
|
577 (defvar rng-c-had-except nil)
|
|
578
|
|
579 (defun rng-c-parse-pattern ()
|
|
580 (let* ((rng-c-had-except nil)
|
|
581 (p (rng-c-parse-repeated))
|
|
582 (op (assoc rng-c-current-token
|
|
583 '(("|" . rng-make-choice)
|
|
584 ("," . rng-make-group)
|
|
585 ("&" . rng-make-interleave)))))
|
|
586 (if op
|
|
587 (if rng-c-had-except
|
|
588 (rng-c-error "Parentheses required around pattern using -")
|
|
589 (let* ((patterns (cons p nil))
|
|
590 (tail patterns)
|
|
591 (connector rng-c-current-token))
|
|
592 (while (progn
|
|
593 (rng-c-advance)
|
|
594 (let ((newcdr (cons (rng-c-parse-repeated) nil)))
|
|
595 (setcdr tail newcdr)
|
|
596 (setq tail newcdr))
|
|
597 (string-equal rng-c-current-token connector)))
|
|
598 (funcall (cdr op) patterns)))
|
|
599 p)))
|
|
600
|
|
601 (defun rng-c-parse-repeated ()
|
|
602 (let ((p (rng-c-parse-follow-annotations
|
|
603 (rng-c-parse-primary)))
|
|
604 (op (assoc rng-c-current-token
|
|
605 '(("*" . rng-make-zero-or-more)
|
|
606 ("+" . rng-make-one-or-more)
|
|
607 ("?" . rng-make-optional)))))
|
|
608 (if op
|
|
609 (if rng-c-had-except
|
|
610 (rng-c-error "Parentheses required around pattern using -")
|
|
611 (rng-c-parse-follow-annotations
|
|
612 (progn
|
|
613 (rng-c-advance)
|
|
614 (funcall (cdr op) p))))
|
|
615 p)))
|
|
616
|
|
617 (defun rng-c-parse-primary ()
|
|
618 "Parse a primary expression. The current token must be the first
|
|
619 token of the expression. After parsing the current token should be
|
|
620 token following the primary expression."
|
|
621 (cond ((rng-c-current-token-keyword-p)
|
|
622 (let ((parse-function (get (intern rng-c-current-token)
|
|
623 'rng-c-pattern)))
|
|
624 (or parse-function
|
|
625 (rng-c-error "Keyword %s does not introduce a pattern"
|
|
626 rng-c-current-token))
|
|
627 (rng-c-advance)
|
|
628 (funcall parse-function)))
|
|
629 ((rng-c-current-token-ncname-p)
|
|
630 (rng-c-advance-with (rng-c-make-ref rng-c-current-token)))
|
|
631 ((string-equal rng-c-current-token "(")
|
|
632 (rng-c-advance)
|
|
633 (let ((p (rng-c-parse-pattern)))
|
|
634 (rng-c-expect ")")
|
|
635 p))
|
|
636 ((rng-c-current-token-prefixed-name-p)
|
|
637 (let ((name (rng-c-expand-datatype rng-c-current-token)))
|
|
638 (rng-c-advance)
|
|
639 (rng-c-parse-data name)))
|
|
640 ((rng-c-current-token-literal-p)
|
|
641 (rng-make-value rng-token-datatype (rng-c-parse-literal) nil))
|
|
642 ((rng-c-current-token-quoted-identifier-p)
|
|
643 (rng-c-advance-with
|
|
644 (rng-c-make-ref (substring rng-c-current-token 1))))
|
|
645 ((string-equal rng-c-current-token "[")
|
|
646 (rng-c-parse-lead-annotation)
|
|
647 (rng-c-parse-primary))
|
|
648 (t (rng-c-error "Invalid pattern"))))
|
|
649
|
|
650 (defun rng-c-parse-parent ()
|
|
651 (and (rng-c-current-token-keyword-p)
|
|
652 (rng-c-error "Keyword following parent was not quoted"
|
|
653 rng-c-current-token))
|
|
654 (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword)))
|
|
655
|
|
656 (defun rng-c-parse-literal ()
|
|
657 (rng-c-fix-escaped-newlines
|
|
658 (apply 'concat (rng-c-parse-literal-segments))))
|
|
659
|
|
660 (defun rng-c-parse-literal-segments ()
|
|
661 (let ((str (rng-c-parse-literal-segment)))
|
|
662 (cons str
|
|
663 (cond ((string-equal rng-c-current-token "~")
|
|
664 (rng-c-advance)
|
|
665 (rng-c-parse-literal-segments))
|
|
666 (t nil)))))
|
|
667
|
|
668 (defun rng-c-parse-literal-segment ()
|
|
669 (or (rng-c-current-token-literal-p)
|
|
670 (rng-c-error "Expected a literal"))
|
|
671 (rng-c-advance-with
|
|
672 (let ((n (if (and (>= (length rng-c-current-token) 6)
|
|
673 (eq (aref rng-c-current-token 0)
|
|
674 (aref rng-c-current-token 1)))
|
|
675 3
|
|
676 1)))
|
|
677 (substring rng-c-current-token n (- n)))))
|
|
678
|
|
679 (defun rng-c-fix-escaped-newlines (str)
|
|
680 (let ((pos 0))
|
|
681 (while (progn
|
|
682 (let ((n (string-match "\C-@" str pos)))
|
|
683 (and n
|
|
684 (aset str n ?\n)
|
|
685 (setq pos (1+ n)))))))
|
|
686 str)
|
|
687
|
|
688 (defun rng-c-parse-identifier-or-keyword ()
|
|
689 (cond ((rng-c-current-token-ncname-p)
|
|
690 (rng-c-advance-with rng-c-current-token))
|
|
691 ((rng-c-current-token-quoted-identifier-p)
|
|
692 (rng-c-advance-with (substring rng-c-current-token 1)))
|
|
693 (t (rng-c-error "Expected identifier or keyword"))))
|
|
694
|
|
695 (put 'string 'rng-c-pattern 'rng-c-parse-string)
|
|
696 (put 'token 'rng-c-pattern 'rng-c-parse-token)
|
|
697 (put 'element 'rng-c-pattern 'rng-c-parse-element)
|
|
698 (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute)
|
|
699 (put 'list 'rng-c-pattern 'rng-c-parse-list)
|
|
700 (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed)
|
|
701 (put 'text 'rng-c-pattern 'rng-c-parse-text)
|
|
702 (put 'empty 'rng-c-pattern 'rng-c-parse-empty)
|
|
703 (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed)
|
|
704 (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar)
|
|
705 (put 'parent 'rng-c-pattern 'rng-c-parse-parent)
|
|
706 (put 'external 'rng-c-pattern 'rng-c-parse-external)
|
|
707
|
|
708 (defun rng-c-parse-element ()
|
|
709 (let ((name-class (rng-c-parse-name-class nil)))
|
|
710 (rng-c-expect "{")
|
|
711 (let ((pattern (rng-c-parse-pattern)))
|
|
712 (rng-c-expect "}")
|
|
713 (rng-make-element name-class pattern))))
|
|
714
|
|
715 (defun rng-c-parse-attribute ()
|
|
716 (let ((name-class (rng-c-parse-name-class 'attribute)))
|
|
717 (rng-c-expect "{")
|
|
718 (let ((pattern (rng-c-parse-pattern)))
|
|
719 (rng-c-expect "}")
|
|
720 (rng-make-attribute name-class pattern))))
|
|
721
|
|
722 (defun rng-c-parse-name-class (attribute)
|
|
723 (let* ((rng-c-had-except nil)
|
|
724 (name-class
|
|
725 (rng-c-parse-follow-annotations
|
|
726 (rng-c-parse-primary-name-class attribute))))
|
|
727 (if (string-equal rng-c-current-token "|")
|
|
728 (let* ((name-classes (cons name-class nil))
|
|
729 (tail name-classes))
|
|
730 (or (not rng-c-had-except)
|
|
731 (rng-c-error "Parentheses required around name-class using - operator"))
|
|
732 (while (progn
|
|
733 (rng-c-advance)
|
|
734 (let ((newcdr
|
|
735 (cons (rng-c-parse-follow-annotations
|
|
736 (rng-c-parse-primary-name-class attribute))
|
|
737 nil)))
|
|
738 (setcdr tail newcdr)
|
|
739 (setq tail newcdr))
|
|
740 (string-equal rng-c-current-token "|")))
|
|
741 (rng-make-choice-name-class name-classes))
|
|
742 name-class)))
|
|
743
|
|
744 (defun rng-c-parse-primary-name-class (attribute)
|
|
745 (cond ((rng-c-current-token-ncname-p)
|
|
746 (rng-c-advance-with
|
|
747 (rng-make-name-name-class
|
|
748 (rng-make-name (rng-c-unqualified-namespace attribute)
|
|
749 rng-c-current-token))))
|
|
750 ((rng-c-current-token-prefixed-name-p)
|
|
751 (rng-c-advance-with
|
|
752 (rng-make-name-name-class
|
|
753 (rng-c-expand-name rng-c-current-token))))
|
|
754 ((string-equal rng-c-current-token "*")
|
|
755 (let ((except (rng-c-parse-opt-except-name-class attribute)))
|
|
756 (if except
|
|
757 (rng-make-any-name-except-name-class except)
|
|
758 (rng-make-any-name-name-class))))
|
|
759 ((rng-c-current-token-ns-name-p)
|
|
760 (let* ((ns
|
|
761 (rng-c-lookup-prefix (substring rng-c-current-token
|
|
762 0
|
|
763 -2)))
|
|
764 (except (rng-c-parse-opt-except-name-class attribute)))
|
|
765 (if except
|
|
766 (rng-make-ns-name-except-name-class ns except)
|
|
767 (rng-make-ns-name-name-class ns))))
|
|
768 ((string-equal rng-c-current-token "(")
|
|
769 (rng-c-advance)
|
|
770 (let ((name-class (rng-c-parse-name-class attribute)))
|
|
771 (rng-c-expect ")")
|
|
772 name-class))
|
|
773 ((rng-c-current-token-quoted-identifier-p)
|
|
774 (rng-c-advance-with
|
|
775 (rng-make-name-name-class
|
|
776 (rng-make-name (rng-c-unqualified-namespace attribute)
|
|
777 (substring rng-c-current-token 1)))))
|
|
778 ((string-equal rng-c-current-token "[")
|
|
779 (rng-c-parse-lead-annotation)
|
|
780 (rng-c-parse-primary-name-class attribute))
|
|
781 (t (rng-c-error "Bad name class"))))
|
|
782
|
|
783 (defun rng-c-parse-opt-except-name-class (attribute)
|
|
784 (rng-c-advance)
|
|
785 (and (string-equal rng-c-current-token "-")
|
|
786 (or (not rng-c-had-except)
|
|
787 (rng-c-error "Parentheses required around name-class using - operator"))
|
|
788 (setq rng-c-had-except t)
|
|
789 (progn
|
|
790 (rng-c-advance)
|
|
791 (rng-c-parse-primary-name-class attribute))))
|
|
792
|
|
793 (defun rng-c-parse-mixed ()
|
|
794 (rng-c-expect "{")
|
|
795 (let ((pattern (rng-make-mixed (rng-c-parse-pattern))))
|
|
796 (rng-c-expect "}")
|
|
797 pattern))
|
|
798
|
|
799 (defun rng-c-parse-list ()
|
|
800 (rng-c-expect "{")
|
|
801 (let ((pattern (rng-make-list (rng-c-parse-pattern))))
|
|
802 (rng-c-expect "}")
|
|
803 pattern))
|
|
804
|
|
805 (defun rng-c-parse-text ()
|
|
806 (rng-make-text))
|
|
807
|
|
808 (defun rng-c-parse-empty ()
|
|
809 (rng-make-empty))
|
|
810
|
|
811 (defun rng-c-parse-not-allowed ()
|
|
812 (rng-make-not-allowed))
|
|
813
|
|
814 (defun rng-c-parse-string ()
|
|
815 (rng-c-parse-data rng-string-datatype))
|
|
816
|
|
817 (defun rng-c-parse-token ()
|
|
818 (rng-c-parse-data rng-token-datatype))
|
|
819
|
|
820 (defun rng-c-parse-data (name)
|
|
821 (if (rng-c-current-token-literal-p)
|
|
822 (rng-make-value name
|
|
823 (rng-c-parse-literal)
|
|
824 (and (car name)
|
|
825 (rng-c-make-context)))
|
|
826 (let ((params (rng-c-parse-optional-params)))
|
|
827 (if (string-equal rng-c-current-token "-")
|
|
828 (progn
|
|
829 (if rng-c-had-except
|
|
830 (rng-c-error "Parentheses required around pattern using -")
|
|
831 (setq rng-c-had-except t))
|
|
832 (rng-c-advance)
|
|
833 (rng-make-data-except name
|
|
834 params
|
|
835 (rng-c-parse-primary)))
|
|
836 (rng-make-data name params)))))
|
|
837
|
|
838 (defun rng-c-parse-optional-params ()
|
|
839 (and (string-equal rng-c-current-token "{")
|
|
840 (let* ((head (cons nil nil))
|
|
841 (tail head))
|
|
842 (rng-c-advance)
|
|
843 (while (not (string-equal rng-c-current-token "}"))
|
|
844 (and (string-equal rng-c-current-token "[")
|
|
845 (rng-c-parse-lead-annotation))
|
|
846 (let ((name (rng-c-parse-identifier-or-keyword)))
|
|
847 (rng-c-expect "=")
|
|
848 (let ((newcdr (cons (cons (intern name)
|
|
849 (rng-c-parse-literal))
|
|
850 nil)))
|
|
851 (setcdr tail newcdr)
|
|
852 (setq tail newcdr))))
|
|
853 (rng-c-advance)
|
|
854 (cdr head))))
|
|
855
|
|
856 (defun rng-c-parse-external ()
|
|
857 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
|
|
858 (rng-c-inherit-namespace (rng-c-parse-opt-inherit)))
|
|
859 (rng-c-parse-file filename 'external)))
|
|
860
|
|
861 (defun rng-c-expand-file (uri)
|
|
862 (condition-case err
|
|
863 (rng-uri-file-name (rng-uri-resolve uri
|
|
864 (rng-file-name-uri rng-c-file-name)))
|
|
865 (rng-uri-error
|
|
866 (rng-c-error (cadr err)))))
|
|
867
|
|
868 (defun rng-c-parse-opt-inherit ()
|
|
869 (cond ((string-equal rng-c-current-token "inherit")
|
|
870 (rng-c-advance)
|
|
871 (rng-c-expect "=")
|
|
872 (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword)))
|
|
873 (t rng-c-default-namespace)))
|
|
874
|
|
875 (defun rng-c-parse-grammar ()
|
|
876 (rng-c-expect "{")
|
|
877 (let* ((rng-c-parent-grammar rng-c-current-grammar)
|
|
878 (rng-c-current-grammar (rng-c-make-grammar)))
|
|
879 (rng-c-parse-grammar-body "}")
|
|
880 (rng-c-finish-grammar)))
|
|
881
|
|
882 (defun rng-c-parse-lead-annotation ()
|
|
883 (rng-c-parse-annotation-body)
|
|
884 (and (string-equal rng-c-current-token "[")
|
|
885 (rng-c-error "Multiple leading annotations")))
|
|
886
|
|
887 (defun rng-c-parse-follow-annotations (obj)
|
|
888 (while (string-equal rng-c-current-token ">>")
|
|
889 (rng-c-advance)
|
|
890 (if (rng-c-current-token-prefixed-name-p)
|
|
891 (rng-c-advance)
|
|
892 (rng-c-parse-identifier-or-keyword))
|
|
893 (rng-c-parse-annotation-body t))
|
|
894 obj)
|
|
895
|
|
896 (defun rng-c-parse-annotation-element ()
|
|
897 (rng-c-advance)
|
|
898 (rng-c-parse-annotation-body t))
|
|
899
|
|
900 ;; XXX need stricter checking of attribute names
|
|
901 ;; XXX don't allow attributes after text
|
|
902
|
|
903 (defun rng-c-parse-annotation-body (&optional allow-text)
|
|
904 "Current token is [. Parse up to matching ]. Current token after
|
|
905 parse is token following ]."
|
|
906 (or (string-equal rng-c-current-token "[")
|
|
907 (rng-c-error "Expected ["))
|
|
908 (rng-c-advance)
|
|
909 (while (not (string-equal rng-c-current-token "]"))
|
|
910 (cond ((rng-c-current-token-literal-p)
|
|
911 (or allow-text
|
|
912 (rng-c-error "Out of place text within annotation"))
|
|
913 (rng-c-parse-literal))
|
|
914 (t
|
|
915 (if (rng-c-current-token-prefixed-name-p)
|
|
916 (rng-c-advance)
|
|
917 (rng-c-parse-identifier-or-keyword))
|
|
918 (cond ((string-equal rng-c-current-token "[")
|
|
919 (rng-c-parse-annotation-body t))
|
|
920 ((string-equal rng-c-current-token "=")
|
|
921 (rng-c-advance)
|
|
922 (rng-c-parse-literal))
|
|
923 (t (rng-c-error "Expected = or ["))))))
|
|
924 (rng-c-advance))
|
|
925
|
|
926 (defun rng-c-advance-with (pattern)
|
|
927 (rng-c-advance)
|
|
928 pattern)
|
|
929
|
|
930 (defun rng-c-expect (str)
|
|
931 (or (string-equal rng-c-current-token str)
|
|
932 (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token))
|
|
933 (rng-c-advance))
|
|
934
|
|
935 (provide 'rng-cmpct)
|
|
936
|
|
937 ;;; rng-cmpct.el
|
86379
|
938
|
|
939 ;; arch-tag: 90395eb1-283b-4146-bbc1-6d6ef1704e57
|