86361
|
1 ;;; rng-cmpct.el --- parsing of RELAX NG Compact Syntax schemas
|
|
2
|
87665
|
3 ;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
|
86361
|
4
|
|
5 ;; Author: James Clark
|
|
6 ;; Keywords: XML, RelaxNG
|
|
7
|
86545
|
8 ;; This file is part of GNU Emacs.
|
|
9
|
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 ;; it under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 3, or (at your option)
|
|
13 ;; any later version.
|
86361
|
14
|
86545
|
15 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 ;; GNU General Public License for more details.
|
86361
|
19
|
86545
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
23 ;; Boston, MA 02110-1301, USA.
|
86361
|
24
|
|
25 ;;; Commentary:
|
|
26
|
|
27 ;; This parses a RELAX NG Compact Syntax schema into the form
|
|
28 ;; specified in rng-pttrn.el.
|
|
29 ;;
|
|
30 ;; RELAX NG Compact Syntax is specified by
|
|
31 ;; http://relaxng.org/compact.html
|
|
32 ;;
|
|
33 ;; This file uses the prefix "rng-c-".
|
|
34
|
|
35 ;;; Code:
|
|
36
|
|
37 (require 'nxml-util)
|
|
38 (require 'rng-util)
|
|
39 (require 'rng-uri)
|
|
40 (require 'rng-pttrn)
|
|
41
|
|
42 ;;;###autoload
|
|
43 (defun rng-c-load-schema (filename)
|
|
44 "Load a schema in RELAX NG compact syntax from FILENAME.
|
|
45 Return a pattern."
|
|
46 (rng-c-parse-file filename))
|
|
47
|
|
48 ;;; Error handling
|
|
49
|
|
50 (put 'rng-c-incorrect-schema
|
|
51 'error-conditions
|
|
52 '(error rng-error nxml-file-parse-error rng-c-incorrect-schema))
|
|
53
|
|
54 (put 'rng-c-incorrect-schema
|
|
55 'error-message
|
|
56 "Incorrect schema")
|
|
57
|
|
58 (defun rng-c-signal-incorrect-schema (filename pos message)
|
|
59 (nxml-signal-file-parse-error filename
|
|
60 pos
|
|
61 message
|
|
62 'rng-c-incorrect-schema))
|
|
63
|
|
64 ;;; Lexing
|
|
65
|
|
66 (defconst rng-c-keywords
|
|
67 '("attribute"
|
|
68 "default"
|
|
69 "datatypes"
|
|
70 "div"
|
|
71 "element"
|
|
72 "empty"
|
|
73 "external"
|
|
74 "grammar"
|
|
75 "include"
|
|
76 "inherit"
|
|
77 "list"
|
|
78 "mixed"
|
|
79 "namespace"
|
|
80 "notAllowed"
|
|
81 "parent"
|
|
82 "start"
|
|
83 "string"
|
|
84 "text"
|
|
85 "token")
|
|
86 "List of strings that are keywords in the compact syntax.")
|
|
87
|
|
88 (defconst rng-c-anchored-keyword-re
|
|
89 (concat "\\`\\(" (regexp-opt rng-c-keywords) "\\)\\'")
|
|
90 "Regular expression to match a keyword in the compact syntax.")
|
|
91
|
|
92 (defvar rng-c-syntax-table nil
|
|
93 "Syntax table for parsing the compact syntax.")
|
|
94
|
|
95 (if rng-c-syntax-table
|
|
96 ()
|
|
97 (setq rng-c-syntax-table (make-syntax-table))
|
|
98 (modify-syntax-entry ?# "<" rng-c-syntax-table)
|
|
99 (modify-syntax-entry ?\n ">" rng-c-syntax-table)
|
|
100 (modify-syntax-entry ?- "w" rng-c-syntax-table)
|
|
101 (modify-syntax-entry ?. "w" rng-c-syntax-table)
|
|
102 (modify-syntax-entry ?_ "w" rng-c-syntax-table)
|
|
103 (modify-syntax-entry ?: "_" rng-c-syntax-table))
|
|
104
|
|
105 (defconst rng-c-literal-1-re
|
|
106 "'\\(''\\([^']\\|'[^']\\|''[^']\\)*''\\|[^'\n]*\\)'"
|
|
107 "Regular expression to match a single-quoted literal.")
|
|
108
|
|
109 (defconst rng-c-literal-2-re
|
|
110 (replace-regexp-in-string "'" "\"" rng-c-literal-1-re)
|
|
111 "Regular expression to match a double-quoted literal.")
|
|
112
|
|
113 (defconst rng-c-ncname-re "\\w+")
|
|
114
|
|
115 (defconst rng-c-anchored-ncname-re
|
|
116 (concat "\\`" rng-c-ncname-re "\\'"))
|
|
117
|
|
118 (defconst rng-c-token-re
|
|
119 (concat "[&|]=" "\\|"
|
|
120 "[][()|&,*+?{}~=-]" "\\|"
|
|
121 rng-c-literal-1-re "\\|"
|
|
122 rng-c-literal-2-re "\\|"
|
|
123 rng-c-ncname-re "\\(:\\(\\*\\|" rng-c-ncname-re "\\)\\)?" "\\|"
|
|
124 "\\\\" rng-c-ncname-re "\\|"
|
|
125 ">>")
|
|
126 "Regular expression to match a token in the compact syntax.")
|
|
127
|
|
128 (defun rng-c-init-buffer ()
|
|
129 (setq case-fold-search nil) ; automatically becomes buffer-local when set
|
|
130 (set-buffer-multibyte t)
|
|
131 (set-syntax-table rng-c-syntax-table))
|
|
132
|
|
133 (defvar rng-c-current-token nil)
|
|
134 (make-variable-buffer-local 'rng-c-current-token)
|
|
135
|
|
136 (defun rng-c-advance ()
|
|
137 (cond ((looking-at rng-c-token-re)
|
|
138 (setq rng-c-current-token (match-string 0))
|
|
139 (goto-char (match-end 0))
|
|
140 (forward-comment (point-max)))
|
|
141 ((= (point) (point-max))
|
|
142 (setq rng-c-current-token ""))
|
|
143 (t (rng-c-error "Invalid token"))))
|
|
144
|
|
145 (defconst rng-c-anchored-datatype-name-re
|
|
146 (concat "\\`" rng-c-ncname-re ":" rng-c-ncname-re "\\'"))
|
|
147
|
|
148 (defsubst rng-c-current-token-keyword-p ()
|
|
149 (string-match rng-c-anchored-keyword-re rng-c-current-token))
|
|
150
|
|
151 (defsubst rng-c-current-token-prefixed-name-p ()
|
|
152 (string-match rng-c-anchored-datatype-name-re rng-c-current-token))
|
|
153
|
|
154 (defsubst rng-c-current-token-literal-p ()
|
|
155 (string-match "\\`['\"]" rng-c-current-token))
|
|
156
|
|
157 (defsubst rng-c-current-token-quoted-identifier-p ()
|
|
158 (string-match "\\`\\\\" rng-c-current-token))
|
|
159
|
|
160 (defsubst rng-c-current-token-ncname-p ()
|
|
161 (string-match rng-c-anchored-ncname-re rng-c-current-token))
|
|
162
|
|
163 (defsubst rng-c-current-token-ns-name-p ()
|
|
164 (let ((len (length rng-c-current-token)))
|
|
165 (and (> len 0)
|
|
166 (= (aref rng-c-current-token (- len 1)) ?*))))
|
|
167
|
|
168 ;;; Namespaces
|
|
169
|
|
170 (defvar rng-c-inherit-namespace nil)
|
|
171
|
|
172 (defvar rng-c-default-namespace nil)
|
|
173
|
|
174 (defvar rng-c-default-namespace-declared nil)
|
|
175
|
|
176 (defvar rng-c-namespace-decls nil
|
|
177 "Alist of namespace declarations.")
|
|
178
|
|
179 (defconst rng-c-no-namespace nil)
|
|
180
|
|
181 (defun rng-c-declare-standard-namespaces ()
|
|
182 (setq rng-c-namespace-decls
|
|
183 (cons (cons "xml" nxml-xml-namespace-uri)
|
|
184 rng-c-namespace-decls))
|
|
185 (when (and (not rng-c-default-namespace-declared)
|
|
186 rng-c-inherit-namespace)
|
|
187 (setq rng-c-default-namespace rng-c-inherit-namespace)))
|
|
188
|
|
189 (defun rng-c-expand-name (prefixed-name)
|
|
190 (let ((i (string-match ":" prefixed-name)))
|
|
191 (rng-make-name (rng-c-lookup-prefix (substring prefixed-name
|
|
192 0
|
|
193 i))
|
|
194 (substring prefixed-name (+ i 1)))))
|
|
195
|
|
196 (defun rng-c-lookup-prefix (prefix)
|
|
197 (let ((binding (assoc prefix rng-c-namespace-decls)))
|
|
198 (or binding (rng-c-error "Undefined prefix %s" prefix))
|
|
199 (cdr binding)))
|
|
200
|
|
201 (defun rng-c-unqualified-namespace (attribute)
|
|
202 (if attribute
|
|
203 rng-c-no-namespace
|
|
204 rng-c-default-namespace))
|
|
205
|
|
206 (defun rng-c-make-context ()
|
|
207 (cons rng-c-default-namespace rng-c-namespace-decls))
|
|
208
|
|
209 ;;; Datatypes
|
|
210
|
|
211 (defconst rng-string-datatype
|
|
212 (rng-make-datatype rng-builtin-datatypes-uri "string"))
|
|
213
|
|
214 (defconst rng-token-datatype
|
|
215 (rng-make-datatype rng-builtin-datatypes-uri "token"))
|
|
216
|
|
217 (defvar rng-c-datatype-decls nil
|
|
218 "Alist of datatype declarations.
|
|
219 Contains a list of pairs (PREFIX . URI) where PREFIX is a string
|
|
220 and URI is a symbol.")
|
|
221
|
|
222 (defun rng-c-declare-standard-datatypes ()
|
|
223 (setq rng-c-datatype-decls
|
|
224 (cons (cons "xsd" rng-xsd-datatypes-uri)
|
|
225 rng-c-datatype-decls)))
|
|
226
|
|
227 (defun rng-c-lookup-datatype-prefix (prefix)
|
|
228 (let ((binding (assoc prefix rng-c-datatype-decls)))
|
|
229 (or binding (rng-c-error "Undefined prefix %s" prefix))
|
|
230 (cdr binding)))
|
|
231
|
|
232 (defun rng-c-expand-datatype (prefixed-name)
|
|
233 (let ((i (string-match ":" prefixed-name)))
|
|
234 (rng-make-datatype
|
|
235 (rng-c-lookup-datatype-prefix (substring prefixed-name 0 i))
|
|
236 (substring prefixed-name (+ i 1)))))
|
|
237
|
|
238 ;;; Grammars
|
|
239
|
|
240 (defvar rng-c-current-grammar nil)
|
|
241 (defvar rng-c-parent-grammar nil)
|
|
242
|
|
243 (defun rng-c-make-grammar ()
|
|
244 (make-hash-table :test 'equal))
|
|
245
|
|
246 (defconst rng-c-about-override-slot 0)
|
|
247 (defconst rng-c-about-combine-slot 1)
|
|
248
|
|
249 (defun rng-c-lookup-create (name grammar)
|
|
250 "Return a def object for NAME. A def object is a pair
|
|
251 \(ABOUT . REF) where REF is returned by `rng-make-ref'. ABOUT is a
|
|
252 two-element vector [OVERRIDE COMBINE]. COMBINE is either nil, choice
|
|
253 or interleave. OVERRIDE is either nil, require or t."
|
|
254 (let ((def (gethash name grammar)))
|
|
255 (if def
|
|
256 def
|
|
257 (progn
|
|
258 (setq def (cons (vector nil nil) (rng-make-ref name)))
|
|
259 (puthash name def grammar)
|
|
260 def))))
|
|
261
|
|
262 (defun rng-c-make-ref (name)
|
|
263 (or rng-c-current-grammar
|
|
264 (rng-c-error "Reference not in a grammar"))
|
|
265 (cdr (rng-c-lookup-create name rng-c-current-grammar)))
|
|
266
|
|
267 (defun rng-c-make-parent-ref (name)
|
|
268 (or rng-c-parent-grammar
|
|
269 (rng-c-error "Reference to non-existent parent grammar"))
|
|
270 (cdr (rng-c-lookup-create name rng-c-parent-grammar)))
|
|
271
|
|
272 (defvar rng-c-overrides nil
|
|
273 "Contains a list of (NAME . DEF) pairs.")
|
|
274
|
|
275 (defun rng-c-merge-combine (def combine name)
|
|
276 (let* ((about (car def))
|
|
277 (current-combine (aref about rng-c-about-combine-slot)))
|
|
278 (if combine
|
|
279 (if current-combine
|
|
280 (or (eq combine current-combine)
|
|
281 (rng-c-error "Inconsistent combine for %s" name))
|
|
282 (aset about rng-c-about-combine-slot combine))
|
|
283 current-combine)))
|
|
284
|
|
285 (defun rng-c-prepare-define (name combine in-include)
|
|
286 (let* ((def (rng-c-lookup-create name rng-c-current-grammar))
|
|
287 (about (car def))
|
|
288 (overridden (aref about rng-c-about-override-slot)))
|
|
289 (and in-include
|
|
290 (setq rng-c-overrides (cons (cons name def) rng-c-overrides)))
|
|
291 (cond (overridden (and (eq overridden 'require)
|
|
292 (aset about rng-c-about-override-slot t))
|
|
293 nil)
|
|
294 (t (setq combine (rng-c-merge-combine def combine name))
|
|
295 (and (rng-ref-get (cdr def))
|
|
296 (not combine)
|
|
297 (rng-c-error "Duplicate definition of %s" name))
|
|
298 def))))
|
|
299
|
|
300 (defun rng-c-start-include (overrides)
|
|
301 (mapcar (lambda (name-def)
|
|
302 (let* ((def (cdr name-def))
|
|
303 (about (car def))
|
|
304 (save (aref about rng-c-about-override-slot)))
|
|
305 (aset about rng-c-about-override-slot 'require)
|
|
306 (cons save name-def)))
|
|
307 overrides))
|
|
308
|
|
309 (defun rng-c-end-include (overrides)
|
|
310 (mapcar (lambda (o)
|
|
311 (let* ((saved (car o))
|
|
312 (name-def (cdr o))
|
|
313 (name (car name-def))
|
|
314 (def (cdr name-def))
|
|
315 (about (car def)))
|
|
316 (and (eq (aref about rng-c-about-override-slot) 'require)
|
|
317 (rng-c-error "Definition of %s in include did not override definition in included file" name))
|
|
318 (aset about rng-c-about-override-slot saved)))
|
|
319 overrides))
|
|
320
|
|
321 (defun rng-c-define (def value)
|
|
322 (and def
|
|
323 (let ((current-value (rng-ref-get (cdr def))))
|
|
324 (rng-ref-set (cdr def)
|
|
325 (if current-value
|
|
326 (if (eq (aref (car def) rng-c-about-combine-slot)
|
|
327 'choice)
|
|
328 (rng-make-choice (list current-value value))
|
|
329 (rng-make-interleave (list current-value value)))
|
|
330 value)))))
|
|
331
|
|
332 (defun rng-c-finish-grammar ()
|
|
333 (maphash (lambda (key def)
|
|
334 (or (rng-ref-get (cdr def))
|
|
335 (rng-c-error "Reference to undefined pattern %s" key)))
|
|
336 rng-c-current-grammar)
|
|
337 (rng-ref-get (cdr (or (gethash 'start rng-c-current-grammar)
|
|
338 (rng-c-error "No definition of start")))))
|
|
339
|
|
340 ;;; Parsing
|
|
341
|
|
342 (defvar rng-c-escape-positions nil)
|
|
343 (make-variable-buffer-local 'rng-c-escape-positions)
|
|
344
|
|
345 (defvar rng-c-file-name nil)
|
|
346 (make-variable-buffer-local 'rng-c-file-name)
|
|
347
|
|
348 (defvar rng-c-file-index nil)
|
|
349
|
|
350 (defun rng-c-parse-file (filename &optional context)
|
|
351 (save-excursion
|
|
352 (set-buffer (get-buffer-create (rng-c-buffer-name context)))
|
|
353 (erase-buffer)
|
|
354 (rng-c-init-buffer)
|
|
355 (setq rng-c-file-name
|
|
356 (car (insert-file-contents filename)))
|
|
357 (setq rng-c-escape-positions nil)
|
|
358 (rng-c-process-escapes)
|
|
359 (rng-c-parse-top-level context)))
|
|
360
|
|
361 (defun rng-c-buffer-name (context)
|
|
362 (concat " *RNC Input"
|
|
363 (if context
|
|
364 (concat "<"
|
|
365 (number-to-string (setq rng-c-file-index
|
|
366 (1+ rng-c-file-index)))
|
|
367 ">*")
|
|
368 (setq rng-c-file-index 1)
|
|
369 "*")))
|
|
370
|
|
371 (defun rng-c-process-escapes ()
|
|
372 ;; Check for any nuls, since we will use nul chars
|
|
373 ;; for internal purposes.
|
|
374 (let ((pos (search-forward "\C-@" nil t)))
|
|
375 (and pos
|
|
376 (rng-c-error "Nul character found (binary file?)")))
|
|
377 (let ((offset 0))
|
|
378 (while (re-search-forward "\\\\x+{\\([0-9a-fA-F]+\\)}"
|
|
379 (point-max)
|
|
380 t)
|
|
381 (let* ((ch (decode-char 'ucs (string-to-number (match-string 1) 16))))
|
|
382 (if (and ch (> ch 0))
|
|
383 (let ((begin (match-beginning 0))
|
|
384 (end (match-end 0)))
|
|
385 (delete-region begin end)
|
|
386 ;; Represent an escaped newline by nul, so
|
|
387 ;; that we can distinguish it from a literal newline.
|
|
388 ;; We will translate it back into a real newline later.
|
|
389 (insert (if (eq ch ?\n) 0 ch))
|
|
390 (setq offset (+ offset (- end begin 1)))
|
|
391 (setq rng-c-escape-positions
|
|
392 (cons (cons (point) offset)
|
|
393 rng-c-escape-positions)))
|
|
394 (rng-c-error "Invalid character escape")))))
|
|
395 (goto-char 1))
|
|
396
|
|
397 (defun rng-c-translate-position (pos)
|
|
398 (let ((tem rng-c-escape-positions))
|
|
399 (while (and tem
|
|
400 (> (caar tem) pos))
|
|
401 (setq tem (cdr tem)))
|
|
402 (if tem
|
|
403 (+ pos (cdar tem))
|
|
404 pos)))
|
|
405
|
|
406 (defun rng-c-error (&rest args)
|
|
407 (rng-c-signal-incorrect-schema rng-c-file-name
|
|
408 (rng-c-translate-position (point))
|
|
409 (apply 'format args)))
|
|
410
|
|
411 (defun rng-c-parse-top-level (context)
|
|
412 (let ((rng-c-namespace-decls nil)
|
|
413 (rng-c-default-namespace nil)
|
|
414 (rng-c-datatype-decls nil))
|
|
415 (goto-char (point-min))
|
|
416 (forward-comment (point-max))
|
|
417 (rng-c-advance)
|
|
418 (rng-c-parse-decls)
|
|
419 (let ((p (if (eq context 'include)
|
|
420 (if (rng-c-implicit-grammar-p)
|
|
421 (rng-c-parse-grammar-body "")
|
|
422 (rng-c-parse-included-grammar))
|
|
423 (if (rng-c-implicit-grammar-p)
|
|
424 (rng-c-parse-implicit-grammar)
|
|
425 (rng-c-parse-pattern)))))
|
|
426 (or (string-equal rng-c-current-token "")
|
|
427 (rng-c-error "Unexpected characters after pattern"))
|
|
428 p)))
|
|
429
|
|
430 (defun rng-c-parse-included-grammar ()
|
|
431 (or (string-equal rng-c-current-token "grammar")
|
|
432 (rng-c-error "Included schema is not a grammar"))
|
|
433 (rng-c-advance)
|
|
434 (rng-c-expect "{")
|
|
435 (rng-c-parse-grammar-body "}"))
|
|
436
|
|
437 (defun rng-c-implicit-grammar-p ()
|
|
438 (or (and (or (rng-c-current-token-prefixed-name-p)
|
|
439 (rng-c-current-token-quoted-identifier-p)
|
|
440 (and (rng-c-current-token-ncname-p)
|
|
441 (not (rng-c-current-token-keyword-p))))
|
|
442 (looking-at "\\["))
|
|
443 (and (string-equal rng-c-current-token "[")
|
|
444 (rng-c-parse-lead-annotation)
|
|
445 nil)
|
|
446 (member rng-c-current-token '("div" "include" ""))
|
|
447 (looking-at "[|&]?=")))
|
|
448
|
|
449 (defun rng-c-parse-decls ()
|
|
450 (setq rng-c-default-namespace-declared nil)
|
|
451 (while (progn
|
|
452 (let ((binding
|
|
453 (assoc rng-c-current-token
|
|
454 '(("namespace" . rng-c-parse-namespace)
|
|
455 ("datatypes" . rng-c-parse-datatypes)
|
|
456 ("default" . rng-c-parse-default)))))
|
|
457 (if binding
|
|
458 (progn
|
|
459 (rng-c-advance)
|
|
460 (funcall (cdr binding))
|
|
461 t)
|
|
462 nil))))
|
|
463 (rng-c-declare-standard-datatypes)
|
|
464 (rng-c-declare-standard-namespaces))
|
|
465
|
|
466 (defun rng-c-parse-datatypes ()
|
|
467 (let ((prefix (rng-c-parse-identifier-or-keyword)))
|
|
468 (or (not (assoc prefix rng-c-datatype-decls))
|
|
469 (rng-c-error "Duplicate datatypes declaration for prefix %s" prefix))
|
|
470 (rng-c-expect "=")
|
|
471 (setq rng-c-datatype-decls
|
|
472 (cons (cons prefix
|
|
473 (rng-make-datatypes-uri (rng-c-parse-literal)))
|
|
474 rng-c-datatype-decls))))
|
|
475
|
|
476 (defun rng-c-parse-namespace ()
|
|
477 (rng-c-declare-namespace nil
|
|
478 (rng-c-parse-identifier-or-keyword)))
|
|
479
|
|
480 (defun rng-c-parse-default ()
|
|
481 (rng-c-expect "namespace")
|
|
482 (rng-c-declare-namespace t
|
|
483 (if (string-equal rng-c-current-token "=")
|
|
484 nil
|
|
485 (rng-c-parse-identifier-or-keyword))))
|
|
486
|
|
487 (defun rng-c-declare-namespace (declare-default prefix)
|
|
488 (rng-c-expect "=")
|
|
489 (let ((ns (cond ((string-equal rng-c-current-token "inherit")
|
|
490 (rng-c-advance)
|
|
491 rng-c-inherit-namespace)
|
|
492 (t
|
|
493 (nxml-make-namespace (rng-c-parse-literal))))))
|
|
494 (and prefix
|
|
495 (or (not (assoc prefix rng-c-namespace-decls))
|
|
496 (rng-c-error "Duplicate namespace declaration for prefix %s"
|
|
497 prefix))
|
|
498 (setq rng-c-namespace-decls
|
|
499 (cons (cons prefix ns) rng-c-namespace-decls)))
|
|
500 (and declare-default
|
|
501 (or (not rng-c-default-namespace-declared)
|
|
502 (rng-c-error "Duplicate default namespace declaration"))
|
|
503 (setq rng-c-default-namespace-declared t)
|
|
504 (setq rng-c-default-namespace ns))))
|
|
505
|
|
506 (defun rng-c-parse-implicit-grammar ()
|
|
507 (let* ((rng-c-parent-grammar rng-c-current-grammar)
|
|
508 (rng-c-current-grammar (rng-c-make-grammar)))
|
|
509 (rng-c-parse-grammar-body "")
|
|
510 (rng-c-finish-grammar)))
|
|
511
|
|
512 (defun rng-c-parse-grammar-body (close-token &optional in-include)
|
|
513 (while (not (string-equal rng-c-current-token close-token))
|
|
514 (cond ((rng-c-current-token-keyword-p)
|
|
515 (let ((kw (intern rng-c-current-token)))
|
|
516 (cond ((eq kw 'start)
|
|
517 (rng-c-parse-define 'start in-include))
|
|
518 ((eq kw 'div)
|
|
519 (rng-c-advance)
|
|
520 (rng-c-parse-div in-include))
|
|
521 ((eq kw 'include)
|
|
522 (and in-include
|
|
523 (rng-c-error "Nested include"))
|
|
524 (rng-c-advance)
|
|
525 (rng-c-parse-include))
|
|
526 (t (rng-c-error "Invalid grammar keyword")))))
|
|
527 ((rng-c-current-token-ncname-p)
|
|
528 (if (looking-at "\\[")
|
|
529 (rng-c-parse-annotation-element)
|
|
530 (rng-c-parse-define rng-c-current-token
|
|
531 in-include)))
|
|
532 ((rng-c-current-token-quoted-identifier-p)
|
|
533 (if (looking-at "\\[")
|
|
534 (rng-c-parse-annotation-element)
|
|
535 (rng-c-parse-define (substring rng-c-current-token 1)
|
|
536 in-include)))
|
|
537 ((rng-c-current-token-prefixed-name-p)
|
|
538 (rng-c-parse-annotation-element))
|
|
539 ((string-equal rng-c-current-token "[")
|
|
540 (rng-c-parse-lead-annotation)
|
|
541 (and (string-equal rng-c-current-token close-token)
|
|
542 (rng-c-error "Missing annotation subject"))
|
|
543 (and (looking-at "\\[")
|
|
544 (rng-c-error "Leading annotation applied to annotation")))
|
|
545 (t (rng-c-error "Invalid grammar content"))))
|
|
546 (or (string-equal rng-c-current-token "")
|
|
547 (rng-c-advance)))
|
|
548
|
|
549 (defun rng-c-parse-div (in-include)
|
|
550 (rng-c-expect "{")
|
|
551 (rng-c-parse-grammar-body "}" in-include))
|
|
552
|
|
553 (defun rng-c-parse-include ()
|
|
554 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
|
|
555 (rng-c-inherit-namespace (rng-c-parse-opt-inherit))
|
|
556 overrides)
|
|
557 (cond ((string-equal rng-c-current-token "{")
|
|
558 (rng-c-advance)
|
|
559 (let ((rng-c-overrides nil))
|
|
560 (rng-c-parse-grammar-body "}" t)
|
|
561 (setq overrides rng-c-overrides))
|
|
562 (setq overrides (rng-c-start-include overrides))
|
|
563 (rng-c-parse-file filename 'include)
|
|
564 (rng-c-end-include overrides))
|
|
565 (t (rng-c-parse-file filename 'include)))))
|
|
566
|
|
567 (defun rng-c-parse-define (name in-include)
|
|
568 (rng-c-advance)
|
|
569 (let ((assign (assoc rng-c-current-token
|
|
570 '(("=" . nil)
|
|
571 ("|=" . choice)
|
|
572 ("&=" . interleave)))))
|
|
573 (or assign
|
|
574 (rng-c-error "Expected assignment operator"))
|
|
575 (rng-c-advance)
|
|
576 (let ((ref (rng-c-prepare-define name (cdr assign) in-include)))
|
|
577 (rng-c-define ref (rng-c-parse-pattern)))))
|
|
578
|
|
579 (defvar rng-c-had-except nil)
|
|
580
|
|
581 (defun rng-c-parse-pattern ()
|
|
582 (let* ((rng-c-had-except nil)
|
|
583 (p (rng-c-parse-repeated))
|
|
584 (op (assoc rng-c-current-token
|
|
585 '(("|" . rng-make-choice)
|
|
586 ("," . rng-make-group)
|
|
587 ("&" . rng-make-interleave)))))
|
|
588 (if op
|
|
589 (if rng-c-had-except
|
|
590 (rng-c-error "Parentheses required around pattern using -")
|
|
591 (let* ((patterns (cons p nil))
|
|
592 (tail patterns)
|
|
593 (connector rng-c-current-token))
|
|
594 (while (progn
|
|
595 (rng-c-advance)
|
|
596 (let ((newcdr (cons (rng-c-parse-repeated) nil)))
|
|
597 (setcdr tail newcdr)
|
|
598 (setq tail newcdr))
|
|
599 (string-equal rng-c-current-token connector)))
|
|
600 (funcall (cdr op) patterns)))
|
|
601 p)))
|
|
602
|
|
603 (defun rng-c-parse-repeated ()
|
|
604 (let ((p (rng-c-parse-follow-annotations
|
|
605 (rng-c-parse-primary)))
|
|
606 (op (assoc rng-c-current-token
|
|
607 '(("*" . rng-make-zero-or-more)
|
|
608 ("+" . rng-make-one-or-more)
|
|
609 ("?" . rng-make-optional)))))
|
|
610 (if op
|
|
611 (if rng-c-had-except
|
|
612 (rng-c-error "Parentheses required around pattern using -")
|
|
613 (rng-c-parse-follow-annotations
|
|
614 (progn
|
|
615 (rng-c-advance)
|
|
616 (funcall (cdr op) p))))
|
|
617 p)))
|
|
618
|
|
619 (defun rng-c-parse-primary ()
|
|
620 "Parse a primary expression. The current token must be the first
|
|
621 token of the expression. After parsing the current token should be
|
|
622 token following the primary expression."
|
|
623 (cond ((rng-c-current-token-keyword-p)
|
|
624 (let ((parse-function (get (intern rng-c-current-token)
|
|
625 'rng-c-pattern)))
|
|
626 (or parse-function
|
|
627 (rng-c-error "Keyword %s does not introduce a pattern"
|
|
628 rng-c-current-token))
|
|
629 (rng-c-advance)
|
|
630 (funcall parse-function)))
|
|
631 ((rng-c-current-token-ncname-p)
|
|
632 (rng-c-advance-with (rng-c-make-ref rng-c-current-token)))
|
|
633 ((string-equal rng-c-current-token "(")
|
|
634 (rng-c-advance)
|
|
635 (let ((p (rng-c-parse-pattern)))
|
|
636 (rng-c-expect ")")
|
|
637 p))
|
|
638 ((rng-c-current-token-prefixed-name-p)
|
|
639 (let ((name (rng-c-expand-datatype rng-c-current-token)))
|
|
640 (rng-c-advance)
|
|
641 (rng-c-parse-data name)))
|
|
642 ((rng-c-current-token-literal-p)
|
|
643 (rng-make-value rng-token-datatype (rng-c-parse-literal) nil))
|
|
644 ((rng-c-current-token-quoted-identifier-p)
|
|
645 (rng-c-advance-with
|
|
646 (rng-c-make-ref (substring rng-c-current-token 1))))
|
|
647 ((string-equal rng-c-current-token "[")
|
|
648 (rng-c-parse-lead-annotation)
|
|
649 (rng-c-parse-primary))
|
|
650 (t (rng-c-error "Invalid pattern"))))
|
|
651
|
|
652 (defun rng-c-parse-parent ()
|
|
653 (and (rng-c-current-token-keyword-p)
|
|
654 (rng-c-error "Keyword following parent was not quoted"
|
|
655 rng-c-current-token))
|
|
656 (rng-c-make-parent-ref (rng-c-parse-identifier-or-keyword)))
|
|
657
|
|
658 (defun rng-c-parse-literal ()
|
|
659 (rng-c-fix-escaped-newlines
|
|
660 (apply 'concat (rng-c-parse-literal-segments))))
|
|
661
|
|
662 (defun rng-c-parse-literal-segments ()
|
|
663 (let ((str (rng-c-parse-literal-segment)))
|
|
664 (cons str
|
|
665 (cond ((string-equal rng-c-current-token "~")
|
|
666 (rng-c-advance)
|
|
667 (rng-c-parse-literal-segments))
|
|
668 (t nil)))))
|
|
669
|
|
670 (defun rng-c-parse-literal-segment ()
|
|
671 (or (rng-c-current-token-literal-p)
|
|
672 (rng-c-error "Expected a literal"))
|
|
673 (rng-c-advance-with
|
|
674 (let ((n (if (and (>= (length rng-c-current-token) 6)
|
|
675 (eq (aref rng-c-current-token 0)
|
|
676 (aref rng-c-current-token 1)))
|
|
677 3
|
|
678 1)))
|
|
679 (substring rng-c-current-token n (- n)))))
|
|
680
|
|
681 (defun rng-c-fix-escaped-newlines (str)
|
|
682 (let ((pos 0))
|
|
683 (while (progn
|
|
684 (let ((n (string-match "\C-@" str pos)))
|
|
685 (and n
|
|
686 (aset str n ?\n)
|
|
687 (setq pos (1+ n)))))))
|
|
688 str)
|
|
689
|
|
690 (defun rng-c-parse-identifier-or-keyword ()
|
|
691 (cond ((rng-c-current-token-ncname-p)
|
|
692 (rng-c-advance-with rng-c-current-token))
|
|
693 ((rng-c-current-token-quoted-identifier-p)
|
|
694 (rng-c-advance-with (substring rng-c-current-token 1)))
|
|
695 (t (rng-c-error "Expected identifier or keyword"))))
|
|
696
|
|
697 (put 'string 'rng-c-pattern 'rng-c-parse-string)
|
|
698 (put 'token 'rng-c-pattern 'rng-c-parse-token)
|
|
699 (put 'element 'rng-c-pattern 'rng-c-parse-element)
|
|
700 (put 'attribute 'rng-c-pattern 'rng-c-parse-attribute)
|
|
701 (put 'list 'rng-c-pattern 'rng-c-parse-list)
|
|
702 (put 'mixed 'rng-c-pattern 'rng-c-parse-mixed)
|
|
703 (put 'text 'rng-c-pattern 'rng-c-parse-text)
|
|
704 (put 'empty 'rng-c-pattern 'rng-c-parse-empty)
|
|
705 (put 'notAllowed 'rng-c-pattern 'rng-c-parse-not-allowed)
|
|
706 (put 'grammar 'rng-c-pattern 'rng-c-parse-grammar)
|
|
707 (put 'parent 'rng-c-pattern 'rng-c-parse-parent)
|
|
708 (put 'external 'rng-c-pattern 'rng-c-parse-external)
|
|
709
|
|
710 (defun rng-c-parse-element ()
|
|
711 (let ((name-class (rng-c-parse-name-class nil)))
|
|
712 (rng-c-expect "{")
|
|
713 (let ((pattern (rng-c-parse-pattern)))
|
|
714 (rng-c-expect "}")
|
|
715 (rng-make-element name-class pattern))))
|
|
716
|
|
717 (defun rng-c-parse-attribute ()
|
|
718 (let ((name-class (rng-c-parse-name-class 'attribute)))
|
|
719 (rng-c-expect "{")
|
|
720 (let ((pattern (rng-c-parse-pattern)))
|
|
721 (rng-c-expect "}")
|
|
722 (rng-make-attribute name-class pattern))))
|
|
723
|
|
724 (defun rng-c-parse-name-class (attribute)
|
|
725 (let* ((rng-c-had-except nil)
|
|
726 (name-class
|
|
727 (rng-c-parse-follow-annotations
|
|
728 (rng-c-parse-primary-name-class attribute))))
|
|
729 (if (string-equal rng-c-current-token "|")
|
|
730 (let* ((name-classes (cons name-class nil))
|
|
731 (tail name-classes))
|
|
732 (or (not rng-c-had-except)
|
|
733 (rng-c-error "Parentheses required around name-class using - operator"))
|
|
734 (while (progn
|
|
735 (rng-c-advance)
|
|
736 (let ((newcdr
|
|
737 (cons (rng-c-parse-follow-annotations
|
|
738 (rng-c-parse-primary-name-class attribute))
|
|
739 nil)))
|
|
740 (setcdr tail newcdr)
|
|
741 (setq tail newcdr))
|
|
742 (string-equal rng-c-current-token "|")))
|
|
743 (rng-make-choice-name-class name-classes))
|
|
744 name-class)))
|
|
745
|
|
746 (defun rng-c-parse-primary-name-class (attribute)
|
|
747 (cond ((rng-c-current-token-ncname-p)
|
|
748 (rng-c-advance-with
|
|
749 (rng-make-name-name-class
|
|
750 (rng-make-name (rng-c-unqualified-namespace attribute)
|
|
751 rng-c-current-token))))
|
|
752 ((rng-c-current-token-prefixed-name-p)
|
|
753 (rng-c-advance-with
|
|
754 (rng-make-name-name-class
|
|
755 (rng-c-expand-name rng-c-current-token))))
|
|
756 ((string-equal rng-c-current-token "*")
|
|
757 (let ((except (rng-c-parse-opt-except-name-class attribute)))
|
|
758 (if except
|
|
759 (rng-make-any-name-except-name-class except)
|
|
760 (rng-make-any-name-name-class))))
|
|
761 ((rng-c-current-token-ns-name-p)
|
|
762 (let* ((ns
|
|
763 (rng-c-lookup-prefix (substring rng-c-current-token
|
|
764 0
|
|
765 -2)))
|
|
766 (except (rng-c-parse-opt-except-name-class attribute)))
|
|
767 (if except
|
|
768 (rng-make-ns-name-except-name-class ns except)
|
|
769 (rng-make-ns-name-name-class ns))))
|
|
770 ((string-equal rng-c-current-token "(")
|
|
771 (rng-c-advance)
|
|
772 (let ((name-class (rng-c-parse-name-class attribute)))
|
|
773 (rng-c-expect ")")
|
|
774 name-class))
|
|
775 ((rng-c-current-token-quoted-identifier-p)
|
|
776 (rng-c-advance-with
|
|
777 (rng-make-name-name-class
|
|
778 (rng-make-name (rng-c-unqualified-namespace attribute)
|
|
779 (substring rng-c-current-token 1)))))
|
|
780 ((string-equal rng-c-current-token "[")
|
|
781 (rng-c-parse-lead-annotation)
|
|
782 (rng-c-parse-primary-name-class attribute))
|
|
783 (t (rng-c-error "Bad name class"))))
|
|
784
|
|
785 (defun rng-c-parse-opt-except-name-class (attribute)
|
|
786 (rng-c-advance)
|
|
787 (and (string-equal rng-c-current-token "-")
|
|
788 (or (not rng-c-had-except)
|
|
789 (rng-c-error "Parentheses required around name-class using - operator"))
|
|
790 (setq rng-c-had-except t)
|
|
791 (progn
|
|
792 (rng-c-advance)
|
|
793 (rng-c-parse-primary-name-class attribute))))
|
|
794
|
|
795 (defun rng-c-parse-mixed ()
|
|
796 (rng-c-expect "{")
|
|
797 (let ((pattern (rng-make-mixed (rng-c-parse-pattern))))
|
|
798 (rng-c-expect "}")
|
|
799 pattern))
|
|
800
|
|
801 (defun rng-c-parse-list ()
|
|
802 (rng-c-expect "{")
|
|
803 (let ((pattern (rng-make-list (rng-c-parse-pattern))))
|
|
804 (rng-c-expect "}")
|
|
805 pattern))
|
|
806
|
|
807 (defun rng-c-parse-text ()
|
|
808 (rng-make-text))
|
|
809
|
|
810 (defun rng-c-parse-empty ()
|
|
811 (rng-make-empty))
|
|
812
|
|
813 (defun rng-c-parse-not-allowed ()
|
|
814 (rng-make-not-allowed))
|
|
815
|
|
816 (defun rng-c-parse-string ()
|
|
817 (rng-c-parse-data rng-string-datatype))
|
|
818
|
|
819 (defun rng-c-parse-token ()
|
|
820 (rng-c-parse-data rng-token-datatype))
|
|
821
|
|
822 (defun rng-c-parse-data (name)
|
|
823 (if (rng-c-current-token-literal-p)
|
|
824 (rng-make-value name
|
|
825 (rng-c-parse-literal)
|
|
826 (and (car name)
|
|
827 (rng-c-make-context)))
|
|
828 (let ((params (rng-c-parse-optional-params)))
|
|
829 (if (string-equal rng-c-current-token "-")
|
|
830 (progn
|
|
831 (if rng-c-had-except
|
|
832 (rng-c-error "Parentheses required around pattern using -")
|
|
833 (setq rng-c-had-except t))
|
|
834 (rng-c-advance)
|
|
835 (rng-make-data-except name
|
|
836 params
|
|
837 (rng-c-parse-primary)))
|
|
838 (rng-make-data name params)))))
|
|
839
|
|
840 (defun rng-c-parse-optional-params ()
|
|
841 (and (string-equal rng-c-current-token "{")
|
|
842 (let* ((head (cons nil nil))
|
|
843 (tail head))
|
|
844 (rng-c-advance)
|
|
845 (while (not (string-equal rng-c-current-token "}"))
|
|
846 (and (string-equal rng-c-current-token "[")
|
|
847 (rng-c-parse-lead-annotation))
|
|
848 (let ((name (rng-c-parse-identifier-or-keyword)))
|
|
849 (rng-c-expect "=")
|
|
850 (let ((newcdr (cons (cons (intern name)
|
|
851 (rng-c-parse-literal))
|
|
852 nil)))
|
|
853 (setcdr tail newcdr)
|
|
854 (setq tail newcdr))))
|
|
855 (rng-c-advance)
|
|
856 (cdr head))))
|
|
857
|
|
858 (defun rng-c-parse-external ()
|
|
859 (let* ((filename (rng-c-expand-file (rng-c-parse-literal)))
|
|
860 (rng-c-inherit-namespace (rng-c-parse-opt-inherit)))
|
|
861 (rng-c-parse-file filename 'external)))
|
|
862
|
|
863 (defun rng-c-expand-file (uri)
|
|
864 (condition-case err
|
|
865 (rng-uri-file-name (rng-uri-resolve uri
|
|
866 (rng-file-name-uri rng-c-file-name)))
|
|
867 (rng-uri-error
|
|
868 (rng-c-error (cadr err)))))
|
|
869
|
|
870 (defun rng-c-parse-opt-inherit ()
|
|
871 (cond ((string-equal rng-c-current-token "inherit")
|
|
872 (rng-c-advance)
|
|
873 (rng-c-expect "=")
|
|
874 (rng-c-lookup-prefix (rng-c-parse-identifier-or-keyword)))
|
|
875 (t rng-c-default-namespace)))
|
|
876
|
|
877 (defun rng-c-parse-grammar ()
|
|
878 (rng-c-expect "{")
|
|
879 (let* ((rng-c-parent-grammar rng-c-current-grammar)
|
|
880 (rng-c-current-grammar (rng-c-make-grammar)))
|
|
881 (rng-c-parse-grammar-body "}")
|
|
882 (rng-c-finish-grammar)))
|
|
883
|
|
884 (defun rng-c-parse-lead-annotation ()
|
|
885 (rng-c-parse-annotation-body)
|
|
886 (and (string-equal rng-c-current-token "[")
|
|
887 (rng-c-error "Multiple leading annotations")))
|
|
888
|
|
889 (defun rng-c-parse-follow-annotations (obj)
|
|
890 (while (string-equal rng-c-current-token ">>")
|
|
891 (rng-c-advance)
|
|
892 (if (rng-c-current-token-prefixed-name-p)
|
|
893 (rng-c-advance)
|
|
894 (rng-c-parse-identifier-or-keyword))
|
|
895 (rng-c-parse-annotation-body t))
|
|
896 obj)
|
|
897
|
|
898 (defun rng-c-parse-annotation-element ()
|
|
899 (rng-c-advance)
|
|
900 (rng-c-parse-annotation-body t))
|
|
901
|
|
902 ;; XXX need stricter checking of attribute names
|
|
903 ;; XXX don't allow attributes after text
|
|
904
|
|
905 (defun rng-c-parse-annotation-body (&optional allow-text)
|
|
906 "Current token is [. Parse up to matching ]. Current token after
|
|
907 parse is token following ]."
|
|
908 (or (string-equal rng-c-current-token "[")
|
|
909 (rng-c-error "Expected ["))
|
|
910 (rng-c-advance)
|
|
911 (while (not (string-equal rng-c-current-token "]"))
|
|
912 (cond ((rng-c-current-token-literal-p)
|
|
913 (or allow-text
|
|
914 (rng-c-error "Out of place text within annotation"))
|
|
915 (rng-c-parse-literal))
|
|
916 (t
|
|
917 (if (rng-c-current-token-prefixed-name-p)
|
|
918 (rng-c-advance)
|
|
919 (rng-c-parse-identifier-or-keyword))
|
|
920 (cond ((string-equal rng-c-current-token "[")
|
|
921 (rng-c-parse-annotation-body t))
|
|
922 ((string-equal rng-c-current-token "=")
|
|
923 (rng-c-advance)
|
|
924 (rng-c-parse-literal))
|
|
925 (t (rng-c-error "Expected = or ["))))))
|
|
926 (rng-c-advance))
|
|
927
|
|
928 (defun rng-c-advance-with (pattern)
|
|
929 (rng-c-advance)
|
|
930 pattern)
|
|
931
|
|
932 (defun rng-c-expect (str)
|
|
933 (or (string-equal rng-c-current-token str)
|
|
934 (rng-c-error "Expected `%s' but got `%s'" str rng-c-current-token))
|
|
935 (rng-c-advance))
|
|
936
|
|
937 (provide 'rng-cmpct)
|
|
938
|
|
939 ;;; rng-cmpct.el
|
86379
|
940
|
|
941 ;; arch-tag: 90395eb1-283b-4146-bbc1-6d6ef1704e57
|