diff lisp/progmodes/ebnf-bnf.el @ 27451:f062cc830f07

*** empty log message ***
author Gerd Moellmann <gerd@gnu.org>
date Thu, 27 Jan 2000 14:31:16 +0000
parents
children 9299c470e566
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/progmodes/ebnf-bnf.el	Thu Jan 27 14:31:16 2000 +0000
@@ -0,0 +1,583 @@
+;;; ebnf-bnf --- Parser for EBNF
+
+;; Copyright (C) 1999 Vinicius Jose Latorre
+
+;; Author:     Vinicius Jose Latorre <vinicius@cpqd.com.br>
+;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
+;; Keywords:   wp, ebnf, PostScript
+;; Time-stamp: <99/11/20 18:05:05 vinicius>
+;; Version:    1.4
+
+;; This file is *NOT* (yet?) part of GNU Emacs.
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 2, or (at your option)
+;; any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs; see the file COPYING.  If not, write to the
+;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;; Boston, MA 02111-1307, USA.
+
+;;; Commentary:
+
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;;
+;; This is part of ebnf2ps package.
+;;
+;; This package defines a parser for EBNF.
+;;
+;; See ebnf2ps.el for documentation.
+;;
+;;
+;; EBNF Syntax
+;; -----------
+;;
+;; The current EBNF that ebnf2ps accepts has the following constructions:
+;;
+;;    ;			comment (until end of line)
+;;    A			non-terminal
+;;    "C"		terminal
+;;    ?C?		special
+;;    $A		default non-terminal
+;;    $"C"		default terminal
+;;    $?C?		default special
+;;    A = B.		production (A is the header and B the body)
+;;    C D		sequence (C occurs before D)
+;;    C | D		alternative (C or D occurs)
+;;    A - B		exception (A excluding B, B without any non-terminal)
+;;    n * A		repetition (A repeats n (integer) times)
+;;    (C)		group (expression C is grouped together)
+;;    [C]		optional (C may or not occurs)
+;;    C+		one or more occurrences of C
+;;    {C}+		one or more occurrences of C
+;;    {C}*		zero or more occurrences of C
+;;    {C}		zero or more occurrences of C
+;;    C / D		equivalent to: C {D C}*
+;;    {C || D}+		equivalent to: C {D C}*
+;;    {C || D}*		equivalent to: [C {D C}*]
+;;    {C || D}		equivalent to: [C {D C}*]
+;;
+;; The EBNF syntax written using the notation above is:
+;;
+;;    EBNF = {production}+.
+;;
+;;    production = non_terminal "=" body ".".   ;; production
+;;
+;;    body = {sequence || "|"}*.                ;; alternative
+;;
+;;    sequence = {exception}*.                  ;; sequence
+;;
+;;    exception = repeat [ "-" repeat].         ;; exception
+;;
+;;    repeat = [ integer "*" ] term.            ;; repetition
+;;
+;;    term = factor
+;;         | [factor] "+"                       ;; one-or-more
+;;         | [factor] "/" [factor]              ;; one-or-more
+;;         .
+;;
+;;    factor = [ "$" ] "\"" terminal "\""       ;; terminal
+;;           | [ "$" ] non_terminal             ;; non-terminal
+;;           | [ "$" ] "?" special "?"          ;; special
+;;           | "(" body ")"                     ;; group
+;;           | "[" body "]"                     ;; zero-or-one
+;;           | "{" body [ "||" body ] "}+"      ;; one-or-more
+;;           | "{" body [ "||" body ] "}*"      ;; zero-or-more
+;;           | "{" body [ "||" body ] "}"       ;; zero-or-more
+;;           .
+;;
+;;    non_terminal = "[A-Za-z\\240-\\377][!#%&'*-,0-:<>@-Z\\^-z~\\240-\\377]*".
+;;
+;;    terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+".
+;;
+;;    special = "[^?\\n\\000-\\010\\016-\\037\\177-\\237]*".
+;;
+;;    integer = "[0-9]+".
+;;
+;;    comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n".
+;;
+;;
+;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;; code:
+
+
+(require 'ebnf-otz)
+
+
+(defvar ebnf-bnf-lex nil
+  "Value returned by `ebnf-bnf-lex' function.")
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Syntatic analyzer
+
+
+;;; EBNF = {production}+.
+
+(defun ebnf-bnf-parser (start)
+  "EBNF parser."
+  (let ((total (+ (- ebnf-limit start) 1))
+	(bias (1- start))
+	(origin (point))
+	prod-list token rule)
+    (goto-char start)
+    (setq token (ebnf-bnf-lex))
+    (and (eq token 'end-of-input)
+	 (error "Invalid EBNF file format."))
+    (while (not (eq token 'end-of-input))
+      (ebnf-message-float
+       "Parsing...%s%%"
+       (/ (* (- (point) bias) 100.0) total))
+      (setq token (ebnf-production token)
+	    rule  (cdr token)
+	    token (car token))
+      (or (ebnf-add-empty-rule-list rule)
+	  (setq prod-list (cons rule prod-list))))
+    (goto-char origin)
+    prod-list))
+
+
+;;; production = non-terminal "=" body ".".
+
+(defun ebnf-production (token)
+  (let ((header ebnf-bnf-lex)
+	(action ebnf-action)
+	body)
+    (setq ebnf-action nil)
+    (or (eq token 'non-terminal)
+	(error "Invalid header production."))
+    (or (eq (ebnf-bnf-lex) 'equal)
+	(error "Invalid production: missing `='."))
+    (setq body (ebnf-body))
+    (or (eq (car body) 'period)
+	(error "Invalid production: missing `.'."))
+    (setq body (cdr body))
+    (ebnf-eps-add-production header)
+    (cons (ebnf-bnf-lex)
+	  (ebnf-make-production header body action))))
+
+
+;;; body = {sequence || "|"}*.
+
+(defun ebnf-body ()
+  (let (body sequence)
+    (while (eq (car (setq sequence (ebnf-sequence))) 'alternative)
+      (setq sequence (cdr sequence)
+	    body     (cons sequence body)))
+    (ebnf-token-alternative body sequence)))
+
+
+;;; sequence = {exception}*.
+
+(defun ebnf-sequence ()
+  (let ((token (ebnf-bnf-lex))
+	seq term)
+    (while (setq term  (ebnf-exception token)
+		 token (car term)
+		 term  (cdr term))
+      (setq seq (cons term seq)))
+    (cons token
+	  (cond
+	   ;; null sequence
+	   ((null seq)
+	    (ebnf-make-empty))
+	   ;; sequence with only one element
+	   ((= (length seq) 1)
+	    (car seq))
+	   ;; a real sequence
+	   (t
+	    (ebnf-make-sequence (nreverse seq)))
+	   ))))
+
+
+;;; exception = repeat [ "-" repeat].
+
+(defun ebnf-exception (token)
+  (let ((term (ebnf-repeat token)))
+    (if (not (eq (car term) 'except))
+	;; repeat
+	term
+      ;; repeat - repeat
+      (let ((exception (ebnf-repeat (ebnf-bnf-lex))))
+	(ebnf-no-non-terminal (cdr exception))
+	(ebnf-token-except (cdr term) exception)))))
+
+
+(defun ebnf-no-non-terminal (node)
+  (and (vectorp node)
+       (let ((kind (ebnf-node-kind node)))
+	 (cond
+	  ((eq kind 'ebnf-generate-non-terminal)
+	   (error "Exception sequence should not contain a non-terminal."))
+	  ((eq kind 'ebnf-generate-repeat)
+	   (ebnf-no-non-terminal (ebnf-node-separator node)))
+	  ((memq kind '(ebnf-generate-optional ebnf-generate-except))
+	   (ebnf-no-non-terminal (ebnf-node-list node)))
+	  ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more))
+	   (ebnf-no-non-terminal (ebnf-node-list node))
+	   (ebnf-no-non-terminal (ebnf-node-separator node)))
+	  ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence))
+	   (let ((seq (ebnf-node-list node)))
+	     (while seq
+	       (ebnf-no-non-terminal (car seq))
+	       (setq seq (cdr seq)))))
+	  ))))
+
+
+;;; repeat = [ integer "*" ] term.
+
+(defun ebnf-repeat (token)
+  (if (not (eq token 'integer))
+      (ebnf-term token)
+    (let ((times ebnf-bnf-lex))
+      (or (eq (ebnf-bnf-lex) 'repeat)
+	  (error "Missing `*'."))
+      (ebnf-token-repeat times (ebnf-term (ebnf-bnf-lex))))))
+
+
+;;; term = factor
+;;;      | [factor] "+"                       ;; one-or-more
+;;;      | [factor] "/" [factor]              ;; one-or-more
+;;;      .
+
+(defun ebnf-term (token)
+  (let ((factor (ebnf-factor token)))
+    (and factor
+	 (setq token (ebnf-bnf-lex)))
+    (cond
+     ;; [factor] +
+     ((eq token 'one-or-more)
+      (cons (ebnf-bnf-lex)
+	    (and factor
+		 (let ((kind (ebnf-node-kind factor)))
+		   (cond
+		    ;; { A }+ + ==> { A }+
+		    ;; { A }* + ==> { A }*
+		    ((memq kind '(ebnf-generate-zero-or-more
+				  ebnf-generate-one-or-more))
+		     factor)
+		    ;; [ A ] + ==> { A }*
+		    ((eq kind 'ebnf-generate-optional)
+		     (ebnf-make-zero-or-more (list factor)))
+		    ;; A +
+		    (t
+		     (ebnf-make-one-or-more (list factor)))
+		    )))))
+     ;; [factor] / [factor]
+     ((eq token 'list)
+      (setq token (ebnf-bnf-lex))
+      (let ((sep (ebnf-factor token)))
+	(and sep
+	     (setq factor (or factor (ebnf-make-empty))))
+	(cons (if sep
+		  (ebnf-bnf-lex)
+		token)
+	      (and factor
+		   (ebnf-make-one-or-more factor sep)))))
+     ;; factor
+     (t
+      (cons token factor))
+     )))
+
+
+;;; factor = [ "$" ] "\"" terminal "\""         ;; terminal
+;;;        | [ "$" ] non_terminal               ;; non-terminal
+;;;        | [ "$" ] "?" special "?"            ;; special
+;;;        | "(" body ")"                       ;; group
+;;;        | "[" body "]"                       ;; zero-or-one
+;;;        | "{" body [ "||" body ] "}+"        ;; one-or-more
+;;;        | "{" body [ "||" body ] "}*"        ;; zero-or-more
+;;;        | "{" body [ "||" body ] "}"         ;; zero-or-more
+;;;        .
+
+(defun ebnf-factor (token)
+  (cond
+   ;; terminal
+   ((eq token 'terminal)
+    (ebnf-make-terminal ebnf-bnf-lex))
+   ;; non-terminal
+   ((eq token 'non-terminal)
+    (ebnf-make-non-terminal ebnf-bnf-lex))
+   ;; special
+   ((eq token 'special)
+    (ebnf-make-special ebnf-bnf-lex))
+   ;; group
+   ((eq token 'begin-group)
+    (let ((body (ebnf-body)))
+      (or (eq (car body) 'end-group)
+	  (error "Missing `)'."))
+      (cdr body)))
+   ;; optional
+   ((eq token 'begin-optional)
+    (let ((body (ebnf-body)))
+      (or (eq (car body) 'end-optional)
+	  (error "Missing `]'."))
+      (ebnf-token-optional (cdr body))))
+   ;; list
+   ((eq token 'begin-list)
+    (let* ((body      (ebnf-body))
+	   (token     (car body))
+	   (list-part (cdr body))
+	   sep-part)
+      (and (eq token 'list-separator)
+	   ;; { A || B }
+	   (setq body     (ebnf-body)	; get separator
+		 token    (car body)
+		 sep-part (cdr body)))
+      (cond
+       ;; { A }+
+       ((eq token 'end-one-or-more)
+	(ebnf-make-one-or-more list-part sep-part))
+       ;; { A }*
+       ((eq token 'end-zero-or-more)
+	(ebnf-make-zero-or-more list-part sep-part))
+       (t
+	(error "Missing `}+', `}*' or `}'."))
+       )))
+   ;; no term
+   (t
+    nil)
+   ))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Lexical analyzer
+
+
+(defconst ebnf-bnf-token-table (make-vector 256 'error)
+  "Vector used to map characters to a lexical token.")
+
+
+(defun ebnf-bnf-initialize ()
+  "Initialize EBNF token table."
+  ;; control character & control 8-bit character are set to `error'
+  (let ((char ?\040))
+    ;; printable character:
+    (while (< char ?\060)
+      (aset ebnf-bnf-token-table char 'non-terminal)
+      (setq char (1+ char)))
+    ;; digits:
+    (while (< char ?\072)
+      (aset ebnf-bnf-token-table char 'integer)
+      (setq char (1+ char)))
+    ;; printable character:
+    (while (< char ?\177)
+      (aset ebnf-bnf-token-table char 'non-terminal)
+      (setq char (1+ char)))
+    ;; European 8-bit accentuated characters:
+    (setq char ?\240)
+    (while (< char ?\400)
+      (aset ebnf-bnf-token-table char 'non-terminal)
+      (setq char (1+ char)))
+    ;; Override space characters:
+    (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab
+    (aset ebnf-bnf-token-table ?\n   'space) ; [NL] linefeed
+    (aset ebnf-bnf-token-table ?\r   'space) ; [CR] carriage return
+    (aset ebnf-bnf-token-table ?\t   'space) ; [HT] horizontal tab
+    (aset ebnf-bnf-token-table ?\    'space) ; [SP] space
+    ;; Override form feed character:
+    (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed
+    ;; Override other lexical characters:
+    (aset ebnf-bnf-token-table ?\" 'terminal)
+    (aset ebnf-bnf-token-table ?\? 'special)
+    (aset ebnf-bnf-token-table ?\( 'begin-group)
+    (aset ebnf-bnf-token-table ?\) 'end-group)
+    (aset ebnf-bnf-token-table ?*  'repeat)
+    (aset ebnf-bnf-token-table ?-  'except)
+    (aset ebnf-bnf-token-table ?=  'equal)
+    (aset ebnf-bnf-token-table ?\[ 'begin-optional)
+    (aset ebnf-bnf-token-table ?\] 'end-optional)
+    (aset ebnf-bnf-token-table ?\{ 'begin-list)
+    (aset ebnf-bnf-token-table ?|  'alternative)
+    (aset ebnf-bnf-token-table ?\} 'end-list)
+    (aset ebnf-bnf-token-table ?/  'list)
+    (aset ebnf-bnf-token-table ?+  'one-or-more)
+    (aset ebnf-bnf-token-table ?$  'default)
+    ;; Override comment character:
+    (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment)
+    ;; Override end of production character:
+    (aset ebnf-bnf-token-table ebnf-lex-eop-char     'period)))
+
+
+(defun ebnf-bnf-lex ()
+  "Lexical analyser for EBNF.
+
+Return a lexical token.
+
+See documentation for variable `ebnf-bnf-lex'."
+  (if (>= (point) ebnf-limit)
+      'end-of-input
+    (let (token)
+      ;; skip spaces and comments
+      (while (if (> (following-char) 255)
+		 (progn
+		   (setq token 'error)
+		   nil)
+	       (setq token (aref ebnf-bnf-token-table (following-char)))
+	       (cond
+		((eq token 'space)
+		 (skip-chars-forward " \013\n\r\t" ebnf-limit)
+		 (< (point) ebnf-limit))
+		((eq token 'comment)
+		 (ebnf-bnf-skip-comment))
+		((eq token 'form-feed)
+		 (forward-char)
+		 (setq ebnf-action 'form-feed))
+		(t nil)
+		)))
+      (setq ebnf-default-p nil)
+      (cond
+       ;; end of input
+       ((>= (point) ebnf-limit)
+	'end-of-input)
+       ;; error
+       ((eq token 'error)
+	(error "Illegal character."))
+       ;; default
+       ((eq token 'default)
+	(forward-char)
+	(if (memq (aref ebnf-bnf-token-table (following-char))
+		  '(terminal non-terminal special))
+	    (prog1
+		(ebnf-bnf-lex)
+	      (setq ebnf-default-p t))
+	  (error "Illegal `default' element.")))
+       ;; integer
+       ((eq token 'integer)
+	(setq ebnf-bnf-lex (ebnf-buffer-substring "0-9"))
+	'integer)
+       ;; special: ?special?
+       ((eq token 'special)
+	(setq ebnf-bnf-lex (concat "?"
+				   (ebnf-string " ->@-~" ?\? "special")
+				   "?"))
+	'special)
+       ;; terminal: "string"
+       ((eq token 'terminal)
+	(setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string)))
+	'terminal)
+       ;; non-terminal or terminal
+       ((eq token 'non-terminal)
+	(setq ebnf-bnf-lex (ebnf-buffer-substring
+			    "!#%&'*-,0-:<>@-Z\\^-z~\240-\377"))
+	(let ((case-fold-search ebnf-case-fold-search)
+	      match)
+	  (if (and ebnf-terminal-regexp
+		   (setq match (string-match ebnf-terminal-regexp
+					     ebnf-bnf-lex))
+		   (zerop match)
+		   (= (match-end 0) (length ebnf-bnf-lex)))
+	      'terminal
+	    'non-terminal)))
+       ;; end of list: }+, }*, }
+       ((eq token 'end-list)
+	(forward-char)
+	(cond
+	 ((= (following-char) ?+)
+	  (forward-char)
+	  'end-one-or-more)
+	 ((= (following-char) ?*)
+	  (forward-char)
+	  'end-zero-or-more)
+	 (t
+	  'end-zero-or-more)
+	 ))
+       ;; alternative: |, ||
+       ((eq token 'alternative)
+	(forward-char)
+	(if (/= (following-char) ?|)
+	    'alternative
+	  (forward-char)
+	  'list-separator))
+       ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, *
+       (t
+	(forward-char)
+	token)
+       ))))
+
+
+(defconst ebnf-bnf-comment-chars "^\n\000-\010\016-\037\177-\237")
+
+
+(defun ebnf-bnf-skip-comment ()
+  (forward-char)
+  (cond
+   ;; open EPS file
+   ((and ebnf-eps-executing (= (following-char) ?\[))
+    (ebnf-eps-add-context (ebnf-bnf-eps-filename)))
+   ;; close EPS file
+   ((and ebnf-eps-executing (= (following-char) ?\]))
+    (ebnf-eps-remove-context (ebnf-bnf-eps-filename)))
+   ;; any other action in comment
+   (t
+    (setq ebnf-action (aref ebnf-comment-table (following-char)))
+    (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit))
+   )
+  ;; check for a valid end of comment
+  (cond ((>= (point) ebnf-limit)
+	 nil)
+	((= (following-char) ?\n)
+	 (forward-char)
+	 t)
+	(t
+	 (error "Illegal character."))
+	))
+
+
+(defun ebnf-bnf-eps-filename ()
+  (forward-char)
+  (ebnf-buffer-substring ebnf-bnf-comment-chars))
+
+
+(defun ebnf-unescape-string (str)
+  (let* ((len (length str))
+	 (size (1- len))
+	 (istr 0)
+	 (n-esc 0))
+    ;; count number of escapes
+    (while (< istr size)
+      (setq istr (+ istr
+		    (if (= (aref str istr) ?\\)
+			(progn
+			  (setq n-esc (1+ n-esc))
+			  2)
+		      1))))
+    (if (zerop n-esc)
+	;; no escapes
+	str
+      ;; at least one escape
+      (let ((new (make-string (- len n-esc) ?\ ))
+	    (inew 0))
+	;; eliminate all escapes
+	(setq istr 0)
+	(while (> n-esc 0)
+	  (and (= (aref str istr) ?\\)
+	       (setq istr  (1+ istr)
+		     n-esc (1- n-esc)))
+	  (aset new inew (aref str istr))
+	  (setq inew (1+ inew)
+		istr (1+ istr)))
+	;; remaining string has no escape
+	(while (< istr len)
+	  (aset new inew (aref str istr))
+	  (setq inew (1+ inew)
+		istr (1+ istr)))
+	new))))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+(provide 'ebnf-bnf)
+
+
+;;; ebnf-bnf.el ends here