Mercurial > emacs
diff lisp/progmodes/ebnf-bnf.el @ 27451:f062cc830f07
*** empty log message ***
author | Gerd Moellmann <gerd@gnu.org> |
---|---|
date | Thu, 27 Jan 2000 14:31:16 +0000 |
parents | |
children | 9299c470e566 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/progmodes/ebnf-bnf.el Thu Jan 27 14:31:16 2000 +0000 @@ -0,0 +1,583 @@ +;;; ebnf-bnf --- Parser for EBNF + +;; Copyright (C) 1999 Vinicius Jose Latorre + +;; Author: Vinicius Jose Latorre <vinicius@cpqd.com.br> +;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br> +;; Keywords: wp, ebnf, PostScript +;; Time-stamp: <99/11/20 18:05:05 vinicius> +;; Version: 1.4 + +;; This file is *NOT* (yet?) part of GNU Emacs. + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation; either version 2, or (at your option) +;; any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs; see the file COPYING. If not, write to the +;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, +;; Boston, MA 02111-1307, USA. + +;;; Commentary: + +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; +;; This is part of ebnf2ps package. +;; +;; This package defines a parser for EBNF. +;; +;; See ebnf2ps.el for documentation. +;; +;; +;; EBNF Syntax +;; ----------- +;; +;; The current EBNF that ebnf2ps accepts has the following constructions: +;; +;; ; comment (until end of line) +;; A non-terminal +;; "C" terminal +;; ?C? special +;; $A default non-terminal +;; $"C" default terminal +;; $?C? default special +;; A = B. production (A is the header and B the body) +;; C D sequence (C occurs before D) +;; C | D alternative (C or D occurs) +;; A - B exception (A excluding B, B without any non-terminal) +;; n * A repetition (A repeats n (integer) times) +;; (C) group (expression C is grouped together) +;; [C] optional (C may or not occurs) +;; C+ one or more occurrences of C +;; {C}+ one or more occurrences of C +;; {C}* zero or more occurrences of C +;; {C} zero or more occurrences of C +;; C / D equivalent to: C {D C}* +;; {C || D}+ equivalent to: C {D C}* +;; {C || D}* equivalent to: [C {D C}*] +;; {C || D} equivalent to: [C {D C}*] +;; +;; The EBNF syntax written using the notation above is: +;; +;; EBNF = {production}+. +;; +;; production = non_terminal "=" body ".". ;; production +;; +;; body = {sequence || "|"}*. ;; alternative +;; +;; sequence = {exception}*. ;; sequence +;; +;; exception = repeat [ "-" repeat]. ;; exception +;; +;; repeat = [ integer "*" ] term. ;; repetition +;; +;; term = factor +;; | [factor] "+" ;; one-or-more +;; | [factor] "/" [factor] ;; one-or-more +;; . +;; +;; factor = [ "$" ] "\"" terminal "\"" ;; terminal +;; | [ "$" ] non_terminal ;; non-terminal +;; | [ "$" ] "?" special "?" ;; special +;; | "(" body ")" ;; group +;; | "[" body "]" ;; zero-or-one +;; | "{" body [ "||" body ] "}+" ;; one-or-more +;; | "{" body [ "||" body ] "}*" ;; zero-or-more +;; | "{" body [ "||" body ] "}" ;; zero-or-more +;; . +;; +;; non_terminal = "[A-Za-z\\240-\\377][!#%&'*-,0-:<>@-Z\\^-z~\\240-\\377]*". +;; +;; terminal = "\\([^\"\\]\\|\\\\[ -~\\240-\\377]\\)+". +;; +;; special = "[^?\\n\\000-\\010\\016-\\037\\177-\\237]*". +;; +;; integer = "[0-9]+". +;; +;; comment = ";" "[^\\n\\000-\\010\\016-\\037\\177-\\237]*" "\\n". +;; +;; +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;; code: + + +(require 'ebnf-otz) + + +(defvar ebnf-bnf-lex nil + "Value returned by `ebnf-bnf-lex' function.") + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Syntatic analyzer + + +;;; EBNF = {production}+. + +(defun ebnf-bnf-parser (start) + "EBNF parser." + (let ((total (+ (- ebnf-limit start) 1)) + (bias (1- start)) + (origin (point)) + prod-list token rule) + (goto-char start) + (setq token (ebnf-bnf-lex)) + (and (eq token 'end-of-input) + (error "Invalid EBNF file format.")) + (while (not (eq token 'end-of-input)) + (ebnf-message-float + "Parsing...%s%%" + (/ (* (- (point) bias) 100.0) total)) + (setq token (ebnf-production token) + rule (cdr token) + token (car token)) + (or (ebnf-add-empty-rule-list rule) + (setq prod-list (cons rule prod-list)))) + (goto-char origin) + prod-list)) + + +;;; production = non-terminal "=" body ".". + +(defun ebnf-production (token) + (let ((header ebnf-bnf-lex) + (action ebnf-action) + body) + (setq ebnf-action nil) + (or (eq token 'non-terminal) + (error "Invalid header production.")) + (or (eq (ebnf-bnf-lex) 'equal) + (error "Invalid production: missing `='.")) + (setq body (ebnf-body)) + (or (eq (car body) 'period) + (error "Invalid production: missing `.'.")) + (setq body (cdr body)) + (ebnf-eps-add-production header) + (cons (ebnf-bnf-lex) + (ebnf-make-production header body action)))) + + +;;; body = {sequence || "|"}*. + +(defun ebnf-body () + (let (body sequence) + (while (eq (car (setq sequence (ebnf-sequence))) 'alternative) + (setq sequence (cdr sequence) + body (cons sequence body))) + (ebnf-token-alternative body sequence))) + + +;;; sequence = {exception}*. + +(defun ebnf-sequence () + (let ((token (ebnf-bnf-lex)) + seq term) + (while (setq term (ebnf-exception token) + token (car term) + term (cdr term)) + (setq seq (cons term seq))) + (cons token + (cond + ;; null sequence + ((null seq) + (ebnf-make-empty)) + ;; sequence with only one element + ((= (length seq) 1) + (car seq)) + ;; a real sequence + (t + (ebnf-make-sequence (nreverse seq))) + )))) + + +;;; exception = repeat [ "-" repeat]. + +(defun ebnf-exception (token) + (let ((term (ebnf-repeat token))) + (if (not (eq (car term) 'except)) + ;; repeat + term + ;; repeat - repeat + (let ((exception (ebnf-repeat (ebnf-bnf-lex)))) + (ebnf-no-non-terminal (cdr exception)) + (ebnf-token-except (cdr term) exception))))) + + +(defun ebnf-no-non-terminal (node) + (and (vectorp node) + (let ((kind (ebnf-node-kind node))) + (cond + ((eq kind 'ebnf-generate-non-terminal) + (error "Exception sequence should not contain a non-terminal.")) + ((eq kind 'ebnf-generate-repeat) + (ebnf-no-non-terminal (ebnf-node-separator node))) + ((memq kind '(ebnf-generate-optional ebnf-generate-except)) + (ebnf-no-non-terminal (ebnf-node-list node))) + ((memq kind '(ebnf-generate-one-or-more ebnf-generate-zero-or-more)) + (ebnf-no-non-terminal (ebnf-node-list node)) + (ebnf-no-non-terminal (ebnf-node-separator node))) + ((memq kind '(ebnf-generate-alternative ebnf-generate-sequence)) + (let ((seq (ebnf-node-list node))) + (while seq + (ebnf-no-non-terminal (car seq)) + (setq seq (cdr seq))))) + )))) + + +;;; repeat = [ integer "*" ] term. + +(defun ebnf-repeat (token) + (if (not (eq token 'integer)) + (ebnf-term token) + (let ((times ebnf-bnf-lex)) + (or (eq (ebnf-bnf-lex) 'repeat) + (error "Missing `*'.")) + (ebnf-token-repeat times (ebnf-term (ebnf-bnf-lex)))))) + + +;;; term = factor +;;; | [factor] "+" ;; one-or-more +;;; | [factor] "/" [factor] ;; one-or-more +;;; . + +(defun ebnf-term (token) + (let ((factor (ebnf-factor token))) + (and factor + (setq token (ebnf-bnf-lex))) + (cond + ;; [factor] + + ((eq token 'one-or-more) + (cons (ebnf-bnf-lex) + (and factor + (let ((kind (ebnf-node-kind factor))) + (cond + ;; { A }+ + ==> { A }+ + ;; { A }* + ==> { A }* + ((memq kind '(ebnf-generate-zero-or-more + ebnf-generate-one-or-more)) + factor) + ;; [ A ] + ==> { A }* + ((eq kind 'ebnf-generate-optional) + (ebnf-make-zero-or-more (list factor))) + ;; A + + (t + (ebnf-make-one-or-more (list factor))) + ))))) + ;; [factor] / [factor] + ((eq token 'list) + (setq token (ebnf-bnf-lex)) + (let ((sep (ebnf-factor token))) + (and sep + (setq factor (or factor (ebnf-make-empty)))) + (cons (if sep + (ebnf-bnf-lex) + token) + (and factor + (ebnf-make-one-or-more factor sep))))) + ;; factor + (t + (cons token factor)) + ))) + + +;;; factor = [ "$" ] "\"" terminal "\"" ;; terminal +;;; | [ "$" ] non_terminal ;; non-terminal +;;; | [ "$" ] "?" special "?" ;; special +;;; | "(" body ")" ;; group +;;; | "[" body "]" ;; zero-or-one +;;; | "{" body [ "||" body ] "}+" ;; one-or-more +;;; | "{" body [ "||" body ] "}*" ;; zero-or-more +;;; | "{" body [ "||" body ] "}" ;; zero-or-more +;;; . + +(defun ebnf-factor (token) + (cond + ;; terminal + ((eq token 'terminal) + (ebnf-make-terminal ebnf-bnf-lex)) + ;; non-terminal + ((eq token 'non-terminal) + (ebnf-make-non-terminal ebnf-bnf-lex)) + ;; special + ((eq token 'special) + (ebnf-make-special ebnf-bnf-lex)) + ;; group + ((eq token 'begin-group) + (let ((body (ebnf-body))) + (or (eq (car body) 'end-group) + (error "Missing `)'.")) + (cdr body))) + ;; optional + ((eq token 'begin-optional) + (let ((body (ebnf-body))) + (or (eq (car body) 'end-optional) + (error "Missing `]'.")) + (ebnf-token-optional (cdr body)))) + ;; list + ((eq token 'begin-list) + (let* ((body (ebnf-body)) + (token (car body)) + (list-part (cdr body)) + sep-part) + (and (eq token 'list-separator) + ;; { A || B } + (setq body (ebnf-body) ; get separator + token (car body) + sep-part (cdr body))) + (cond + ;; { A }+ + ((eq token 'end-one-or-more) + (ebnf-make-one-or-more list-part sep-part)) + ;; { A }* + ((eq token 'end-zero-or-more) + (ebnf-make-zero-or-more list-part sep-part)) + (t + (error "Missing `}+', `}*' or `}'.")) + ))) + ;; no term + (t + nil) + )) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Lexical analyzer + + +(defconst ebnf-bnf-token-table (make-vector 256 'error) + "Vector used to map characters to a lexical token.") + + +(defun ebnf-bnf-initialize () + "Initialize EBNF token table." + ;; control character & control 8-bit character are set to `error' + (let ((char ?\040)) + ;; printable character: + (while (< char ?\060) + (aset ebnf-bnf-token-table char 'non-terminal) + (setq char (1+ char))) + ;; digits: + (while (< char ?\072) + (aset ebnf-bnf-token-table char 'integer) + (setq char (1+ char))) + ;; printable character: + (while (< char ?\177) + (aset ebnf-bnf-token-table char 'non-terminal) + (setq char (1+ char))) + ;; European 8-bit accentuated characters: + (setq char ?\240) + (while (< char ?\400) + (aset ebnf-bnf-token-table char 'non-terminal) + (setq char (1+ char))) + ;; Override space characters: + (aset ebnf-bnf-token-table ?\013 'space) ; [VT] vertical tab + (aset ebnf-bnf-token-table ?\n 'space) ; [NL] linefeed + (aset ebnf-bnf-token-table ?\r 'space) ; [CR] carriage return + (aset ebnf-bnf-token-table ?\t 'space) ; [HT] horizontal tab + (aset ebnf-bnf-token-table ?\ 'space) ; [SP] space + ;; Override form feed character: + (aset ebnf-bnf-token-table ?\f 'form-feed) ; [FF] form feed + ;; Override other lexical characters: + (aset ebnf-bnf-token-table ?\" 'terminal) + (aset ebnf-bnf-token-table ?\? 'special) + (aset ebnf-bnf-token-table ?\( 'begin-group) + (aset ebnf-bnf-token-table ?\) 'end-group) + (aset ebnf-bnf-token-table ?* 'repeat) + (aset ebnf-bnf-token-table ?- 'except) + (aset ebnf-bnf-token-table ?= 'equal) + (aset ebnf-bnf-token-table ?\[ 'begin-optional) + (aset ebnf-bnf-token-table ?\] 'end-optional) + (aset ebnf-bnf-token-table ?\{ 'begin-list) + (aset ebnf-bnf-token-table ?| 'alternative) + (aset ebnf-bnf-token-table ?\} 'end-list) + (aset ebnf-bnf-token-table ?/ 'list) + (aset ebnf-bnf-token-table ?+ 'one-or-more) + (aset ebnf-bnf-token-table ?$ 'default) + ;; Override comment character: + (aset ebnf-bnf-token-table ebnf-lex-comment-char 'comment) + ;; Override end of production character: + (aset ebnf-bnf-token-table ebnf-lex-eop-char 'period))) + + +(defun ebnf-bnf-lex () + "Lexical analyser for EBNF. + +Return a lexical token. + +See documentation for variable `ebnf-bnf-lex'." + (if (>= (point) ebnf-limit) + 'end-of-input + (let (token) + ;; skip spaces and comments + (while (if (> (following-char) 255) + (progn + (setq token 'error) + nil) + (setq token (aref ebnf-bnf-token-table (following-char))) + (cond + ((eq token 'space) + (skip-chars-forward " \013\n\r\t" ebnf-limit) + (< (point) ebnf-limit)) + ((eq token 'comment) + (ebnf-bnf-skip-comment)) + ((eq token 'form-feed) + (forward-char) + (setq ebnf-action 'form-feed)) + (t nil) + ))) + (setq ebnf-default-p nil) + (cond + ;; end of input + ((>= (point) ebnf-limit) + 'end-of-input) + ;; error + ((eq token 'error) + (error "Illegal character.")) + ;; default + ((eq token 'default) + (forward-char) + (if (memq (aref ebnf-bnf-token-table (following-char)) + '(terminal non-terminal special)) + (prog1 + (ebnf-bnf-lex) + (setq ebnf-default-p t)) + (error "Illegal `default' element."))) + ;; integer + ((eq token 'integer) + (setq ebnf-bnf-lex (ebnf-buffer-substring "0-9")) + 'integer) + ;; special: ?special? + ((eq token 'special) + (setq ebnf-bnf-lex (concat "?" + (ebnf-string " ->@-~" ?\? "special") + "?")) + 'special) + ;; terminal: "string" + ((eq token 'terminal) + (setq ebnf-bnf-lex (ebnf-unescape-string (ebnf-get-string))) + 'terminal) + ;; non-terminal or terminal + ((eq token 'non-terminal) + (setq ebnf-bnf-lex (ebnf-buffer-substring + "!#%&'*-,0-:<>@-Z\\^-z~\240-\377")) + (let ((case-fold-search ebnf-case-fold-search) + match) + (if (and ebnf-terminal-regexp + (setq match (string-match ebnf-terminal-regexp + ebnf-bnf-lex)) + (zerop match) + (= (match-end 0) (length ebnf-bnf-lex))) + 'terminal + 'non-terminal))) + ;; end of list: }+, }*, } + ((eq token 'end-list) + (forward-char) + (cond + ((= (following-char) ?+) + (forward-char) + 'end-one-or-more) + ((= (following-char) ?*) + (forward-char) + 'end-zero-or-more) + (t + 'end-zero-or-more) + )) + ;; alternative: |, || + ((eq token 'alternative) + (forward-char) + (if (/= (following-char) ?|) + 'alternative + (forward-char) + 'list-separator)) + ;; miscellaneous: {, (, ), [, ], ., =, /, +, -, * + (t + (forward-char) + token) + )))) + + +(defconst ebnf-bnf-comment-chars "^\n\000-\010\016-\037\177-\237") + + +(defun ebnf-bnf-skip-comment () + (forward-char) + (cond + ;; open EPS file + ((and ebnf-eps-executing (= (following-char) ?\[)) + (ebnf-eps-add-context (ebnf-bnf-eps-filename))) + ;; close EPS file + ((and ebnf-eps-executing (= (following-char) ?\])) + (ebnf-eps-remove-context (ebnf-bnf-eps-filename))) + ;; any other action in comment + (t + (setq ebnf-action (aref ebnf-comment-table (following-char))) + (skip-chars-forward ebnf-bnf-comment-chars ebnf-limit)) + ) + ;; check for a valid end of comment + (cond ((>= (point) ebnf-limit) + nil) + ((= (following-char) ?\n) + (forward-char) + t) + (t + (error "Illegal character.")) + )) + + +(defun ebnf-bnf-eps-filename () + (forward-char) + (ebnf-buffer-substring ebnf-bnf-comment-chars)) + + +(defun ebnf-unescape-string (str) + (let* ((len (length str)) + (size (1- len)) + (istr 0) + (n-esc 0)) + ;; count number of escapes + (while (< istr size) + (setq istr (+ istr + (if (= (aref str istr) ?\\) + (progn + (setq n-esc (1+ n-esc)) + 2) + 1)))) + (if (zerop n-esc) + ;; no escapes + str + ;; at least one escape + (let ((new (make-string (- len n-esc) ?\ )) + (inew 0)) + ;; eliminate all escapes + (setq istr 0) + (while (> n-esc 0) + (and (= (aref str istr) ?\\) + (setq istr (1+ istr) + n-esc (1- n-esc))) + (aset new inew (aref str istr)) + (setq inew (1+ inew) + istr (1+ istr))) + ;; remaining string has no escape + (while (< istr len) + (aset new inew (aref str istr)) + (setq inew (1+ inew) + istr (1+ istr))) + new)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +(provide 'ebnf-bnf) + + +;;; ebnf-bnf.el ends here