Mercurial > emacs
changeset 104479:55deb3a40b66
lisp/cedet/semantic/wisent.el: New file.
author | Chong Yidong <cyd@stupidchicken.com> |
---|---|
date | Mon, 07 Sep 2009 16:38:54 +0000 |
parents | 8b129ef893a2 |
children | f32663a81ad6 |
files | lisp/cedet/semantic/wisent.el |
diffstat | 1 files changed, 346 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lisp/cedet/semantic/wisent.el Mon Sep 07 16:38:54 2009 +0000 @@ -0,0 +1,346 @@ +;;; semantic/wisent.el --- Wisent - Semantic gateway + +;;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 +;;; Free Software Foundation, Inc. + +;; Author: David Ponce <david@dponce.com> +;; Maintainer: David Ponce <david@dponce.com> +;; Created: 30 Aug 2001 +;; Keywords: syntax + +;; This file is part of GNU Emacs. + +;; GNU Emacs is free software: you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; GNU Emacs is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. + +;;; Commentary: +;; +;; Here are functions necessary to use the Wisent LALR parser from +;; Semantic environment. + +;;; History: +;; + +;;; Code: + +(require 'semantic) +(require 'semantic/wisent/wisent) + +;;; Lexical analysis +;; +(defvar wisent-lex-istream nil + "Input stream of `semantic-lex' syntactic tokens.") + +(defvar wisent-lex-lookahead nil + "Extra lookahead token. +When non-nil it is directly returned by `wisent-lex-function'.") + +;; Maintain this alias for compatibility until all WY grammars have +;; been translated again to Elisp code. +(semantic-alias-obsolete 'wisent-lex-make-token-table + 'semantic-lex-make-type-table) + +(defmacro wisent-lex-eoi () + "Return an End-Of-Input lexical token. +The EOI token is like this: ($EOI "" POINT-MAX . POINT-MAX)." + `(cons ',wisent-eoi-term + (cons "" + (cons (point-max) (point-max))))) + +(defmacro define-wisent-lexer (name doc &rest body) + "Create a new lexical analyzer with NAME. +DOC is a documentation string describing this analyzer. +When a token is available in `wisent-lex-istream', eval BODY forms +sequentially. BODY must return a lexical token for the LALR parser. + +Each token in input was produced by `semantic-lex', it is a list: + + (TOKSYM START . END) + +TOKSYM is a terminal symbol used in the grammar. +START and END mark boundary in the current buffer of that token's +value. + +Returned tokens must have the form: + + (TOKSYM VALUE START . END) + +where VALUE is the buffer substring between START and END positions." + `(defun + ,name () ,doc + (cond + (wisent-lex-lookahead + (prog1 wisent-lex-lookahead + (setq wisent-lex-lookahead nil))) + (wisent-lex-istream + ,@body) + ((wisent-lex-eoi))))) + +(define-wisent-lexer wisent-lex + "Return the next available lexical token in Wisent's form. +The variable `wisent-lex-istream' contains the list of lexical tokens +produced by `semantic-lex'. Pop the next token available and convert +it to a form suitable for the Wisent's parser." + (let* ((tk (car wisent-lex-istream))) + ;; Eat input stream + (setq wisent-lex-istream (cdr wisent-lex-istream)) + (cons (semantic-lex-token-class tk) + (cons (semantic-lex-token-text tk) + (semantic-lex-token-bounds tk))))) + +;;; Syntax analysis +;; +(defvar wisent-error-function nil + "Function used to report parse error. +By default use the function `wisent-message'.") +(make-variable-buffer-local 'wisent-error-function) + +(defvar wisent-lexer-function 'wisent-lex + "Function used to obtain the next lexical token in input. +Should be a lexical analyzer created with `define-wisent-lexer'.") +(make-variable-buffer-local 'wisent-lexer-function) + +;; Tag production +;; +(defsubst wisent-raw-tag (semantic-tag) + "Return raw form of given Semantic tag SEMANTIC-TAG. +Should be used in semantic actions, in grammars, to build a Semantic +parse tree." + (nconc semantic-tag + (if (or $region + (setq $region (nthcdr 2 wisent-input))) + (list (car $region) (cdr $region)) + (list (point-max) (point-max))))) + +(defsubst wisent-cook-tag (raw-tag) + "From raw form of Semantic tag RAW-TAG, return a list of cooked tags. +Should be used in semantic actions, in grammars, to build a Semantic +parse tree." + (let* ((cooked (semantic--tag-expand raw-tag)) + (l cooked)) + (while l + (semantic--tag-put-property (car l) 'reparse-symbol $nterm) + (setq l (cdr l))) + cooked)) + +;; Unmatched syntax collector +;; +(defun wisent-collect-unmatched-syntax (nomatch) + "Add lexical token NOMATCH to the cache of unmatched tokens. +See also the variable `semantic-unmatched-syntax-cache'. + +NOMATCH is in Wisent's form: (SYMBOL VALUE START . END) +and will be collected in `semantic-lex' form: (SYMBOL START . END)." + (let ((region (cddr nomatch))) + (and (number-or-marker-p (car region)) + (number-or-marker-p (cdr region)) + (setq semantic-unmatched-syntax-cache + (cons (cons (car nomatch) region) + semantic-unmatched-syntax-cache))))) + +;; Parser plug-ins +;; +;; The following functions permit to plug the Wisent LALR parser in +;; Semantic toolkit. They use the standard API provided by Semantic +;; to plug parsers in. +;; +;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME: +;; +;; - `wisent-parse-stream' designed to override the standard function +;; `semantic-parse-stream'. +;; +;; - `wisent-parse-region' designed to override the standard function +;; `semantic-parse-region'. +;; +;; Maybe the latter is faster because it eliminates a lot of function +;; call. +;; +(defun wisent-parse-stream (stream goal) + "Parse STREAM using the Wisent LALR parser. +GOAL is a nonterminal symbol to start parsing at. +Return the list (STREAM SEMANTIC-STREAM) where STREAM are those +elements of STREAM that have not been used. SEMANTIC-STREAM is the +list of semantic tags found. +The LALR parser automaton must be available in buffer local variable +`semantic--parse-table'. + +Must be installed by `semantic-install-function-overrides' to override +the standard function `semantic-parse-stream'." + (let (wisent-lex-istream wisent-lex-lookahead la-elt cache) + + ;; IMPLEMENTATION NOTES: + ;; `wisent-parse' returns a lookahead token when it stopped + ;; parsing before encountering the end of input. To re-enter the + ;; parser it is necessary to push back in the lexical input stream + ;; the last lookahead token issued. Because the format of + ;; lookahead tokens and tokens in STREAM can be different the + ;; lookahead token is put in the variable `wisent-lex-lookahead' + ;; before calling `wisent-parse'. Wisent's lexers always pop the + ;; next lexical token from that variable when non nil, then from + ;; the lexical input stream. + ;; + ;; The first element of STREAM is used to keep lookahead tokens + ;; across successive calls to `wisent-parse-stream'. In fact + ;; what is kept is a stack of lookaheads encountered so far. It + ;; is cleared when `wisent-parse' returns a valid semantic tag, + ;; or twice the same lookahead token! The latter indicates that + ;; there is a syntax error on that token. If so, tokens currently + ;; in the lookahead stack have not been used, and are moved into + ;; `semantic-unmatched-syntax-cache'. When the parser will be + ;; re-entered, a new lexical token will be read from STREAM. + ;; + ;; The first element of STREAM that contains the lookahead stack + ;; has this format (compatible with the format of `semantic-lex' + ;; tokens): + ;; + ;; (LOOKAHEAD-STACK START . END) + ;; + ;; where LOOKAHEAD-STACK is a list of lookahead tokens. And + ;; START/END are the bounds of the lookahead at top of stack. + + ;; Retrieve lookahead token from stack + (setq la-elt (car stream)) + (if (consp (car la-elt)) + ;; The first elt of STREAM contains a lookahead stack + (setq wisent-lex-lookahead (caar la-elt) + stream (cdr stream)) + (setq la-elt nil)) + ;; Parse + (setq wisent-lex-istream stream + cache (semantic-safe "wisent-parse-stream: %s" + (condition-case error-to-filter + (wisent-parse semantic--parse-table + wisent-lexer-function + wisent-error-function + goal) + (args-out-of-range + (if (and (not debug-on-error) + (= wisent-parse-max-stack-size + (nth 2 error-to-filter))) + (progn + (message "wisent-parse-stream: %s" + (error-message-string error-to-filter)) + (message "wisent-parse-max-stack-size \ +might need to be increased")) + (apply 'signal error-to-filter)))))) + ;; Manage returned lookahead token + (if wisent-lookahead + (if (eq (caar la-elt) wisent-lookahead) + ;; It is already at top of lookahead stack + (progn + (setq cache nil + la-elt (car la-elt)) + (while la-elt + ;; Collect unmatched tokens from the stack + (run-hook-with-args + 'wisent-discarding-token-functions (car la-elt)) + (setq la-elt (cdr la-elt)))) + ;; New lookahead token + (if (or (consp cache) ;; Clear the stack if parse succeeded + (null la-elt)) + (setq la-elt (cons nil nil))) + ;; Push it into the stack + (setcar la-elt (cons wisent-lookahead (car la-elt))) + ;; Update START/END + (setcdr la-elt (cddr wisent-lookahead)) + ;; Push (LOOKAHEAD-STACK START . END) in STREAM + (setq wisent-lex-istream (cons la-elt wisent-lex-istream)))) + ;; Return (STREAM SEMANTIC-STREAM) + (list wisent-lex-istream + (if (consp cache) cache '(nil)) + ))) + +(defun wisent-parse-region (start end &optional goal depth returnonerror) + "Parse the area between START and END using the Wisent LALR parser. +Return the list of semantic tags found. +Optional arguments GOAL is a nonterminal symbol to start parsing at, +DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to +stop parsing on syntax error, when non-nil. +The LALR parser automaton must be available in buffer local variable +`semantic--parse-table'. + +Must be installed by `semantic-install-function-overrides' to override +the standard function `semantic-parse-region'." + (if (or (< start (point-min)) (> end (point-max)) (< end start)) + (error "Invalid bounds [%s %s] passed to `wisent-parse-region'" + start end)) + (let* ((case-fold-search semantic-case-fold) + (wisent-lex-istream (semantic-lex start end depth)) + ptree tag cooked lstack wisent-lex-lookahead) + ;; Loop while there are lexical tokens available + (while wisent-lex-istream + ;; Parse + (setq wisent-lex-lookahead (car lstack) + tag (semantic-safe "wisent-parse-region: %s" + (wisent-parse semantic--parse-table + wisent-lexer-function + wisent-error-function + goal))) + ;; Manage returned lookahead token + (if wisent-lookahead + (if (eq (car lstack) wisent-lookahead) + ;; It is already at top of lookahead stack + (progn + (setq tag nil) + (while lstack + ;; Collect unmatched tokens from lookahead stack + (run-hook-with-args + 'wisent-discarding-token-functions (car lstack)) + (setq lstack (cdr lstack)))) + ;; Push new lookahead token into the stack + (setq lstack (cons wisent-lookahead lstack)))) + ;; Manage the parser result + (cond + ;; Parse succeeded, cook result + ((consp tag) + (setq lstack nil ;; Clear the lookahead stack + cooked (semantic--tag-expand tag) + ptree (append cooked ptree)) + (while cooked + (setq tag (car cooked) + cooked (cdr cooked)) + (or (semantic--tag-get-property tag 'reparse-symbol) + (semantic--tag-put-property tag 'reparse-symbol goal))) + ) + ;; Return on error if requested + (returnonerror + (setq wisent-lex-istream nil) + )) + ;; Work in progress... + (if wisent-lex-istream + (and (eq semantic-working-type 'percent) + (boundp 'semantic--progress-reporter) + semantic--progress-reporter + (progress-reporter-update + semantic--progress-reporter + (/ (* 100 (semantic-lex-token-start + (car wisent-lex-istream))) + (point-max)))))) + ;; Return parse tree + (nreverse ptree))) + +;;; Interfacing with edebug +;; +(add-hook + 'edebug-setup-hook + #'(lambda () + + (def-edebug-spec define-wisent-lexer + (&define name stringp def-body) + ) + + )) + +(provide 'semantic/wisent) + +;;; semantic/wisent.el ends here