changeset 104479:55deb3a40b66

lisp/cedet/semantic/wisent.el: New file.
author Chong Yidong <cyd@stupidchicken.com>
date Mon, 07 Sep 2009 16:38:54 +0000
parents 8b129ef893a2
children f32663a81ad6
files lisp/cedet/semantic/wisent.el
diffstat 1 files changed, 346 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lisp/cedet/semantic/wisent.el	Mon Sep 07 16:38:54 2009 +0000
@@ -0,0 +1,346 @@
+;;; semantic/wisent.el --- Wisent - Semantic gateway
+
+;;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007
+;;; Free Software Foundation, Inc.
+
+;; Author: David Ponce <david@dponce.com>
+;; Maintainer: David Ponce <david@dponce.com>
+;; Created: 30 Aug 2001
+;; Keywords: syntax
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+;;
+;; Here are functions necessary to use the Wisent LALR parser from
+;; Semantic environment.
+
+;;; History:
+;;
+
+;;; Code:
+
+(require 'semantic)
+(require 'semantic/wisent/wisent)
+
+;;; Lexical analysis
+;;
+(defvar wisent-lex-istream nil
+  "Input stream of `semantic-lex' syntactic tokens.")
+
+(defvar wisent-lex-lookahead nil
+  "Extra lookahead token.
+When non-nil it is directly returned by `wisent-lex-function'.")
+
+;; Maintain this alias for compatibility until all WY grammars have
+;; been translated again to Elisp code.
+(semantic-alias-obsolete 'wisent-lex-make-token-table
+                         'semantic-lex-make-type-table)
+
+(defmacro wisent-lex-eoi ()
+  "Return an End-Of-Input lexical token.
+The EOI token is like this: ($EOI "" POINT-MAX . POINT-MAX)."
+  `(cons ',wisent-eoi-term
+         (cons ""
+               (cons (point-max) (point-max)))))
+
+(defmacro define-wisent-lexer (name doc &rest body)
+  "Create a new lexical analyzer with NAME.
+DOC is a documentation string describing this analyzer.
+When a token is available in `wisent-lex-istream', eval BODY forms
+sequentially.  BODY must return a lexical token for the LALR parser.
+
+Each token in input was produced by `semantic-lex', it is a list:
+
+  (TOKSYM START . END)
+
+TOKSYM is a terminal symbol used in the grammar.
+START and END mark boundary in the current buffer of that token's
+value.
+
+Returned tokens must have the form:
+
+  (TOKSYM VALUE START . END)
+
+where VALUE is the buffer substring between START and END positions."
+  `(defun
+     ,name () ,doc
+     (cond
+      (wisent-lex-lookahead
+       (prog1 wisent-lex-lookahead
+         (setq wisent-lex-lookahead nil)))
+      (wisent-lex-istream
+       ,@body)
+      ((wisent-lex-eoi)))))
+
+(define-wisent-lexer wisent-lex
+  "Return the next available lexical token in Wisent's form.
+The variable `wisent-lex-istream' contains the list of lexical tokens
+produced by `semantic-lex'.  Pop the next token available and convert
+it to a form suitable for the Wisent's parser."
+  (let* ((tk (car wisent-lex-istream)))
+    ;; Eat input stream
+    (setq wisent-lex-istream (cdr wisent-lex-istream))
+    (cons (semantic-lex-token-class tk)
+          (cons (semantic-lex-token-text tk)
+                (semantic-lex-token-bounds tk)))))
+
+;;; Syntax analysis
+;;
+(defvar wisent-error-function nil
+  "Function used to report parse error.
+By default use the function `wisent-message'.")
+(make-variable-buffer-local 'wisent-error-function)
+
+(defvar wisent-lexer-function 'wisent-lex
+  "Function used to obtain the next lexical token in input.
+Should be a lexical analyzer created with `define-wisent-lexer'.")
+(make-variable-buffer-local 'wisent-lexer-function)
+
+;; Tag production
+;;
+(defsubst wisent-raw-tag (semantic-tag)
+  "Return raw form of given Semantic tag SEMANTIC-TAG.
+Should be used in semantic actions, in grammars, to build a Semantic
+parse tree."
+  (nconc semantic-tag
+         (if (or $region
+                 (setq $region (nthcdr 2 wisent-input)))
+             (list (car $region) (cdr $region))
+           (list (point-max) (point-max)))))
+
+(defsubst wisent-cook-tag (raw-tag)
+  "From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
+Should be used in semantic actions, in grammars, to build a Semantic
+parse tree."
+  (let* ((cooked (semantic--tag-expand raw-tag))
+         (l cooked))
+    (while l
+      (semantic--tag-put-property (car l) 'reparse-symbol $nterm)
+      (setq l (cdr l)))
+    cooked))
+
+;; Unmatched syntax collector
+;;
+(defun wisent-collect-unmatched-syntax (nomatch)
+  "Add lexical token NOMATCH to the cache of unmatched tokens.
+See also the variable `semantic-unmatched-syntax-cache'.
+
+NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
+and will be collected in `semantic-lex' form: (SYMBOL START . END)."
+  (let ((region (cddr nomatch)))
+    (and (number-or-marker-p (car region))
+         (number-or-marker-p (cdr region))
+         (setq semantic-unmatched-syntax-cache
+               (cons (cons (car nomatch) region)
+                     semantic-unmatched-syntax-cache)))))
+
+;; Parser plug-ins
+;;
+;; The following functions permit to plug the Wisent LALR parser in
+;; Semantic toolkit.  They use the standard API provided by Semantic
+;; to plug parsers in.
+;;
+;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
+;;
+;; - `wisent-parse-stream' designed to override the standard function
+;;   `semantic-parse-stream'.
+;;
+;; - `wisent-parse-region' designed to override the standard function
+;;   `semantic-parse-region'.
+;;
+;; Maybe the latter is faster because it eliminates a lot of function
+;; call.
+;;
+(defun wisent-parse-stream (stream goal)
+  "Parse STREAM using the Wisent LALR parser.
+GOAL is a nonterminal symbol to start parsing at.
+Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
+elements of STREAM that have not been used.  SEMANTIC-STREAM is the
+list of semantic tags found.
+The LALR parser automaton must be available in buffer local variable
+`semantic--parse-table'.
+
+Must be installed by `semantic-install-function-overrides' to override
+the standard function `semantic-parse-stream'."
+  (let (wisent-lex-istream wisent-lex-lookahead la-elt cache)
+
+    ;; IMPLEMENTATION NOTES:
+    ;; `wisent-parse' returns a lookahead token when it stopped
+    ;; parsing before encountering the end of input.  To re-enter the
+    ;; parser it is necessary to push back in the lexical input stream
+    ;; the last lookahead token issued.  Because the format of
+    ;; lookahead tokens and tokens in STREAM can be different the
+    ;; lookahead token is put in the variable `wisent-lex-lookahead'
+    ;; before calling `wisent-parse'.  Wisent's lexers always pop the
+    ;; next lexical token from that variable when non nil, then from
+    ;; the lexical input stream.
+    ;;
+    ;; The first element of STREAM is used to keep lookahead tokens
+    ;; across successive calls to `wisent-parse-stream'.  In fact
+    ;; what is kept is a stack of lookaheads encountered so far.  It
+    ;; is cleared when `wisent-parse' returns a valid semantic tag,
+    ;; or twice the same lookahead token!  The latter indicates that
+    ;; there is a syntax error on that token.  If so, tokens currently
+    ;; in the lookahead stack have not been used, and are moved into
+    ;; `semantic-unmatched-syntax-cache'.  When the parser will be
+    ;; re-entered, a new lexical token will be read from STREAM.
+    ;;
+    ;; The first element of STREAM that contains the lookahead stack
+    ;; has this format (compatible with the format of `semantic-lex'
+    ;; tokens):
+    ;;
+    ;; (LOOKAHEAD-STACK START . END)
+    ;;
+    ;; where LOOKAHEAD-STACK is a list of lookahead tokens.  And
+    ;; START/END are the bounds of the lookahead at top of stack.
+
+    ;; Retrieve lookahead token from stack
+    (setq la-elt (car stream))
+    (if (consp (car la-elt))
+        ;; The first elt of STREAM contains a lookahead stack
+        (setq wisent-lex-lookahead (caar la-elt)
+              stream (cdr stream))
+      (setq la-elt nil))
+    ;; Parse
+    (setq wisent-lex-istream stream
+          cache (semantic-safe "wisent-parse-stream: %s"
+                  (condition-case error-to-filter
+                      (wisent-parse semantic--parse-table
+                                    wisent-lexer-function
+                                    wisent-error-function
+                                    goal)
+                    (args-out-of-range
+                     (if (and (not debug-on-error)
+                              (= wisent-parse-max-stack-size
+                                 (nth 2 error-to-filter)))
+                         (progn
+                           (message "wisent-parse-stream: %s"
+                                    (error-message-string error-to-filter))
+                           (message "wisent-parse-max-stack-size \
+might need to be increased"))
+                       (apply 'signal error-to-filter))))))
+    ;; Manage returned lookahead token
+    (if wisent-lookahead
+        (if (eq (caar la-elt) wisent-lookahead)
+            ;; It is already at top of lookahead stack
+            (progn
+              (setq cache nil
+                    la-elt (car la-elt))
+              (while la-elt
+                ;; Collect unmatched tokens from the stack
+                (run-hook-with-args
+                 'wisent-discarding-token-functions (car la-elt))
+                (setq la-elt (cdr la-elt))))
+          ;; New lookahead token
+          (if (or (consp cache) ;; Clear the stack if parse succeeded
+                  (null la-elt))
+              (setq la-elt (cons nil nil)))
+          ;; Push it into the stack
+          (setcar la-elt (cons wisent-lookahead (car la-elt)))
+          ;; Update START/END
+          (setcdr la-elt (cddr wisent-lookahead))
+          ;; Push (LOOKAHEAD-STACK START . END) in STREAM
+          (setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
+    ;; Return (STREAM SEMANTIC-STREAM)
+    (list wisent-lex-istream
+          (if (consp cache) cache '(nil))
+          )))
+
+(defun wisent-parse-region (start end &optional goal depth returnonerror)
+  "Parse the area between START and END using the Wisent LALR parser.
+Return the list of semantic tags found.
+Optional arguments GOAL is a nonterminal symbol to start parsing at,
+DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
+stop parsing on syntax error, when non-nil.
+The LALR parser automaton must be available in buffer local variable
+`semantic--parse-table'.
+
+Must be installed by `semantic-install-function-overrides' to override
+the standard function `semantic-parse-region'."
+  (if (or (< start (point-min)) (> end (point-max)) (< end start))
+      (error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
+             start end))
+  (let* ((case-fold-search semantic-case-fold)
+         (wisent-lex-istream (semantic-lex start end depth))
+         ptree tag cooked lstack wisent-lex-lookahead)
+    ;; Loop while there are lexical tokens available
+    (while wisent-lex-istream
+      ;; Parse
+      (setq wisent-lex-lookahead (car lstack)
+            tag (semantic-safe "wisent-parse-region: %s"
+                    (wisent-parse semantic--parse-table
+                                  wisent-lexer-function
+                                  wisent-error-function
+                                  goal)))
+      ;; Manage returned lookahead token
+      (if wisent-lookahead
+          (if (eq (car lstack) wisent-lookahead)
+              ;; It is already at top of lookahead stack
+              (progn
+                (setq tag nil)
+                (while lstack
+                  ;; Collect unmatched tokens from lookahead stack
+                  (run-hook-with-args
+                   'wisent-discarding-token-functions (car lstack))
+                  (setq lstack (cdr lstack))))
+            ;; Push new lookahead token into the stack
+            (setq lstack (cons wisent-lookahead lstack))))
+      ;; Manage the parser result
+      (cond
+       ;; Parse succeeded, cook result
+       ((consp tag)
+        (setq lstack nil ;; Clear the lookahead stack
+              cooked (semantic--tag-expand tag)
+              ptree (append cooked ptree))
+        (while cooked
+          (setq tag    (car cooked)
+                cooked (cdr cooked))
+          (or (semantic--tag-get-property tag 'reparse-symbol)
+              (semantic--tag-put-property tag 'reparse-symbol goal)))
+        )
+       ;; Return on error if requested
+       (returnonerror
+        (setq wisent-lex-istream nil)
+        ))
+      ;; Work in progress...
+      (if wisent-lex-istream
+	  (and (eq semantic-working-type 'percent)
+	       (boundp 'semantic--progress-reporter)
+	       semantic--progress-reporter
+	       (progress-reporter-update
+		semantic--progress-reporter
+		(/ (* 100 (semantic-lex-token-start
+			   (car wisent-lex-istream)))
+		   (point-max))))))
+    ;; Return parse tree
+    (nreverse ptree)))
+
+;;; Interfacing with edebug
+;;
+(add-hook
+ 'edebug-setup-hook
+ #'(lambda ()
+
+     (def-edebug-spec define-wisent-lexer
+       (&define name stringp def-body)
+       )
+
+     ))
+
+(provide 'semantic/wisent)
+
+;;; semantic/wisent.el ends here