view lisp/cedet/semantic/wisent.el @ 110305:b10051866f51

New syntax-propertize functionality. * lisp/font-lock.el (font-lock-syntactic-keywords): Make obsolete. (font-lock-fontify-syntactic-keywords-region): Move handling of font-lock-syntactically-fontified to... (font-lock-default-fontify-region): ...here. Let syntax-propertize-function take precedence. (font-lock-fontify-syntactically-region): Cal syntax-propertize. * lisp/emacs-lisp/regexp-opt.el (regexp-opt-depth): Skip named groups. * lisp/emacs-lisp/syntax.el (syntax-propertize-function) (syntax-propertize-chunk-size, syntax-propertize--done) (syntax-propertize-extend-region-functions): New vars. (syntax-propertize-wholelines, syntax-propertize-multiline) (syntax-propertize--shift-groups, syntax-propertize-via-font-lock) (syntax-propertize): New functions. (syntax-propertize-rules): New macro. (syntax-ppss-flush-cache): Set syntax-propertize--done. (syntax-ppss): Call syntax-propertize. * lisp/progmodes/ada-mode.el (ada-set-syntax-table-properties) (ada-after-change-function, ada-initialize-syntax-table-properties) (ada-handle-syntax-table-properties): Only define when syntax-propertize is not available. (ada-mode): Use syntax-propertize-function. * lisp/progmodes/autoconf.el (autoconf-mode): Use syntax-propertize-function. (autoconf-font-lock-syntactic-keywords): Remove. * lisp/progmodes/cfengine.el (cfengine-mode): Use syntax-propertize-function. (cfengine-font-lock-syntactic-keywords): Remove. * lisp/progmodes/cperl-mode.el (cperl-mode): Use syntax-propertize-function. * lisp/progmodes/fortran.el (fortran-mode): Use syntax-propertize-function. (fortran--font-lock-syntactic-keywords): New var. (fortran-line-length): Update syntax-propertize-function and fortran--font-lock-syntactic-keywords. * lisp/progmodes/gud.el (gdb-script-syntax-propertize-function): New var; replaces gdb-script-font-lock-syntactic-keywords. (gdb-script-mode): Use it. * lisp/progmodes/js.el (js--regexp-literal): Define while compiling. (js-syntax-propertize-function): New var; replaces js-font-lock-syntactic-keywords. (js-mode): Use it. * lisp/progmodes/make-mode.el (makefile-syntax-propertize-function): New var; replaces makefile-font-lock-syntactic-keywords. (makefile-mode): Use it. (makefile-imake-mode): Adjust. * lisp/progmodes/mixal-mode.el (mixal-syntax-propertize-function): New var; replaces mixal-font-lock-syntactic-keywords. (mixal-mode): Use it. * lisp/progmodes/octave-mod.el (octave-syntax-propertize-sqs): New function to replace octave-font-lock-close-quotes. (octave-syntax-propertize-function): New function to replace octave-font-lock-syntactic-keywords. (octave-mode): Use it. * lisp/progmodes/perl-mode.el (perl-syntax-propertize-function): New fun to replace perl-font-lock-syntactic-keywords. (perl-syntax-propertize-special-constructs): New fun to replace perl-font-lock-special-syntactic-constructs. (perl-font-lock-syntactic-face-function): New fun. (perl-mode): Use it. * lisp/progmodes/python.el (python-syntax-propertize-function): New var to replace python-font-lock-syntactic-keywords. (python-mode): Use it. (python-quote-syntax): Simplify and adjust to new use. * lisp/progmodes/ruby-mode.el (ruby-here-doc-beg-re): Define while compiling. (ruby-here-doc-end-re, ruby-here-doc-beg-match) (ruby-font-lock-syntactic-keywords, ruby-comment-beg-syntax) (syntax-ppss, ruby-in-ppss-context-p, ruby-in-here-doc-p) (ruby-here-doc-find-end, ruby-here-doc-beg-syntax) (ruby-here-doc-end-syntax): Only define when syntax-propertize is not available. (ruby-syntax-propertize-function, ruby-syntax-propertize-heredoc): New functions. (ruby-in-ppss-context-p): Update to new syntax of heredocs. (electric-indent-chars): Silence bytecompiler. (ruby-mode): Use prog-mode, syntax-propertize-function, and electric-indent-chars. * lisp/progmodes/sh-script.el (sh-st-symbol): Remove. (sh-font-lock-close-heredoc, sh-font-lock-open-heredoc): Add eol arg. (sh-font-lock-flush-syntax-ppss-cache, sh-font-lock-here-doc): Remove. (sh-font-lock-quoted-subshell): Assume we've already matched $(. (sh-font-lock-paren): Set syntax-multiline. (sh-font-lock-syntactic-keywords): Remove. (sh-syntax-propertize-function): New function to replace it. (sh-mode): Use it. * lisp/progmodes/simula.el (simula-syntax-propertize-function): New var to replace simula-font-lock-syntactic-keywords. (simula-mode): Use it. * lisp/progmodes/tcl.el (tcl-syntax-propertize-function): New var to replace tcl-font-lock-syntactic-keywords. (tcl-mode): Use it. * lisp/progmodes/vhdl-mode.el (vhdl-mode): Use syntax-propertize-function if available. (vhdl-fontify-buffer): Adjust. * lisp/textmodes/bibtex.el (bibtex-mode): Use syntax-propertize-function. * lisp/textmodes/reftex.el (font-lock-syntactic-keywords): Don't declare since we don't use it. * lisp/textmodes/sgml-mode.el (sgml-syntax-propertize-function): New var to replace sgml-font-lock-syntactic-keywords. (sgml-mode): Use it. * lisp/textmodes/tex-mode.el (tex-common-initialization, doctex-mode): Use syntax-propertize-function. * lisp/textmodes/texinfo.el (texinfo-syntax-propertize-function): New fun to replace texinfo-font-lock-syntactic-keywords. (texinfo-mode): Use it. * test/indent/octave.m: Remove some `fixindent' not needed any more.
author Stefan Monnier <monnier@iro.umontreal.ca>
date Sat, 11 Sep 2010 01:13:42 +0200
parents 1d1d5d9bd884
children 376148b31b5e
line wrap: on
line source

;;; semantic/wisent.el --- Wisent - Semantic gateway

;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009, 2010
;;   Free Software Foundation, Inc.

;; Author: David Ponce <david@dponce.com>
;; Maintainer: David Ponce <david@dponce.com>
;; Created: 30 Aug 2001
;; Keywords: syntax

;; This file is part of GNU Emacs.

;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, either version 3 of the License, or
;; (at your option) any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.

;;; Commentary:
;;
;; Here are functions necessary to use the Wisent LALR parser from
;; Semantic environment.

;;; History:
;;

;;; Code:

(require 'semantic)
(require 'semantic/wisent/wisent)

;;; Lexical analysis
;;
(defvar wisent-lex-istream nil
  "Input stream of `semantic-lex' syntactic tokens.")

(defvar wisent-lex-lookahead nil
  "Extra lookahead token.
When non-nil it is directly returned by `wisent-lex-function'.")

;; Maintain this alias for compatibility until all WY grammars have
;; been translated again to Elisp code.
(semantic-alias-obsolete 'wisent-lex-make-token-table
                         'semantic-lex-make-type-table "23.2")

(defmacro wisent-lex-eoi ()
  "Return an End-Of-Input lexical token.
The EOI token is like this: ($EOI \"\" POINT-MAX . POINT-MAX)."
  `(cons ',wisent-eoi-term
         (cons ""
               (cons (point-max) (point-max)))))

(defmacro define-wisent-lexer (name doc &rest body)
  "Create a new lexical analyzer with NAME.
DOC is a documentation string describing this analyzer.
When a token is available in `wisent-lex-istream', eval BODY forms
sequentially.  BODY must return a lexical token for the LALR parser.

Each token in input was produced by `semantic-lex', it is a list:

  (TOKSYM START . END)

TOKSYM is a terminal symbol used in the grammar.
START and END mark boundary in the current buffer of that token's
value.

Returned tokens must have the form:

  (TOKSYM VALUE START . END)

where VALUE is the buffer substring between START and END positions."
  `(defun
     ,name () ,doc
     (cond
      (wisent-lex-lookahead
       (prog1 wisent-lex-lookahead
         (setq wisent-lex-lookahead nil)))
      (wisent-lex-istream
       ,@body)
      ((wisent-lex-eoi)))))

(define-wisent-lexer wisent-lex
  "Return the next available lexical token in Wisent's form.
The variable `wisent-lex-istream' contains the list of lexical tokens
produced by `semantic-lex'.  Pop the next token available and convert
it to a form suitable for the Wisent's parser."
  (let* ((tk (car wisent-lex-istream)))
    ;; Eat input stream
    (setq wisent-lex-istream (cdr wisent-lex-istream))
    (cons (semantic-lex-token-class tk)
          (cons (semantic-lex-token-text tk)
                (semantic-lex-token-bounds tk)))))

;;; Syntax analysis
;;
(defvar wisent-error-function nil
  "Function used to report parse error.
By default use the function `wisent-message'.")
(make-variable-buffer-local 'wisent-error-function)

(defvar wisent-lexer-function 'wisent-lex
  "Function used to obtain the next lexical token in input.
Should be a lexical analyzer created with `define-wisent-lexer'.")
(make-variable-buffer-local 'wisent-lexer-function)

;; Tag production
;;
(defsubst wisent-raw-tag (semantic-tag)
  "Return raw form of given Semantic tag SEMANTIC-TAG.
Should be used in semantic actions, in grammars, to build a Semantic
parse tree."
  (nconc semantic-tag
         (if (or $region
                 (setq $region (nthcdr 2 wisent-input)))
             (list (car $region) (cdr $region))
           (list (point-max) (point-max)))))

(defsubst wisent-cook-tag (raw-tag)
  "From raw form of Semantic tag RAW-TAG, return a list of cooked tags.
Should be used in semantic actions, in grammars, to build a Semantic
parse tree."
  (let* ((cooked (semantic--tag-expand raw-tag))
         (l cooked))
    (while l
      (semantic--tag-put-property (car l) 'reparse-symbol $nterm)
      (setq l (cdr l)))
    cooked))

;; Unmatched syntax collector
;;
(defun wisent-collect-unmatched-syntax (nomatch)
  "Add lexical token NOMATCH to the cache of unmatched tokens.
See also the variable `semantic-unmatched-syntax-cache'.

NOMATCH is in Wisent's form: (SYMBOL VALUE START . END)
and will be collected in `semantic-lex' form: (SYMBOL START . END)."
  (let ((region (cddr nomatch)))
    (and (number-or-marker-p (car region))
         (number-or-marker-p (cdr region))
         (setq semantic-unmatched-syntax-cache
               (cons (cons (car nomatch) region)
                     semantic-unmatched-syntax-cache)))))

;; Parser plug-ins
;;
;; The following functions permit to plug the Wisent LALR parser in
;; Semantic toolkit.  They use the standard API provided by Semantic
;; to plug parsers in.
;;
;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME:
;;
;; - `wisent-parse-stream' designed to override the standard function
;;   `semantic-parse-stream'.
;;
;; - `wisent-parse-region' designed to override the standard function
;;   `semantic-parse-region'.
;;
;; Maybe the latter is faster because it eliminates a lot of function
;; call.
;;
(defun wisent-parse-stream (stream goal)
  "Parse STREAM using the Wisent LALR parser.
GOAL is a nonterminal symbol to start parsing at.
Return the list (STREAM SEMANTIC-STREAM) where STREAM are those
elements of STREAM that have not been used.  SEMANTIC-STREAM is the
list of semantic tags found.
The LALR parser automaton must be available in buffer local variable
`semantic--parse-table'.

Must be installed by `semantic-install-function-overrides' to override
the standard function `semantic-parse-stream'."
  (let (wisent-lex-istream wisent-lex-lookahead la-elt cache)

    ;; IMPLEMENTATION NOTES:
    ;; `wisent-parse' returns a lookahead token when it stopped
    ;; parsing before encountering the end of input.  To re-enter the
    ;; parser it is necessary to push back in the lexical input stream
    ;; the last lookahead token issued.  Because the format of
    ;; lookahead tokens and tokens in STREAM can be different the
    ;; lookahead token is put in the variable `wisent-lex-lookahead'
    ;; before calling `wisent-parse'.  Wisent's lexers always pop the
    ;; next lexical token from that variable when non nil, then from
    ;; the lexical input stream.
    ;;
    ;; The first element of STREAM is used to keep lookahead tokens
    ;; across successive calls to `wisent-parse-stream'.  In fact
    ;; what is kept is a stack of lookaheads encountered so far.  It
    ;; is cleared when `wisent-parse' returns a valid semantic tag,
    ;; or twice the same lookahead token!  The latter indicates that
    ;; there is a syntax error on that token.  If so, tokens currently
    ;; in the lookahead stack have not been used, and are moved into
    ;; `semantic-unmatched-syntax-cache'.  When the parser will be
    ;; re-entered, a new lexical token will be read from STREAM.
    ;;
    ;; The first element of STREAM that contains the lookahead stack
    ;; has this format (compatible with the format of `semantic-lex'
    ;; tokens):
    ;;
    ;; (LOOKAHEAD-STACK START . END)
    ;;
    ;; where LOOKAHEAD-STACK is a list of lookahead tokens.  And
    ;; START/END are the bounds of the lookahead at top of stack.

    ;; Retrieve lookahead token from stack
    (setq la-elt (car stream))
    (if (consp (car la-elt))
        ;; The first elt of STREAM contains a lookahead stack
        (setq wisent-lex-lookahead (caar la-elt)
              stream (cdr stream))
      (setq la-elt nil))
    ;; Parse
    (setq wisent-lex-istream stream
          cache (semantic-safe "wisent-parse-stream: %s"
                  (condition-case error-to-filter
                      (wisent-parse semantic--parse-table
                                    wisent-lexer-function
                                    wisent-error-function
                                    goal)
                    (args-out-of-range
                     (if (and (not debug-on-error)
                              (= wisent-parse-max-stack-size
                                 (nth 2 error-to-filter)))
                         (progn
                           (message "wisent-parse-stream: %s"
                                    (error-message-string error-to-filter))
                           (message "wisent-parse-max-stack-size \
might need to be increased"))
                       (apply 'signal error-to-filter))))))
    ;; Manage returned lookahead token
    (if wisent-lookahead
        (if (eq (caar la-elt) wisent-lookahead)
            ;; It is already at top of lookahead stack
            (progn
              (setq cache nil
                    la-elt (car la-elt))
              (while la-elt
                ;; Collect unmatched tokens from the stack
                (run-hook-with-args
                 'wisent-discarding-token-functions (car la-elt))
                (setq la-elt (cdr la-elt))))
          ;; New lookahead token
          (if (or (consp cache) ;; Clear the stack if parse succeeded
                  (null la-elt))
              (setq la-elt (cons nil nil)))
          ;; Push it into the stack
          (setcar la-elt (cons wisent-lookahead (car la-elt)))
          ;; Update START/END
          (setcdr la-elt (cddr wisent-lookahead))
          ;; Push (LOOKAHEAD-STACK START . END) in STREAM
          (setq wisent-lex-istream (cons la-elt wisent-lex-istream))))
    ;; Return (STREAM SEMANTIC-STREAM)
    (list wisent-lex-istream
          (if (consp cache) cache '(nil))
          )))

(defun wisent-parse-region (start end &optional goal depth returnonerror)
  "Parse the area between START and END using the Wisent LALR parser.
Return the list of semantic tags found.
Optional arguments GOAL is a nonterminal symbol to start parsing at,
DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to
stop parsing on syntax error, when non-nil.
The LALR parser automaton must be available in buffer local variable
`semantic--parse-table'.

Must be installed by `semantic-install-function-overrides' to override
the standard function `semantic-parse-region'."
  (if (or (< start (point-min)) (> end (point-max)) (< end start))
      (error "Invalid bounds [%s %s] passed to `wisent-parse-region'"
             start end))
  (let* ((case-fold-search semantic-case-fold)
         (wisent-lex-istream (semantic-lex start end depth))
         ptree tag cooked lstack wisent-lex-lookahead)
    ;; Loop while there are lexical tokens available
    (while wisent-lex-istream
      ;; Parse
      (setq wisent-lex-lookahead (car lstack)
            tag (semantic-safe "wisent-parse-region: %s"
                    (wisent-parse semantic--parse-table
                                  wisent-lexer-function
                                  wisent-error-function
                                  goal)))
      ;; Manage returned lookahead token
      (if wisent-lookahead
          (if (eq (car lstack) wisent-lookahead)
              ;; It is already at top of lookahead stack
              (progn
                (setq tag nil)
                (while lstack
                  ;; Collect unmatched tokens from lookahead stack
                  (run-hook-with-args
                   'wisent-discarding-token-functions (car lstack))
                  (setq lstack (cdr lstack))))
            ;; Push new lookahead token into the stack
            (setq lstack (cons wisent-lookahead lstack))))
      ;; Manage the parser result
      (cond
       ;; Parse succeeded, cook result
       ((consp tag)
        (setq lstack nil ;; Clear the lookahead stack
              cooked (semantic--tag-expand tag)
              ptree (append cooked ptree))
        (while cooked
          (setq tag    (car cooked)
                cooked (cdr cooked))
          (or (semantic--tag-get-property tag 'reparse-symbol)
              (semantic--tag-put-property tag 'reparse-symbol goal)))
        )
       ;; Return on error if requested
       (returnonerror
        (setq wisent-lex-istream nil)
        ))
      ;; Work in progress...
      (if wisent-lex-istream
	  (and (eq semantic-working-type 'percent)
	       (boundp 'semantic--progress-reporter)
	       semantic--progress-reporter
	       (progress-reporter-update
		semantic--progress-reporter
		(/ (* 100 (semantic-lex-token-start
			   (car wisent-lex-istream)))
		   (point-max))))))
    ;; Return parse tree
    (nreverse ptree)))

;;; Interfacing with edebug
;;
(add-hook
 'edebug-setup-hook
 #'(lambda ()

     (def-edebug-spec define-wisent-lexer
       (&define name stringp def-body)
       )

     ))

(provide 'semantic/wisent)

;; arch-tag: c24ddd69-d41c-4604-8221-29a93fc4fa79
;;; semantic/wisent.el ends here