view lisp/progmodes/ebnf-yac.el @ 29005:b396df3a5181

(ONE_MORE_BYTE, TWO_MORE_BYTES): Set coding->resutl to CODING_FINISH_INSUFFICIENT_SRC if there's not enough source. (ONE_MORE_CHAR, EMIT_CHAR, EMIT_ONE_BYTE, EMIT_TWO_BYTE, EMIT_BYTES): New macros. (THREE_MORE_BYTES, DECODE_CHARACTER_ASCII, DECODE_CHARACTER_DIMENSION1, DECODE_CHARACTER_DIMENSION2): These macros deleted. (CHECK_CODE_RANGE_A0_FF): This macro deleted. (detect_coding_emacs_mule): Use UNIBYTE_STR_AS_MULTIBYTE_P to check the validity of multibyte sequence. (decode_coding_emacs_mule): New function. (encode_coding_emacs_mule): New macro. (detect_coding_iso2022): Use ONE_MORE_BYTE to fetch a byte from the source. (DECODE_ISO_CHARACTER): Just return a character code. (DECODE_COMPOSITION_START): Set coding->result instead of result. (decode_coding_iso2022, decode_coding_sjis_big5, decode_eol): Use EMIT_CHAR to produced decoded characters. Exit the loop only by macros ONE_MORE_BYTE or EMIT_CHAR. Don't handle the case of last block here. (ENCODE_ISO_CHARACTER): Don't translate character here. Produce only position codes for an invalid character. (encode_designation_at_bol): Return new destination pointer. 5th arg DSTP is changed to DST. (encode_coding_iso2022, decode_coding_sjis_big5): Get a character from the source by ONE_MORE_CHAR. Don't handle the case of last block here. (DECODE_SJIS_BIG5_CHARACTER, ENCODE_SJIS_BIG5_CHARACTER): These macros deleted. (detect_coding_sjis, detect_coding_big5, detect_coding_utf_8, detect_coding_utf_16, detect_coding_ccl): Use ONE_MORE_BYTE and TWO_MORE_BYTES to fetch a byte from the source. (encode_eol): Pay attention to coding->src_multibyte. (detect_coding, detect_eol): Preserve members src_multibyte and dst_multibyte. (DECODING_BUFFER_MAG): Return 2 even for coding_type_raw_text. (encoding_buffer_size): Set magnification to 3 for all coding systems that require encoding. (ccl_coding_driver): For decoding, be sure that the result is valid multibyte sequence. (decode_coding): Initialize coding->errors and coding->result. For emacs-mule, call decode_coding_emacs_mule. For no-conversion and raw-text, always call decode_eol. Handle the case of last block here. If not coding->dst_multibyte, convert the resulting sequence to unibyte. (encode_coding): Initialize coding->errors and coding->result. For emacs-mule, call encode_coding_emacs_mule. For no-conversion and raw-text, always call encode_eol. Handle the case of last block here. (shrink_decoding_region, shrink_encoding_region): Detect cases that we can't skip data more rigidly. (code_convert_region): Setup src_multibyte and dst_multibyte members of coding. For decoding, if the buffer is multibyte, convert the source sequence to unibyte in advance. For encoding, if the buffer is multibyte, convert the resulting sequence to multibyte afterward. (run_pre_post_conversion_on_str): New function. (code_convert_string): Deleted and divided into the following two. (decode_coding_string, encode_coding_string): New functions. (code_convert_string1, code_convert_string_norecord): Call one of above. (Fdecode_sjis_char, Fdecode_big5_char): Use MAKE_CHAR instead of MAKE_NON_ASCII_CHAR. (Fset_terminal_coding_system_internal, Fset_safe_terminal_coding_system_internal): Setup src_multibyte and dst_multibyte members. (init_coding_once): Initialize iso_code_class with new enum ISO_control_0 and ISO_control_1.
author Kenichi Handa <handa@m17n.org>
date Fri, 19 May 2000 23:54:56 +0000
parents 9299c470e566
children 05c7152c7aeb
line wrap: on
line source

;;; ebnf-yac --- Parser for Yacc/Bison

;; Copyright (C) 1999, 2000 Free Sofware Foundation, Inc.

;; Author:     Vinicius Jose Latorre <vinicius@cpqd.com.br>
;; Maintainer: Vinicius Jose Latorre <vinicius@cpqd.com.br>
;; Keywords:   wp, ebnf, PostScript
;; Time-stamp: <99/11/20 18:02:43 vinicius>
;; Version:    1.0

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
;;
;; This is part of ebnf2ps package.
;;
;; This package defines a parser for Yacc/Bison.
;;
;; See ebnf2ps.el for documentation.
;;
;;
;; Yacc/Bison Syntax
;; -----------------
;;
;; YACC = { YACC-Definitions }* "%%" { YACC-Rule }* [ "%%" [ YACC-Code ] ].
;;
;; YACC-Definitions = "%token" [ "<" Name ">" ] Name-List
;;                  | "any other Yacc definition"
;;                  .
;;
;; YACC-Code = "any C definition".
;;
;; YACC-Rule = Name ":" Alternative ";".
;;
;; Alternative = { Sequence || "|" }*.
;;
;; Sequence = { Factor }*.
;;
;; Factor = Name
;;        | "'" "character" "'"
;;        | "error"
;;        | "{" "C like commands" "}"
;;        .
;;
;; Name-List = { Name || "," }*.
;;
;; Name = "[A-Za-z][A-Za-z0-9_.]*".
;;
;; Comment = "/*" "any character, but the sequence \"*/\"" "*/"
;;         | "//" "any character" "\\n".
;;
;;
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;;; code:


(require 'ebnf-otz)


(defvar ebnf-yac-lex nil
  "Value returned by `ebnf-yac-lex' function.")


(defvar ebnf-yac-token-list nil
  "List of `%TOKEN' names.")


(defvar ebnf-yac-skip-char nil
  "Non-nil means skip printable characters with no grammatical meaning.")


(defvar ebnf-yac-error nil
  "Non-nil means \"error\" occured.")


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Syntatic analyzer


;;; YACC = { YACC-Definitions }* "%%" { YACC-Rule }* [ "%%" [ YACC-Code ] ].
;;;
;;; YACC-Code = "any C definition".

(defun ebnf-yac-parser (start)
  "yacc/Bison parser."
  (let ((total (+ (- ebnf-limit start) 1))
	(bias (1- start))
	(origin (point))
	syntax-list token rule)
    (goto-char start)
    (setq token (ebnf-yac-lex))
    (and (eq token 'end-of-input)
	 (error "Invalid Yacc/Bison file format."))
    (or (eq (ebnf-yac-definitions token) 'yac-separator)
	(error "Missing `%%%%'."))
    (setq token (ebnf-yac-lex))
    (while (not (memq token '(end-of-input yac-separator)))
      (ebnf-message-float
       "Parsing...%s%%"
       (/ (* (- (point) bias) 100.0) total))
      (setq token (ebnf-yac-rule token)
	    rule  (cdr token)
	    token (car token))
      (or (ebnf-add-empty-rule-list rule)
	  (setq syntax-list (cons rule syntax-list))))
    (goto-char origin)
    syntax-list))


;;; YACC-Definitions = "%token" [ "<" Name ">" ] Name-List
;;;                  | "any other Yacc definition"
;;;                  .

(defun ebnf-yac-definitions (token)
  (let ((ebnf-yac-skip-char t))
    (while (not (memq token '(yac-separator end-of-input)))
      (setq token
	    (cond
	     ;; "%token" [ "<" Name ">" ] Name-List
	     ((eq token 'yac-token)
	      (setq token (ebnf-yac-lex))
	      (when (eq token 'open-angle)
		(or (eq (ebnf-yac-lex) 'non-terminal)
		    (error "Missing type name."))
		(or (eq (ebnf-yac-lex) 'close-angle)
		    (error "Missing `>'."))
		(setq token (ebnf-yac-lex)))
	      (setq token               (ebnf-yac-name-list token)
		    ebnf-yac-token-list (nconc (cdr token)
					       ebnf-yac-token-list))
	      (car token))
	     ;; "any other Yacc definition"
	     (t
	      (ebnf-yac-lex))
	     )))
    token))


;;; YACC-Rule = Name ":" Alternative ";".

(defun ebnf-yac-rule (token)
  (let ((header ebnf-yac-lex)
	(action ebnf-action)
	body)
    (setq ebnf-action nil)
    (or (eq token 'non-terminal)
	(error "Invalid rule name."))
    (or (eq (ebnf-yac-lex) 'colon)
	(error "Invalid rule: missing `:'."))
    (setq body (ebnf-yac-alternative))
    (or (eq (car body) 'period)
	(error "Invalid rule: missing `;'."))
    (setq body (cdr body))
    (ebnf-eps-add-production header)
    (cons (ebnf-yac-lex)
	  (ebnf-make-production header body action))))


;;; Alternative = { Sequence || "|" }*.

(defun ebnf-yac-alternative ()
  (let (body sequence)
    (while (eq (car (setq sequence (ebnf-yac-sequence)))
	       'alternative)
      (and (setq sequence (cdr sequence))
	   (setq body     (cons sequence body))))
    (ebnf-token-alternative body sequence)))


;;; Sequence = { Factor }*.

(defun ebnf-yac-sequence ()
  (let (ebnf-yac-error token seq factor)
    (while (setq token  (ebnf-yac-lex)
		 factor (ebnf-yac-factor token))
      (setq seq (cons factor seq)))
    (cons token
	  (cond
	   ;; ignore error recovery
	   ((and ebnf-yac-ignore-error-recovery ebnf-yac-error)
	    nil)
	   ;; null sequence
	   ((null seq)
	    (ebnf-make-empty))
	   ;; sequence with only one element
	   ((= (length seq) 1)
	    (car seq))
	   ;; a real sequence
	   (t
	    (ebnf-make-sequence (nreverse seq)))
	   ))))


;;; Factor = Name
;;;        | "'" "character" "'"
;;;        | "error"
;;;        | "{" "C like commands" "}"
;;;        .

(defun ebnf-yac-factor (token)
  (cond
   ;; 'character'
   ((eq token 'terminal)
    (ebnf-make-terminal ebnf-yac-lex))
   ;; Name
   ((eq token 'non-terminal)
    (ebnf-make-non-terminal ebnf-yac-lex))
   ;; "error"
   ((eq token 'yac-error)
    (ebnf-make-special ebnf-yac-lex))
   ;; not a factor
   (t
    nil)
   ))


;;; Name-List = { Name || "," }*.

(defun ebnf-yac-name-list (token)
  (let (names)
    (when (eq token 'non-terminal)
      (while (progn
	       (setq names (cons ebnf-yac-lex names)
		     token (ebnf-yac-lex))
	       (eq token 'comma))
	(or (eq (ebnf-yac-lex) 'non-terminal)
	    (error "Missing token name."))))
    (cons token names)))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Lexical analyzer


;;; Name = "[A-Za-z][A-Za-z0-9_.]*".
;;;
;;; Comment = "/*" "any character, but the sequence \"*/\"" "*/"
;;;         | "//" "any character" "\\n".

(defconst ebnf-yac-token-table
  ;; control character & 8-bit character are set to `error'
  (let ((table (make-vector 256 'error)))
    ;; upper & lower case letters:
    (mapcar
     #'(lambda (char)
	 (aset table char 'non-terminal))
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
    ;; printable characters:
    (mapcar
     #'(lambda (char)
	 (aset table char 'character))
     "!#$&()*+-.0123456789=?@[\\]^_`~")
    ;; Override space characters:
    (aset table ?\n 'space)		; [NL] linefeed
    (aset table ?\r 'space)		; [CR] carriage return
    (aset table ?\t 'space)		; [HT] horizontal tab
    (aset table ?\  'space)		; [SP] space
    ;; Override form feed character:
    (aset table ?\f 'form-feed)		; [FF] form feed
    ;; Override other lexical characters:
    (aset table ?<  'open-angle)
    (aset table ?>  'close-angle)
    (aset table ?,  'comma)
    (aset table ?%  'yac-pragma)
    (aset table ?/  'slash)
    (aset table ?\{ 'yac-code)
    (aset table ?\" 'string)
    (aset table ?\' 'terminal)
    (aset table ?:  'colon)
    (aset table ?|  'alternative)
    (aset table ?\; 'period)
    table)
  "Vector used to map characters to a lexical token.")


(defun ebnf-yac-initialize ()
  "Initializations for Yacc/Bison parser."
  (setq ebnf-yac-token-list nil))


(defun ebnf-yac-lex ()
  "Lexical analyser for Yacc/Bison.

Return a lexical token.

See documentation for variable `ebnf-yac-lex'."
  (if (>= (point) ebnf-limit)
      'end-of-input
    (let (token)
      ;; skip spaces, code blocks and comments
      (while (if (> (following-char) 255)
		 (progn
		   (setq token 'error)
		   nil)
	       (setq token (aref ebnf-yac-token-table (following-char)))
	       (cond
		((or (eq token 'space)
		     (and ebnf-yac-skip-char
			  (eq token 'character)))
		 (ebnf-yac-skip-spaces))
		((eq token 'yac-code)
		 (ebnf-yac-skip-code))
		((eq token 'slash)
		 (ebnf-yac-handle-comment))
		((eq token 'form-feed)
		 (forward-char)
		 (setq ebnf-action 'form-feed))
		(t nil)
		)))
      (cond
       ;; end of input
       ((>= (point) ebnf-limit)
	'end-of-input)
       ;; error
       ((eq token 'error)
	(error "Illegal character."))
       ;; "string"
       ((eq token 'string)
	(setq ebnf-yac-lex (ebnf-get-string))
	'string)
       ;; terminal: 'char'
       ((eq token 'terminal)
	(setq ebnf-yac-lex (ebnf-string " -&(-~" ?\' "terminal"))
	'terminal)
       ;; non-terminal, terminal or "error"
       ((eq token 'non-terminal)
	(setq ebnf-yac-lex (ebnf-buffer-substring "0-9A-Za-z_."))
	(cond ((member ebnf-yac-lex ebnf-yac-token-list)
	       'terminal)
	      ((string= ebnf-yac-lex "error")
	       (setq ebnf-yac-error t)
	       'yac-error)
	      (t
	       'non-terminal)
	      ))
       ;; %% and Yacc pragmas (%TOKEN, %START, etc).
       ((eq token 'yac-pragma)
	(forward-char)
	(cond
	 ;; Yacc separator
	 ((eq (following-char) ?%)
	  (forward-char)
	  'yac-separator)
	 ;; %TOKEN
	 ((string= (upcase (ebnf-buffer-substring "0-9A-Za-z_")) "TOKEN")
	  'yac-token)
	 ;; other Yacc pragmas
	 (t
	  'yac-pragma)
	 ))
       ;; miscellaneous
       (t
	(forward-char)
	token)
       ))))


(defun ebnf-yac-skip-spaces ()
  (skip-chars-forward
   (if ebnf-yac-skip-char
       "\n\r\t !#$&()*+-.0123456789=?@[\\\\]^_`~"
     "\n\r\t ")
   ebnf-limit)
  (< (point) ebnf-limit))


(defun ebnf-yac-skip-code ()
  (forward-char)
  (let ((pair 1))
    (while (> pair 0)
      (skip-chars-forward "^{}/'\"\000-\010\013\016-\037\177-\377" ebnf-limit)
      (cond
       ((= (following-char) ?{)
	(forward-char)
	(setq pair (1+ pair)))
       ((= (following-char) ?})
	(forward-char)
	(setq pair (1- pair)))
       ((= (following-char) ?/)
	(ebnf-yac-handle-comment))
       ((= (following-char) ?\")
	(ebnf-get-string))
       ((= (following-char) ?\')
	(ebnf-string " -&(-~" ?\' "character"))
       (t
	(error "Illegal character."))
       )))
  (ebnf-yac-skip-spaces))


(defun ebnf-yac-handle-comment ()
  (forward-char)
  (cond
   ;; begin comment
   ((= (following-char) ?*)
    (ebnf-yac-skip-comment)
    (ebnf-yac-skip-spaces))
   ;; line comment
   ((= (following-char) ?/)
    (end-of-line)
    (ebnf-yac-skip-spaces))
   ;; no comment
   (t nil)
   ))


(defconst ebnf-yac-comment-chars "^*\000-\010\013\016-\037\177-\237")


(defun ebnf-yac-skip-comment ()
  (forward-char)
  (cond
   ;; open EPS file
   ((and ebnf-eps-executing (= (following-char) ?\[))
    (ebnf-eps-add-context (ebnf-yac-eps-filename)))
   ;; close EPS file
   ((and ebnf-eps-executing (= (following-char) ?\]))
    (ebnf-eps-remove-context (ebnf-yac-eps-filename)))
   ;; any other action in comment
   (t
    (setq ebnf-action (aref ebnf-comment-table (following-char))))
   )
  (let ((not-end t))
    (while not-end
      (skip-chars-forward ebnf-yac-comment-chars ebnf-limit)
      (cond ((>= (point) ebnf-limit)
	     (error "Missing end of comment: `*/'."))
	    ((= (following-char) ?*)
	     (skip-chars-forward "*" ebnf-limit)
	     (when (= (following-char) ?/)
	       ;; end of comment
	       (forward-char)
	       (setq not-end nil)))
	    (t
	     (error "Illegal character."))
	    ))))


(defun ebnf-yac-eps-filename ()
  (forward-char)
  (buffer-substring-no-properties
   (point)
   (let ((chars (concat ebnf-yac-comment-chars "\n"))
	 found)
     (while (not found)
       (skip-chars-forward chars ebnf-limit)
       (setq found
	     (cond ((>= (point) ebnf-limit)
		    (point))
		   ((= (following-char) ?*)
		    (skip-chars-forward "*" ebnf-limit)
		    (if (/= (following-char) ?\/)
			nil
		      (backward-char)
		      (point)))
		   (t
		    (point))
		   )))
     found)))


;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;


(provide 'ebnf-yac)


;;; ebnf-yac.el ends here