Mercurial > emacs
comparison lisp/cedet/semantic/wisent.el @ 104479:55deb3a40b66
lisp/cedet/semantic/wisent.el: New file.
author | Chong Yidong <cyd@stupidchicken.com> |
---|---|
date | Mon, 07 Sep 2009 16:38:54 +0000 |
parents | |
children | 108a3a6d8be0 |
comparison
equal
deleted
inserted
replaced
104478:8b129ef893a2 | 104479:55deb3a40b66 |
---|---|
1 ;;; semantic/wisent.el --- Wisent - Semantic gateway | |
2 | |
3 ;;; Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007 | |
4 ;;; Free Software Foundation, Inc. | |
5 | |
6 ;; Author: David Ponce <david@dponce.com> | |
7 ;; Maintainer: David Ponce <david@dponce.com> | |
8 ;; Created: 30 Aug 2001 | |
9 ;; Keywords: syntax | |
10 | |
11 ;; This file is part of GNU Emacs. | |
12 | |
13 ;; GNU Emacs is free software: you can redistribute it and/or modify | |
14 ;; it under the terms of the GNU General Public License as published by | |
15 ;; the Free Software Foundation, either version 3 of the License, or | |
16 ;; (at your option) any later version. | |
17 | |
18 ;; GNU Emacs is distributed in the hope that it will be useful, | |
19 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 ;; GNU General Public License for more details. | |
22 | |
23 ;; You should have received a copy of the GNU General Public License | |
24 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. | |
25 | |
26 ;;; Commentary: | |
27 ;; | |
28 ;; Here are functions necessary to use the Wisent LALR parser from | |
29 ;; Semantic environment. | |
30 | |
31 ;;; History: | |
32 ;; | |
33 | |
34 ;;; Code: | |
35 | |
36 (require 'semantic) | |
37 (require 'semantic/wisent/wisent) | |
38 | |
39 ;;; Lexical analysis | |
40 ;; | |
41 (defvar wisent-lex-istream nil | |
42 "Input stream of `semantic-lex' syntactic tokens.") | |
43 | |
44 (defvar wisent-lex-lookahead nil | |
45 "Extra lookahead token. | |
46 When non-nil it is directly returned by `wisent-lex-function'.") | |
47 | |
48 ;; Maintain this alias for compatibility until all WY grammars have | |
49 ;; been translated again to Elisp code. | |
50 (semantic-alias-obsolete 'wisent-lex-make-token-table | |
51 'semantic-lex-make-type-table) | |
52 | |
53 (defmacro wisent-lex-eoi () | |
54 "Return an End-Of-Input lexical token. | |
55 The EOI token is like this: ($EOI "" POINT-MAX . POINT-MAX)." | |
56 `(cons ',wisent-eoi-term | |
57 (cons "" | |
58 (cons (point-max) (point-max))))) | |
59 | |
60 (defmacro define-wisent-lexer (name doc &rest body) | |
61 "Create a new lexical analyzer with NAME. | |
62 DOC is a documentation string describing this analyzer. | |
63 When a token is available in `wisent-lex-istream', eval BODY forms | |
64 sequentially. BODY must return a lexical token for the LALR parser. | |
65 | |
66 Each token in input was produced by `semantic-lex', it is a list: | |
67 | |
68 (TOKSYM START . END) | |
69 | |
70 TOKSYM is a terminal symbol used in the grammar. | |
71 START and END mark boundary in the current buffer of that token's | |
72 value. | |
73 | |
74 Returned tokens must have the form: | |
75 | |
76 (TOKSYM VALUE START . END) | |
77 | |
78 where VALUE is the buffer substring between START and END positions." | |
79 `(defun | |
80 ,name () ,doc | |
81 (cond | |
82 (wisent-lex-lookahead | |
83 (prog1 wisent-lex-lookahead | |
84 (setq wisent-lex-lookahead nil))) | |
85 (wisent-lex-istream | |
86 ,@body) | |
87 ((wisent-lex-eoi))))) | |
88 | |
89 (define-wisent-lexer wisent-lex | |
90 "Return the next available lexical token in Wisent's form. | |
91 The variable `wisent-lex-istream' contains the list of lexical tokens | |
92 produced by `semantic-lex'. Pop the next token available and convert | |
93 it to a form suitable for the Wisent's parser." | |
94 (let* ((tk (car wisent-lex-istream))) | |
95 ;; Eat input stream | |
96 (setq wisent-lex-istream (cdr wisent-lex-istream)) | |
97 (cons (semantic-lex-token-class tk) | |
98 (cons (semantic-lex-token-text tk) | |
99 (semantic-lex-token-bounds tk))))) | |
100 | |
101 ;;; Syntax analysis | |
102 ;; | |
103 (defvar wisent-error-function nil | |
104 "Function used to report parse error. | |
105 By default use the function `wisent-message'.") | |
106 (make-variable-buffer-local 'wisent-error-function) | |
107 | |
108 (defvar wisent-lexer-function 'wisent-lex | |
109 "Function used to obtain the next lexical token in input. | |
110 Should be a lexical analyzer created with `define-wisent-lexer'.") | |
111 (make-variable-buffer-local 'wisent-lexer-function) | |
112 | |
113 ;; Tag production | |
114 ;; | |
115 (defsubst wisent-raw-tag (semantic-tag) | |
116 "Return raw form of given Semantic tag SEMANTIC-TAG. | |
117 Should be used in semantic actions, in grammars, to build a Semantic | |
118 parse tree." | |
119 (nconc semantic-tag | |
120 (if (or $region | |
121 (setq $region (nthcdr 2 wisent-input))) | |
122 (list (car $region) (cdr $region)) | |
123 (list (point-max) (point-max))))) | |
124 | |
125 (defsubst wisent-cook-tag (raw-tag) | |
126 "From raw form of Semantic tag RAW-TAG, return a list of cooked tags. | |
127 Should be used in semantic actions, in grammars, to build a Semantic | |
128 parse tree." | |
129 (let* ((cooked (semantic--tag-expand raw-tag)) | |
130 (l cooked)) | |
131 (while l | |
132 (semantic--tag-put-property (car l) 'reparse-symbol $nterm) | |
133 (setq l (cdr l))) | |
134 cooked)) | |
135 | |
136 ;; Unmatched syntax collector | |
137 ;; | |
138 (defun wisent-collect-unmatched-syntax (nomatch) | |
139 "Add lexical token NOMATCH to the cache of unmatched tokens. | |
140 See also the variable `semantic-unmatched-syntax-cache'. | |
141 | |
142 NOMATCH is in Wisent's form: (SYMBOL VALUE START . END) | |
143 and will be collected in `semantic-lex' form: (SYMBOL START . END)." | |
144 (let ((region (cddr nomatch))) | |
145 (and (number-or-marker-p (car region)) | |
146 (number-or-marker-p (cdr region)) | |
147 (setq semantic-unmatched-syntax-cache | |
148 (cons (cons (car nomatch) region) | |
149 semantic-unmatched-syntax-cache))))) | |
150 | |
151 ;; Parser plug-ins | |
152 ;; | |
153 ;; The following functions permit to plug the Wisent LALR parser in | |
154 ;; Semantic toolkit. They use the standard API provided by Semantic | |
155 ;; to plug parsers in. | |
156 ;; | |
157 ;; Two plug-ins are available, BUT ONLY ONE MUST BE USED AT A TIME: | |
158 ;; | |
159 ;; - `wisent-parse-stream' designed to override the standard function | |
160 ;; `semantic-parse-stream'. | |
161 ;; | |
162 ;; - `wisent-parse-region' designed to override the standard function | |
163 ;; `semantic-parse-region'. | |
164 ;; | |
165 ;; Maybe the latter is faster because it eliminates a lot of function | |
166 ;; call. | |
167 ;; | |
168 (defun wisent-parse-stream (stream goal) | |
169 "Parse STREAM using the Wisent LALR parser. | |
170 GOAL is a nonterminal symbol to start parsing at. | |
171 Return the list (STREAM SEMANTIC-STREAM) where STREAM are those | |
172 elements of STREAM that have not been used. SEMANTIC-STREAM is the | |
173 list of semantic tags found. | |
174 The LALR parser automaton must be available in buffer local variable | |
175 `semantic--parse-table'. | |
176 | |
177 Must be installed by `semantic-install-function-overrides' to override | |
178 the standard function `semantic-parse-stream'." | |
179 (let (wisent-lex-istream wisent-lex-lookahead la-elt cache) | |
180 | |
181 ;; IMPLEMENTATION NOTES: | |
182 ;; `wisent-parse' returns a lookahead token when it stopped | |
183 ;; parsing before encountering the end of input. To re-enter the | |
184 ;; parser it is necessary to push back in the lexical input stream | |
185 ;; the last lookahead token issued. Because the format of | |
186 ;; lookahead tokens and tokens in STREAM can be different the | |
187 ;; lookahead token is put in the variable `wisent-lex-lookahead' | |
188 ;; before calling `wisent-parse'. Wisent's lexers always pop the | |
189 ;; next lexical token from that variable when non nil, then from | |
190 ;; the lexical input stream. | |
191 ;; | |
192 ;; The first element of STREAM is used to keep lookahead tokens | |
193 ;; across successive calls to `wisent-parse-stream'. In fact | |
194 ;; what is kept is a stack of lookaheads encountered so far. It | |
195 ;; is cleared when `wisent-parse' returns a valid semantic tag, | |
196 ;; or twice the same lookahead token! The latter indicates that | |
197 ;; there is a syntax error on that token. If so, tokens currently | |
198 ;; in the lookahead stack have not been used, and are moved into | |
199 ;; `semantic-unmatched-syntax-cache'. When the parser will be | |
200 ;; re-entered, a new lexical token will be read from STREAM. | |
201 ;; | |
202 ;; The first element of STREAM that contains the lookahead stack | |
203 ;; has this format (compatible with the format of `semantic-lex' | |
204 ;; tokens): | |
205 ;; | |
206 ;; (LOOKAHEAD-STACK START . END) | |
207 ;; | |
208 ;; where LOOKAHEAD-STACK is a list of lookahead tokens. And | |
209 ;; START/END are the bounds of the lookahead at top of stack. | |
210 | |
211 ;; Retrieve lookahead token from stack | |
212 (setq la-elt (car stream)) | |
213 (if (consp (car la-elt)) | |
214 ;; The first elt of STREAM contains a lookahead stack | |
215 (setq wisent-lex-lookahead (caar la-elt) | |
216 stream (cdr stream)) | |
217 (setq la-elt nil)) | |
218 ;; Parse | |
219 (setq wisent-lex-istream stream | |
220 cache (semantic-safe "wisent-parse-stream: %s" | |
221 (condition-case error-to-filter | |
222 (wisent-parse semantic--parse-table | |
223 wisent-lexer-function | |
224 wisent-error-function | |
225 goal) | |
226 (args-out-of-range | |
227 (if (and (not debug-on-error) | |
228 (= wisent-parse-max-stack-size | |
229 (nth 2 error-to-filter))) | |
230 (progn | |
231 (message "wisent-parse-stream: %s" | |
232 (error-message-string error-to-filter)) | |
233 (message "wisent-parse-max-stack-size \ | |
234 might need to be increased")) | |
235 (apply 'signal error-to-filter)))))) | |
236 ;; Manage returned lookahead token | |
237 (if wisent-lookahead | |
238 (if (eq (caar la-elt) wisent-lookahead) | |
239 ;; It is already at top of lookahead stack | |
240 (progn | |
241 (setq cache nil | |
242 la-elt (car la-elt)) | |
243 (while la-elt | |
244 ;; Collect unmatched tokens from the stack | |
245 (run-hook-with-args | |
246 'wisent-discarding-token-functions (car la-elt)) | |
247 (setq la-elt (cdr la-elt)))) | |
248 ;; New lookahead token | |
249 (if (or (consp cache) ;; Clear the stack if parse succeeded | |
250 (null la-elt)) | |
251 (setq la-elt (cons nil nil))) | |
252 ;; Push it into the stack | |
253 (setcar la-elt (cons wisent-lookahead (car la-elt))) | |
254 ;; Update START/END | |
255 (setcdr la-elt (cddr wisent-lookahead)) | |
256 ;; Push (LOOKAHEAD-STACK START . END) in STREAM | |
257 (setq wisent-lex-istream (cons la-elt wisent-lex-istream)))) | |
258 ;; Return (STREAM SEMANTIC-STREAM) | |
259 (list wisent-lex-istream | |
260 (if (consp cache) cache '(nil)) | |
261 ))) | |
262 | |
263 (defun wisent-parse-region (start end &optional goal depth returnonerror) | |
264 "Parse the area between START and END using the Wisent LALR parser. | |
265 Return the list of semantic tags found. | |
266 Optional arguments GOAL is a nonterminal symbol to start parsing at, | |
267 DEPTH is the lexical depth to scan, and RETURNONERROR is a flag to | |
268 stop parsing on syntax error, when non-nil. | |
269 The LALR parser automaton must be available in buffer local variable | |
270 `semantic--parse-table'. | |
271 | |
272 Must be installed by `semantic-install-function-overrides' to override | |
273 the standard function `semantic-parse-region'." | |
274 (if (or (< start (point-min)) (> end (point-max)) (< end start)) | |
275 (error "Invalid bounds [%s %s] passed to `wisent-parse-region'" | |
276 start end)) | |
277 (let* ((case-fold-search semantic-case-fold) | |
278 (wisent-lex-istream (semantic-lex start end depth)) | |
279 ptree tag cooked lstack wisent-lex-lookahead) | |
280 ;; Loop while there are lexical tokens available | |
281 (while wisent-lex-istream | |
282 ;; Parse | |
283 (setq wisent-lex-lookahead (car lstack) | |
284 tag (semantic-safe "wisent-parse-region: %s" | |
285 (wisent-parse semantic--parse-table | |
286 wisent-lexer-function | |
287 wisent-error-function | |
288 goal))) | |
289 ;; Manage returned lookahead token | |
290 (if wisent-lookahead | |
291 (if (eq (car lstack) wisent-lookahead) | |
292 ;; It is already at top of lookahead stack | |
293 (progn | |
294 (setq tag nil) | |
295 (while lstack | |
296 ;; Collect unmatched tokens from lookahead stack | |
297 (run-hook-with-args | |
298 'wisent-discarding-token-functions (car lstack)) | |
299 (setq lstack (cdr lstack)))) | |
300 ;; Push new lookahead token into the stack | |
301 (setq lstack (cons wisent-lookahead lstack)))) | |
302 ;; Manage the parser result | |
303 (cond | |
304 ;; Parse succeeded, cook result | |
305 ((consp tag) | |
306 (setq lstack nil ;; Clear the lookahead stack | |
307 cooked (semantic--tag-expand tag) | |
308 ptree (append cooked ptree)) | |
309 (while cooked | |
310 (setq tag (car cooked) | |
311 cooked (cdr cooked)) | |
312 (or (semantic--tag-get-property tag 'reparse-symbol) | |
313 (semantic--tag-put-property tag 'reparse-symbol goal))) | |
314 ) | |
315 ;; Return on error if requested | |
316 (returnonerror | |
317 (setq wisent-lex-istream nil) | |
318 )) | |
319 ;; Work in progress... | |
320 (if wisent-lex-istream | |
321 (and (eq semantic-working-type 'percent) | |
322 (boundp 'semantic--progress-reporter) | |
323 semantic--progress-reporter | |
324 (progress-reporter-update | |
325 semantic--progress-reporter | |
326 (/ (* 100 (semantic-lex-token-start | |
327 (car wisent-lex-istream))) | |
328 (point-max)))))) | |
329 ;; Return parse tree | |
330 (nreverse ptree))) | |
331 | |
332 ;;; Interfacing with edebug | |
333 ;; | |
334 (add-hook | |
335 'edebug-setup-hook | |
336 #'(lambda () | |
337 | |
338 (def-edebug-spec define-wisent-lexer | |
339 (&define name stringp def-body) | |
340 ) | |
341 | |
342 )) | |
343 | |
344 (provide 'semantic/wisent) | |
345 | |
346 ;;; semantic/wisent.el ends here |