26187
|
1 ;;; tildify.el --- adding hard spaces into texts
|
|
2
|
33949
|
3 ;; Copyright (C) 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
|
26187
|
4
|
|
5 ;; Author: Milan Zamazal <pdm@freesoft.cz>
|
|
6 ;; Version: 4.4
|
31529
|
7 ;; Keywords: text, TeX, SGML, wp
|
26187
|
8
|
|
9 ;; This file is part of GNU Emacs.
|
|
10
|
|
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
12 ;; it under the terms of the GNU General Public License as published by
|
|
13 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
14 ;; any later version.
|
|
15
|
|
16 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 ;; GNU General Public License for more details.
|
|
20
|
|
21 ;; You should have received a copy of the GNU General Public License
|
|
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
24 ;; Boston, MA 02111-1307, USA.
|
|
25
|
|
26 ;;; Commentary:
|
|
27
|
|
28 ;; This package can be typically used for adding forgotten tildes in TeX
|
|
29 ;; sources or adding ` ' sequences in SGML (e.g. HTML) texts.
|
|
30 ;;
|
31529
|
31 ;; For example, the Czech ortography requires avoiding one letter
|
|
32 ;; prepositions at line endings. So they should be connected with the
|
|
33 ;; following words by a tilde. Some users forget to do this all the
|
|
34 ;; time. The purpose of this program is to check the text and suggest
|
|
35 ;; adding of missing tildes on some places. It works in a similar
|
|
36 ;; manner to `query-replace-regexp'.
|
26187
|
37 ;;
|
31529
|
38 ;; The functionality of this program is actually performing query
|
|
39 ;; replace on certain regions, but for historical reasons explained
|
|
40 ;; above it is called `tildify'.
|
26187
|
41 ;;
|
|
42 ;; The default variable settings are suited for Czech, so do not try to
|
|
43 ;; understand them if you are not familiar with Czech grammar and spelling.
|
|
44 ;;
|
|
45 ;; The algorithm was inspired by Petr Ol¹įk's program `vlna'. Abbilities of
|
|
46 ;; `tildify.el' are a little limited; if you have improvement suggestions, let
|
|
47 ;; me know.
|
|
48
|
|
49 ;;; Code:
|
|
50
|
|
51
|
|
52 ;;; *** User configuration variables ***
|
|
53
|
|
54
|
|
55 (defgroup tildify nil
|
|
56 "Adding missing hard spaces or other text fragments into texts."
|
31529
|
57 :version "21.1"
|
26187
|
58 :group 'wp)
|
|
59
|
|
60 (defcustom tildify-pattern-alist
|
|
61 '((t "\\([,:;(][ \t]*[a]\\|\\<[AIKOSUVZikosuvz]\\)\\([ \t]+\\|[ \t]*\n[ \t]*\\)\\(\\w\\|[([{\\]\\|<[a-zA-Z]\\)" 2))
|
|
62 "Alist specifying where to insert hard spaces.
|
|
63
|
|
64 Each alist item is of the form (MAJOR-MODE REGEXP NUMBER) or
|
|
65 (MAJOR-MODE . SYMBOL).
|
|
66
|
|
67 MAJOR-MODE defines major mode, for which the item applies. It can be either:
|
|
68 - a symbol equal to the major mode of the buffer to be fixed
|
|
69 - t for default item, this applies to all major modes not defined in another
|
|
70 alist item
|
|
71
|
|
72 REGEXP is a regular expression matching the part of a text, where a hard space
|
|
73 is missing. The regexp is always case sensitive, regardless of the current
|
|
74 `case-fold-search' setting.
|
|
75
|
|
76 NUMBER defines the number of the REGEXP subexpression which should be replaced
|
|
77 by the hard space character.
|
|
78
|
|
79 The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
|
31529
|
80 mode, the item for the mode SYMBOL is looked up in the alist instead."
|
26187
|
81 :group 'tildify
|
|
82 :type '(repeat (choice (list symbol regexp integer) (cons symbol symbol))))
|
|
83
|
|
84 (defcustom tildify-string-alist
|
|
85 '((latex-mode . "~")
|
|
86 (tex-mode . latex-mode)
|
33949
|
87 (plain-tex-mode . latex-mode)
|
26187
|
88 (sgml-mode . " ")
|
|
89 (html-mode . sgml-mode)
|
|
90 (t . " "))
|
|
91 "Alist specifying what is a hard space in the current major mode.
|
|
92
|
|
93 Each alist item is of the form (MAJOR-MODE . STRING) or
|
|
94 (MAJOR-MODE . SYMBOL).
|
|
95
|
|
96 MAJOR-MODE defines major mode, for which the item applies. It can be either:
|
|
97 - a symbol equal to the major mode of the buffer to be fixed
|
|
98 - t for default item, this applies to all major modes not defined in another
|
|
99 alist item
|
|
100
|
|
101 STRING defines the hard space, which is inserted at places defined by
|
|
102 `tildify-pattern-alist'. For example it can be \"~\" for TeX or \" \"
|
|
103 for SGML.
|
|
104
|
|
105 The form (MAJOR-MODE . SYMBOL) defines alias item for MAJOR-MODE. For this
|
31529
|
106 mode, the item for the mode SYMBOL is looked up in the alist instead."
|
26187
|
107 :group 'tildify
|
|
108 :type '(repeat (cons symbol (choice string symbol))))
|
|
109
|
|
110 (defcustom tildify-ignored-environments-alist
|
|
111 '((latex-mode
|
|
112 ("\\\\\\\\" . "") ; do not remove this
|
|
113 ("\\\\begin{verbatim}" . "\\\\end{verbatim}")
|
|
114 ("\\\\verb\\(.\\)" . (1))
|
|
115 ("\\$\\$" . "\\$\\$")
|
|
116 ("\\$" . "\\$")
|
|
117 ("\\\\(" . "\\\\)")
|
|
118 ("\\\\[[]" . "\\\\[]]")
|
|
119 ("\\\\begin{math}" . "\\\\end{math}")
|
|
120 ("\\\\begin{displaymath}" . "\\\\end{displaymath}")
|
|
121 ("\\\\begin{equation}" . "\\\\end{equation}")
|
|
122 ("\\\\begin{eqnarray\\*?}" . "\\\\end{eqnarray\\*?}")
|
|
123 ("\\\\[a-zA-Z]+\\( +\\|{}\\)[a-zA-Z]*" . "")
|
|
124 ("%" . "$"))
|
|
125 (plain-tex-mode . latex-mode)
|
|
126 (html-mode
|
|
127 ("<pre[^>]*>" . "</pre>")
|
|
128 ("<dfn>" . "</dfn>")
|
|
129 ("<code>" . "</code>")
|
|
130 ("<samp>" . "</samp>")
|
|
131 ("<kbd>" . "</kbd>")
|
|
132 ("<var>" . "</var>")
|
|
133 ("<PRE[^>]*>" . "</PRE>")
|
|
134 ("<DFN>" . "</DFN>")
|
|
135 ("<CODE>" . "</CODE>")
|
|
136 ("<SAMP>" . "</SAMP>")
|
|
137 ("<KBD>" . "</KBD>")
|
|
138 ("<VAR>" . "</VAR>")
|
|
139 ("<! *--" . "-- *>")
|
|
140 ("<" . ">"))
|
|
141 (sgml-mode . html-mode)
|
|
142 (t nil))
|
|
143 "Alist specifying ignored structured text environments.
|
|
144 Parts of text defined in this alist are skipped without performing hard space
|
|
145 insertion on them. These setting allow skipping text parts like verbatim or
|
|
146 math environments in TeX or preformatted text in SGML.
|
|
147
|
|
148 Each list element is of the form
|
|
149 (MAJOR-MODE (BEG-REGEX . END-REGEX) (BEG-REGEX . END-REGEX) ... )
|
|
150
|
|
151 MAJOR-MODE defines major mode, for which the item applies. It can be either:
|
|
152 - a symbol equal to the major mode of the buffer to be fixed
|
|
153 - t for default item, this applies to all major modes not defined in another
|
|
154 alist item
|
|
155
|
|
156 BEG-REGEX is a regexp matching beginning of a text part to be skipped.
|
|
157 END-REGEX defines end of the corresponding text part and can be either:
|
|
158 - a regexp matching the end of the skipped text part
|
|
159 - a list of regexps and numbers, which will compose the ending regexp by
|
|
160 concatenating themselves, while replacing the numbers with corresponding
|
|
161 subexpressions of BEG-REGEX (this is used to solve cases like
|
31529
|
162 \\\\verb<character> in TeX)."
|
26187
|
163 :group 'tildify
|
|
164 :type '(repeat (cons symbol (choice symbol (repeat sexp)))))
|
|
165
|
|
166
|
|
167 ;;; *** Internal variables ***
|
|
168
|
|
169 (defvar tildify-count nil
|
|
170 "Counter for replacements.")
|
|
171
|
|
172
|
|
173 ;;; *** Interactive functions ***
|
|
174
|
|
175 ;;;###autoload
|
|
176 (defun tildify-region (beg end)
|
|
177 "Add hard spaces in the region between BEG and END.
|
|
178 See variables `tildify-pattern-alist', `tildify-string-alist', and
|
|
179 `tildify-ignored-environments-alist' for information about configuration
|
|
180 parameters.
|
|
181 This function performs no refilling of the changed text."
|
|
182 (interactive "*r")
|
|
183 (setq tildify-count 0)
|
|
184 (let (a
|
|
185 z
|
|
186 (marker-end (copy-marker end))
|
|
187 end-env
|
|
188 finish
|
|
189 (ask t)
|
|
190 (case-fold-search nil)
|
|
191 (regexp (tildify-build-regexp)) ; beginnings of environments
|
|
192 aux)
|
|
193 (if regexp
|
|
194 ;; Yes, ignored environments exist for the current major mode,
|
|
195 ;; tildify just texts outside them
|
|
196 (save-excursion
|
|
197 (save-restriction
|
|
198 (widen)
|
|
199 (goto-char (point-min))
|
|
200 (while (not finish)
|
|
201 (setq a (point))
|
|
202 (setq end-env (tildify-find-env regexp))
|
|
203 (setq z (copy-marker (if end-env (1- (point)) (point-max))))
|
|
204 (if (>= (marker-position z) beg)
|
|
205 (progn
|
|
206 (or (>= a beg) (setq a beg))
|
|
207 (or (<= (marker-position z) (marker-position marker-end))
|
|
208 (setq z marker-end))
|
|
209 (setq aux (tildify-tildify a (marker-position z) ask))
|
|
210 (if (eq aux 'force)
|
|
211 (setq ask nil)
|
|
212 (if (eq aux nil)
|
|
213 (setq finish t)))))
|
31529
|
214 (if (>= (marker-position z) (marker-position marker-end))
|
26187
|
215 (setq finish t))
|
|
216 (or (>= (point) (marker-position z))
|
|
217 (goto-char (marker-position z)))
|
|
218 (if (not finish)
|
|
219 (if (re-search-forward end-env nil t)
|
|
220 (if (> (point) (marker-position marker-end))
|
|
221 (setq finish t))
|
|
222 (message
|
|
223 (format "End of environment not found: %s" end-env))
|
|
224 (setq finish t))))))
|
|
225 ;; No ignored environments, tildify directly
|
|
226 (tildify-tildify beg end ask)))
|
|
227 (message (format "%d spaces replaced." tildify-count)))
|
|
228
|
|
229 ;;;###autoload
|
|
230 (defun tildify-buffer ()
|
|
231 "Add hard spaces in the current buffer.
|
|
232 See variables `tildify-pattern-alist', `tildify-string-alist', and
|
|
233 `tildify-ignored-environments-alist' for information about configuration
|
|
234 parameters.
|
|
235 This function performs no refilling of the changed text."
|
|
236 (interactive "*")
|
|
237 (tildify-region (point-min) (point-max)))
|
|
238
|
|
239
|
|
240 ;;; *** Auxiliary functions ***
|
|
241
|
|
242 (defun tildify-build-regexp ()
|
|
243 "Build start of environment regexp."
|
|
244 (let ((alist (tildify-mode-alist tildify-ignored-environments-alist))
|
|
245 regexp)
|
|
246 (when alist
|
|
247 (setq regexp (caar alist))
|
|
248 (setq alist (cdr alist))
|
|
249 (while alist
|
|
250 (setq regexp (concat regexp "\\|" (caar alist)))
|
|
251 (setq alist (cdr alist)))
|
|
252 regexp)))
|
|
253
|
|
254 (defun tildify-mode-alist (mode-alist &optional mode)
|
|
255 "Return alist item for the MODE-ALIST in the current major MODE."
|
|
256 (if (null mode)
|
|
257 (setq mode major-mode))
|
|
258 (let ((alist (cdr (or (assoc mode mode-alist)
|
|
259 (assoc t mode-alist)))))
|
|
260 (if (and alist
|
|
261 (symbolp alist))
|
|
262 (tildify-mode-alist mode-alist alist)
|
|
263 alist)))
|
|
264
|
|
265 (defun tildify-find-env (regexp)
|
|
266 "Find environment using REGEXP.
|
|
267 Return regexp for the end of the environment or nil if no environment was
|
|
268 found."
|
|
269 ;; Find environment
|
|
270 (if (re-search-forward regexp nil t)
|
|
271 ;; Build end-env regexp
|
|
272 (let ((match (match-string 0))
|
|
273 (alist (tildify-mode-alist tildify-ignored-environments-alist))
|
|
274 expression)
|
|
275 (save-match-data
|
|
276 (while (not (eq (string-match (caar alist) match) 0))
|
|
277 (setq alist (cdr alist))))
|
|
278 (if (stringp (setq expression (cdar alist)))
|
|
279 expression
|
|
280 (let ((result "")
|
|
281 aux)
|
|
282 (while expression
|
|
283 (setq result (concat result
|
|
284 (if (stringp (setq aux (car expression)))
|
|
285 expression
|
|
286 (regexp-quote (match-string aux)))))
|
|
287 (setq expression (cdr expression)))
|
|
288 result)))
|
|
289 ;; Return nil if not found
|
|
290 nil))
|
|
291
|
|
292 (defun tildify-tildify (beg end ask)
|
|
293 "Add tilde characters in the region between BEG and END.
|
|
294 This function does not do any further checking except of for comments and
|
|
295 macros.
|
|
296
|
|
297 If ASK is nil, perform replace without asking user for confirmation.
|
|
298
|
|
299 Returns one of symbols: t (all right), nil (quit), force (replace without
|
|
300 further questions)."
|
|
301 (save-excursion
|
|
302 (goto-char beg)
|
|
303 (let* ((alist (tildify-mode-alist tildify-pattern-alist))
|
|
304 (regexp (car alist))
|
|
305 (match-number (cadr alist))
|
|
306 (tilde (tildify-mode-alist tildify-string-alist))
|
|
307 (end-marker (copy-marker end))
|
|
308 answer
|
|
309 bad-answer
|
|
310 replace
|
|
311 quit
|
|
312 (message-log-max nil))
|
|
313 (while (and (not quit)
|
|
314 (re-search-forward regexp (marker-position end-marker) t))
|
|
315 (when (or (not ask)
|
|
316 (progn
|
|
317 (goto-char (match-beginning match-number))
|
|
318 (setq bad-answer t)
|
|
319 (while bad-answer
|
|
320 (setq bad-answer nil)
|
|
321 (message "Replace? (yn!q) ")
|
|
322 (setq answer (read-event)))
|
|
323 (cond
|
|
324 ((or (eq answer ?y) (eq answer ? ) (eq answer 'space))
|
|
325 (setq replace t))
|
|
326 ((eq answer ?n)
|
|
327 (setq replace nil))
|
|
328 ((eq answer ?!)
|
|
329 (setq replace t
|
|
330 ask nil))
|
|
331 ((eq answer ?q)
|
|
332 (setq replace nil
|
|
333 quit t))
|
|
334 (t
|
|
335 (message "Press y, n, !, or q.")
|
|
336 (setq bad-answer t)))
|
|
337 replace))
|
|
338 (replace-match tilde t t nil match-number)
|
|
339 (setq tildify-count (1+ tildify-count))))
|
|
340 ;; Return value
|
|
341 (cond
|
|
342 (quit nil)
|
|
343 ((not ask) 'force)
|
|
344 (t t)))))
|
|
345
|
|
346
|
|
347 ;;; *** Announce ***
|
|
348
|
|
349 (provide 'tildify)
|
|
350
|
|
351
|
|
352 ;; Local variables:
|
|
353 ;; coding: iso-latin-2
|
|
354 ;; End:
|
|
355
|
|
356 ;;; tildify.el ends here
|