Mercurial > emacs
annotate lisp/international/ogonek.el @ 88364:f6557aebe110
Completely re-written. Character and multibyte
sequence handling codes are moved to character.c.
author | Kenichi Handa <handa@m17n.org> |
---|---|
date | Fri, 01 Mar 2002 01:16:34 +0000 |
parents | 64274328203b |
children | 0d8b17d428b5 |
rev | line source |
---|---|
21701 | 1 ;;; ogonek.el --- change the encoding of Polish diacritics |
18264 | 2 |
3 ;; Copyright (C) 1997 Free Software Foundation, Inc. | |
4 | |
5 ;; Author: W{\l}odek Bzyl, Ryszard Kubiak | |
6 ;; Maintainer: rysiek@ipipan.gda.pl (Ryszard Kubiak) | |
7 ;; Keywords: i18n | |
8 | |
9 ;; This file is part of GNU Emacs. | |
10 | |
11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 ;; it under the terms of the GNU General Public License as published by | |
13 ;; the Free Software Foundation; either version 2, or (at your option) | |
14 ;; any later version. | |
15 | |
16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 ;; GNU General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 ;; Boston, MA 02111-1307, USA. | |
25 | |
26 ;;; Commentary: | |
27 | |
28 ;; To use this library load it using | |
29 ;; M-x load-library [enter] ogonek | |
30 ;; Then, you may get a short info by calling one of | |
31 ;; M-x ogonek-jak -- in Polish | |
32 ;; M-x ogonek-how -- in English " | |
33 | |
21646
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
34 ;;; Code: |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
35 |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
36 (defgroup ogonek nil |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
37 "Change the encoding of Polish diacritic characters." |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
38 :prefix "ogonek-" |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
39 :group 'i18n) |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
40 |
18264 | 41 (defconst ogonek-name-encoding-alist |
42 '(("ascii" . (?A ?C ?E ?L ?N ?O ?S ?Z ?Z | |
43 ?a ?c ?e ?l ?n ?o ?s ?z ?z)) | |
44 ("iso8859-2" . (161 198 202 163 209 211 166 172 175 | |
45 177 230 234 179 241 243 182 188 191)) | |
46 ("mazovia" . (143 149 144 156 165 163 152 160 161 | |
47 134 141 145 146 164 162 158 166 167)) | |
48 ("windows-EE" . (165 198 202 163 209 211 140 143 175 | |
49 185 230 234 179 241 243 156 159 191)) | |
50 ("windows-PL" . (165 198 202 163 209 211 140 143 175 | |
51 185 230 234 179 241 243 156 159 191)) | |
52 ("latin-2" . (164 143 168 157 227 224 151 141 189 | |
53 165 134 169 136 228 162 152 171 190)) | |
54 ("CP852" . (164 143 168 157 227 224 151 141 189 | |
55 165 134 169 136 228 162 152 171 190)) | |
56 ("MeX" . (129 130 134 138 139 211 145 153 155 | |
57 161 162 166 170 171 243 177 185 187)) | |
58 ("CorelDraw" . (197 242 201 163 209 211 255 225 237 | |
59 229 236 230 198 241 243 165 170 186)) | |
60 ("Amiga" . (194 202 203 206 207 211 212 218 219 | |
61 226 234 235 238 239 243 244 250 251)) | |
62 ("Mac" . (132 140 162 252 193 238 229 143 251 | |
63 136 141 171 184 196 151 230 144 253)) | |
64 ) | |
65 "The constant `ogonek-name-encoding-alist' is a list of (NAME.LIST) pairs. | |
18363 | 66 Each LIST contains codes for 18 Polish diacritic characters. The codes |
67 are given in the following order: | |
18264 | 68 Aogonek Cacute Eogonek Lslash Nacute Oacute Sacute Zacute Zdotaccent |
69 aogonek cacute eogonek lslash nacute oacute sacute zacute zdotaccent.") | |
70 | |
18363 | 71 ;; ------ A Little Info in Polish --------------- |
18264 | 72 |
73 (defconst ogonek-informacja | |
18363 | 74 " FUNKCJE INTERAKCYJNE UDOST/EPNIANE PRZEZ BIBLIOTEK/E `ogonek'. |
18264 | 75 |
18363 | 76 Je/sli czytasz ten tekst, to albo przegl/adasz plik /xr/od/lowy |
18264 | 77 biblioteki `ogonek.el', albo wywo/la/le/s polecenie `ogonek-jak'. |
78 W drugim przypadku mo/zesz usun/a/c tekst z ekranu, stosuj/ac | |
79 polecenie `M-x kill-buffer'. | |
80 | |
18363 | 81 Niniejsza biblioteka dostarcza funkcji do zmiany kodowania polskich |
18264 | 82 znak/ow diakrytycznych. Funkcje te mo/zna pogrupowa/c nast/epuj/aco. |
83 | |
18363 | 84 1. `ogonek-recode-region' oraz `ogonek-recode-buffer' |
85 przekodowuj/a zaznaczony fragment wzgl/ednie ca/ly buffor. | |
18264 | 86 Po wywo/laniu interakcyjnym funkcji zadawane s/a |
18363 | 87 pytania o parametry przekodowania: nazw/e kodowania |
88 w tek/scie /xr/od/lowym i nazw/e kodowania docelowego. | |
89 Poni/zsze przyk/lady powinny wyja/sni/c, jakich parametr/ow | |
18264 | 90 oczekuj/a wymienione funkcje: |
91 | |
18363 | 92 (ogonek-recode-region (poczatek-fragmentu) (koniec-fragmentu) |
18264 | 93 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa) |
94 (ogonek-recode-buffer | |
95 nazwa-kodowania-w-tekscie-zrodlowym nazwa-kodowania-docelowa) | |
96 | |
18363 | 97 2. `ogonek-prefixify-region' oraz `ogonek-prefixify-buffer' |
98 s/lu/z/a do wprowadzania notacji prefiksowej. | |
18264 | 99 |
18363 | 100 (ogonek-prefixify-region (poczatek-fragmentu) (koniec-fragmentu) |
18264 | 101 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu) |
102 (ogonek-prefixify-buffer | |
103 nazwa-kodowania-w-tekscie-zrodlowym znak-prefiksu) | |
104 | |
18363 | 105 3. `ogonek-deprefixify-region' oraz `ogonek-deprefixify-buffer' |
106 s/lu/z/a do usuwania notacji prefiksowej. | |
18264 | 107 |
18363 | 108 (ogonek-deprefixify-region (poczatek-fragmentu) (koniec-fragmentu) |
18264 | 109 znak-prefiksu nazwa-kodowania-docelowa) |
110 (ogonek-prefixify-buffer | |
111 znak-prefiksu nazwa-kodowania-docelowa) | |
112 | |
113 U/zycie klawisza TAB w trybie interakcyjnym powoduje wy/swietlenie | |
18363 | 114 listy dopuszczalnych nazw kod/ow, pami/etanych w sta/lej |
18264 | 115 `ogonek-name-encoding-alist'. |
116 | |
117 Funkcje biblioteki odwo/luj/a si/e do pi/eciu zmiennych, kt/ore | |
118 przechowuj/a podpowiedzi do zadawanych pyta/n. Nazwy tych zmiennych | |
119 oraz ich warto/sci domy/slne s/a nast/epuj/ace: | |
120 | |
121 ogonek-from-encoding iso8859-2 | |
18363 | 122 ogonek-to-encoding ascii |
18264 | 123 ogonek-prefix-char / |
124 ogonek-prefix-from-encoding iso8859-2 | |
125 ogonek-prefix-to-encoding iso8859-2 | |
126 | |
127 Powy/zsze warto/sci domy/slne mo/zna zmieni/c przez umieszczenie w pliku | |
128 konfiguracyjnym `~/.emacs' odpowiednich przypisa/n, na przyk/lad: | |
129 | |
18363 | 130 (setq ogonek-prefix-char ?/) |
131 (setq ogonek-prefix-to-encoding \"iso8859-2\") | |
18264 | 132 |
133 Zamiast wczytywania ca/lej biblioteki `ogonek.el' mo/zna w pliku | |
18363 | 134 `~/.emacs' za/z/ada/c wczytania wybranych funkcji, na dodatek dopiero |
135 w chwili ich rzeczywistego u/zycia: | |
18264 | 136 |
18363 | 137 (autoload 'ogonek-jak \"ogonek\") |
138 (autoload 'ogonek-recode-region \"ogonek\") | |
139 (autoload 'ogonek-prefixify-region \"ogonek\") | |
140 (autoload 'ogonek-deprefixify-region \"ogonek\") | |
18264 | 141 |
142 Cz/esto wyst/epuj/ace kombinacje wywo/la/n funkcji mo/zna dla wygody | |
143 skr/oci/c i przypisa/c klawiszom. Oto praktyczne przyk/lady: | |
144 | |
18363 | 145 (defun deprefixify-iso8859-2-region (start end) |
146 (interactive \"*r\") | |
147 (ogonek-deprefixify-region start end ?/ \"iso8859-2\")) | |
148 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d | |
18264 | 149 |
18363 | 150 (defun mazovia-to-iso8859-2 (start end) |
151 (interactive \"*r\") | |
152 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\")) | |
153 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r | |
18264 | 154 |
18363 | 155 (defun prefixify-iso8859-2-region (start end) |
156 (interactive \"*r\") | |
157 (ogonek-prefixify-region start end \"iso8859-2\" ?/)) | |
158 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p | |
18264 | 159 |
160 Ka/zd/a operacj/e przekodowania mo/zna w ca/lo/sci odwo/la/c | |
161 przez wykonanie polecenia `undo'.") | |
162 | |
163 (defun ogonek-jak () | |
18363 | 164 "Display `ogonek-informacja' in an auxiliary *ogonek-jak* buffer." |
18264 | 165 (interactive) |
166 (set-buffer (get-buffer-create " *ogonek-jak*")) | |
167 (insert ogonek-informacja) | |
168 (switch-to-buffer " *ogonek-jak*") | |
169 (beginning-of-buffer)) | |
170 | |
18363 | 171 ;; ------ A Little Info in English -------- |
18264 | 172 |
173 (defconst ogonek-information | |
18363 | 174 " THE INTERACTIVE FUNCTIONS PROVIDED BY THE LIBRARY `ogonek'. |
18264 | 175 |
176 If you read this text then you are either looking at the library's | |
18363 | 177 source text or you have called the `ogonek-how' command. In the |
18264 | 178 latter case you may remove this text using `M-x kill-buffer'. |
179 | |
180 The library provides functions for changing the encoding of Polish | |
181 diacritic characters, the ones with an `ogonek' below or above them. | |
18363 | 182 The functions come in the following groups. |
18264 | 183 |
18363 | 184 1. `ogonek-recode-region' and `ogonek-recode-buffer' to change |
185 between one-character encodings, such as `iso-8859-2', `mazovia', | |
186 plain `ascii' or `TeX'. As the names suggest you may recode | |
187 either the entire current buffer or just a marked region | |
188 in it. You may use the functions interactively as commands. | |
189 Once you call a command you will be asked about the code | |
190 currently used in your text and the target encoding, the one | |
191 you want to get. The following example shows a non-interactive | |
192 use of the functions in a program. This also illustrates what | |
193 type of parameters the functions expect to be called with: | |
18264 | 194 |
18363 | 195 (ogonek-recode-region |
196 (region-beginning) (region-end) from-code-name to-code-name) | |
18264 | 197 (ogonek-recode-buffer from-code-name to-code-name) |
198 | |
18363 | 199 2. `ogonek-prefixify-region' and `ogonek-prefixify-buffer' for |
200 introducing prefix notation: | |
18264 | 201 |
18363 | 202 (ogonek-prefixify-region |
203 (region-beginning) (region-end) from-code-name prefix-char) | |
18264 | 204 (ogonek-prefixify-buffer from-code-name prefix-char) |
205 | |
18363 | 206 3. `ogonek-deprefixify-region' and `ogonek-deprefixify-buffer' for |
207 removing prefix notation: | |
18264 | 208 |
18363 | 209 (ogonek-deprefixify-region |
210 (region-beginning) (region-end) prefix-char to-code-name) | |
18264 | 211 (ogonek-prefixify-buffer prefix-char to-code-name) |
212 | |
18363 | 213 The TAB character used in interactive mode makes `emacs' |
214 display the list of encodings recognized by the library. The list | |
215 is stored in the constant `ogonek-name-encoding-alist'. | |
18264 | 216 |
18363 | 217 The `ogonek' functions refer to five variables in which the suggested |
218 answers to dialogue questions are stored. The variables and their | |
219 default values are: | |
18264 | 220 |
221 ogonek-from-encoding iso8859-2 | |
18363 | 222 ogonek-to-encoding ascii |
223 ogonek-prefix-char / | |
18264 | 224 ogonek-prefix-from-encoding iso8859-2 |
225 ogonek-prefix-to-encoding iso8859-2 | |
226 | |
227 The above default values can be changed by placing appropriate settings | |
228 in the '~/.emacs' file: | |
229 | |
18363 | 230 (setq ogonek-prefix-char ?/) |
231 (setq ogonek-prefix-to-encoding \"iso8859-2\") | |
18264 | 232 |
18363 | 233 Instead of loading the whole library `ogonek' it may be better to |
234 autoload the needed functions, for example by placing in `~/.emacs': | |
18264 | 235 |
18363 | 236 (autoload 'ogonek-how \"ogonek\") |
237 (autoload 'ogonek-recode-region \"ogonek\") | |
238 (autoload 'ogonek-prefixify-region \"ogonek\") | |
239 (autoload 'ogonek-deprefixify-region \"ogonek\") | |
18264 | 240 |
241 The most frequent function calls can be abbreviated and assigned to | |
242 keyboard keys. Here are a few practical examples: | |
243 | |
18363 | 244 (defun deprefixify-iso8859-2-region (start end) |
245 (interactive \"*r\") | |
246 (ogonek-deprefixify-region start end ?/ \"iso8859-2\")) | |
247 (global-set-key \"\\C-cd\" 'deprefixify-iso8859-2-region) ; ctrl-c d | |
18264 | 248 |
18363 | 249 (defun mazovia-to-iso8859-2 (start end) |
250 (interactive \"*r\") | |
251 (ogonek-recode-region start end \"mazovia\" \"iso8859-2\")) | |
252 (global-set-key \"\\C-cr\" 'mazovia-to-iso8859-2) ; ctrl-c r | |
18264 | 253 |
18363 | 254 (defun prefixify-iso8859-2-region (start end) |
255 (interactive \"*r\") | |
256 (ogonek-prefixify-region start end \"iso8859-2\" ?/)) | |
257 (global-set-key \"\\C-cp\" 'prefixify-iso8859-2-region) ; ctrl-c p | |
18264 | 258 |
18363 | 259 Each recoding operation can be called off using the `undo' command.") |
18264 | 260 |
261 (defun ogonek-how () | |
18363 | 262 "Display `ogonek-information' in an auxiliary *recode-how* buffer." |
18264 | 263 (interactive "*") |
18363 | 264 (set-buffer (get-buffer-create " *ogonek-how*")) |
18264 | 265 (insert ogonek-information) |
18363 | 266 (switch-to-buffer " *ogonek-how*") |
18264 | 267 (beginning-of-buffer)) |
268 | |
18363 | 269 ;; ---- Variables keeping the suggested answers to dialogue questions ----- |
21646
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
270 (defvar ogonek-encoding-choices |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
271 (cons 'choice |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
272 (mapcar (lambda (x) (list 'const (car x))) |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
273 ogonek-name-encoding-alist)) |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
274 "List of ogonek encodings. Used only for customization.") |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
275 (defcustom ogonek-from-encoding "iso8859-2" |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
276 "*Encoding in the source file of recoding." |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
277 :type ogonek-encoding-choices |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
278 :group 'ogonek) |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
279 (defcustom ogonek-to-encoding "ascii" |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
280 "*Encoding in the target file of recoding." |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
281 :type ogonek-encoding-choices |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
282 :group 'ogonek) |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
283 (defcustom ogonek-prefix-char ?/ |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
284 "*Prefix character for prefix encodings." |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
285 :type 'character |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
286 :group 'ogonek) |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
287 (defcustom ogonek-prefix-from-encoding "iso8859-2" |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
288 "*Encoding in the source file subject to prefixifation." |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
289 :type ogonek-encoding-choices |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
290 :group 'ogonek) |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
291 (defcustom ogonek-prefix-to-encoding "iso8859-2" |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
292 "*Encoding in the target file subject to deprefixifation." |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
293 :type ogonek-encoding-choices |
ec243644b3fb
Customize. Add `;;; Code' line.
Stephen Eglen <stephen@gnu.org>
parents:
18363
diff
changeset
|
294 :group 'ogonek) |
18264 | 295 |
18363 | 296 ;; ---- Auxiliary functions for reading parameters in interactive mode ---- |
18264 | 297 |
298 (defun ogonek-read-encoding (prompt default-name-var) | |
18363 | 299 "Read encoding name with completion based on `ogonek-name-encoding-alist'. |
42686 | 300 Store the name in the parameter-variable DEFAULT-NAME-VAR. |
18363 | 301 PROMPT is a string to be shown when the user is asked for a name." |
18264 | 302 (let ((encoding |
303 (completing-read | |
304 (format "%s (default %s): " prompt (eval default-name-var)) | |
305 ogonek-name-encoding-alist nil t))) | |
18363 | 306 ;; change the default name to the one just read |
307 (set default-name-var | |
308 (if (string= encoding "") (eval default-name-var) encoding)) | |
309 ;; return the new default as the name you read | |
18264 | 310 (eval default-name-var))) |
311 | |
312 (defun ogonek-read-prefix (prompt default-prefix-var) | |
18363 | 313 "Read a prefix character for prefix notation. |
314 The result is stored in the variable DEFAULT-PREFIX-VAR. | |
315 PROMPT is a string to be shown when the user is asked for a new prefix." | |
18264 | 316 (let ((prefix-string |
317 (read-string | |
318 (format "%s (default %s): " prompt | |
319 (char-to-string (eval default-prefix-var)))))) | |
320 (if (> (length prefix-string) 1) | |
38436
b174db545cfd
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
21701
diff
changeset
|
321 (error "! Only one character expected") |
18363 | 322 ;; set the default prefix character to the one just read |
18264 | 323 (set default-prefix-var |
324 (if (string= prefix-string "") | |
325 (eval default-prefix-var) | |
326 (string-to-char prefix-string))) | |
18363 | 327 ;; the new default prefix is the function's result: |
18264 | 328 (eval default-prefix-var)))) |
329 | |
330 (defun ogonek-lookup-encoding (encoding) | |
18363 | 331 "Pick up an association for ENCODING in `ogonek-name-encoding-alist'. |
332 Before returning a result test whether the string ENCODING is in | |
333 the list `ogonek-name-encoding-alist'" | |
18264 | 334 (let ((code-list (assoc encoding ogonek-name-encoding-alist))) |
335 (if (null code-list) | |
38436
b174db545cfd
Some fixes to follow coding conventions.
Pavel Janík <Pavel@Janik.cz>
parents:
21701
diff
changeset
|
336 (error "! Name `%s' not known in `ogonek-name-encoding-alist'" |
18264 | 337 encoding) |
338 (cdr code-list)))) | |
339 | |
18363 | 340 ;; ---- An auxiliary function for zipping two lists of equal length ---- |
18264 | 341 |
342 (defun ogonek-zip-lists (xs ys) | |
18363 | 343 "Build a list of pairs from lists XS and YS of the same length." |
18264 | 344 (let ((pairs nil)) |
345 (while xs | |
346 (setq pairs (cons (cons (car xs) (car ys)) pairs)) | |
347 (setq xs (cdr xs)) | |
348 (setq ys (cdr ys))) | |
18363 | 349 ;; `pairs' are the function's result |
18264 | 350 pairs)) |
351 | |
18363 | 352 ;; ---- An auxiliary function building a one-to-one recoding table ----- |
18264 | 353 |
354 (defun ogonek-build-table (recoding-pairs) | |
18363 | 355 "Build a table required by Emacs's `translate-region' function. |
356 RECODING-PAIRS is a list of character pairs for which recoding | |
357 is not an identity. | |
18264 | 358 By using the built-in `translate-region' function |
359 we gain better performance compared to converting characters | |
360 by a hand-written routine as it is done for prefix encodings." | |
361 (let ((table (make-string 256 0)) | |
362 (i 0)) | |
363 (while (< i 256) | |
364 (aset table i i) | |
365 (setq i (1+ i))) | |
18363 | 366 ;; make changes in `table' according to `recoding-pairs' |
18264 | 367 (while recoding-pairs |
368 (aset table (car (car recoding-pairs)) (cdr (car recoding-pairs))) | |
369 (setq recoding-pairs (cdr recoding-pairs))) | |
18363 | 370 ;; return the table just built |
18264 | 371 table)) |
372 | |
18363 | 373 ;; ---- Commands for one-to-one recoding ------------------------------- |
374 | |
18264 | 375 (defun ogonek-recode-region (start end from-encoding to-encoding) |
18363 | 376 "Recode text in a marked region in one-to-one manner. |
377 When called interactively ask the user for the names of the FROM- | |
378 and TO- encodings." | |
18264 | 379 (interactive (progn (barf-if-buffer-read-only) |
380 (list | |
381 (region-beginning) | |
382 (region-end) | |
383 (ogonek-read-encoding "From code" 'ogonek-from-encoding) | |
384 (ogonek-read-encoding "To code" 'ogonek-to-encoding)))) | |
385 (save-excursion | |
386 (translate-region | |
387 start end | |
388 (ogonek-build-table | |
389 (ogonek-zip-lists | |
390 (ogonek-lookup-encoding from-encoding) | |
391 (ogonek-lookup-encoding to-encoding)))))) | |
392 | |
393 (defun ogonek-recode-buffer (from-encoding to-encoding) | |
18363 | 394 "Call `ogonek-recode-region' on the entire buffer. |
395 When called interactively ask the user for the names of the FROM- | |
396 and TO- encodings." | |
18264 | 397 (interactive (progn (barf-if-buffer-read-only) |
398 (list | |
399 (ogonek-read-encoding "From code" 'ogonek-from-encoding) | |
400 (ogonek-read-encoding "To code" 'ogonek-to-encoding)))) | |
401 (ogonek-recode-region | |
402 (point-min) (point-max) from-encoding to-encoding)) | |
403 | |
18363 | 404 ;; ---- Recoding with prefix notation ------------------------------- |
18264 | 405 |
18363 | 406 (defconst ogonek-prefix-code '(?A ?C ?E ?L ?N ?O ?S ?X ?Z |
407 ?a ?c ?e ?l ?n ?o ?s ?x ?z)) | |
18264 | 408 |
409 (defun ogonek-prefixify-region (start end from-encoding prefix-char) | |
18363 | 410 "In a region, replace FROM-encoded Polish characters with PREFIX pairs. |
411 A PREFIX pair generated consists of PREFIX-CHAR and the respective | |
412 character listed in the `ogonek-prefix-code' constant. | |
413 PREFIX-CHAR itself gets doubled." | |
18264 | 414 (interactive (progn (barf-if-buffer-read-only) |
415 (list | |
416 (region-beginning) | |
417 (region-end) | |
418 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding) | |
419 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char)))) | |
420 (let* | |
421 ((from-code (ogonek-lookup-encoding from-encoding)) | |
18363 | 422 (to-code ogonek-prefix-code) |
423 (recoding-pairs ; `ogonek-prefix-char' added for doubling | |
18264 | 424 (ogonek-zip-lists |
425 (cons prefix-char from-code) | |
426 (cons prefix-char to-code)))) | |
427 (save-excursion | |
428 (goto-char start) | |
429 (while (< (point) end) | |
430 (let ((pair (assoc (following-char) recoding-pairs))) | |
431 (if (null pair) | |
18363 | 432 ;; not a Polish character -- skip it |
18264 | 433 (forward-char 1) |
18363 | 434 ;; Polish character -- insert a prefix pair instead |
18264 | 435 (delete-char 1) |
436 (insert ogonek-prefix-char) | |
437 (insert (cdr pair)) | |
18363 | 438 ;; the region is now one character longer |
18264 | 439 (setq end (1+ end)))))))) |
440 | |
441 (defun ogonek-prefixify-buffer (from-encoding prefix-char) | |
442 "Call `ogonek-prefixify-region' on the entire buffer." | |
443 (interactive (progn (barf-if-buffer-read-only) | |
444 (list | |
445 (ogonek-read-encoding "From code" 'ogonek-prefix-from-encoding) | |
446 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char)))) | |
447 (ogonek-prefixify-region | |
448 (point-min) (point-max) from-encoding prefix-char)) | |
449 | |
450 (defun ogonek-deprefixify-region (start end prefix-char to-encoding) | |
18363 | 451 "In a region, replace PREFIX pairs with their corresponding TO-encodings. |
452 PREFIX-CHAR followed by a Polish character from the `ogonek-prefix-code' | |
453 list is replaced with the corresponding TO-encoded character. A doubled | |
454 PREFIX-CHAR gets replaced with a single one. A combination of PREFIX-CHAR | |
455 followed by a non-Polish character, that is one not listed in the | |
456 `ogonek-prefix-code' constant, is left unchanged." | |
18264 | 457 (interactive (progn (barf-if-buffer-read-only) |
458 (list (region-beginning) | |
459 (region-end) | |
460 (ogonek-read-prefix | |
461 "Prefix character" 'ogonek-prefix-char) | |
462 (ogonek-read-encoding | |
463 "To code" 'ogonek-prefix-to-encoding)))) | |
464 (let* | |
18363 | 465 ((from-code ogonek-prefix-code) |
18264 | 466 (to-code (ogonek-lookup-encoding to-encoding)) |
467 (recoding-pairs | |
468 (ogonek-zip-lists | |
469 (cons prefix-char from-code) | |
470 (cons prefix-char to-code)))) | |
471 (save-excursion | |
472 (goto-char start) | |
473 (while (< (point) end) | |
474 (forward-char 1) | |
475 (if (or (not (= (preceding-char) prefix-char)) (= (point) end)) | |
18363 | 476 ;; non-prefix character or the end-of-region -- do nothing |
18264 | 477 () |
18363 | 478 ;; now, we can check the next character |
18264 | 479 (let ((pair (assoc (following-char) recoding-pairs))) |
480 (if (null pair) | |
18363 | 481 ;; `following-char' is not a Polish character nor it is |
482 ;; `prefix-char' since the one is among `recoding-pairs' | |
18264 | 483 (forward-char 1) |
18363 | 484 ;; else prefix followed by a Polish character has been found |
485 ;; replace it by the corresponding Polish character | |
18264 | 486 (backward-char 1) |
487 (delete-char 2) | |
488 (insert (cdr pair)) | |
18363 | 489 ;; the region got shorter by one character |
18264 | 490 (setq end (1- end))))))))) |
491 | |
492 (defun ogonek-deprefixify-buffer (prefix-char to-encoding) | |
493 "Call `ogonek-deprefixify-region' on the entire buffer." | |
494 (interactive (progn (barf-if-buffer-read-only) | |
495 (list | |
496 (ogonek-read-prefix "Prefix character" 'ogonek-prefix-char) | |
497 (ogonek-read-encoding "To code" 'ogonek-prefix-to-encoding)))) | |
498 (ogonek-deprefixify-region | |
499 (point-min) (point-max) prefix-char to-encoding)) | |
500 | |
501 (provide 'ogonek) | |
502 | |
503 ;;; ogonek.el ends here |