comparison lisp/international/codepage.el @ 23915:7ce49fb0dfbc

Initial revision
author Eli Zaretskii <eliz@gnu.org>
date Thu, 17 Dec 1998 17:01:11 +0000
parents
children efcf2fcda617
comparison
equal deleted inserted replaced
23914:17d161505a18 23915:7ce49fb0dfbc
1 ;;; codepage.el --- MS-DOS specific coding systems.
2
3 ;; Copyright (C) 1998 Free Software Foundation, Inc.
4
5 ;; Author: Eli Zaretskii
6 ;; Maintainer: FSF
7 ;; Keywords: i18n ms-dos codepage
8
9 ;; This file is part of GNU Emacs.
10
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
12 ;; it under the terms of the GNU General Public License as published by
13 ;; the Free Software Foundation; either version 2, or (at your option)
14 ;; any later version.
15
16 ;; GNU Emacs is distributed in the hope that it will be useful,
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 ;; GNU General Public License for more details.
20
21 ;; You should have received a copy of the GNU General Public License
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 ;; Boston, MA 02111-1307, USA.
25
26 ;;; Commentary:
27
28 ;; Special coding systems for DOS codepage support.
29 ;;
30 ;; These coding systems perform conversion from the DOS codepage encoding
31 ;; to one of the ISO-8859 character sets. Each codepage has its corresponding
32 ;; ISO-8859 charset, chosen so as to be able to convert all (or most) of the
33 ;; characters. The idea is that Emacs internally works with the usual MULE
34 ;; charsets, and the conversion to and from the DOS codepage is performed
35 ;; on I/O only.
36 ;; See term/internal.el for the complementary setup of the DOS terminal
37 ;; display and input methods.
38 ;;
39 ;; Thanks to Ken'ichi Handa <handa@etl.go.jp> for writing the CCL
40 ;; encoders/decoders, and for help in debugging this code.
41
42 ;;; Code:
43
44 (defun cp-coding-system-for-codepage-1 (coding mnemonic iso-name
45 decoder encoder)
46 "Make coding system CODING for a DOS codepage using translation tables.
47 MNEMONIC is a character to be displayed on mode line for the coding system.
48 ISO-NAME is the name of the ISO-8859 charset which corresponds to this
49 codepage.
50 DECODER is a translation table for converting characters in the DOS codepage
51 encoding to Emacs multibyte characters.
52 ENCODER is a translation table for encoding Emacs multibyte characters into
53 external DOS codepage codes.
54
55 Note that the coding systems created by this function don't support
56 automatic detection of the EOL format. Use explicit -dos or -unix variants
57 as appropriate (Mac EOL style is not supported, as it doesn't make sense for
58 these coding systems).
59
60 If the coding system's name ends with \"-dos\", this function automatically
61 creates a coding system which converts from and to DOS EOL format; otherwise
62 the created coding system assumes Unix-style EOL (i.e., it doesn't perform
63 any EOL conversions)."
64 (save-match-data
65 (let* ((coding-name (symbol-name coding))
66 (eol-type (string-match "-\\(dos\\|unix\\)\\'" coding-name))
67 (dos-p
68 (and eol-type
69 (string= "-dos" (substring coding-name eol-type))))
70 (coding-sans-eol
71 (if eol-type (substring coding-name 0 eol-type) coding-name))
72 (ccl-decoder
73 (if dos-p
74 (ccl-compile
75 `(4 (loop (read r1)
76 (if (r1 != ?\r)
77 (if (r1 >= 128)
78 ((r0 = ,(charset-id 'ascii))
79 (translate-character ,decoder r0 r1)
80 (if (r0 == ,(charset-id 'ascii))
81 (write r1)
82 (write-multibyte-character r0 r1)))
83 (write r1)))
84 (repeat))))
85 (ccl-compile
86 `(4 (loop (read r1)
87 (if (r1 >= 128)
88 ((r0 = ,(charset-id 'ascii))
89 (translate-character ,decoder r0 r1)
90 (if (r0 == ,(charset-id 'ascii))
91 (write r1)
92 (write-multibyte-character r0 r1)))
93 (write r1))
94 (repeat))))))
95 (ccl-encoder
96 (if dos-p
97 (ccl-compile
98 `(1 (loop (read-multibyte-character r0 r1)
99 (if (r1 == ?\n)
100 (write ?\r)
101 (if (r0 != ,(charset-id 'ascii))
102 ((translate-character ,encoder r0 r1)
103 (if (r0 == ,(charset-id 'japanese-jisx0208))
104 ((r1 = ??)
105 (write r1))))))
106 (write-repeat r1))))
107 (ccl-compile
108 `(1 (loop (read-multibyte-character r0 r1)
109 (if (r0 != ,(charset-id 'ascii))
110 ((translate-character ,encoder r0 r1)
111 (if (r0 == ,(charset-id 'japanese-jisx0208))
112 ((r1 = ??)
113 (write r1)))))
114 (write-repeat r1)))))))
115 (if (memq coding coding-system-list)
116 (setq coding-system-list (delq coding coding-system-list)))
117 (make-coding-system
118 coding 4 mnemonic
119 (concat "8-bit encoding of " (symbol-name iso-name)
120 " characters using IBM codepage " (substring coding-sans-eol 2))
121 (cons ccl-decoder ccl-encoder)
122 `((safe-charsets ascii ,iso-name)))
123 (put coding 'eol-type (if dos-p 1 0)))))
124
125 (defun cp-decoding-vector-for-codepage (table charset offset)
126 "Create a vector for decoding IBM PC characters using conversion table
127 TABLE into an ISO-8859 character set CHARSET whose first non-ASCII
128 character' is generated by (make-char CHARSET OFFSET)."
129 (let* ((len (length table))
130 (undefined-char
131 (if (eq system-type 'ms-dos)
132 (if dos-unsupported-char-glyph
133 (logand dos-unsupported-char-glyph 255)
134 127)
135 32))
136 (vec1 (make-vector 256 undefined-char))
137 (i 0))
138 (while (< i offset)
139 (aset vec1 i i)
140 (setq i (1+ i)))
141 (setq i 0)
142 (while (< i len)
143 (if (aref table i)
144 (aset vec1 (aref table i) (make-char charset (+ i offset))))
145 (setq i (1+ i)))
146 vec1))
147
148 ;;; You don't think I created all these tables below by hand, do you?
149 ;;; The following Awk script will create the table for cp850-to-Latin-1
150 ;;; conversion from the RFC 1345 file (the other tables are left as an
151 ;;; excercise):
152 ;;; BEGIN { n_pages = 11;
153 ;;; pn["IBM437"] = 0; pn["IBM850"] = 1; pn["IBM851"] = 2;
154 ;;; pn["IBM852"] = 3; pn["IBM855"] = 4; pn["IBM860"] = 5;
155 ;;; pn["IBM861"] = 6; pn["IBM862"] = 7; pn["IBM863"] = 8;
156 ;;; pn["IBM864"] = 9; pn["IBM865"] = 10;
157 ;;; }
158 ;;; $1 == "&charset" { charset = $2; }
159 ;;; $1 == "&code" { code = $2; }
160 ;;; /^ [^&]/ {
161 ;;; if ((charset ~ /^IBM(437|8(5[0125]|6[0-5]))$/) || (charset ~ /^ISO_8859-1/))
162 ;;; {
163 ;;; for (i = 1; i <= NF; i++)
164 ;;; chars[charset,code++] = $i;
165 ;;; }
166 ;;; }
167 ;;;
168 ;;; END {
169 ;;; for (i = 160; i < 256; i++)
170 ;;; {
171 ;;; c = chars["ISO_8859-1:1987",i];
172 ;;; if (c == "??") # skip unused positions
173 ;;; {
174 ;;; printf " nil";
175 ;;; if ((i - 159)%16 == 0)
176 ;;; printf "\n";
177 ;;; continue;
178 ;;; }
179 ;;; found = 0;
180 ;;; for (j in pn)
181 ;;; map[j] = "nil";
182 ;;; for (combined in chars)
183 ;;; {
184 ;;; candidate = chars[combined];
185 ;;; split (combined, separate, SUBSEP);
186 ;;; if (separate[1] == "IBM850" && candidate == c)
187 ;;; {
188 ;;; found = 1;
189 ;;; map[separate[1]] = separate[2];
190 ;;; }
191 ;;; }
192 ;;; printf " %s", map["IBM850"];
193 ;;; if ((i - 159)%16 == 0)
194 ;;; printf "\n";
195 ;;; }
196 ;;; }
197
198 ;;; WARNING WARNING WARNING!!!
199 ;;;
200 ;;; If you want to get fancy with these tables, remember that the inverse
201 ;;; tables, created by `cp-decoding-vector-for-codepage' above, are installed
202 ;;; on MS-DOS as nonascii-translation-table (see `dos-codepage-setup' on
203 ;;; internal.el). Therefore, you should NOT put any codes below 128 in
204 ;;; these tables! Otherwise, various Emacs commands and functions will
205 ;;; mysteriously fail! For example, a typical screwup is to map the Latin-N
206 ;;; acute accent character to the apostrophe, and have all regexps which
207 ;;; end with "\\'" begin to fail (e.g., the automatic setting of the major
208 ;;; mode by file name extension will stop working).
209 ;;;
210 ;;; You HAVE BEEN warned!
211
212 ;; US/English/PC-8/IBM-2. This doesn't support Latin-1 characters very
213 ;; well, but why not use what we can salvage?
214 (defvar cp437-decode-table
215 ;; Nth element is the code of a cp437 glyph for the multibyte
216 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
217 ;; The element nil means there's no corresponding cp850 glyph.
218 [
219 255 173 155 156 nil 157 179 nil nil nil 166 174 170 196 nil nil
220 248 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
221 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
222 nil 165 nil nil nil nil 153 nil nil nil nil nil 154 nil nil 225
223 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
224 nil 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 152]
225 "Table for converting ISO-8859-1 characters into codepage 437 glyphs.")
226 (setplist 'cp437-decode-table
227 '(charset latin-iso8859-1 language "Latin-1" offset 160))
228
229 ;; Multilingual (Latin-1)
230 (defvar cp850-decode-table
231 ;; Nth element is the code of a cp850 glyph for the multibyte
232 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
233 ;; The element nil means there's no corresponding cp850 glyph.
234 [
235 255 173 189 156 207 190 221 245 249 184 166 174 170 240 169 nil
236 248 241 253 252 239 nil 244 nil nil 251 167 175 172 171 243 168
237 183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216
238 209 165 227 224 226 229 153 158 157 235 233 234 154 237 231 225
239 133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139
240 208 164 149 162 147 228 148 246 155 151 163 150 129 236 232 152]
241 "Table for converting ISO-8859-1 characters into codepage 850 glyphs.")
242 (setplist 'cp850-decode-table
243 '(charset latin-iso8859-1 language "Latin-1" offset 160))
244
245 ;; Greek
246 (defvar cp851-decode-table
247 [
248 255 nil nil 156 nil nil nil 245 249 nil nil 174 nil 240 nil nil
249 248 241 nil nil 239 nil 134 nil 141 143 144 175 146 171 149 152
250 161 164 165 166 167 168 169 170 172 173 181 182 184 183 189 190
251 198 199 nil 207 208 209 210 211 212 213 nil nil 155 157 158 159
252 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
253 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
254 "Table for converting ISO-8859-7 characters into codepage 851 glyphs.")
255 (setplist 'cp851-decode-table
256 '(charset greek-iso8859-7 language "Greek" offset 160))
257
258 ;; Slavic/Eastern Europe (Latin-2)
259 (defvar cp852-decode-table
260 [
261 255 164 244 157 207 149 151 245 249 230 184 155 141 240 166 189
262 248 165 247 136 239 150 152 243 242 231 173 156 171 241 167 190
263 232 181 182 198 142 145 143 128 172 144 168 211 183 214 nil 210
264 209 227 213 224 226 138 153 158 252 222 233 235 154 237 221 225
265 234 160 131 199 132 146 134 135 159 130 169 137 216 161 140 212
266 208 228 229 162 147 139 148 246 253 133 163 251 129 236 238 250]
267 "Table for converting ISO-8859-2 characters into codepage 852 glyphs.")
268 (setplist 'cp852-decode-table
269 '(charset latin-iso8859-2 language "Latin-2" offset 160))
270
271 ;; Russian
272 (defvar cp855-decode-table
273 [
274 255 133 129 131 135 137 139 141 143 145 147 nil 151 240 153 155
275 161 163 236 173 167 169 234 244 184 190 199 209 211 213 215 221
276 226 228 230 232 171 182 165 252 246 250 159 242 238 248 157 224
277 160 162 235 172 166 168 233 243 183 189 198 208 210 212 214 216
278 225 227 229 231 170 181 164 251 245 249 158 241 237 247 156 222
279 nil 132 128 130 134 136 138 140 142 144 146 148 150 nil 152 154]
280 "Table for converting ISO-8859-5 characters into codepage 855 glyphs.")
281 (setplist 'cp855-decode-table
282 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
283
284 ;; Turkish
285 (defvar cp857-decode-table
286 [
287 255 nil nil 156 207 nil 245 249 152 158 166 nil 240 nil
288 248 nil 253 252 239 nil nil nil nil 141 159 167 nil 171 nil
289 183 181 182 142 nil nil 128 212 144 210 211 222 214 215 216
290 165 227 224 226 nil 153 232 nil 235 233 234 154 nil nil 225
291 133 160 131 132 nil nil 135 138 130 136 137 236 161 140 139
292 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 250]
293 "Table for converting ISO-8859-3 characters into codepage 857 glyphs.")
294 (setplist 'cp857-decode-table
295 '(charset latin-iso8859-3 language "Latin-3" offset 160))
296
297 ;; Portuguese
298 (defvar cp860-decode-table
299 [
300 255 173 155 156 nil nil 179 nil nil nil 166 174 170 nil nil nil
301 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
302 145 134 143 142 nil nil nil 128 146 144 137 nil 152 nil 139 nil
303 nil 165 159 169 140 153 nil nil nil 157 150 nil 154 nil nil nil
304 133 160 131 132 nil nil nil 135 138 130 136 nil 141 161 nil nil
305 nil 164 149 162 147 148 nil 246 nil 151 163 nil 129 nil nil nil]
306 "Table for converting ISO-8859-1 characters into codepage 860 glyphs.")
307 (setplist 'cp860-decode-table
308 '(charset latin-iso8859-1 language "Latin-1" offset 160))
309
310 ;; Icelandic
311 (defvar cp861-decode-table
312 [
313 255 173 nil 156 nil nil nil nil nil nil nil 174 170 nil nil nil
314 nil 241 253 nil nil nil nil 249 nil nil nil 175 172 171 nil 168
315 nil 164 nil nil 142 143 146 128 nil 144 nil nil nil 165 nil nil
316 139 nil 159 166 nil nil 153 nil 157 nil 167 nil 154 151 141 nil
317 133 160 131 nil 132 134 145 135 138 130 136 137 nil 161 nil nil
318 140 nil nil 162 147 nil 148 246 155 nil 163 150 129 152 149 nil]
319 "Table for converting ISO-8859-1 characters into codepage 861 glyphs.")
320 (setplist 'cp861-decode-table
321 '(charset latin-iso8859-1 language "Latin-1" offset 160))
322
323 ;; Hebrew
324 (defvar cp862-decode-table
325 ;; Nth element is the code of a cp862 glyph for the multibyte
326 ;; character created by (make-char 'hebrew-iso8859-8 (+ N 160)).
327 ;; The element nil means there's no corresponding cp850 glyph.
328 [
329 255 173 155 156 nil 157 179 nil nil nil nil 174 170 196 nil nil
330 248 241 253 nil nil 230 nil 249 nil nil 246 175 172 171 nil nil
331 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
332 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil 205
333 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
334 144 145 146 147 148 149 150 151 152 153 154 nil nil nil nil nil]
335 "Table for converting ISO-8859-8 characters into codepage 862 glyphs.")
336 (setplist 'cp862-decode-table
337 '(charset hebrew-iso8859-8 language "Hebrew" offset 160))
338
339 ;; French Canadian
340 (defvar cp863-decode-table
341 [
342 255 nil 155 156 152 nil 160 143 164 nil nil 174 170 nil nil 167
343 nil 241 253 166 161 nil 134 249 165 nil nil 175 172 171 173 nil
344 142 nil 132 nil nil nil nil 128 145 144 146 148 nil nil 168 149
345 nil nil nil nil 153 nil nil nil nil 157 nil 158 154 nil nil nil
346 133 nil 131 nil nil nil nil 135 138 130 136 137 141 nil 140 139
347 nil nil nil 162 147 nil nil 246 nil 151 163 150 129 nil nil nil]
348 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.")
349 (setplist 'cp863-decode-table
350 '(charset latin-iso8859-1 language "Latin-1" offset 160))
351
352 ;; Arabic
353 ;; FIXME: Emacs doesn't seem to support the "Arabic" language
354 ;; environment yet. So this is only partially usable, for now
355 (defvar cp864-decode-table
356 [
357 255 nil nil nil 164 nil nil nil nil nil nil nil 172 161 nil nil
358 nil nil nil nil nil nil nil nil nil nil nil 187 nil nil nil 191
359 nil 193 194 195 196 nil 198 199 169 201 170 171 173 174 175 207
360 208 209 210 188 189 190 235 215 216 223 238 nil nil nil nil nil
361 224 247 248 252 251 239 242 243 232 233 253 nil nil nil nil nil
362 nil 241 nil nil nil nil nil nil nil nil nil nil nil nil nil nil]
363 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.")
364 (setplist 'cp864-decode-table
365 '(charset arabic-iso8859-6 language nil offset 160))
366
367 ;; Nordic (Norwegian/Danish)
368 (defvar cp865-decode-table
369 [
370 255 173 nil 156 nil nil nil nil nil nil 166 174 170 nil nil nil
371 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
372 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
373 nil 165 nil nil nil nil 153 nil 157 nil nil nil 154 nil nil nil
374 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
375 nil 164 149 162 147 nil 148 246 155 151 163 150 129 nil nil 152]
376 "Table for converting ISO-8859-1 characters into codepage 865 glyphs.")
377 (setplist 'cp865-decode-table
378 '(charset latin-iso8859-1 language "Latin-1" offset 160))
379
380 ;; Greek (yes, another one!)
381 (defvar cp869-decode-table
382 [
383 255 139 140 156 nil nil 138 245 249 151 nil 174 137 240 nil 142
384 248 241 153 154 239 247 134 136 141 143 144 175 146 171 149 152
385 161 164 165 166 167 168 169 170 172 173 181 182 183 184 189 190
386 198 199 nil 207 208 209 210 211 212 213 145 150 155 157 158 159
387 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
388 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
389 "Table for converting ISO-8859-7 characters into codepage 869 glyphs.")
390 (setplist 'cp869-decode-table
391 '(charset greek-iso8859-7 language "Greek" offset 160))
392
393 ;; Conversion from codepage 775 to Latin-4 for Baltic countries.
394 (defvar cp775-decode-table
395 [
396 255 181 nil 138 150 nil 234 245 166 190 237 149 173 240 207 nil
397 248 208 nil 139 239 nil 235 nil nil 213 137 133 nil nil 216 nil
398 160 nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil 161
399 nil 238 226 232 nil 229 153 158 157 198 nil nil 154 nil 199 225
400 131 nil nil nil 132 134 145 212 209 130 210 nil 211 nil nil 140
401 nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil]
402 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.")
403 (setplist 'cp775-decode-table
404 '(charset latin-iso8859-4 language "Latin-4" offset 160))
405
406 ;;;###autoload
407 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset)
408 "Create 2 coding systems to convert IBM CODEPAGE into charset ISO-NAME
409 whose first character is at offset OFFSET from the beginning of 8-bit
410 ASCII table.
411
412 The two coding systems are identical except for the EOL conversion: one
413 of them decodes DOS-style EOLs, the other assumes Unix style and doesn't
414 perform any EOL conversions."
415 (let* ((decode-table (intern (format "%s-decode-table" codepage)))
416 (nonascii-table
417 (intern (format "%s-nonascii-translation-table" codepage)))
418 (decode-translation
419 (intern (format "%s-decode-translation-table" codepage)))
420 (encode-translation
421 (intern (format "%s-encode-translation-table" codepage)))
422 (codepage-dos
423 (intern (format "%s-dos" codepage)))
424 (codepage-unix
425 (intern (format "%s-unix" codepage))))
426 (set nonascii-table
427 (make-translation-table-from-vector
428 (cp-decoding-vector-for-codepage
429 (symbol-value decode-table) iso-name offset)))
430 (define-translation-table encode-translation
431 (char-table-extra-slot (symbol-value nonascii-table) 0))
432 ;; For charsets other than ascii and ISO-NAME, set `?' for
433 ;; one-column charsets, and some Japanese character for
434 ;; wide-column charsets. CCL encoder convert that Japanese
435 ;; character to "??".
436 (let ((tbl (char-table-extra-slot (symbol-value nonascii-table) 0))
437 (charsets (delq 'ascii (delq iso-name
438 (copy-sequence charset-list))))
439 (wide-column-char (make-char 'japanese-jisx0208 32 32)))
440 (while charsets
441 (aset tbl (make-char (car charsets))
442 (if (= (charset-width (car charsets)) 1) ?? wide-column-char))
443 (setq charsets (cdr charsets))))
444 (define-translation-table decode-translation
445 (symbol-value nonascii-table))
446 (cp-coding-system-for-codepage-1
447 codepage-dos ?D iso-name decode-translation encode-translation)
448 (cp-coding-system-for-codepage-1
449 codepage-unix ?D iso-name decode-translation encode-translation)))
450
451 (defun cp-codepage-decoder (codepage)
452 "If CODEPAGE is the name of a supported codepage, return its decode table;
453 otherwise return nil."
454 (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage)))
455 (cond
456 ((stringp cp)
457 (intern-soft (format "%s-decode-table" cp)))
458 (t nil))))
459
460 ;;;###autoload
461 (defun cp-charset-for-codepage (codepage)
462 "Return the charset for which there is a translation table to DOS CODEPAGE.
463 CODEPAGE must be the name of a DOS codepage, a string."
464 (let ((cp-decoder (cp-codepage-decoder codepage)))
465 (if (null cp-decoder)
466 (error "Unsupported codepage %s" codepage)
467 (get cp-decoder 'charset))))
468
469 ;;;###autoload
470 (defun cp-language-for-codepage (codepage)
471 "Return the name of the MULE language environment for CODEPAGE.
472 CODEPAGE must be the name of a DOS codepage, a string."
473 (let ((cp-decoder (cp-codepage-decoder codepage)))
474 (if (null cp-decoder)
475 (error "Unsupported codepage %s" codepage)
476 (get cp-decoder 'language))))
477
478 ;;;###autoload
479 (defun cp-offset-for-codepage (codepage)
480 "Return the offset to be used in setting up coding systems for CODEPAGE.
481 CODEPAGE must be the name of a DOS codepage, a string."
482 (let ((cp-decoder (cp-codepage-decoder codepage)))
483 (if (null cp-decoder)
484 (error "Unsupported codepage %s" codepage)
485 (get cp-decoder 'offset))))
486
487 ;;;###autoload
488 (defun cp-supported-codepages ()
489 "Return an alist of supported codepages.
490
491 Each association in the alist has the form (NNN . CHARSET), where NNN is the
492 codepage number, and CHARSET is the MULE charset which is the closest match
493 for the character set supported by that codepage.
494
495 A codepage NNN is supported if a variable called `cpNNN-decode-table' exists,
496 is a vector, and has a charset property."
497 (save-match-data
498 (let (alist chset sname)
499 (mapatoms
500 (function
501 (lambda (sym)
502 (if (and (boundp sym)
503 (string-match "\\`cp\\([1-9][0-9][0-9]\\)-decode-table\\'"
504 (setq sname (symbol-name sym)))
505 (vectorp (symbol-value sym))
506 (setq chset (get sym 'charset)))
507 (setq alist
508 (cons (cons (match-string 1 sname) chset) alist))))))
509 alist)))
510
511 ;;;###autoload
512 (defun codepage-setup (codepage)
513 "Create 2 coding systems for codepage CODEPAGE: cpCODEPAGE-dos and
514 cpCODEPAGE-unix.
515
516 These coding systems are meant for encoding and decoding 8-bit non-ASCII
517 characters used by the IBM codepages, typically in conjunction with files
518 read/written by MS-DOS software, or for display on MS-DOS terminal."
519 (interactive
520 (let ((completion-ignore-case t)
521 (candidates (cp-supported-codepages)))
522 (list (completing-read "Setup DOS Codepage: (default 437) " candidates
523 nil t nil nil "437"))))
524 (let ((cp (format "cp%s" codepage)))
525 (cp-make-coding-systems-for-codepage
526 cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp))))
527
528 (provide 'codepage)
529
530 ;; codepage.el ends here