23915
|
1 ;;; codepage.el --- MS-DOS specific coding systems.
|
|
2
|
|
3 ;; Copyright (C) 1998 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: Eli Zaretskii
|
|
6 ;; Maintainer: FSF
|
|
7 ;; Keywords: i18n ms-dos codepage
|
|
8
|
|
9 ;; This file is part of GNU Emacs.
|
|
10
|
|
11 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
12 ;; it under the terms of the GNU General Public License as published by
|
|
13 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
14 ;; any later version.
|
|
15
|
|
16 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
19 ;; GNU General Public License for more details.
|
|
20
|
|
21 ;; You should have received a copy of the GNU General Public License
|
|
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
24 ;; Boston, MA 02111-1307, USA.
|
|
25
|
|
26 ;;; Commentary:
|
|
27
|
|
28 ;; Special coding systems for DOS codepage support.
|
|
29 ;;
|
|
30 ;; These coding systems perform conversion from the DOS codepage encoding
|
|
31 ;; to one of the ISO-8859 character sets. Each codepage has its corresponding
|
|
32 ;; ISO-8859 charset, chosen so as to be able to convert all (or most) of the
|
|
33 ;; characters. The idea is that Emacs internally works with the usual MULE
|
|
34 ;; charsets, and the conversion to and from the DOS codepage is performed
|
|
35 ;; on I/O only.
|
|
36 ;; See term/internal.el for the complementary setup of the DOS terminal
|
|
37 ;; display and input methods.
|
|
38 ;;
|
|
39 ;; Thanks to Ken'ichi Handa <handa@etl.go.jp> for writing the CCL
|
|
40 ;; encoders/decoders, and for help in debugging this code.
|
|
41
|
|
42 ;;; Code:
|
|
43
|
|
44 (defun cp-coding-system-for-codepage-1 (coding mnemonic iso-name
|
|
45 decoder encoder)
|
|
46 "Make coding system CODING for a DOS codepage using translation tables.
|
|
47 MNEMONIC is a character to be displayed on mode line for the coding system.
|
|
48 ISO-NAME is the name of the ISO-8859 charset which corresponds to this
|
|
49 codepage.
|
|
50 DECODER is a translation table for converting characters in the DOS codepage
|
|
51 encoding to Emacs multibyte characters.
|
|
52 ENCODER is a translation table for encoding Emacs multibyte characters into
|
|
53 external DOS codepage codes.
|
|
54
|
|
55 Note that the coding systems created by this function don't support
|
|
56 automatic detection of the EOL format. Use explicit -dos or -unix variants
|
|
57 as appropriate (Mac EOL style is not supported, as it doesn't make sense for
|
|
58 these coding systems).
|
|
59
|
|
60 If the coding system's name ends with \"-dos\", this function automatically
|
|
61 creates a coding system which converts from and to DOS EOL format; otherwise
|
|
62 the created coding system assumes Unix-style EOL (i.e., it doesn't perform
|
|
63 any EOL conversions)."
|
|
64 (save-match-data
|
|
65 (let* ((coding-name (symbol-name coding))
|
|
66 (eol-type (string-match "-\\(dos\\|unix\\)\\'" coding-name))
|
|
67 (dos-p
|
|
68 (and eol-type
|
|
69 (string= "-dos" (substring coding-name eol-type))))
|
|
70 (coding-sans-eol
|
|
71 (if eol-type (substring coding-name 0 eol-type) coding-name))
|
|
72 (ccl-decoder
|
|
73 (if dos-p
|
|
74 (ccl-compile
|
|
75 `(4 (loop (read r1)
|
|
76 (if (r1 != ?\r)
|
|
77 (if (r1 >= 128)
|
|
78 ((r0 = ,(charset-id 'ascii))
|
|
79 (translate-character ,decoder r0 r1)
|
|
80 (if (r0 == ,(charset-id 'ascii))
|
|
81 (write r1)
|
|
82 (write-multibyte-character r0 r1)))
|
|
83 (write r1)))
|
|
84 (repeat))))
|
|
85 (ccl-compile
|
|
86 `(4 (loop (read r1)
|
|
87 (if (r1 >= 128)
|
|
88 ((r0 = ,(charset-id 'ascii))
|
|
89 (translate-character ,decoder r0 r1)
|
|
90 (if (r0 == ,(charset-id 'ascii))
|
|
91 (write r1)
|
|
92 (write-multibyte-character r0 r1)))
|
|
93 (write r1))
|
|
94 (repeat))))))
|
|
95 (ccl-encoder
|
|
96 (if dos-p
|
|
97 (ccl-compile
|
|
98 `(1 (loop (read-multibyte-character r0 r1)
|
|
99 (if (r1 == ?\n)
|
|
100 (write ?\r)
|
|
101 (if (r0 != ,(charset-id 'ascii))
|
|
102 ((translate-character ,encoder r0 r1)
|
|
103 (if (r0 == ,(charset-id 'japanese-jisx0208))
|
|
104 ((r1 = ??)
|
|
105 (write r1))))))
|
|
106 (write-repeat r1))))
|
|
107 (ccl-compile
|
|
108 `(1 (loop (read-multibyte-character r0 r1)
|
|
109 (if (r0 != ,(charset-id 'ascii))
|
|
110 ((translate-character ,encoder r0 r1)
|
|
111 (if (r0 == ,(charset-id 'japanese-jisx0208))
|
|
112 ((r1 = ??)
|
|
113 (write r1)))))
|
|
114 (write-repeat r1)))))))
|
|
115 (if (memq coding coding-system-list)
|
|
116 (setq coding-system-list (delq coding coding-system-list)))
|
|
117 (make-coding-system
|
|
118 coding 4 mnemonic
|
|
119 (concat "8-bit encoding of " (symbol-name iso-name)
|
|
120 " characters using IBM codepage " (substring coding-sans-eol 2))
|
|
121 (cons ccl-decoder ccl-encoder)
|
|
122 `((safe-charsets ascii ,iso-name)))
|
|
123 (put coding 'eol-type (if dos-p 1 0)))))
|
|
124
|
|
125 (defun cp-decoding-vector-for-codepage (table charset offset)
|
|
126 "Create a vector for decoding IBM PC characters using conversion table
|
|
127 TABLE into an ISO-8859 character set CHARSET whose first non-ASCII
|
|
128 character' is generated by (make-char CHARSET OFFSET)."
|
|
129 (let* ((len (length table))
|
|
130 (undefined-char
|
|
131 (if (eq system-type 'ms-dos)
|
|
132 (if dos-unsupported-char-glyph
|
|
133 (logand dos-unsupported-char-glyph 255)
|
|
134 127)
|
|
135 32))
|
|
136 (vec1 (make-vector 256 undefined-char))
|
|
137 (i 0))
|
|
138 (while (< i offset)
|
|
139 (aset vec1 i i)
|
|
140 (setq i (1+ i)))
|
|
141 (setq i 0)
|
|
142 (while (< i len)
|
|
143 (if (aref table i)
|
|
144 (aset vec1 (aref table i) (make-char charset (+ i offset))))
|
|
145 (setq i (1+ i)))
|
|
146 vec1))
|
|
147
|
|
148 ;;; You don't think I created all these tables below by hand, do you?
|
|
149 ;;; The following Awk script will create the table for cp850-to-Latin-1
|
|
150 ;;; conversion from the RFC 1345 file (the other tables are left as an
|
|
151 ;;; excercise):
|
|
152 ;;; BEGIN { n_pages = 11;
|
|
153 ;;; pn["IBM437"] = 0; pn["IBM850"] = 1; pn["IBM851"] = 2;
|
|
154 ;;; pn["IBM852"] = 3; pn["IBM855"] = 4; pn["IBM860"] = 5;
|
|
155 ;;; pn["IBM861"] = 6; pn["IBM862"] = 7; pn["IBM863"] = 8;
|
|
156 ;;; pn["IBM864"] = 9; pn["IBM865"] = 10;
|
|
157 ;;; }
|
|
158 ;;; $1 == "&charset" { charset = $2; }
|
|
159 ;;; $1 == "&code" { code = $2; }
|
|
160 ;;; /^ [^&]/ {
|
|
161 ;;; if ((charset ~ /^IBM(437|8(5[0125]|6[0-5]))$/) || (charset ~ /^ISO_8859-1/))
|
|
162 ;;; {
|
|
163 ;;; for (i = 1; i <= NF; i++)
|
|
164 ;;; chars[charset,code++] = $i;
|
|
165 ;;; }
|
|
166 ;;; }
|
|
167 ;;;
|
|
168 ;;; END {
|
|
169 ;;; for (i = 160; i < 256; i++)
|
|
170 ;;; {
|
|
171 ;;; c = chars["ISO_8859-1:1987",i];
|
|
172 ;;; if (c == "??") # skip unused positions
|
|
173 ;;; {
|
|
174 ;;; printf " nil";
|
|
175 ;;; if ((i - 159)%16 == 0)
|
|
176 ;;; printf "\n";
|
|
177 ;;; continue;
|
|
178 ;;; }
|
|
179 ;;; found = 0;
|
|
180 ;;; for (j in pn)
|
|
181 ;;; map[j] = "nil";
|
|
182 ;;; for (combined in chars)
|
|
183 ;;; {
|
|
184 ;;; candidate = chars[combined];
|
|
185 ;;; split (combined, separate, SUBSEP);
|
|
186 ;;; if (separate[1] == "IBM850" && candidate == c)
|
|
187 ;;; {
|
|
188 ;;; found = 1;
|
|
189 ;;; map[separate[1]] = separate[2];
|
|
190 ;;; }
|
|
191 ;;; }
|
|
192 ;;; printf " %s", map["IBM850"];
|
|
193 ;;; if ((i - 159)%16 == 0)
|
|
194 ;;; printf "\n";
|
|
195 ;;; }
|
|
196 ;;; }
|
|
197
|
|
198 ;;; WARNING WARNING WARNING!!!
|
|
199 ;;;
|
|
200 ;;; If you want to get fancy with these tables, remember that the inverse
|
|
201 ;;; tables, created by `cp-decoding-vector-for-codepage' above, are installed
|
|
202 ;;; on MS-DOS as nonascii-translation-table (see `dos-codepage-setup' on
|
|
203 ;;; internal.el). Therefore, you should NOT put any codes below 128 in
|
|
204 ;;; these tables! Otherwise, various Emacs commands and functions will
|
|
205 ;;; mysteriously fail! For example, a typical screwup is to map the Latin-N
|
|
206 ;;; acute accent character to the apostrophe, and have all regexps which
|
|
207 ;;; end with "\\'" begin to fail (e.g., the automatic setting of the major
|
|
208 ;;; mode by file name extension will stop working).
|
|
209 ;;;
|
|
210 ;;; You HAVE BEEN warned!
|
|
211
|
|
212 ;; US/English/PC-8/IBM-2. This doesn't support Latin-1 characters very
|
|
213 ;; well, but why not use what we can salvage?
|
|
214 (defvar cp437-decode-table
|
|
215 ;; Nth element is the code of a cp437 glyph for the multibyte
|
|
216 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
|
|
217 ;; The element nil means there's no corresponding cp850 glyph.
|
|
218 [
|
|
219 255 173 155 156 nil 157 179 nil nil nil 166 174 170 196 nil nil
|
|
220 248 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
|
|
221 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
|
|
222 nil 165 nil nil nil nil 153 nil nil nil nil nil 154 nil nil 225
|
|
223 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
|
|
224 nil 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 152]
|
|
225 "Table for converting ISO-8859-1 characters into codepage 437 glyphs.")
|
|
226 (setplist 'cp437-decode-table
|
|
227 '(charset latin-iso8859-1 language "Latin-1" offset 160))
|
|
228
|
|
229 ;; Multilingual (Latin-1)
|
|
230 (defvar cp850-decode-table
|
|
231 ;; Nth element is the code of a cp850 glyph for the multibyte
|
|
232 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)).
|
|
233 ;; The element nil means there's no corresponding cp850 glyph.
|
|
234 [
|
|
235 255 173 189 156 207 190 221 245 249 184 166 174 170 240 169 nil
|
|
236 248 241 253 252 239 nil 244 nil nil 251 167 175 172 171 243 168
|
|
237 183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216
|
|
238 209 165 227 224 226 229 153 158 157 235 233 234 154 237 231 225
|
|
239 133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139
|
|
240 208 164 149 162 147 228 148 246 155 151 163 150 129 236 232 152]
|
|
241 "Table for converting ISO-8859-1 characters into codepage 850 glyphs.")
|
|
242 (setplist 'cp850-decode-table
|
|
243 '(charset latin-iso8859-1 language "Latin-1" offset 160))
|
|
244
|
|
245 ;; Greek
|
|
246 (defvar cp851-decode-table
|
|
247 [
|
|
248 255 nil nil 156 nil nil nil 245 249 nil nil 174 nil 240 nil nil
|
|
249 248 241 nil nil 239 nil 134 nil 141 143 144 175 146 171 149 152
|
|
250 161 164 165 166 167 168 169 170 172 173 181 182 184 183 189 190
|
|
251 198 199 nil 207 208 209 210 211 212 213 nil nil 155 157 158 159
|
|
252 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
|
|
253 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
|
|
254 "Table for converting ISO-8859-7 characters into codepage 851 glyphs.")
|
|
255 (setplist 'cp851-decode-table
|
|
256 '(charset greek-iso8859-7 language "Greek" offset 160))
|
|
257
|
|
258 ;; Slavic/Eastern Europe (Latin-2)
|
|
259 (defvar cp852-decode-table
|
|
260 [
|
|
261 255 164 244 157 207 149 151 245 249 230 184 155 141 240 166 189
|
|
262 248 165 247 136 239 150 152 243 242 231 173 156 171 241 167 190
|
|
263 232 181 182 198 142 145 143 128 172 144 168 211 183 214 nil 210
|
|
264 209 227 213 224 226 138 153 158 252 222 233 235 154 237 221 225
|
|
265 234 160 131 199 132 146 134 135 159 130 169 137 216 161 140 212
|
|
266 208 228 229 162 147 139 148 246 253 133 163 251 129 236 238 250]
|
|
267 "Table for converting ISO-8859-2 characters into codepage 852 glyphs.")
|
|
268 (setplist 'cp852-decode-table
|
|
269 '(charset latin-iso8859-2 language "Latin-2" offset 160))
|
|
270
|
|
271 ;; Russian
|
|
272 (defvar cp855-decode-table
|
|
273 [
|
|
274 255 133 129 131 135 137 139 141 143 145 147 nil 151 240 153 155
|
|
275 161 163 236 173 167 169 234 244 184 190 199 209 211 213 215 221
|
|
276 226 228 230 232 171 182 165 252 246 250 159 242 238 248 157 224
|
|
277 160 162 235 172 166 168 233 243 183 189 198 208 210 212 214 216
|
|
278 225 227 229 231 170 181 164 251 245 249 158 241 237 247 156 222
|
|
279 nil 132 128 130 134 136 138 140 142 144 146 148 150 nil 152 154]
|
|
280 "Table for converting ISO-8859-5 characters into codepage 855 glyphs.")
|
|
281 (setplist 'cp855-decode-table
|
|
282 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160))
|
|
283
|
|
284 ;; Turkish
|
|
285 (defvar cp857-decode-table
|
|
286 [
|
|
287 255 nil nil 156 207 nil 245 249 152 158 166 nil 240 nil
|
|
288 248 nil 253 252 239 nil nil nil nil 141 159 167 nil 171 nil
|
|
289 183 181 182 142 nil nil 128 212 144 210 211 222 214 215 216
|
|
290 165 227 224 226 nil 153 232 nil 235 233 234 154 nil nil 225
|
|
291 133 160 131 132 nil nil 135 138 130 136 137 236 161 140 139
|
|
292 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 250]
|
|
293 "Table for converting ISO-8859-3 characters into codepage 857 glyphs.")
|
|
294 (setplist 'cp857-decode-table
|
|
295 '(charset latin-iso8859-3 language "Latin-3" offset 160))
|
|
296
|
|
297 ;; Portuguese
|
|
298 (defvar cp860-decode-table
|
|
299 [
|
|
300 255 173 155 156 nil nil 179 nil nil nil 166 174 170 nil nil nil
|
|
301 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
|
|
302 145 134 143 142 nil nil nil 128 146 144 137 nil 152 nil 139 nil
|
|
303 nil 165 159 169 140 153 nil nil nil 157 150 nil 154 nil nil nil
|
|
304 133 160 131 132 nil nil nil 135 138 130 136 nil 141 161 nil nil
|
|
305 nil 164 149 162 147 148 nil 246 nil 151 163 nil 129 nil nil nil]
|
|
306 "Table for converting ISO-8859-1 characters into codepage 860 glyphs.")
|
|
307 (setplist 'cp860-decode-table
|
|
308 '(charset latin-iso8859-1 language "Latin-1" offset 160))
|
|
309
|
|
310 ;; Icelandic
|
|
311 (defvar cp861-decode-table
|
|
312 [
|
|
313 255 173 nil 156 nil nil nil nil nil nil nil 174 170 nil nil nil
|
|
314 nil 241 253 nil nil nil nil 249 nil nil nil 175 172 171 nil 168
|
|
315 nil 164 nil nil 142 143 146 128 nil 144 nil nil nil 165 nil nil
|
|
316 139 nil 159 166 nil nil 153 nil 157 nil 167 nil 154 151 141 nil
|
|
317 133 160 131 nil 132 134 145 135 138 130 136 137 nil 161 nil nil
|
|
318 140 nil nil 162 147 nil 148 246 155 nil 163 150 129 152 149 nil]
|
|
319 "Table for converting ISO-8859-1 characters into codepage 861 glyphs.")
|
|
320 (setplist 'cp861-decode-table
|
|
321 '(charset latin-iso8859-1 language "Latin-1" offset 160))
|
|
322
|
|
323 ;; Hebrew
|
|
324 (defvar cp862-decode-table
|
|
325 ;; Nth element is the code of a cp862 glyph for the multibyte
|
|
326 ;; character created by (make-char 'hebrew-iso8859-8 (+ N 160)).
|
|
327 ;; The element nil means there's no corresponding cp850 glyph.
|
|
328 [
|
|
329 255 173 155 156 nil 157 179 nil nil nil nil 174 170 196 nil nil
|
|
330 248 241 253 nil nil 230 nil 249 nil nil 246 175 172 171 nil nil
|
|
331 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil
|
|
332 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil 205
|
|
333 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
|
334 144 145 146 147 148 149 150 151 152 153 154 nil nil nil nil nil]
|
|
335 "Table for converting ISO-8859-8 characters into codepage 862 glyphs.")
|
|
336 (setplist 'cp862-decode-table
|
|
337 '(charset hebrew-iso8859-8 language "Hebrew" offset 160))
|
|
338
|
|
339 ;; French Canadian
|
|
340 (defvar cp863-decode-table
|
|
341 [
|
|
342 255 nil 155 156 152 nil 160 143 164 nil nil 174 170 nil nil 167
|
|
343 nil 241 253 166 161 nil 134 249 165 nil nil 175 172 171 173 nil
|
|
344 142 nil 132 nil nil nil nil 128 145 144 146 148 nil nil 168 149
|
|
345 nil nil nil nil 153 nil nil nil nil 157 nil 158 154 nil nil nil
|
|
346 133 nil 131 nil nil nil nil 135 138 130 136 137 141 nil 140 139
|
|
347 nil nil nil 162 147 nil nil 246 nil 151 163 150 129 nil nil nil]
|
|
348 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.")
|
|
349 (setplist 'cp863-decode-table
|
|
350 '(charset latin-iso8859-1 language "Latin-1" offset 160))
|
|
351
|
|
352 ;; Arabic
|
|
353 ;; FIXME: Emacs doesn't seem to support the "Arabic" language
|
|
354 ;; environment yet. So this is only partially usable, for now
|
|
355 (defvar cp864-decode-table
|
|
356 [
|
|
357 255 nil nil nil 164 nil nil nil nil nil nil nil 172 161 nil nil
|
|
358 nil nil nil nil nil nil nil nil nil nil nil 187 nil nil nil 191
|
|
359 nil 193 194 195 196 nil 198 199 169 201 170 171 173 174 175 207
|
|
360 208 209 210 188 189 190 235 215 216 223 238 nil nil nil nil nil
|
|
361 224 247 248 252 251 239 242 243 232 233 253 nil nil nil nil nil
|
|
362 nil 241 nil nil nil nil nil nil nil nil nil nil nil nil nil nil]
|
|
363 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.")
|
|
364 (setplist 'cp864-decode-table
|
|
365 '(charset arabic-iso8859-6 language nil offset 160))
|
|
366
|
|
367 ;; Nordic (Norwegian/Danish)
|
|
368 (defvar cp865-decode-table
|
|
369 [
|
|
370 255 173 nil 156 nil nil nil nil nil nil 166 174 170 nil nil nil
|
|
371 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168
|
|
372 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil
|
|
373 nil 165 nil nil nil nil 153 nil 157 nil nil nil 154 nil nil nil
|
|
374 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139
|
|
375 nil 164 149 162 147 nil 148 246 155 151 163 150 129 nil nil 152]
|
|
376 "Table for converting ISO-8859-1 characters into codepage 865 glyphs.")
|
|
377 (setplist 'cp865-decode-table
|
|
378 '(charset latin-iso8859-1 language "Latin-1" offset 160))
|
|
379
|
|
380 ;; Greek (yes, another one!)
|
|
381 (defvar cp869-decode-table
|
|
382 [
|
|
383 255 139 140 156 nil nil 138 245 249 151 nil 174 137 240 nil 142
|
|
384 248 241 153 154 239 247 134 136 141 143 144 175 146 171 149 152
|
|
385 161 164 165 166 167 168 169 170 172 173 181 182 183 184 189 190
|
|
386 198 199 nil 207 208 209 210 211 212 213 145 150 155 157 158 159
|
|
387 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233
|
|
388 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil]
|
|
389 "Table for converting ISO-8859-7 characters into codepage 869 glyphs.")
|
|
390 (setplist 'cp869-decode-table
|
|
391 '(charset greek-iso8859-7 language "Greek" offset 160))
|
|
392
|
|
393 ;; Conversion from codepage 775 to Latin-4 for Baltic countries.
|
|
394 (defvar cp775-decode-table
|
|
395 [
|
|
396 255 181 nil 138 150 nil 234 245 166 190 237 149 173 240 207 nil
|
|
397 248 208 nil 139 239 nil 235 nil nil 213 137 133 nil nil 216 nil
|
|
398 160 nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil 161
|
|
399 nil 238 226 232 nil 229 153 158 157 198 nil nil 154 nil 199 225
|
|
400 131 nil nil nil 132 134 145 212 209 130 210 nil 211 nil nil 140
|
|
401 nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil]
|
|
402 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.")
|
|
403 (setplist 'cp775-decode-table
|
|
404 '(charset latin-iso8859-4 language "Latin-4" offset 160))
|
|
405
|
|
406 ;;;###autoload
|
|
407 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset)
|
|
408 "Create 2 coding systems to convert IBM CODEPAGE into charset ISO-NAME
|
|
409 whose first character is at offset OFFSET from the beginning of 8-bit
|
|
410 ASCII table.
|
|
411
|
|
412 The two coding systems are identical except for the EOL conversion: one
|
|
413 of them decodes DOS-style EOLs, the other assumes Unix style and doesn't
|
|
414 perform any EOL conversions."
|
|
415 (let* ((decode-table (intern (format "%s-decode-table" codepage)))
|
|
416 (nonascii-table
|
|
417 (intern (format "%s-nonascii-translation-table" codepage)))
|
|
418 (decode-translation
|
|
419 (intern (format "%s-decode-translation-table" codepage)))
|
|
420 (encode-translation
|
|
421 (intern (format "%s-encode-translation-table" codepage)))
|
|
422 (codepage-dos
|
|
423 (intern (format "%s-dos" codepage)))
|
|
424 (codepage-unix
|
|
425 (intern (format "%s-unix" codepage))))
|
|
426 (set nonascii-table
|
|
427 (make-translation-table-from-vector
|
|
428 (cp-decoding-vector-for-codepage
|
|
429 (symbol-value decode-table) iso-name offset)))
|
|
430 (define-translation-table encode-translation
|
|
431 (char-table-extra-slot (symbol-value nonascii-table) 0))
|
|
432 ;; For charsets other than ascii and ISO-NAME, set `?' for
|
|
433 ;; one-column charsets, and some Japanese character for
|
|
434 ;; wide-column charsets. CCL encoder convert that Japanese
|
|
435 ;; character to "??".
|
|
436 (let ((tbl (char-table-extra-slot (symbol-value nonascii-table) 0))
|
|
437 (charsets (delq 'ascii (delq iso-name
|
|
438 (copy-sequence charset-list))))
|
|
439 (wide-column-char (make-char 'japanese-jisx0208 32 32)))
|
|
440 (while charsets
|
|
441 (aset tbl (make-char (car charsets))
|
|
442 (if (= (charset-width (car charsets)) 1) ?? wide-column-char))
|
|
443 (setq charsets (cdr charsets))))
|
|
444 (define-translation-table decode-translation
|
|
445 (symbol-value nonascii-table))
|
|
446 (cp-coding-system-for-codepage-1
|
|
447 codepage-dos ?D iso-name decode-translation encode-translation)
|
|
448 (cp-coding-system-for-codepage-1
|
|
449 codepage-unix ?D iso-name decode-translation encode-translation)))
|
|
450
|
|
451 (defun cp-codepage-decoder (codepage)
|
|
452 "If CODEPAGE is the name of a supported codepage, return its decode table;
|
|
453 otherwise return nil."
|
|
454 (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage)))
|
|
455 (cond
|
|
456 ((stringp cp)
|
|
457 (intern-soft (format "%s-decode-table" cp)))
|
|
458 (t nil))))
|
|
459
|
|
460 ;;;###autoload
|
|
461 (defun cp-charset-for-codepage (codepage)
|
|
462 "Return the charset for which there is a translation table to DOS CODEPAGE.
|
|
463 CODEPAGE must be the name of a DOS codepage, a string."
|
|
464 (let ((cp-decoder (cp-codepage-decoder codepage)))
|
|
465 (if (null cp-decoder)
|
|
466 (error "Unsupported codepage %s" codepage)
|
|
467 (get cp-decoder 'charset))))
|
|
468
|
|
469 ;;;###autoload
|
|
470 (defun cp-language-for-codepage (codepage)
|
|
471 "Return the name of the MULE language environment for CODEPAGE.
|
|
472 CODEPAGE must be the name of a DOS codepage, a string."
|
|
473 (let ((cp-decoder (cp-codepage-decoder codepage)))
|
|
474 (if (null cp-decoder)
|
|
475 (error "Unsupported codepage %s" codepage)
|
|
476 (get cp-decoder 'language))))
|
|
477
|
|
478 ;;;###autoload
|
|
479 (defun cp-offset-for-codepage (codepage)
|
|
480 "Return the offset to be used in setting up coding systems for CODEPAGE.
|
|
481 CODEPAGE must be the name of a DOS codepage, a string."
|
|
482 (let ((cp-decoder (cp-codepage-decoder codepage)))
|
|
483 (if (null cp-decoder)
|
|
484 (error "Unsupported codepage %s" codepage)
|
|
485 (get cp-decoder 'offset))))
|
|
486
|
|
487 ;;;###autoload
|
|
488 (defun cp-supported-codepages ()
|
|
489 "Return an alist of supported codepages.
|
|
490
|
|
491 Each association in the alist has the form (NNN . CHARSET), where NNN is the
|
|
492 codepage number, and CHARSET is the MULE charset which is the closest match
|
|
493 for the character set supported by that codepage.
|
|
494
|
|
495 A codepage NNN is supported if a variable called `cpNNN-decode-table' exists,
|
|
496 is a vector, and has a charset property."
|
|
497 (save-match-data
|
|
498 (let (alist chset sname)
|
|
499 (mapatoms
|
|
500 (function
|
|
501 (lambda (sym)
|
|
502 (if (and (boundp sym)
|
|
503 (string-match "\\`cp\\([1-9][0-9][0-9]\\)-decode-table\\'"
|
|
504 (setq sname (symbol-name sym)))
|
|
505 (vectorp (symbol-value sym))
|
|
506 (setq chset (get sym 'charset)))
|
|
507 (setq alist
|
|
508 (cons (cons (match-string 1 sname) chset) alist))))))
|
|
509 alist)))
|
|
510
|
|
511 ;;;###autoload
|
|
512 (defun codepage-setup (codepage)
|
|
513 "Create 2 coding systems for codepage CODEPAGE: cpCODEPAGE-dos and
|
|
514 cpCODEPAGE-unix.
|
|
515
|
|
516 These coding systems are meant for encoding and decoding 8-bit non-ASCII
|
|
517 characters used by the IBM codepages, typically in conjunction with files
|
|
518 read/written by MS-DOS software, or for display on MS-DOS terminal."
|
|
519 (interactive
|
|
520 (let ((completion-ignore-case t)
|
|
521 (candidates (cp-supported-codepages)))
|
|
522 (list (completing-read "Setup DOS Codepage: (default 437) " candidates
|
|
523 nil t nil nil "437"))))
|
|
524 (let ((cp (format "cp%s" codepage)))
|
|
525 (cp-make-coding-systems-for-codepage
|
|
526 cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp))))
|
|
527
|
|
528 (provide 'codepage)
|
|
529
|
|
530 ;; codepage.el ends here
|