Mercurial > emacs
annotate lisp/international/codepage.el @ 23920:efcf2fcda617
(cp-coding-system-for-codepage-1):
Create separate encoders and decoders, for DOS and Unix. Make the
usual family of 3 coding systems, so that automatic detection of
EOL type works.
(cp-make-coding-systems-for-codepage): Don't intern DOS- and
Unix-specific symbols here, and don't call
cp-coding-system-for-codepage-1 twice. (Suggested by Ken'ichi
Handa <handa@etl.go.jp>.)
author | Eli Zaretskii <eliz@gnu.org> |
---|---|
date | Sun, 20 Dec 1998 15:17:49 +0000 |
parents | 7ce49fb0dfbc |
children | 4ef8ec98dd43 |
rev | line source |
---|---|
23915 | 1 ;;; codepage.el --- MS-DOS specific coding systems. |
2 | |
3 ;; Copyright (C) 1998 Free Software Foundation, Inc. | |
4 | |
5 ;; Author: Eli Zaretskii | |
6 ;; Maintainer: FSF | |
7 ;; Keywords: i18n ms-dos codepage | |
8 | |
9 ;; This file is part of GNU Emacs. | |
10 | |
11 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
12 ;; it under the terms of the GNU General Public License as published by | |
13 ;; the Free Software Foundation; either version 2, or (at your option) | |
14 ;; any later version. | |
15 | |
16 ;; GNU Emacs is distributed in the hope that it will be useful, | |
17 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
18 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
19 ;; GNU General Public License for more details. | |
20 | |
21 ;; You should have received a copy of the GNU General Public License | |
22 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
23 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
24 ;; Boston, MA 02111-1307, USA. | |
25 | |
26 ;;; Commentary: | |
27 | |
28 ;; Special coding systems for DOS codepage support. | |
29 ;; | |
30 ;; These coding systems perform conversion from the DOS codepage encoding | |
31 ;; to one of the ISO-8859 character sets. Each codepage has its corresponding | |
32 ;; ISO-8859 charset, chosen so as to be able to convert all (or most) of the | |
33 ;; characters. The idea is that Emacs internally works with the usual MULE | |
34 ;; charsets, and the conversion to and from the DOS codepage is performed | |
35 ;; on I/O only. | |
36 ;; See term/internal.el for the complementary setup of the DOS terminal | |
37 ;; display and input methods. | |
38 ;; | |
39 ;; Thanks to Ken'ichi Handa <handa@etl.go.jp> for writing the CCL | |
40 ;; encoders/decoders, and for help in debugging this code. | |
41 | |
42 ;;; Code: | |
43 | |
44 (defun cp-coding-system-for-codepage-1 (coding mnemonic iso-name | |
45 decoder encoder) | |
46 "Make coding system CODING for a DOS codepage using translation tables. | |
47 MNEMONIC is a character to be displayed on mode line for the coding system. | |
48 ISO-NAME is the name of the ISO-8859 charset which corresponds to this | |
49 codepage. | |
50 DECODER is a translation table for converting characters in the DOS codepage | |
51 encoding to Emacs multibyte characters. | |
52 ENCODER is a translation table for encoding Emacs multibyte characters into | |
53 external DOS codepage codes. | |
54 | |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
55 Note that the coding systems created by this function support automatic |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
56 detection of the EOL format." |
23915 | 57 (save-match-data |
58 (let* ((coding-name (symbol-name coding)) | |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
59 (ccl-decoder-dos |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
60 (ccl-compile |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
61 `(4 (loop (read r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
62 (if (r1 != ?\r) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
63 (if (r1 >= 128) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
64 ((r0 = ,(charset-id 'ascii)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
65 (translate-character ,decoder r0 r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
66 (if (r0 == ,(charset-id 'ascii)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
67 (write r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
68 (write-multibyte-character r0 r1))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
69 (write r1))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
70 (repeat))))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
71 (ccl-decoder-unix |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
72 (ccl-compile |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
73 `(4 (loop (read r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
74 (if (r1 >= 128) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
75 ((r0 = ,(charset-id 'ascii)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
76 (translate-character ,decoder r0 r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
77 (if (r0 == ,(charset-id 'ascii)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
78 (write r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
79 (write-multibyte-character r0 r1))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
80 (write r1)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
81 (repeat))))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
82 (ccl-encoder-dos |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
83 (ccl-compile |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
84 `(1 (loop (read-multibyte-character r0 r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
85 (if (r1 == ?\n) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
86 (write ?\r) |
23915 | 87 (if (r0 != ,(charset-id 'ascii)) |
88 ((translate-character ,encoder r0 r1) | |
89 (if (r0 == ,(charset-id 'japanese-jisx0208)) | |
90 ((r1 = ??) | |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
91 (write r1)))))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
92 (write-repeat r1))))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
93 (ccl-encoder-unix |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
94 (ccl-compile |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
95 `(1 (loop (read-multibyte-character r0 r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
96 (if (r0 != ,(charset-id 'ascii)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
97 ((translate-character ,encoder r0 r1) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
98 (if (r0 == ,(charset-id 'japanese-jisx0208)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
99 ((r1 = ??) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
100 (write r1))))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
101 (write-repeat r1)))))) |
23915 | 102 (if (memq coding coding-system-list) |
103 (setq coding-system-list (delq coding coding-system-list))) | |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
104 |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
105 ;; Make coding system CODING. |
23915 | 106 (make-coding-system |
107 coding 4 mnemonic | |
108 (concat "8-bit encoding of " (symbol-name iso-name) | |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
109 " characters using IBM codepage " coding-name) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
110 (cons ccl-decoder-unix ccl-encoder-unix) |
23915 | 111 `((safe-charsets ascii ,iso-name))) |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
112 ;;; Make coding systems CODING-unix, CODING-dos, CODING-mac. |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
113 (make-subsidiary-coding-system coding) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
114 (put coding 'eol-type (vector (intern (format "%s-unix" coding)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
115 (intern (format "%s-dos" coding)) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
116 (intern (format "%s-mac" coding)))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
117 ;; Change CCL code for CODING-dos. |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
118 (let ((coding-spec (copy-sequence (get coding 'coding-system)))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
119 (aset coding-spec 4 |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
120 (cons (check-ccl-program |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
121 ccl-decoder-dos |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
122 (intern (format "%s-dos-decoder" coding))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
123 (check-ccl-program |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
124 ccl-encoder-dos |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
125 (intern (format "%s-dos-encoder" coding))))) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
126 (put (intern (concat coding-name "-dos")) 'coding-system |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
127 coding-spec))))) |
23915 | 128 |
129 (defun cp-decoding-vector-for-codepage (table charset offset) | |
130 "Create a vector for decoding IBM PC characters using conversion table | |
131 TABLE into an ISO-8859 character set CHARSET whose first non-ASCII | |
132 character' is generated by (make-char CHARSET OFFSET)." | |
133 (let* ((len (length table)) | |
134 (undefined-char | |
135 (if (eq system-type 'ms-dos) | |
136 (if dos-unsupported-char-glyph | |
137 (logand dos-unsupported-char-glyph 255) | |
138 127) | |
139 32)) | |
140 (vec1 (make-vector 256 undefined-char)) | |
141 (i 0)) | |
142 (while (< i offset) | |
143 (aset vec1 i i) | |
144 (setq i (1+ i))) | |
145 (setq i 0) | |
146 (while (< i len) | |
147 (if (aref table i) | |
148 (aset vec1 (aref table i) (make-char charset (+ i offset)))) | |
149 (setq i (1+ i))) | |
150 vec1)) | |
151 | |
152 ;;; You don't think I created all these tables below by hand, do you? | |
153 ;;; The following Awk script will create the table for cp850-to-Latin-1 | |
154 ;;; conversion from the RFC 1345 file (the other tables are left as an | |
155 ;;; excercise): | |
156 ;;; BEGIN { n_pages = 11; | |
157 ;;; pn["IBM437"] = 0; pn["IBM850"] = 1; pn["IBM851"] = 2; | |
158 ;;; pn["IBM852"] = 3; pn["IBM855"] = 4; pn["IBM860"] = 5; | |
159 ;;; pn["IBM861"] = 6; pn["IBM862"] = 7; pn["IBM863"] = 8; | |
160 ;;; pn["IBM864"] = 9; pn["IBM865"] = 10; | |
161 ;;; } | |
162 ;;; $1 == "&charset" { charset = $2; } | |
163 ;;; $1 == "&code" { code = $2; } | |
164 ;;; /^ [^&]/ { | |
165 ;;; if ((charset ~ /^IBM(437|8(5[0125]|6[0-5]))$/) || (charset ~ /^ISO_8859-1/)) | |
166 ;;; { | |
167 ;;; for (i = 1; i <= NF; i++) | |
168 ;;; chars[charset,code++] = $i; | |
169 ;;; } | |
170 ;;; } | |
171 ;;; | |
172 ;;; END { | |
173 ;;; for (i = 160; i < 256; i++) | |
174 ;;; { | |
175 ;;; c = chars["ISO_8859-1:1987",i]; | |
176 ;;; if (c == "??") # skip unused positions | |
177 ;;; { | |
178 ;;; printf " nil"; | |
179 ;;; if ((i - 159)%16 == 0) | |
180 ;;; printf "\n"; | |
181 ;;; continue; | |
182 ;;; } | |
183 ;;; found = 0; | |
184 ;;; for (j in pn) | |
185 ;;; map[j] = "nil"; | |
186 ;;; for (combined in chars) | |
187 ;;; { | |
188 ;;; candidate = chars[combined]; | |
189 ;;; split (combined, separate, SUBSEP); | |
190 ;;; if (separate[1] == "IBM850" && candidate == c) | |
191 ;;; { | |
192 ;;; found = 1; | |
193 ;;; map[separate[1]] = separate[2]; | |
194 ;;; } | |
195 ;;; } | |
196 ;;; printf " %s", map["IBM850"]; | |
197 ;;; if ((i - 159)%16 == 0) | |
198 ;;; printf "\n"; | |
199 ;;; } | |
200 ;;; } | |
201 | |
202 ;;; WARNING WARNING WARNING!!! | |
203 ;;; | |
204 ;;; If you want to get fancy with these tables, remember that the inverse | |
205 ;;; tables, created by `cp-decoding-vector-for-codepage' above, are installed | |
206 ;;; on MS-DOS as nonascii-translation-table (see `dos-codepage-setup' on | |
207 ;;; internal.el). Therefore, you should NOT put any codes below 128 in | |
208 ;;; these tables! Otherwise, various Emacs commands and functions will | |
209 ;;; mysteriously fail! For example, a typical screwup is to map the Latin-N | |
210 ;;; acute accent character to the apostrophe, and have all regexps which | |
211 ;;; end with "\\'" begin to fail (e.g., the automatic setting of the major | |
212 ;;; mode by file name extension will stop working). | |
213 ;;; | |
214 ;;; You HAVE BEEN warned! | |
215 | |
216 ;; US/English/PC-8/IBM-2. This doesn't support Latin-1 characters very | |
217 ;; well, but why not use what we can salvage? | |
218 (defvar cp437-decode-table | |
219 ;; Nth element is the code of a cp437 glyph for the multibyte | |
220 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)). | |
221 ;; The element nil means there's no corresponding cp850 glyph. | |
222 [ | |
223 255 173 155 156 nil 157 179 nil nil nil 166 174 170 196 nil nil | |
224 248 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168 | |
225 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil | |
226 nil 165 nil nil nil nil 153 nil nil nil nil nil 154 nil nil 225 | |
227 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139 | |
228 nil 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 152] | |
229 "Table for converting ISO-8859-1 characters into codepage 437 glyphs.") | |
230 (setplist 'cp437-decode-table | |
231 '(charset latin-iso8859-1 language "Latin-1" offset 160)) | |
232 | |
233 ;; Multilingual (Latin-1) | |
234 (defvar cp850-decode-table | |
235 ;; Nth element is the code of a cp850 glyph for the multibyte | |
236 ;; character created by (make-char 'latin-iso8859-1 (+ N 160)). | |
237 ;; The element nil means there's no corresponding cp850 glyph. | |
238 [ | |
239 255 173 189 156 207 190 221 245 249 184 166 174 170 240 169 nil | |
240 248 241 253 252 239 nil 244 nil nil 251 167 175 172 171 243 168 | |
241 183 181 182 199 142 143 146 128 212 144 210 211 222 214 215 216 | |
242 209 165 227 224 226 229 153 158 157 235 233 234 154 237 231 225 | |
243 133 160 131 198 132 134 145 135 138 130 136 137 141 161 140 139 | |
244 208 164 149 162 147 228 148 246 155 151 163 150 129 236 232 152] | |
245 "Table for converting ISO-8859-1 characters into codepage 850 glyphs.") | |
246 (setplist 'cp850-decode-table | |
247 '(charset latin-iso8859-1 language "Latin-1" offset 160)) | |
248 | |
249 ;; Greek | |
250 (defvar cp851-decode-table | |
251 [ | |
252 255 nil nil 156 nil nil nil 245 249 nil nil 174 nil 240 nil nil | |
253 248 241 nil nil 239 nil 134 nil 141 143 144 175 146 171 149 152 | |
254 161 164 165 166 167 168 169 170 172 173 181 182 184 183 189 190 | |
255 198 199 nil 207 208 209 210 211 212 213 nil nil 155 157 158 159 | |
256 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233 | |
257 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil] | |
258 "Table for converting ISO-8859-7 characters into codepage 851 glyphs.") | |
259 (setplist 'cp851-decode-table | |
260 '(charset greek-iso8859-7 language "Greek" offset 160)) | |
261 | |
262 ;; Slavic/Eastern Europe (Latin-2) | |
263 (defvar cp852-decode-table | |
264 [ | |
265 255 164 244 157 207 149 151 245 249 230 184 155 141 240 166 189 | |
266 248 165 247 136 239 150 152 243 242 231 173 156 171 241 167 190 | |
267 232 181 182 198 142 145 143 128 172 144 168 211 183 214 nil 210 | |
268 209 227 213 224 226 138 153 158 252 222 233 235 154 237 221 225 | |
269 234 160 131 199 132 146 134 135 159 130 169 137 216 161 140 212 | |
270 208 228 229 162 147 139 148 246 253 133 163 251 129 236 238 250] | |
271 "Table for converting ISO-8859-2 characters into codepage 852 glyphs.") | |
272 (setplist 'cp852-decode-table | |
273 '(charset latin-iso8859-2 language "Latin-2" offset 160)) | |
274 | |
275 ;; Russian | |
276 (defvar cp855-decode-table | |
277 [ | |
278 255 133 129 131 135 137 139 141 143 145 147 nil 151 240 153 155 | |
279 161 163 236 173 167 169 234 244 184 190 199 209 211 213 215 221 | |
280 226 228 230 232 171 182 165 252 246 250 159 242 238 248 157 224 | |
281 160 162 235 172 166 168 233 243 183 189 198 208 210 212 214 216 | |
282 225 227 229 231 170 181 164 251 245 249 158 241 237 247 156 222 | |
283 nil 132 128 130 134 136 138 140 142 144 146 148 150 nil 152 154] | |
284 "Table for converting ISO-8859-5 characters into codepage 855 glyphs.") | |
285 (setplist 'cp855-decode-table | |
286 '(charset cyrillic-iso8859-5 language "Cyrillic-ISO" offset 160)) | |
287 | |
288 ;; Turkish | |
289 (defvar cp857-decode-table | |
290 [ | |
291 255 nil nil 156 207 nil 245 249 152 158 166 nil 240 nil | |
292 248 nil 253 252 239 nil nil nil nil 141 159 167 nil 171 nil | |
293 183 181 182 142 nil nil 128 212 144 210 211 222 214 215 216 | |
294 165 227 224 226 nil 153 232 nil 235 233 234 154 nil nil 225 | |
295 133 160 131 132 nil nil 135 138 130 136 137 236 161 140 139 | |
296 164 149 162 147 nil 148 246 nil 151 163 150 129 nil nil 250] | |
297 "Table for converting ISO-8859-3 characters into codepage 857 glyphs.") | |
298 (setplist 'cp857-decode-table | |
299 '(charset latin-iso8859-3 language "Latin-3" offset 160)) | |
300 | |
301 ;; Portuguese | |
302 (defvar cp860-decode-table | |
303 [ | |
304 255 173 155 156 nil nil 179 nil nil nil 166 174 170 nil nil nil | |
305 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168 | |
306 145 134 143 142 nil nil nil 128 146 144 137 nil 152 nil 139 nil | |
307 nil 165 159 169 140 153 nil nil nil 157 150 nil 154 nil nil nil | |
308 133 160 131 132 nil nil nil 135 138 130 136 nil 141 161 nil nil | |
309 nil 164 149 162 147 148 nil 246 nil 151 163 nil 129 nil nil nil] | |
310 "Table for converting ISO-8859-1 characters into codepage 860 glyphs.") | |
311 (setplist 'cp860-decode-table | |
312 '(charset latin-iso8859-1 language "Latin-1" offset 160)) | |
313 | |
314 ;; Icelandic | |
315 (defvar cp861-decode-table | |
316 [ | |
317 255 173 nil 156 nil nil nil nil nil nil nil 174 170 nil nil nil | |
318 nil 241 253 nil nil nil nil 249 nil nil nil 175 172 171 nil 168 | |
319 nil 164 nil nil 142 143 146 128 nil 144 nil nil nil 165 nil nil | |
320 139 nil 159 166 nil nil 153 nil 157 nil 167 nil 154 151 141 nil | |
321 133 160 131 nil 132 134 145 135 138 130 136 137 nil 161 nil nil | |
322 140 nil nil 162 147 nil 148 246 155 nil 163 150 129 152 149 nil] | |
323 "Table for converting ISO-8859-1 characters into codepage 861 glyphs.") | |
324 (setplist 'cp861-decode-table | |
325 '(charset latin-iso8859-1 language "Latin-1" offset 160)) | |
326 | |
327 ;; Hebrew | |
328 (defvar cp862-decode-table | |
329 ;; Nth element is the code of a cp862 glyph for the multibyte | |
330 ;; character created by (make-char 'hebrew-iso8859-8 (+ N 160)). | |
331 ;; The element nil means there's no corresponding cp850 glyph. | |
332 [ | |
333 255 173 155 156 nil 157 179 nil nil nil nil 174 170 196 nil nil | |
334 248 241 253 nil nil 230 nil 249 nil nil 246 175 172 171 nil nil | |
335 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil | |
336 nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil 205 | |
337 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 | |
338 144 145 146 147 148 149 150 151 152 153 154 nil nil nil nil nil] | |
339 "Table for converting ISO-8859-8 characters into codepage 862 glyphs.") | |
340 (setplist 'cp862-decode-table | |
341 '(charset hebrew-iso8859-8 language "Hebrew" offset 160)) | |
342 | |
343 ;; French Canadian | |
344 (defvar cp863-decode-table | |
345 [ | |
346 255 nil 155 156 152 nil 160 143 164 nil nil 174 170 nil nil 167 | |
347 nil 241 253 166 161 nil 134 249 165 nil nil 175 172 171 173 nil | |
348 142 nil 132 nil nil nil nil 128 145 144 146 148 nil nil 168 149 | |
349 nil nil nil nil 153 nil nil nil nil 157 nil 158 154 nil nil nil | |
350 133 nil 131 nil nil nil nil 135 138 130 136 137 141 nil 140 139 | |
351 nil nil nil 162 147 nil nil 246 nil 151 163 150 129 nil nil nil] | |
352 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.") | |
353 (setplist 'cp863-decode-table | |
354 '(charset latin-iso8859-1 language "Latin-1" offset 160)) | |
355 | |
356 ;; Arabic | |
357 ;; FIXME: Emacs doesn't seem to support the "Arabic" language | |
358 ;; environment yet. So this is only partially usable, for now | |
359 (defvar cp864-decode-table | |
360 [ | |
361 255 nil nil nil 164 nil nil nil nil nil nil nil 172 161 nil nil | |
362 nil nil nil nil nil nil nil nil nil nil nil 187 nil nil nil 191 | |
363 nil 193 194 195 196 nil 198 199 169 201 170 171 173 174 175 207 | |
364 208 209 210 188 189 190 235 215 216 223 238 nil nil nil nil nil | |
365 224 247 248 252 251 239 242 243 232 233 253 nil nil nil nil nil | |
366 nil 241 nil nil nil nil nil nil nil nil nil nil nil nil nil nil] | |
367 "Table for converting ISO-8859-1 characters into codepage 863 glyphs.") | |
368 (setplist 'cp864-decode-table | |
369 '(charset arabic-iso8859-6 language nil offset 160)) | |
370 | |
371 ;; Nordic (Norwegian/Danish) | |
372 (defvar cp865-decode-table | |
373 [ | |
374 255 173 nil 156 nil nil nil nil nil nil 166 174 170 nil nil nil | |
375 nil 241 253 nil nil nil nil 249 nil nil 167 175 172 171 nil 168 | |
376 nil nil nil nil 142 143 146 128 nil 144 nil nil nil nil nil nil | |
377 nil 165 nil nil nil nil 153 nil 157 nil nil nil 154 nil nil nil | |
378 133 160 131 nil 132 134 145 135 138 130 136 137 141 161 140 139 | |
379 nil 164 149 162 147 nil 148 246 155 151 163 150 129 nil nil 152] | |
380 "Table for converting ISO-8859-1 characters into codepage 865 glyphs.") | |
381 (setplist 'cp865-decode-table | |
382 '(charset latin-iso8859-1 language "Latin-1" offset 160)) | |
383 | |
384 ;; Greek (yes, another one!) | |
385 (defvar cp869-decode-table | |
386 [ | |
387 255 139 140 156 nil nil 138 245 249 151 nil 174 137 240 nil 142 | |
388 248 241 153 154 239 247 134 136 141 143 144 175 146 171 149 152 | |
389 161 164 165 166 167 168 169 170 172 173 181 182 183 184 189 190 | |
390 198 199 nil 207 208 209 210 211 212 213 145 150 155 157 158 159 | |
391 252 214 215 216 221 222 224 225 226 227 228 229 230 231 232 233 | |
392 234 235 237 236 238 242 243 244 246 250 160 251 162 163 253 nil] | |
393 "Table for converting ISO-8859-7 characters into codepage 869 glyphs.") | |
394 (setplist 'cp869-decode-table | |
395 '(charset greek-iso8859-7 language "Greek" offset 160)) | |
396 | |
397 ;; Conversion from codepage 775 to Latin-4 for Baltic countries. | |
398 (defvar cp775-decode-table | |
399 [ | |
400 255 181 nil 138 150 nil 234 245 166 190 237 149 173 240 207 nil | |
401 248 208 nil 139 239 nil 235 nil nil 213 137 133 nil nil 216 nil | |
402 160 nil nil nil 142 143 146 189 182 144 183 nil 184 nil nil 161 | |
403 nil 238 226 232 nil 229 153 158 157 198 nil nil 154 nil 199 225 | |
404 131 nil nil nil 132 134 145 212 209 130 210 nil 211 nil nil 140 | |
405 nil 236 147 233 nil 228 148 nil 155 214 nil nil 129 nil 215 nil] | |
406 "Table for converting ISO-8859-4 characters into codepage 775 glyphs.") | |
407 (setplist 'cp775-decode-table | |
408 '(charset latin-iso8859-4 language "Latin-4" offset 160)) | |
409 | |
410 ;;;###autoload | |
411 (defun cp-make-coding-systems-for-codepage (codepage iso-name offset) | |
412 "Create 2 coding systems to convert IBM CODEPAGE into charset ISO-NAME | |
413 whose first character is at offset OFFSET from the beginning of 8-bit | |
414 ASCII table. | |
415 | |
416 The two coding systems are identical except for the EOL conversion: one | |
417 of them decodes DOS-style EOLs, the other assumes Unix style and doesn't | |
418 perform any EOL conversions." | |
419 (let* ((decode-table (intern (format "%s-decode-table" codepage))) | |
420 (nonascii-table | |
421 (intern (format "%s-nonascii-translation-table" codepage))) | |
422 (decode-translation | |
423 (intern (format "%s-decode-translation-table" codepage))) | |
424 (encode-translation | |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
425 (intern (format "%s-encode-translation-table" codepage)))) |
23915 | 426 (set nonascii-table |
427 (make-translation-table-from-vector | |
428 (cp-decoding-vector-for-codepage | |
429 (symbol-value decode-table) iso-name offset))) | |
430 (define-translation-table encode-translation | |
431 (char-table-extra-slot (symbol-value nonascii-table) 0)) | |
432 ;; For charsets other than ascii and ISO-NAME, set `?' for | |
433 ;; one-column charsets, and some Japanese character for | |
434 ;; wide-column charsets. CCL encoder convert that Japanese | |
435 ;; character to "??". | |
436 (let ((tbl (char-table-extra-slot (symbol-value nonascii-table) 0)) | |
437 (charsets (delq 'ascii (delq iso-name | |
438 (copy-sequence charset-list)))) | |
439 (wide-column-char (make-char 'japanese-jisx0208 32 32))) | |
440 (while charsets | |
441 (aset tbl (make-char (car charsets)) | |
442 (if (= (charset-width (car charsets)) 1) ?? wide-column-char)) | |
443 (setq charsets (cdr charsets)))) | |
444 (define-translation-table decode-translation | |
445 (symbol-value nonascii-table)) | |
446 (cp-coding-system-for-codepage-1 | |
23920
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
447 (intern codepage) ?D iso-name decode-translation encode-translation) |
efcf2fcda617
(cp-coding-system-for-codepage-1):
Eli Zaretskii <eliz@gnu.org>
parents:
23915
diff
changeset
|
448 )) |
23915 | 449 |
450 (defun cp-codepage-decoder (codepage) | |
451 "If CODEPAGE is the name of a supported codepage, return its decode table; | |
452 otherwise return nil." | |
453 (let ((cp (if (symbolp codepage) (symbol-name codepage) codepage))) | |
454 (cond | |
455 ((stringp cp) | |
456 (intern-soft (format "%s-decode-table" cp))) | |
457 (t nil)))) | |
458 | |
459 ;;;###autoload | |
460 (defun cp-charset-for-codepage (codepage) | |
461 "Return the charset for which there is a translation table to DOS CODEPAGE. | |
462 CODEPAGE must be the name of a DOS codepage, a string." | |
463 (let ((cp-decoder (cp-codepage-decoder codepage))) | |
464 (if (null cp-decoder) | |
465 (error "Unsupported codepage %s" codepage) | |
466 (get cp-decoder 'charset)))) | |
467 | |
468 ;;;###autoload | |
469 (defun cp-language-for-codepage (codepage) | |
470 "Return the name of the MULE language environment for CODEPAGE. | |
471 CODEPAGE must be the name of a DOS codepage, a string." | |
472 (let ((cp-decoder (cp-codepage-decoder codepage))) | |
473 (if (null cp-decoder) | |
474 (error "Unsupported codepage %s" codepage) | |
475 (get cp-decoder 'language)))) | |
476 | |
477 ;;;###autoload | |
478 (defun cp-offset-for-codepage (codepage) | |
479 "Return the offset to be used in setting up coding systems for CODEPAGE. | |
480 CODEPAGE must be the name of a DOS codepage, a string." | |
481 (let ((cp-decoder (cp-codepage-decoder codepage))) | |
482 (if (null cp-decoder) | |
483 (error "Unsupported codepage %s" codepage) | |
484 (get cp-decoder 'offset)))) | |
485 | |
486 ;;;###autoload | |
487 (defun cp-supported-codepages () | |
488 "Return an alist of supported codepages. | |
489 | |
490 Each association in the alist has the form (NNN . CHARSET), where NNN is the | |
491 codepage number, and CHARSET is the MULE charset which is the closest match | |
492 for the character set supported by that codepage. | |
493 | |
494 A codepage NNN is supported if a variable called `cpNNN-decode-table' exists, | |
495 is a vector, and has a charset property." | |
496 (save-match-data | |
497 (let (alist chset sname) | |
498 (mapatoms | |
499 (function | |
500 (lambda (sym) | |
501 (if (and (boundp sym) | |
502 (string-match "\\`cp\\([1-9][0-9][0-9]\\)-decode-table\\'" | |
503 (setq sname (symbol-name sym))) | |
504 (vectorp (symbol-value sym)) | |
505 (setq chset (get sym 'charset))) | |
506 (setq alist | |
507 (cons (cons (match-string 1 sname) chset) alist)))))) | |
508 alist))) | |
509 | |
510 ;;;###autoload | |
511 (defun codepage-setup (codepage) | |
512 "Create 2 coding systems for codepage CODEPAGE: cpCODEPAGE-dos and | |
513 cpCODEPAGE-unix. | |
514 | |
515 These coding systems are meant for encoding and decoding 8-bit non-ASCII | |
516 characters used by the IBM codepages, typically in conjunction with files | |
517 read/written by MS-DOS software, or for display on MS-DOS terminal." | |
518 (interactive | |
519 (let ((completion-ignore-case t) | |
520 (candidates (cp-supported-codepages))) | |
521 (list (completing-read "Setup DOS Codepage: (default 437) " candidates | |
522 nil t nil nil "437")))) | |
523 (let ((cp (format "cp%s" codepage))) | |
524 (cp-make-coding-systems-for-codepage | |
525 cp (cp-charset-for-codepage cp) (cp-offset-for-codepage cp)))) | |
526 | |
527 (provide 'codepage) | |
528 | |
529 ;; codepage.el ends here |