Mercurial > emacs
comparison lisp/gnus/mm-util.el @ 31717:6b20b7e85e3c
*** empty log message ***
author | Gerd Moellmann <gerd@gnu.org> |
---|---|
date | Tue, 19 Sep 2000 13:40:08 +0000 |
parents | |
children | cbdba3c57536 |
comparison
equal
deleted
inserted
replaced
31716:9968f55ad26e | 31717:6b20b7e85e3c |
---|---|
1 ;;; mm-util.el --- Utility functions for MIME things | |
2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | |
3 | |
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
6 ;; This file is part of GNU Emacs. | |
7 | |
8 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 ;; it under the terms of the GNU General Public License as published by | |
10 ;; the Free Software Foundation; either version 2, or (at your option) | |
11 ;; any later version. | |
12 | |
13 ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 ;; GNU General Public License for more details. | |
17 | |
18 ;; You should have received a copy of the GNU General Public License | |
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 ;; Boston, MA 02111-1307, USA. | |
22 | |
23 ;;; Commentary: | |
24 | |
25 ;;; Code: | |
26 | |
27 (require 'mail-prsvr) | |
28 | |
29 (defvar mm-mime-mule-charset-alist | |
30 '((us-ascii ascii) | |
31 (iso-8859-1 latin-iso8859-1) | |
32 (iso-8859-2 latin-iso8859-2) | |
33 (iso-8859-3 latin-iso8859-3) | |
34 (iso-8859-4 latin-iso8859-4) | |
35 (iso-8859-5 cyrillic-iso8859-5) | |
36 ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. | |
37 ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default | |
38 ;; charset is koi8-r, not iso-8859-5. | |
39 (koi8-r cyrillic-iso8859-5 gnus-koi8-r) | |
40 (iso-8859-6 arabic-iso8859-6) | |
41 (iso-8859-7 greek-iso8859-7) | |
42 (iso-8859-8 hebrew-iso8859-8) | |
43 (iso-8859-9 latin-iso8859-9) | |
44 (viscii vietnamese-viscii-lower) | |
45 (iso-2022-jp latin-jisx0201 japanese-jisx0208 japanese-jisx0208-1978) | |
46 (euc-kr korean-ksc5601) | |
47 (cn-gb-2312 chinese-gb2312) | |
48 (cn-big5 chinese-big5-1 chinese-big5-2) | |
49 (tibetan tibetan) | |
50 (thai-tis620 thai-tis620) | |
51 (iso-2022-7bit ethiopic arabic-1-column arabic-2-column) | |
52 (iso-2022-jp-2 latin-iso8859-1 greek-iso8859-7 | |
53 latin-jisx0201 japanese-jisx0208-1978 | |
54 chinese-gb2312 japanese-jisx0208 | |
55 korean-ksc5601 japanese-jisx0212 | |
56 katakana-jisx0201) | |
57 (iso-2022-int-1 latin-iso8859-1 greek-iso8859-7 | |
58 latin-jisx0201 japanese-jisx0208-1978 | |
59 chinese-gb2312 japanese-jisx0208 | |
60 korean-ksc5601 japanese-jisx0212 | |
61 chinese-cns11643-1 chinese-cns11643-2) | |
62 (iso-2022-int-1 latin-iso8859-1 latin-iso8859-2 | |
63 cyrillic-iso8859-5 greek-iso8859-7 | |
64 latin-jisx0201 japanese-jisx0208-1978 | |
65 chinese-gb2312 japanese-jisx0208 | |
66 korean-ksc5601 japanese-jisx0212 | |
67 chinese-cns11643-1 chinese-cns11643-2 | |
68 chinese-cns11643-3 chinese-cns11643-4 | |
69 chinese-cns11643-5 chinese-cns11643-6 | |
70 chinese-cns11643-7) | |
71 (utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e)) | |
72 "Alist of MIME-charset/MULE-charsets.") | |
73 | |
74 (eval-and-compile | |
75 (mapcar | |
76 (lambda (elem) | |
77 (let ((nfunc (intern (format "mm-%s" (car elem))))) | |
78 (if (fboundp (car elem)) | |
79 (defalias nfunc (car elem)) | |
80 (defalias nfunc (cdr elem))))) | |
81 '((decode-coding-string . (lambda (s a) s)) | |
82 (encode-coding-string . (lambda (s a) s)) | |
83 (encode-coding-region . ignore) | |
84 (coding-system-list . ignore) | |
85 (decode-coding-region . ignore) | |
86 (char-int . identity) | |
87 (device-type . ignore) | |
88 (coding-system-equal . equal) | |
89 (annotationp . ignore) | |
90 (set-buffer-file-coding-system . ignore) | |
91 (make-char | |
92 . (lambda (charset int) | |
93 (int-to-char int))) | |
94 (read-coding-system | |
95 . (lambda (prompt) | |
96 "Prompt the user for a coding system." | |
97 (completing-read | |
98 prompt (mapcar (lambda (s) (list (symbol-name (car s)))) | |
99 mm-mime-mule-charset-alist)))) | |
100 (read-charset | |
101 . (lambda (prompt) | |
102 "Return a charset." | |
103 (intern | |
104 (completing-read | |
105 prompt | |
106 (mapcar (lambda (e) (list (symbol-name (car e)))) | |
107 mm-mime-mule-charset-alist) | |
108 nil t))))))) | |
109 | |
110 (eval-and-compile | |
111 (defalias 'mm-char-or-char-int-p | |
112 (cond | |
113 ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) | |
114 ((fboundp 'char-valid-p) 'char-valid-p) | |
115 (t 'identity)))) | |
116 | |
117 (defvar mm-coding-system-list nil) | |
118 (defun mm-get-coding-system-list () | |
119 "Get the coding system list." | |
120 (or mm-coding-system-list | |
121 (setq mm-coding-system-list (mm-coding-system-list)))) | |
122 | |
123 (defvar mm-charset-synonym-alist | |
124 '((big5 . cn-big5) | |
125 (gb2312 . cn-gb-2312) | |
126 (x-ctext . ctext)) | |
127 "A mapping from invalid charset names to the real charset names.") | |
128 | |
129 (defun mm-coding-system-p (sym) | |
130 "Return non-nil if SYM is a coding system." | |
131 (or (and (fboundp 'coding-system-p) (coding-system-p sym)) | |
132 (memq sym (mm-get-coding-system-list)))) | |
133 | |
134 (defvar mm-binary-coding-system | |
135 (cond | |
136 ((mm-coding-system-p 'binary) 'binary) | |
137 ((mm-coding-system-p 'no-conversion) 'no-conversion) | |
138 (t nil)) | |
139 "100% binary coding system.") | |
140 | |
141 (defvar mm-text-coding-system | |
142 (or (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
143 (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) | |
144 (and (mm-coding-system-p 'raw-text) 'raw-text)) | |
145 mm-binary-coding-system) | |
146 "Text-safe coding system (For removing ^M).") | |
147 | |
148 (defvar mm-text-coding-system-for-write nil | |
149 "Text coding system for write.") | |
150 | |
151 (defvar mm-auto-save-coding-system | |
152 (cond | |
153 ((mm-coding-system-p 'emacs-mule) | |
154 (if (memq system-type '(windows-nt ms-dos ms-windows)) | |
155 (if (mm-coding-system-p 'emacs-mule-dos) | |
156 'emacs-mule-dos mm-binary-coding-system) | |
157 'emacs-mule)) | |
158 ((mm-coding-system-p 'escape-quoted) 'escape-quoted) | |
159 (t mm-binary-coding-system)) | |
160 "Coding system of auto save file.") | |
161 | |
162 ;;; Internal variables: | |
163 | |
164 ;;; Functions: | |
165 | |
166 (defun mm-mule-charset-to-mime-charset (charset) | |
167 "Return the MIME charset corresponding to MULE CHARSET." | |
168 (let ((alist mm-mime-mule-charset-alist) | |
169 out) | |
170 (while alist | |
171 (when (memq charset (cdar alist)) | |
172 (setq out (caar alist) | |
173 alist nil)) | |
174 (pop alist)) | |
175 out)) | |
176 | |
177 (defun mm-charset-to-coding-system (charset &optional lbt) | |
178 "Return coding-system corresponding to CHARSET. | |
179 CHARSET is a symbol naming a MIME charset. | |
180 If optional argument LBT (`unix', `dos' or `mac') is specified, it is | |
181 used as the line break code type of the coding system." | |
182 (when (stringp charset) | |
183 (setq charset (intern (downcase charset)))) | |
184 (setq charset | |
185 (or (cdr (assq charset mm-charset-synonym-alist)) | |
186 charset)) | |
187 (when lbt | |
188 (setq charset (intern (format "%s-%s" charset lbt)))) | |
189 (cond | |
190 ;; Running in a non-MULE environment. | |
191 ((null (mm-get-coding-system-list)) | |
192 charset) | |
193 ;; ascii | |
194 ((eq charset 'us-ascii) | |
195 'ascii) | |
196 ;; Check to see whether we can handle this charset. | |
197 ((memq charset (mm-get-coding-system-list)) | |
198 charset) | |
199 ;; Nope. | |
200 (t | |
201 nil))) | |
202 | |
203 (if (fboundp 'subst-char-in-string) | |
204 (defsubst mm-replace-chars-in-string (string from to) | |
205 (subst-char-in-string from to string)) | |
206 (defun mm-replace-chars-in-string (string from to) | |
207 "Replace characters in STRING from FROM to TO." | |
208 (let ((string (substring string 0)) ;Copy string. | |
209 (len (length string)) | |
210 (idx 0)) | |
211 ;; Replace all occurrences of FROM with TO. | |
212 (while (< idx len) | |
213 (when (= (aref string idx) from) | |
214 (aset string idx to)) | |
215 (setq idx (1+ idx))) | |
216 string))) | |
217 | |
218 (defsubst mm-enable-multibyte () | |
219 "Enable multibyte in the current buffer." | |
220 (when (and (fboundp 'set-buffer-multibyte) | |
221 (boundp 'enable-multibyte-characters) | |
222 (default-value 'enable-multibyte-characters)) | |
223 (set-buffer-multibyte t))) | |
224 | |
225 (defsubst mm-disable-multibyte () | |
226 "Disable multibyte in the current buffer." | |
227 (when (fboundp 'set-buffer-multibyte) | |
228 (set-buffer-multibyte nil))) | |
229 | |
230 (defun mm-preferred-coding-system (charset) | |
231 ;; A typo in some Emacs versions. | |
232 (or (get-charset-property charset 'prefered-coding-system) | |
233 (get-charset-property charset 'preferred-coding-system))) | |
234 | |
235 (defun mm-charset-after (&optional pos) | |
236 "Return charset of a character in current buffer at position POS. | |
237 If POS is nil, it defauls to the current point. | |
238 If POS is out of range, the value is nil. | |
239 If the charset is `composition', return the actual one." | |
240 (let ((charset (cond | |
241 ((fboundp 'charset-after) | |
242 (charset-after pos)) | |
243 ((fboundp 'char-charset) | |
244 (char-charset (char-after pos))) | |
245 ((< (mm-char-int (char-after pos)) 128) | |
246 'ascii) | |
247 (mail-parse-mule-charset ;; cached mule-charset | |
248 mail-parse-mule-charset) | |
249 ((boundp 'current-language-environment) | |
250 (let ((entry (assoc current-language-environment | |
251 language-info-alist))) | |
252 (setq mail-parse-mule-charset | |
253 (or (car (last (assq 'charset entry))) | |
254 'latin-iso8859-1)))) | |
255 (t ;; figure out the charset | |
256 (setq mail-parse-mule-charset | |
257 (or (car (last (assq mail-parse-charset | |
258 mm-mime-mule-charset-alist))) | |
259 'latin-iso8859-1)))))) | |
260 (if (eq charset 'composition) | |
261 (let ((p (or pos (point)))) | |
262 (cadr (find-charset-region p (1+ p)))) | |
263 charset))) | |
264 | |
265 (defun mm-mime-charset (charset) | |
266 "Return the MIME charset corresponding to the MULE CHARSET." | |
267 (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) | |
268 ;; This exists in Emacs 20. | |
269 (or | |
270 (and (mm-preferred-coding-system charset) | |
271 (coding-system-get | |
272 (mm-preferred-coding-system charset) 'mime-charset)) | |
273 (and (eq charset 'ascii) | |
274 'us-ascii) | |
275 (mm-preferred-coding-system charset) | |
276 (mm-mule-charset-to-mime-charset charset)) | |
277 ;; This is for XEmacs. | |
278 (mm-mule-charset-to-mime-charset charset))) | |
279 | |
280 (defun mm-delete-duplicates (list) | |
281 "Simple substitute for CL `delete-duplicates', testing with `equal'." | |
282 (let (result head) | |
283 (while list | |
284 (setq head (car list)) | |
285 (setq list (delete head list)) | |
286 (setq result (cons head result))) | |
287 (nreverse result))) | |
288 | |
289 (defun mm-find-mime-charset-region (b e) | |
290 "Return the MIME charsets needed to encode the region between B and E." | |
291 (let ((charsets (mapcar 'mm-mime-charset | |
292 (delq 'ascii | |
293 (mm-find-charset-region b e))))) | |
294 (when (memq 'iso-2022-jp-2 charsets) | |
295 (setq charsets (delq 'iso-2022-jp charsets))) | |
296 (setq charsets (mm-delete-duplicates charsets)) | |
297 (if (and (> (length charsets) 1) | |
298 (fboundp 'find-coding-systems-region) | |
299 (memq 'utf-8 (find-coding-systems-region b e))) | |
300 '(utf-8) | |
301 charsets))) | |
302 | |
303 (defsubst mm-multibyte-p () | |
304 "Say whether multibyte is enabled." | |
305 (or (featurep 'xemacs) | |
306 (and (boundp 'enable-multibyte-characters) | |
307 enable-multibyte-characters))) | |
308 | |
309 (defmacro mm-with-unibyte-buffer (&rest forms) | |
310 "Create a temporary buffer, and evaluate FORMS there like `progn'. | |
311 See also `with-temp-file' and `with-output-to-string'." | |
312 (let ((temp-buffer (make-symbol "temp-buffer")) | |
313 (multibyte (make-symbol "multibyte"))) | |
314 `(if (or (string-match "XEmacs\\|Lucid" emacs-version) | |
315 (not (boundp 'enable-multibyte-characters))) | |
316 (with-temp-buffer ,@forms) | |
317 (let ((,multibyte (default-value 'enable-multibyte-characters)) | |
318 ,temp-buffer) | |
319 (unwind-protect | |
320 (progn | |
321 (setq-default enable-multibyte-characters nil) | |
322 (setq ,temp-buffer | |
323 (get-buffer-create (generate-new-buffer-name " *temp*"))) | |
324 (unwind-protect | |
325 (with-current-buffer ,temp-buffer | |
326 (let ((buffer-file-coding-system mm-binary-coding-system) | |
327 (coding-system-for-read mm-binary-coding-system) | |
328 (coding-system-for-write mm-binary-coding-system)) | |
329 ,@forms)) | |
330 (and (buffer-name ,temp-buffer) | |
331 (kill-buffer ,temp-buffer)))) | |
332 (setq-default enable-multibyte-characters ,multibyte)))))) | |
333 (put 'mm-with-unibyte-buffer 'lisp-indent-function 0) | |
334 (put 'mm-with-unibyte-buffer 'edebug-form-spec '(body)) | |
335 | |
336 (defmacro mm-with-unibyte-current-buffer (&rest forms) | |
337 "Evaluate FORMS there like `progn' in current buffer." | |
338 (let ((multibyte (make-symbol "multibyte"))) | |
339 `(if (or (featurep 'xemacs) | |
340 (not (fboundp 'set-buffer-multibyte))) | |
341 (progn | |
342 ,@forms) | |
343 (let ((,multibyte (default-value 'enable-multibyte-characters))) | |
344 (unwind-protect | |
345 (let ((buffer-file-coding-system mm-binary-coding-system) | |
346 (coding-system-for-read mm-binary-coding-system) | |
347 (coding-system-for-write mm-binary-coding-system)) | |
348 (set-buffer-multibyte nil) | |
349 (setq-default enable-multibyte-characters nil) | |
350 ,@forms) | |
351 (setq-default enable-multibyte-characters ,multibyte) | |
352 (set-buffer-multibyte ,multibyte)))))) | |
353 (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) | |
354 (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) | |
355 | |
356 (defmacro mm-with-unibyte (&rest forms) | |
357 "Set default `enable-multibyte-characters' to `nil', eval the FORMS." | |
358 (let ((multibyte (make-symbol "multibyte"))) | |
359 `(if (or (featurep 'xemacs) | |
360 (not (boundp 'enable-multibyte-characters))) | |
361 (progn ,@forms) | |
362 (let ((,multibyte (default-value 'enable-multibyte-characters))) | |
363 (unwind-protect | |
364 (progn | |
365 (setq-default enable-multibyte-characters nil) | |
366 ,@forms) | |
367 (setq-default enable-multibyte-characters ,multibyte)))))) | |
368 (put 'mm-with-unibyte 'lisp-indent-function 0) | |
369 (put 'mm-with-unibyte 'edebug-form-spec '(body)) | |
370 | |
371 (defun mm-find-charset-region (b e) | |
372 "Return a list of charsets in the region." | |
373 (cond | |
374 ((and (mm-multibyte-p) | |
375 (fboundp 'find-charset-region)) | |
376 ;; Remove composition since the base charsets have been included. | |
377 (delq 'composition (find-charset-region b e))) | |
378 ((not (boundp 'current-language-environment)) | |
379 (save-excursion | |
380 (save-restriction | |
381 (narrow-to-region b e) | |
382 (goto-char (point-min)) | |
383 (skip-chars-forward "\0-\177") | |
384 (if (eobp) | |
385 '(ascii) | |
386 (delq nil (list 'ascii | |
387 (or (car (last (assq mail-parse-charset | |
388 mm-mime-mule-charset-alist))) | |
389 'latin-iso8859-1))))))) | |
390 (t | |
391 ;; We are in a unibyte buffer, so we futz around a bit. | |
392 (save-excursion | |
393 (save-restriction | |
394 (narrow-to-region b e) | |
395 (goto-char (point-min)) | |
396 (let ((entry (assoc current-language-environment | |
397 language-info-alist))) | |
398 (skip-chars-forward "\0-\177") | |
399 (if (eobp) | |
400 '(ascii) | |
401 (delq nil (list 'ascii | |
402 (or (car (last (assq 'charset entry))) | |
403 'latin-iso8859-1)))))))))) | |
404 | |
405 (if (fboundp 'shell-quote-argument) | |
406 (defalias 'mm-quote-arg 'shell-quote-argument) | |
407 (defun mm-quote-arg (arg) | |
408 "Return a version of ARG that is safe to evaluate in a shell." | |
409 (let ((pos 0) new-pos accum) | |
410 ;; *** bug: we don't handle newline characters properly | |
411 (while (setq new-pos (string-match "[]*[;!'`\"$\\& \t{} |()<>]" arg pos)) | |
412 (push (substring arg pos new-pos) accum) | |
413 (push "\\" accum) | |
414 (push (list (aref arg new-pos)) accum) | |
415 (setq pos (1+ new-pos))) | |
416 (if (= pos 0) | |
417 arg | |
418 (apply 'concat (nconc (nreverse accum) (list (substring arg pos)))))))) | |
419 | |
420 (defun mm-auto-mode-alist () | |
421 "Return an `auto-mode-alist' with only the .gz (etc) thingies." | |
422 (let ((alist auto-mode-alist) | |
423 out) | |
424 (while alist | |
425 (when (listp (cdar alist)) | |
426 (push (car alist) out)) | |
427 (pop alist)) | |
428 (nreverse out))) | |
429 | |
430 (defvar mm-inhibit-file-name-handlers | |
431 '(jka-compr-handler) | |
432 "A list of handlers doing (un)compression (etc) thingies.") | |
433 | |
434 (defun mm-insert-file-contents (filename &optional visit beg end replace | |
435 inhibit) | |
436 "Like `insert-file-contents', q.v., but only reads in the file. | |
437 A buffer may be modified in several ways after reading into the buffer due | |
438 to advanced Emacs features, such as file-name-handlers, format decoding, | |
439 find-file-hooks, etc. | |
440 If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers. | |
441 This function ensures that none of these modifications will take place." | |
442 (let ((format-alist nil) | |
443 (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) | |
444 (default-major-mode 'fundamental-mode) | |
445 (enable-local-variables nil) | |
446 (after-insert-file-functions nil) | |
447 (enable-local-eval nil) | |
448 (find-file-hooks nil) | |
449 (inhibit-file-name-operation (if inhibit | |
450 'insert-file-contents | |
451 inhibit-file-name-operation)) | |
452 (inhibit-file-name-handlers | |
453 (if inhibit | |
454 (append mm-inhibit-file-name-handlers | |
455 inhibit-file-name-handlers) | |
456 inhibit-file-name-handlers))) | |
457 (insert-file-contents filename visit beg end replace))) | |
458 | |
459 (defun mm-append-to-file (start end filename &optional codesys inhibit) | |
460 "Append the contents of the region to the end of file FILENAME. | |
461 When called from a function, expects three arguments, | |
462 START, END and FILENAME. START and END are buffer positions | |
463 saying what text to write. | |
464 Optional fourth argument specifies the coding system to use when | |
465 encoding the file. | |
466 If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." | |
467 (let ((coding-system-for-write | |
468 (or codesys mm-text-coding-system-for-write | |
469 mm-text-coding-system)) | |
470 (inhibit-file-name-operation (if inhibit | |
471 'append-to-file | |
472 inhibit-file-name-operation)) | |
473 (inhibit-file-name-handlers | |
474 (if inhibit | |
475 (append mm-inhibit-file-name-handlers | |
476 inhibit-file-name-handlers) | |
477 inhibit-file-name-handlers))) | |
478 (append-to-file start end filename))) | |
479 | |
480 (defun mm-write-region (start end filename &optional append visit lockname | |
481 coding-system inhibit) | |
482 | |
483 "Like `write-region'. | |
484 If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." | |
485 (let ((coding-system-for-write | |
486 (or coding-system mm-text-coding-system-for-write | |
487 mm-text-coding-system)) | |
488 (inhibit-file-name-operation (if inhibit | |
489 'write-region | |
490 inhibit-file-name-operation)) | |
491 (inhibit-file-name-handlers | |
492 (if inhibit | |
493 (append mm-inhibit-file-name-handlers | |
494 inhibit-file-name-handlers) | |
495 inhibit-file-name-handlers))) | |
496 (write-region start end filename append visit lockname))) | |
497 | |
498 (provide 'mm-util) | |
499 | |
500 ;;; mm-util.el ends here |