Mercurial > emacs
annotate lisp/gnus/mm-bodies.el @ 88306:7b65aade5e20
new file, based on Alexander Pohoyda's code.
author | Alex Schroeder <alex@gnu.org> |
---|---|
date | Tue, 31 Jan 2006 20:53:28 +0000 |
parents | d7ddb3e565de |
children |
rev | line source |
---|---|
88155 | 1 ;;; mm-bodies.el --- Functions for decoding MIME things |
2 | |
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, | |
4 ;; 2005 Free Software Foundation, Inc. | |
31717 | 5 |
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
8 ;; This file is part of GNU Emacs. | |
9 | |
10 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
11 ;; it under the terms of the GNU General Public License as published by | |
12 ;; the Free Software Foundation; either version 2, or (at your option) | |
13 ;; any later version. | |
14 | |
15 ;; GNU Emacs is distributed in the hope that it will be useful, | |
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 ;; GNU General Public License for more details. | |
19 | |
20 ;; You should have received a copy of the GNU General Public License | |
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
88155 | 22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
23 ;; Boston, MA 02110-1301, USA. | |
31717 | 24 |
25 ;;; Commentary: | |
26 | |
27 ;;; Code: | |
28 | |
29 (eval-and-compile | |
30 (or (fboundp 'base64-decode-region) | |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
31 (require 'base64))) |
32409
9373d6d073ed
Don't require `mm-uu' at compile-time; it leads
Gerd Moellmann <gerd@gnu.org>
parents:
32209
diff
changeset
|
32 |
32628 | 33 (eval-when-compile |
34 (defvar mm-uu-decode-function) | |
35 (defvar mm-uu-binhex-decode-function)) | |
31717 | 36 |
37 (require 'mm-util) | |
38 (require 'rfc2047) | |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
39 (require 'mm-encode) |
31717 | 40 |
88155 | 41 ;; 8bit treatment gets any char except: 0x32 - 0x7f, LF, TAB, BEL, |
31717 | 42 ;; BS, vertical TAB, form feed, and ^_ |
88155 | 43 ;; |
44 ;; Note that CR is *not* included, as that would allow a non-paired CR | |
45 ;; in the body contrary to RFC 2822: | |
46 ;; | |
47 ;; - CR and LF MUST only occur together as CRLF; they MUST NOT | |
48 ;; appear independently in the body. | |
49 | |
50 (defvar mm-7bit-chars "\x20-\x7f\n\t\x7\x8\xb\xc\x1f") | |
31717 | 51 |
52 (defcustom mm-body-charset-encoding-alist | |
53 '((iso-2022-jp . 7bit) | |
88155 | 54 (iso-2022-jp-2 . 7bit) |
55 ;; We MUST encode UTF-16 because it can contain \0's which is | |
56 ;; known to break servers. | |
57 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781], | |
58 ;; so this can't happen :-/. | |
59 (utf-16 . base64) | |
60 (utf-16be . base64) | |
61 (utf-16le . base64)) | |
31717 | 62 "Alist of MIME charsets to encodings. |
63 Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'." | |
64 :type '(repeat (cons (symbol :tag "charset") | |
65 (choice :tag "encoding" | |
66 (const 7bit) | |
67 (const 8bit) | |
68 (const quoted-printable) | |
69 (const base64)))) | |
70 :group 'mime) | |
71 | |
88155 | 72 (defun mm-encode-body (&optional charset) |
31717 | 73 "Encode a body. |
74 Should be called narrowed to the body that is to be encoded. | |
88155 | 75 If there is more than one non-ASCII MULE charset in the body, then the |
76 list of MULE charsets found is returned. | |
77 If CHARSET is non-nil, it is used as the MIME charset to encode the body. | |
31717 | 78 If successful, the MIME charset is returned. |
79 If no encoding was done, nil is returned." | |
33343
074ad4abf8e0
(mm-encode-body): Use mm-multibyte-p, don't just
Dave Love <fx@gnu.org>
parents:
32628
diff
changeset
|
80 (if (not (mm-multibyte-p)) |
31717 | 81 ;; In the non-Mule case, we search for non-ASCII chars and |
82 ;; return the value of `mail-parse-charset' if any are found. | |
88155 | 83 (or charset |
84 (save-excursion | |
85 (goto-char (point-min)) | |
86 (if (re-search-forward "[^\x0-\x7f]" nil t) | |
87 (or mail-parse-charset | |
88 (message-options-get 'mm-encody-body-charset) | |
89 (message-options-set | |
90 'mm-encody-body-charset | |
91 (mm-read-coding-system "Charset used in the article: "))) | |
92 ;; The logic in `mml-generate-mime-1' confirms that it's OK | |
93 ;; to return nil here. | |
94 nil))) | |
31717 | 95 (save-excursion |
88155 | 96 (if charset |
97 (progn | |
98 (mm-encode-coding-region (point-min) (point-max) | |
99 (mm-charset-to-coding-system charset)) | |
100 charset) | |
101 (goto-char (point-min)) | |
102 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max) | |
103 mm-hack-charsets))) | |
104 (cond | |
105 ;; No encoding. | |
106 ((null charsets) | |
107 nil) | |
108 ;; Too many charsets. | |
109 ((> (length charsets) 1) | |
110 charsets) | |
111 ;; We encode. | |
112 (t | |
113 (prog1 | |
114 (setq charset (car charsets)) | |
115 (mm-encode-coding-region (point-min) (point-max) | |
116 (mm-charset-to-coding-system charset)))) | |
117 )))))) | |
31717 | 118 |
88155 | 119 (defun mm-long-lines-p (length) |
120 "Say whether any of the lines in the buffer is longer than LENGTH." | |
121 (save-excursion | |
122 (goto-char (point-min)) | |
123 (end-of-line) | |
124 (while (and (not (eobp)) | |
125 (not (> (current-column) length))) | |
126 (forward-line 1) | |
127 (end-of-line)) | |
128 (and (> (current-column) length) | |
129 (current-column)))) | |
130 | |
131 (defvar message-posting-charset) | |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
132 |
31717 | 133 (defun mm-body-encoding (charset &optional encoding) |
134 "Do Content-Transfer-Encoding and return the encoding of the current buffer." | |
88155 | 135 (when (stringp encoding) |
136 (setq encoding (intern (downcase encoding)))) | |
137 (let ((bits (mm-body-7-or-8)) | |
138 (longp (mm-long-lines-p 1000))) | |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
139 (require 'message) |
31717 | 140 (cond |
88155 | 141 ((and (not longp) |
142 (not (and mm-use-ultra-safe-encoding | |
143 (or (save-excursion (re-search-forward " $" nil t)) | |
144 (save-excursion (re-search-forward "^From " nil t))))) | |
145 (eq bits '7bit)) | |
31717 | 146 bits) |
147 ((and (not mm-use-ultra-safe-encoding) | |
88155 | 148 (not longp) |
149 (not (cdr (assq charset mm-body-charset-encoding-alist))) | |
31717 | 150 (or (eq t (cdr message-posting-charset)) |
151 (memq charset (cdr message-posting-charset)) | |
152 (eq charset mail-parse-charset))) | |
153 bits) | |
154 (t | |
155 (let ((encoding (or encoding | |
156 (cdr (assq charset mm-body-charset-encoding-alist)) | |
157 (mm-qp-or-base64)))) | |
158 (when mm-use-ultra-safe-encoding | |
159 (setq encoding (mm-safer-encoding encoding))) | |
160 (mm-encode-content-transfer-encoding encoding "text/plain") | |
161 encoding))))) | |
162 | |
163 (defun mm-body-7-or-8 () | |
164 "Say whether the body is 7bit or 8bit." | |
47948
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
165 (if (save-excursion |
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
166 (goto-char (point-min)) |
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
167 (skip-chars-forward mm-7bit-chars) |
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
168 (eobp)) |
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
169 '7bit |
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
170 '8bit)) |
31717 | 171 |
172 ;;; | |
173 ;;; Functions for decoding | |
174 ;;; | |
175 | |
88155 | 176 (eval-when-compile (defvar mm-uu-yenc-decode-function)) |
177 | |
31717 | 178 (defun mm-decode-content-transfer-encoding (encoding &optional type) |
88155 | 179 "Decodes buffer encoded with ENCODING, returning success status. |
180 If TYPE is `text/plain' CRLF->LF translation may occur." | |
31717 | 181 (prog1 |
182 (condition-case error | |
183 (cond | |
184 ((eq encoding 'quoted-printable) | |
88155 | 185 (quoted-printable-decode-region (point-min) (point-max)) |
186 t) | |
31717 | 187 ((eq encoding 'base64) |
188 (base64-decode-region | |
189 (point-min) | |
190 ;; Some mailers insert whitespace | |
191 ;; junk at the end which | |
192 ;; base64-decode-region dislikes. | |
193 ;; Also remove possible junk which could | |
194 ;; have been added by mailing list software. | |
195 (save-excursion | |
196 (goto-char (point-min)) | |
197 (while (re-search-forward "^[\t ]*\r?\n" nil t) | |
198 (delete-region (match-beginning 0) (match-end 0))) | |
199 (goto-char (point-max)) | |
200 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t) | |
88155 | 201 (forward-line)) |
202 (point)))) | |
31717 | 203 ((memq encoding '(7bit 8bit binary)) |
204 ;; Do nothing. | |
88155 | 205 t) |
31717 | 206 ((null encoding) |
207 ;; Do nothing. | |
88155 | 208 t) |
31717 | 209 ((memq encoding '(x-uuencode x-uue)) |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
210 (require 'mm-uu) |
88155 | 211 (funcall mm-uu-decode-function (point-min) (point-max)) |
212 t) | |
31717 | 213 ((eq encoding 'x-binhex) |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
214 (require 'mm-uu) |
88155 | 215 (funcall mm-uu-binhex-decode-function (point-min) (point-max)) |
216 t) | |
217 ((eq encoding 'x-yenc) | |
218 (require 'mm-uu) | |
219 (funcall mm-uu-yenc-decode-function (point-min) (point-max)) | |
220 ) | |
31717 | 221 ((functionp encoding) |
88155 | 222 (funcall encoding (point-min) (point-max)) |
223 t) | |
31717 | 224 (t |
225 (message "Unknown encoding %s; defaulting to 8bit" encoding))) | |
226 (error | |
227 (message "Error while decoding: %s" error) | |
228 nil)) | |
229 (when (and | |
88155 | 230 type |
231 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc)) | |
232 (string-match "\\`text/" type)) | |
31717 | 233 (goto-char (point-min)) |
234 (while (search-forward "\r\n" nil t) | |
235 (replace-match "\n" t t))))) | |
236 | |
237 (defun mm-decode-body (charset &optional encoding type) | |
88155 | 238 "Decode the current article that has been encoded with ENCODING to CHARSET. |
239 ENCODING is a MIME content transfer encoding. | |
240 CHARSET is the MIME charset with which to decode the data after transfer | |
241 decoding. If it is nil, default to `mail-parse-charset'." | |
242 (when (stringp charset) | |
243 (setq charset (intern (downcase charset)))) | |
244 (when (or (not charset) | |
245 (eq 'gnus-all mail-parse-ignored-charsets) | |
246 (memq 'gnus-all mail-parse-ignored-charsets) | |
247 (memq charset mail-parse-ignored-charsets)) | |
248 (setq charset mail-parse-charset)) | |
31717 | 249 (save-excursion |
250 (when encoding | |
251 (mm-decode-content-transfer-encoding encoding type)) | |
88155 | 252 (when (featurep 'mule) ; Fixme: Wrong test for unibyte session. |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
253 (let ((coding-system (mm-charset-to-coding-system charset))) |
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
254 (if (and (not coding-system) |
31717 | 255 (listp mail-parse-ignored-charsets) |
256 (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
47948
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
257 (setq coding-system |
31717 | 258 (mm-charset-to-coding-system mail-parse-charset))) |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
259 (when (and charset coding-system |
31717 | 260 ;; buffer-file-coding-system |
261 ;;Article buffer is nil coding system | |
262 ;;in XEmacs | |
263 (mm-multibyte-p) | |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
264 (or (not (eq coding-system 'ascii)) |
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
265 (setq coding-system mail-parse-charset)) |
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
266 (not (eq coding-system 'gnus-decoded))) |
88155 | 267 (mm-decode-coding-region (point-min) (point-max) |
268 coding-system)) | |
269 (setq buffer-file-coding-system | |
270 (if (boundp 'last-coding-system-used) | |
271 (symbol-value 'last-coding-system-used) | |
272 coding-system)))))) | |
31717 | 273 |
274 (defun mm-decode-string (string charset) | |
275 "Decode STRING with CHARSET." | |
276 (when (stringp charset) | |
277 (setq charset (intern (downcase charset)))) | |
47948
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
278 (when (or (not charset) |
31717 | 279 (eq 'gnus-all mail-parse-ignored-charsets) |
280 (memq 'gnus-all mail-parse-ignored-charsets) | |
281 (memq charset mail-parse-ignored-charsets)) | |
282 (setq charset mail-parse-charset)) | |
283 (or | |
284 (when (featurep 'mule) | |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
285 (let ((coding-system (mm-charset-to-coding-system charset))) |
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
286 (if (and (not coding-system) |
31717 | 287 (listp mail-parse-ignored-charsets) |
288 (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
47948
bce2c13027f2
(mm-body-7-or-8): Don't special-case Mule.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
289 (setq coding-system |
31717 | 290 (mm-charset-to-coding-system mail-parse-charset))) |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
291 (when (and charset coding-system |
31717 | 292 (mm-multibyte-p) |
32209
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
293 (or (not (eq coding-system 'ascii)) |
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
294 (setq coding-system mail-parse-charset))) |
cbbde5b20af5
Require mm-uu, Don't require qp, uudecode.
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
295 (mm-decode-coding-string string coding-system)))) |
31717 | 296 string)) |
297 | |
298 (provide 'mm-bodies) | |
299 | |
88155 | 300 ;;; arch-tag: 41104bb6-4443-4ca9-8d5c-ff87ecf27d8d |
38413
a26d9b55abb6
Some fixes to follow coding conventions in files from Gnus.
Pavel Janík <Pavel@Janik.cz>
parents:
33343
diff
changeset
|
301 ;;; mm-bodies.el ends here |