Mercurial > emacs
comparison lisp/gnus/mm-bodies.el @ 82951:0fde48feb604
Import Gnus 5.10 from the v5_10 branch of the Gnus repository.
author | Andreas Schwab <schwab@suse.de> |
---|---|
date | Thu, 22 Jul 2004 16:45:51 +0000 |
parents | 695cf19ef79e |
children | c5e16264557d cce1c0ee76ee |
comparison
equal
deleted
inserted
replaced
56503:8bbd2323fbf2 | 82951:0fde48feb604 |
---|---|
1 ;;; mm-bodies.el --- functions for decoding MIME things | 1 ;;; mm-bodies.el --- Functions for decoding MIME things |
2 ;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. | 2 |
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2003 | |
4 ;; Free Software Foundation, Inc. | |
3 | 5 |
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | 6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | 7 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> |
6 ;; This file is part of GNU Emacs. | 8 ;; This file is part of GNU Emacs. |
7 | 9 |
40 ;; BS, vertical TAB, form feed, and ^_ | 42 ;; BS, vertical TAB, form feed, and ^_ |
41 (defvar mm-7bit-chars "\x20-\x7f\r\n\t\x7\x8\xb\xc\x1f") | 43 (defvar mm-7bit-chars "\x20-\x7f\r\n\t\x7\x8\xb\xc\x1f") |
42 | 44 |
43 (defcustom mm-body-charset-encoding-alist | 45 (defcustom mm-body-charset-encoding-alist |
44 '((iso-2022-jp . 7bit) | 46 '((iso-2022-jp . 7bit) |
45 (iso-2022-jp-2 . 7bit)) | 47 (iso-2022-jp-2 . 7bit) |
48 ;; We MUST encode UTF-16 because it can contain \0's which is | |
49 ;; known to break servers. | |
50 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781], | |
51 ;; so this can't happen :-/. | |
52 (utf-16 . base64) | |
53 (utf-16be . base64) | |
54 (utf-16le . base64)) | |
46 "Alist of MIME charsets to encodings. | 55 "Alist of MIME charsets to encodings. |
47 Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'." | 56 Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'." |
48 :type '(repeat (cons (symbol :tag "charset") | 57 :type '(repeat (cons (symbol :tag "charset") |
49 (choice :tag "encoding" | 58 (choice :tag "encoding" |
50 (const 7bit) | 59 (const 7bit) |
51 (const 8bit) | 60 (const 8bit) |
52 (const quoted-printable) | 61 (const quoted-printable) |
53 (const base64)))) | 62 (const base64)))) |
54 :group 'mime) | 63 :group 'mime) |
55 | 64 |
56 (defun mm-encode-body () | 65 (defun mm-encode-body (&optional charset) |
57 "Encode a body. | 66 "Encode a body. |
58 Should be called narrowed to the body that is to be encoded. | 67 Should be called narrowed to the body that is to be encoded. |
59 If there is more than one non-ASCII Mule charset, then the list of found | 68 If there is more than one non-ASCII MULE charset in the body, then the |
60 Mule charsets is returned. | 69 list of MULE charsets found is returned. |
70 If CHARSET is non-nil, it is used as the MIME charset to encode the body. | |
61 If successful, the MIME charset is returned. | 71 If successful, the MIME charset is returned. |
62 If no encoding was done, nil is returned." | 72 If no encoding was done, nil is returned." |
63 (if (not (mm-multibyte-p)) | 73 (if (not (mm-multibyte-p)) |
64 ;; In the non-Mule case, we search for non-ASCII chars and | 74 ;; In the non-Mule case, we search for non-ASCII chars and |
65 ;; return the value of `mail-parse-charset' if any are found. | 75 ;; return the value of `mail-parse-charset' if any are found. |
66 (save-excursion | 76 (or charset |
77 (save-excursion | |
78 (goto-char (point-min)) | |
79 (if (re-search-forward "[^\x0-\x7f]" nil t) | |
80 (or mail-parse-charset | |
81 (message-options-get 'mm-encody-body-charset) | |
82 (message-options-set | |
83 'mm-encody-body-charset | |
84 (mm-read-coding-system "Charset used in the article: "))) | |
85 ;; The logic in `mml-generate-mime-1' confirms that it's OK | |
86 ;; to return nil here. | |
87 nil))) | |
88 (save-excursion | |
89 (if charset | |
90 (progn | |
91 (mm-encode-coding-region (point-min) (point-max) charset) | |
92 charset) | |
67 (goto-char (point-min)) | 93 (goto-char (point-min)) |
68 (if (re-search-forward "[^\x0-\x7f]" nil t) | 94 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max) |
69 (or mail-parse-charset | 95 mm-hack-charsets))) |
70 (mm-read-charset "Charset used in the article: ")) | 96 (cond |
71 ;; The logic in `mml-generate-mime-1' confirms that it's OK | 97 ;; No encoding. |
72 ;; to return nil here. | 98 ((null charsets) |
73 nil)) | 99 nil) |
74 (save-excursion | 100 ;; Too many charsets. |
75 (goto-char (point-min)) | 101 ((> (length charsets) 1) |
76 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)))) | 102 charsets) |
77 (cond | 103 ;; We encode. |
78 ;; No encoding. | 104 (t |
79 ((null charsets) | 105 (prog1 |
80 nil) | 106 (setq charset (car charsets)) |
81 ;; Too many charsets. | 107 (mm-encode-coding-region (point-min) (point-max) |
82 ((> (length charsets) 1) | 108 (mm-charset-to-coding-system charset)))) |
83 charsets) | 109 )))))) |
84 ;; We encode. | 110 |
85 (t | 111 (defun mm-long-lines-p (length) |
86 (mm-encode-coding-region (point-min) (point-max) | 112 "Say whether any of the lines in the buffer is longer than LENGTH." |
87 (mm-charset-to-coding-system | 113 (save-excursion |
88 (car charsets))) | 114 (goto-char (point-min)) |
89 (car charsets))))))) | 115 (end-of-line) |
90 | 116 (while (and (not (eobp)) |
91 (eval-when-compile (defvar message-posting-charset)) | 117 (not (> (current-column) length))) |
118 (forward-line 1) | |
119 (end-of-line)) | |
120 (and (> (current-column) length) | |
121 (current-column)))) | |
122 | |
123 (defvar message-posting-charset) | |
92 | 124 |
93 (defun mm-body-encoding (charset &optional encoding) | 125 (defun mm-body-encoding (charset &optional encoding) |
94 "Do Content-Transfer-Encoding and return the encoding of the current buffer." | 126 "Do Content-Transfer-Encoding and return the encoding of the current buffer." |
95 (let ((bits (mm-body-7-or-8))) | 127 (when (stringp encoding) |
128 (setq encoding (intern (downcase encoding)))) | |
129 (let ((bits (mm-body-7-or-8)) | |
130 (longp (mm-long-lines-p 1000))) | |
96 (require 'message) | 131 (require 'message) |
97 (cond | 132 (cond |
98 ((and (not mm-use-ultra-safe-encoding) (eq bits '7bit)) | 133 ((and (not longp) |
134 (not (and mm-use-ultra-safe-encoding | |
135 (save-excursion (re-search-forward "^From " nil t)))) | |
136 (eq bits '7bit)) | |
99 bits) | 137 bits) |
100 ((and (not mm-use-ultra-safe-encoding) | 138 ((and (not mm-use-ultra-safe-encoding) |
139 (not longp) | |
140 (not (cdr (assq charset mm-body-charset-encoding-alist))) | |
101 (or (eq t (cdr message-posting-charset)) | 141 (or (eq t (cdr message-posting-charset)) |
102 (memq charset (cdr message-posting-charset)) | 142 (memq charset (cdr message-posting-charset)) |
103 (eq charset mail-parse-charset))) | 143 (eq charset mail-parse-charset))) |
104 bits) | 144 bits) |
105 (t | 145 (t |
122 | 162 |
123 ;;; | 163 ;;; |
124 ;;; Functions for decoding | 164 ;;; Functions for decoding |
125 ;;; | 165 ;;; |
126 | 166 |
167 (eval-when-compile (defvar mm-uu-yenc-decode-function)) | |
168 | |
127 (defun mm-decode-content-transfer-encoding (encoding &optional type) | 169 (defun mm-decode-content-transfer-encoding (encoding &optional type) |
170 "Decodes buffer encoded with ENCODING, returning success status. | |
171 If TYPE is `text/plain' CRLF->LF translation may occur." | |
128 (prog1 | 172 (prog1 |
129 (condition-case error | 173 (condition-case error |
130 (cond | 174 (cond |
131 ((eq encoding 'quoted-printable) | 175 ((eq encoding 'quoted-printable) |
132 (quoted-printable-decode-region (point-min) (point-max))) | 176 (quoted-printable-decode-region (point-min) (point-max)) |
177 t) | |
133 ((eq encoding 'base64) | 178 ((eq encoding 'base64) |
134 (base64-decode-region | 179 (base64-decode-region |
135 (point-min) | 180 (point-min) |
136 ;; Some mailers insert whitespace | 181 ;; Some mailers insert whitespace |
137 ;; junk at the end which | 182 ;; junk at the end which |
142 (goto-char (point-min)) | 187 (goto-char (point-min)) |
143 (while (re-search-forward "^[\t ]*\r?\n" nil t) | 188 (while (re-search-forward "^[\t ]*\r?\n" nil t) |
144 (delete-region (match-beginning 0) (match-end 0))) | 189 (delete-region (match-beginning 0) (match-end 0))) |
145 (goto-char (point-max)) | 190 (goto-char (point-max)) |
146 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t) | 191 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t) |
147 (forward-line) | 192 (forward-line)) |
148 (delete-region (point) (point-max))) | 193 (point)))) |
149 (point-max)))) | |
150 ((memq encoding '(7bit 8bit binary)) | 194 ((memq encoding '(7bit 8bit binary)) |
151 ;; Do nothing. | 195 ;; Do nothing. |
152 ) | 196 t) |
153 ((null encoding) | 197 ((null encoding) |
154 ;; Do nothing. | 198 ;; Do nothing. |
155 ) | 199 t) |
156 ((memq encoding '(x-uuencode x-uue)) | 200 ((memq encoding '(x-uuencode x-uue)) |
157 (require 'mm-uu) | 201 (require 'mm-uu) |
158 (funcall mm-uu-decode-function (point-min) (point-max))) | 202 (funcall mm-uu-decode-function (point-min) (point-max)) |
203 t) | |
159 ((eq encoding 'x-binhex) | 204 ((eq encoding 'x-binhex) |
160 (require 'mm-uu) | 205 (require 'mm-uu) |
161 (funcall mm-uu-binhex-decode-function (point-min) (point-max))) | 206 (funcall mm-uu-binhex-decode-function (point-min) (point-max)) |
207 t) | |
208 ((eq encoding 'x-yenc) | |
209 (require 'mm-uu) | |
210 (funcall mm-uu-yenc-decode-function (point-min) (point-max)) | |
211 ) | |
162 ((functionp encoding) | 212 ((functionp encoding) |
163 (funcall encoding (point-min) (point-max))) | 213 (funcall encoding (point-min) (point-max)) |
214 t) | |
164 (t | 215 (t |
165 (message "Unknown encoding %s; defaulting to 8bit" encoding))) | 216 (message "Unknown encoding %s; defaulting to 8bit" encoding))) |
166 (error | 217 (error |
167 (message "Error while decoding: %s" error) | 218 (message "Error while decoding: %s" error) |
168 nil)) | 219 nil)) |
169 (when (and | 220 (when (and |
170 (memq encoding '(base64 x-uuencode x-uue x-binhex)) | 221 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc)) |
171 (equal type "text/plain")) | 222 (equal type "text/plain")) |
172 (goto-char (point-min)) | 223 (goto-char (point-min)) |
173 (while (search-forward "\r\n" nil t) | 224 (while (search-forward "\r\n" nil t) |
174 (replace-match "\n" t t))))) | 225 (replace-match "\n" t t))))) |
175 | 226 |
176 (defun mm-decode-body (charset &optional encoding type) | 227 (defun mm-decode-body (charset &optional encoding type) |
177 "Decode the current article that has been encoded with ENCODING. | 228 "Decode the current article that has been encoded with ENCODING to CHARSET. |
178 The characters in CHARSET should then be decoded." | 229 ENCODING is a MIME content transfer encoding. |
179 (if (stringp charset) | 230 CHARSET is the MIME charset with which to decode the data after transfer |
180 (setq charset (intern (downcase charset)))) | 231 decoding. If it is nil, default to `mail-parse-charset'." |
181 (if (or (not charset) | 232 (when (stringp charset) |
182 (eq 'gnus-all mail-parse-ignored-charsets) | 233 (setq charset (intern (downcase charset)))) |
183 (memq 'gnus-all mail-parse-ignored-charsets) | 234 (when (or (not charset) |
184 (memq charset mail-parse-ignored-charsets)) | 235 (eq 'gnus-all mail-parse-ignored-charsets) |
185 (setq charset mail-parse-charset)) | 236 (memq 'gnus-all mail-parse-ignored-charsets) |
237 (memq charset mail-parse-ignored-charsets)) | |
238 (setq charset mail-parse-charset)) | |
186 (save-excursion | 239 (save-excursion |
187 (when encoding | 240 (when encoding |
188 (mm-decode-content-transfer-encoding encoding type)) | 241 (mm-decode-content-transfer-encoding encoding type)) |
189 (when (featurep 'mule) | 242 (when (featurep 'mule) ; Fixme: Wrong test for unibyte session. |
190 (let ((coding-system (mm-charset-to-coding-system charset))) | 243 (let ((coding-system (mm-charset-to-coding-system charset))) |
191 (if (and (not coding-system) | 244 (if (and (not coding-system) |
192 (listp mail-parse-ignored-charsets) | 245 (listp mail-parse-ignored-charsets) |
193 (memq 'gnus-unknown mail-parse-ignored-charsets)) | 246 (memq 'gnus-unknown mail-parse-ignored-charsets)) |
194 (setq coding-system | 247 (setq coding-system |
199 ;;in XEmacs | 252 ;;in XEmacs |
200 (mm-multibyte-p) | 253 (mm-multibyte-p) |
201 (or (not (eq coding-system 'ascii)) | 254 (or (not (eq coding-system 'ascii)) |
202 (setq coding-system mail-parse-charset)) | 255 (setq coding-system mail-parse-charset)) |
203 (not (eq coding-system 'gnus-decoded))) | 256 (not (eq coding-system 'gnus-decoded))) |
204 (mm-decode-coding-region (point-min) (point-max) coding-system)))))) | 257 (mm-decode-coding-region (point-min) (point-max) |
258 coding-system)) | |
259 (setq buffer-file-coding-system | |
260 (if (boundp 'last-coding-system-used) | |
261 (symbol-value 'last-coding-system-used) | |
262 coding-system)))))) | |
205 | 263 |
206 (defun mm-decode-string (string charset) | 264 (defun mm-decode-string (string charset) |
207 "Decode STRING with CHARSET." | 265 "Decode STRING with CHARSET." |
208 (when (stringp charset) | 266 (when (stringp charset) |
209 (setq charset (intern (downcase charset)))) | 267 (setq charset (intern (downcase charset)))) |