comparison lisp/gnus/mm-bodies.el @ 82951:0fde48feb604

Import Gnus 5.10 from the v5_10 branch of the Gnus repository.
author Andreas Schwab <schwab@suse.de>
date Thu, 22 Jul 2004 16:45:51 +0000
parents 695cf19ef79e
children c5e16264557d cce1c0ee76ee
comparison
equal deleted inserted replaced
56503:8bbd2323fbf2 82951:0fde48feb604
1 ;;; mm-bodies.el --- functions for decoding MIME things 1 ;;; mm-bodies.el --- Functions for decoding MIME things
2 ;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. 2
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2003
4 ;; Free Software Foundation, Inc.
3 5
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> 7 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs. 8 ;; This file is part of GNU Emacs.
7 9
40 ;; BS, vertical TAB, form feed, and ^_ 42 ;; BS, vertical TAB, form feed, and ^_
41 (defvar mm-7bit-chars "\x20-\x7f\r\n\t\x7\x8\xb\xc\x1f") 43 (defvar mm-7bit-chars "\x20-\x7f\r\n\t\x7\x8\xb\xc\x1f")
42 44
43 (defcustom mm-body-charset-encoding-alist 45 (defcustom mm-body-charset-encoding-alist
44 '((iso-2022-jp . 7bit) 46 '((iso-2022-jp . 7bit)
45 (iso-2022-jp-2 . 7bit)) 47 (iso-2022-jp-2 . 7bit)
48 ;; We MUST encode UTF-16 because it can contain \0's which is
49 ;; known to break servers.
50 ;; Note: UTF-16 variants are invalid for text parts [RFC 2781],
51 ;; so this can't happen :-/.
52 (utf-16 . base64)
53 (utf-16be . base64)
54 (utf-16le . base64))
46 "Alist of MIME charsets to encodings. 55 "Alist of MIME charsets to encodings.
47 Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'." 56 Valid encodings are `7bit', `8bit', `quoted-printable' and `base64'."
48 :type '(repeat (cons (symbol :tag "charset") 57 :type '(repeat (cons (symbol :tag "charset")
49 (choice :tag "encoding" 58 (choice :tag "encoding"
50 (const 7bit) 59 (const 7bit)
51 (const 8bit) 60 (const 8bit)
52 (const quoted-printable) 61 (const quoted-printable)
53 (const base64)))) 62 (const base64))))
54 :group 'mime) 63 :group 'mime)
55 64
56 (defun mm-encode-body () 65 (defun mm-encode-body (&optional charset)
57 "Encode a body. 66 "Encode a body.
58 Should be called narrowed to the body that is to be encoded. 67 Should be called narrowed to the body that is to be encoded.
59 If there is more than one non-ASCII Mule charset, then the list of found 68 If there is more than one non-ASCII MULE charset in the body, then the
60 Mule charsets is returned. 69 list of MULE charsets found is returned.
70 If CHARSET is non-nil, it is used as the MIME charset to encode the body.
61 If successful, the MIME charset is returned. 71 If successful, the MIME charset is returned.
62 If no encoding was done, nil is returned." 72 If no encoding was done, nil is returned."
63 (if (not (mm-multibyte-p)) 73 (if (not (mm-multibyte-p))
64 ;; In the non-Mule case, we search for non-ASCII chars and 74 ;; In the non-Mule case, we search for non-ASCII chars and
65 ;; return the value of `mail-parse-charset' if any are found. 75 ;; return the value of `mail-parse-charset' if any are found.
66 (save-excursion 76 (or charset
77 (save-excursion
78 (goto-char (point-min))
79 (if (re-search-forward "[^\x0-\x7f]" nil t)
80 (or mail-parse-charset
81 (message-options-get 'mm-encody-body-charset)
82 (message-options-set
83 'mm-encody-body-charset
84 (mm-read-coding-system "Charset used in the article: ")))
85 ;; The logic in `mml-generate-mime-1' confirms that it's OK
86 ;; to return nil here.
87 nil)))
88 (save-excursion
89 (if charset
90 (progn
91 (mm-encode-coding-region (point-min) (point-max) charset)
92 charset)
67 (goto-char (point-min)) 93 (goto-char (point-min))
68 (if (re-search-forward "[^\x0-\x7f]" nil t) 94 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)
69 (or mail-parse-charset 95 mm-hack-charsets)))
70 (mm-read-charset "Charset used in the article: ")) 96 (cond
71 ;; The logic in `mml-generate-mime-1' confirms that it's OK 97 ;; No encoding.
72 ;; to return nil here. 98 ((null charsets)
73 nil)) 99 nil)
74 (save-excursion 100 ;; Too many charsets.
75 (goto-char (point-min)) 101 ((> (length charsets) 1)
76 (let ((charsets (mm-find-mime-charset-region (point-min) (point-max)))) 102 charsets)
77 (cond 103 ;; We encode.
78 ;; No encoding. 104 (t
79 ((null charsets) 105 (prog1
80 nil) 106 (setq charset (car charsets))
81 ;; Too many charsets. 107 (mm-encode-coding-region (point-min) (point-max)
82 ((> (length charsets) 1) 108 (mm-charset-to-coding-system charset))))
83 charsets) 109 ))))))
84 ;; We encode. 110
85 (t 111 (defun mm-long-lines-p (length)
86 (mm-encode-coding-region (point-min) (point-max) 112 "Say whether any of the lines in the buffer is longer than LENGTH."
87 (mm-charset-to-coding-system 113 (save-excursion
88 (car charsets))) 114 (goto-char (point-min))
89 (car charsets))))))) 115 (end-of-line)
90 116 (while (and (not (eobp))
91 (eval-when-compile (defvar message-posting-charset)) 117 (not (> (current-column) length)))
118 (forward-line 1)
119 (end-of-line))
120 (and (> (current-column) length)
121 (current-column))))
122
123 (defvar message-posting-charset)
92 124
93 (defun mm-body-encoding (charset &optional encoding) 125 (defun mm-body-encoding (charset &optional encoding)
94 "Do Content-Transfer-Encoding and return the encoding of the current buffer." 126 "Do Content-Transfer-Encoding and return the encoding of the current buffer."
95 (let ((bits (mm-body-7-or-8))) 127 (when (stringp encoding)
128 (setq encoding (intern (downcase encoding))))
129 (let ((bits (mm-body-7-or-8))
130 (longp (mm-long-lines-p 1000)))
96 (require 'message) 131 (require 'message)
97 (cond 132 (cond
98 ((and (not mm-use-ultra-safe-encoding) (eq bits '7bit)) 133 ((and (not longp)
134 (not (and mm-use-ultra-safe-encoding
135 (save-excursion (re-search-forward "^From " nil t))))
136 (eq bits '7bit))
99 bits) 137 bits)
100 ((and (not mm-use-ultra-safe-encoding) 138 ((and (not mm-use-ultra-safe-encoding)
139 (not longp)
140 (not (cdr (assq charset mm-body-charset-encoding-alist)))
101 (or (eq t (cdr message-posting-charset)) 141 (or (eq t (cdr message-posting-charset))
102 (memq charset (cdr message-posting-charset)) 142 (memq charset (cdr message-posting-charset))
103 (eq charset mail-parse-charset))) 143 (eq charset mail-parse-charset)))
104 bits) 144 bits)
105 (t 145 (t
122 162
123 ;;; 163 ;;;
124 ;;; Functions for decoding 164 ;;; Functions for decoding
125 ;;; 165 ;;;
126 166
167 (eval-when-compile (defvar mm-uu-yenc-decode-function))
168
127 (defun mm-decode-content-transfer-encoding (encoding &optional type) 169 (defun mm-decode-content-transfer-encoding (encoding &optional type)
170 "Decodes buffer encoded with ENCODING, returning success status.
171 If TYPE is `text/plain' CRLF->LF translation may occur."
128 (prog1 172 (prog1
129 (condition-case error 173 (condition-case error
130 (cond 174 (cond
131 ((eq encoding 'quoted-printable) 175 ((eq encoding 'quoted-printable)
132 (quoted-printable-decode-region (point-min) (point-max))) 176 (quoted-printable-decode-region (point-min) (point-max))
177 t)
133 ((eq encoding 'base64) 178 ((eq encoding 'base64)
134 (base64-decode-region 179 (base64-decode-region
135 (point-min) 180 (point-min)
136 ;; Some mailers insert whitespace 181 ;; Some mailers insert whitespace
137 ;; junk at the end which 182 ;; junk at the end which
142 (goto-char (point-min)) 187 (goto-char (point-min))
143 (while (re-search-forward "^[\t ]*\r?\n" nil t) 188 (while (re-search-forward "^[\t ]*\r?\n" nil t)
144 (delete-region (match-beginning 0) (match-end 0))) 189 (delete-region (match-beginning 0) (match-end 0)))
145 (goto-char (point-max)) 190 (goto-char (point-max))
146 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t) 191 (when (re-search-backward "^[A-Za-z0-9+/]+=*[\t ]*$" nil t)
147 (forward-line) 192 (forward-line))
148 (delete-region (point) (point-max))) 193 (point))))
149 (point-max))))
150 ((memq encoding '(7bit 8bit binary)) 194 ((memq encoding '(7bit 8bit binary))
151 ;; Do nothing. 195 ;; Do nothing.
152 ) 196 t)
153 ((null encoding) 197 ((null encoding)
154 ;; Do nothing. 198 ;; Do nothing.
155 ) 199 t)
156 ((memq encoding '(x-uuencode x-uue)) 200 ((memq encoding '(x-uuencode x-uue))
157 (require 'mm-uu) 201 (require 'mm-uu)
158 (funcall mm-uu-decode-function (point-min) (point-max))) 202 (funcall mm-uu-decode-function (point-min) (point-max))
203 t)
159 ((eq encoding 'x-binhex) 204 ((eq encoding 'x-binhex)
160 (require 'mm-uu) 205 (require 'mm-uu)
161 (funcall mm-uu-binhex-decode-function (point-min) (point-max))) 206 (funcall mm-uu-binhex-decode-function (point-min) (point-max))
207 t)
208 ((eq encoding 'x-yenc)
209 (require 'mm-uu)
210 (funcall mm-uu-yenc-decode-function (point-min) (point-max))
211 )
162 ((functionp encoding) 212 ((functionp encoding)
163 (funcall encoding (point-min) (point-max))) 213 (funcall encoding (point-min) (point-max))
214 t)
164 (t 215 (t
165 (message "Unknown encoding %s; defaulting to 8bit" encoding))) 216 (message "Unknown encoding %s; defaulting to 8bit" encoding)))
166 (error 217 (error
167 (message "Error while decoding: %s" error) 218 (message "Error while decoding: %s" error)
168 nil)) 219 nil))
169 (when (and 220 (when (and
170 (memq encoding '(base64 x-uuencode x-uue x-binhex)) 221 (memq encoding '(base64 x-uuencode x-uue x-binhex x-yenc))
171 (equal type "text/plain")) 222 (equal type "text/plain"))
172 (goto-char (point-min)) 223 (goto-char (point-min))
173 (while (search-forward "\r\n" nil t) 224 (while (search-forward "\r\n" nil t)
174 (replace-match "\n" t t))))) 225 (replace-match "\n" t t)))))
175 226
176 (defun mm-decode-body (charset &optional encoding type) 227 (defun mm-decode-body (charset &optional encoding type)
177 "Decode the current article that has been encoded with ENCODING. 228 "Decode the current article that has been encoded with ENCODING to CHARSET.
178 The characters in CHARSET should then be decoded." 229 ENCODING is a MIME content transfer encoding.
179 (if (stringp charset) 230 CHARSET is the MIME charset with which to decode the data after transfer
180 (setq charset (intern (downcase charset)))) 231 decoding. If it is nil, default to `mail-parse-charset'."
181 (if (or (not charset) 232 (when (stringp charset)
182 (eq 'gnus-all mail-parse-ignored-charsets) 233 (setq charset (intern (downcase charset))))
183 (memq 'gnus-all mail-parse-ignored-charsets) 234 (when (or (not charset)
184 (memq charset mail-parse-ignored-charsets)) 235 (eq 'gnus-all mail-parse-ignored-charsets)
185 (setq charset mail-parse-charset)) 236 (memq 'gnus-all mail-parse-ignored-charsets)
237 (memq charset mail-parse-ignored-charsets))
238 (setq charset mail-parse-charset))
186 (save-excursion 239 (save-excursion
187 (when encoding 240 (when encoding
188 (mm-decode-content-transfer-encoding encoding type)) 241 (mm-decode-content-transfer-encoding encoding type))
189 (when (featurep 'mule) 242 (when (featurep 'mule) ; Fixme: Wrong test for unibyte session.
190 (let ((coding-system (mm-charset-to-coding-system charset))) 243 (let ((coding-system (mm-charset-to-coding-system charset)))
191 (if (and (not coding-system) 244 (if (and (not coding-system)
192 (listp mail-parse-ignored-charsets) 245 (listp mail-parse-ignored-charsets)
193 (memq 'gnus-unknown mail-parse-ignored-charsets)) 246 (memq 'gnus-unknown mail-parse-ignored-charsets))
194 (setq coding-system 247 (setq coding-system
199 ;;in XEmacs 252 ;;in XEmacs
200 (mm-multibyte-p) 253 (mm-multibyte-p)
201 (or (not (eq coding-system 'ascii)) 254 (or (not (eq coding-system 'ascii))
202 (setq coding-system mail-parse-charset)) 255 (setq coding-system mail-parse-charset))
203 (not (eq coding-system 'gnus-decoded))) 256 (not (eq coding-system 'gnus-decoded)))
204 (mm-decode-coding-region (point-min) (point-max) coding-system)))))) 257 (mm-decode-coding-region (point-min) (point-max)
258 coding-system))
259 (setq buffer-file-coding-system
260 (if (boundp 'last-coding-system-used)
261 (symbol-value 'last-coding-system-used)
262 coding-system))))))
205 263
206 (defun mm-decode-string (string charset) 264 (defun mm-decode-string (string charset)
207 "Decode STRING with CHARSET." 265 "Decode STRING with CHARSET."
208 (when (stringp charset) 266 (when (stringp charset)
209 (setq charset (intern (downcase charset)))) 267 (setq charset (intern (downcase charset))))