comparison lisp/gnus/rfc2047.el @ 47945:3915f2c7691e

(message-posting-charset): defvar when compiling. (ietf-drums, gnus-util): Don't require. (rfc2047-header-encoding-alist): Add `address-mime' part. Doc fixes. (rfc2047-charset-encoding-alist): Use B for iso-8859-7, iso-8859-8. (rfc2047-q-encoding-alist): Augment header list. (rfc2047-encoding-type): New. (rfc2047-dissect-region): Deleted. (rfc2047-encode-region, rfc2047-encode): Rewritten to take account of rfc2047 rules with respect to rfc2822 tokens and to do encoding in place rather than by passing strings. (rfc2047-encode-message-header): Don't include header name field in encoding. Add `address-mime' case and bind rfc2047-encoding-type for `mime' case. (rfc2047-encode-string): Doc fix. (rfc2047-encode): Use longer chunks for base64. (rfc2047-fold-region): Insert single characters, not strings. (rfc2047-encoded-word-regexp): Wrap in eval-and-compile. (rfc2047-decode-region): Avoid consing regexp in loop.
author Dave Love <fx@gnu.org>
date Fri, 18 Oct 2002 10:52:56 +0000
parents a26d9b55abb6
children 9cd6016af581
comparison
equal deleted inserted replaced
47944:03cfc305a0fa 47945:3915f2c7691e
1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages 1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. 2 ;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc.
3 3
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> 5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
6 ;; This file is part of GNU Emacs. 6 ;; This file is part of GNU Emacs.
7 7
25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part 25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
26 ;; Three: Message Header Extensions for Non-ASCII Text". 26 ;; Three: Message Header Extensions for Non-ASCII Text".
27 27
28 ;;; Code: 28 ;;; Code:
29 29
30 (eval-when-compile (require 'cl)) 30 (eval-when-compile
31 (require 'cl)
32 (defvar message-posting-charset))
31 33
32 (require 'qp) 34 (require 'qp)
33 (require 'mm-util) 35 (require 'mm-util)
34 (require 'ietf-drums) 36 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
35 (require 'mail-prsvr) 37 (require 'mail-prsvr)
36 (require 'base64) 38 (require 'base64)
37 ;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus.
38 (require 'gnus-util)
39 (autoload 'mm-body-7-or-8 "mm-bodies") 39 (autoload 'mm-body-7-or-8 "mm-bodies")
40 40
41 (defvar rfc2047-header-encoding-alist 41 (defvar rfc2047-header-encoding-alist
42 '(("Newsgroups" . nil) 42 '(("Newsgroups" . nil)
43 ("Message-ID" . nil) 43 ("Message-ID" . nil)
44 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" .
45 address-mime)
44 (t . mime)) 46 (t . mime))
45 "*Header/encoding method alist. 47 "*Header/encoding method alist.
46 The list is traversed sequentially. The keys can either be 48 The list is traversed sequentially. The keys can either be
47 header regexps or t. 49 header regexps or t.
48 50
49 The values can be: 51 The values can be:
50 52
51 1) nil, in which case no encoding is done; 53 1) nil, in which case no encoding is done;
52 2) `mime', in which case the header will be encoded according to RFC2047; 54 2) `mime', in which case the header will be encoded according to RFC2047;
53 3) a charset, in which case it will be encoded as that charset; 55 3) `address-mime', like `mime', but takes account of the rules for address
54 4) `default', in which case the field will be encoded as the rest 56 fields (where quoted strings and comments must be treated separately);
57 4) a charset, in which case it will be encoded as that charset;
58 5) `default', in which case the field will be encoded as the rest
55 of the article.") 59 of the article.")
56 60
57 (defvar rfc2047-charset-encoding-alist 61 (defvar rfc2047-charset-encoding-alist
58 '((us-ascii . nil) 62 '((us-ascii . nil)
59 (iso-8859-1 . Q) 63 (iso-8859-1 . Q)
60 (iso-8859-2 . Q) 64 (iso-8859-2 . Q)
61 (iso-8859-3 . Q) 65 (iso-8859-3 . Q)
62 (iso-8859-4 . Q) 66 (iso-8859-4 . Q)
63 (iso-8859-5 . B) 67 (iso-8859-5 . B)
64 (koi8-r . B) 68 (koi8-r . B)
65 (iso-8859-7 . Q) 69 (iso-8859-7 . B)
66 (iso-8859-8 . Q) 70 (iso-8859-8 . B)
67 (iso-8859-9 . Q) 71 (iso-8859-9 . Q)
68 (iso-8859-14 . Q) 72 (iso-8859-14 . Q)
69 (iso-8859-15 . Q) 73 (iso-8859-15 . Q)
70 (iso-2022-jp . B) 74 (iso-2022-jp . B)
71 (iso-2022-kr . B) 75 (iso-2022-kr . B)
76 (cn-gb-2312 . B) 80 (cn-gb-2312 . B)
77 (euc-kr . B) 81 (euc-kr . B)
78 (iso-2022-jp-2 . B) 82 (iso-2022-jp-2 . B)
79 (iso-2022-int-1 . B)) 83 (iso-2022-int-1 . B))
80 "Alist of MIME charsets to RFC2047 encodings. 84 "Alist of MIME charsets to RFC2047 encodings.
81 Valid encodings are nil, `Q' and `B'.") 85 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
86 quoted-printable and base64 respectively.")
82 87
83 (defvar rfc2047-encoding-function-alist 88 (defvar rfc2047-encoding-function-alist
84 '((Q . rfc2047-q-encode-region) 89 '((Q . rfc2047-q-encode-region)
85 (B . rfc2047-b-encode-region) 90 (B . rfc2047-b-encode-region)
86 (nil . ignore)) 91 (nil . ignore))
87 "Alist of RFC2047 encodings to encoding functions.") 92 "Alist of RFC2047 encodings to encoding functions.")
88 93
89 (defvar rfc2047-q-encoding-alist 94 (defvar rfc2047-q-encoding-alist
90 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") 95 '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):"
96 . "-A-Za-z0-9!*+/" )
91 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. 97 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
92 ;; Avoid using 8bit characters. 98 ;; Avoid using 8bit characters.
93 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" 99 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
94 ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) 100 ("." . "\010\012\014\040-\074\076\100-\136\140-\177"))
95 "Alist of header regexps and valid Q characters.") 101 "Alist of header regexps and valid Q characters.")
109 (progn 115 (progn
110 (beginning-of-line) 116 (beginning-of-line)
111 (point)) 117 (point))
112 (point-max)))) 118 (point-max))))
113 (goto-char (point-min))) 119 (goto-char (point-min)))
120
121 (defvar rfc2047-encoding-type 'address-mime
122 "The type of encoding done by `rfc2047-encode-region'.
123 This should be dynamically bound around calls to
124 `rfc2047-encode-region' to either `mime' or `address-mime'. See
125 `rfc2047-header-encoding-alist', for definitions.")
114 126
115 (defun rfc2047-encode-message-header () 127 (defun rfc2047-encode-message-header ()
116 "Encode the message header according to `rfc2047-header-encoding-alist'. 128 "Encode the message header according to `rfc2047-header-encoding-alist'.
117 Should be called narrowed to the head of the message." 129 Should be called narrowed to the head of the message."
118 (interactive "*") 130 (interactive "*")
139 (when (or (and (stringp (car elem)) 151 (when (or (and (stringp (car elem))
140 (looking-at (car elem))) 152 (looking-at (car elem)))
141 (eq (car elem) t)) 153 (eq (car elem) t))
142 (setq alist nil 154 (setq alist nil
143 method (cdr elem)))) 155 method (cdr elem))))
156 (goto-char (point-min))
157 (re-search-forward "^[^:]+: *" nil t)
144 (cond 158 (cond
159 ((eq method 'address-mime)
160 (rfc2047-encode-region (point) (point-max)))
145 ((eq method 'mime) 161 ((eq method 'mime)
146 (rfc2047-encode-region (point-min) (point-max))) 162 (let (rfc2047-encoding-type)
163 (rfc2047-encode-region (point) (point-max))))
147 ((eq method 'default) 164 ((eq method 'default)
148 (if (and (featurep 'mule) 165 (if (and (featurep 'mule)
149 (if (boundp 'default-enable-multibyte-characters) 166 (if (boundp 'default-enable-multibyte-characters)
150 default-enable-multibyte-characters) 167 default-enable-multibyte-characters)
151 mail-parse-charset) 168 mail-parse-charset)
152 (mm-encode-coding-region (point-min) (point-max) 169 (mm-encode-coding-region (point) (point-max)
153 mail-parse-charset))) 170 mail-parse-charset)))
154 ((mm-coding-system-p method) 171 ((mm-coding-system-p method)
155 (if (and (featurep 'mule) 172 (if (and (featurep 'mule)
156 (if (boundp 'default-enable-multibyte-characters) 173 (if (boundp 'default-enable-multibyte-characters)
157 default-enable-multibyte-characters)) 174 default-enable-multibyte-characters))
158 (mm-encode-coding-region (point-min) (point-max) method))) 175 (mm-encode-coding-region (point) (point-max) method)))
159 ;; Hm. 176 ;; Hm.
160 (t))) 177 (t)))
161 (goto-char (point-max))))))) 178 (goto-char (point-max)))))))
162 179
163 ;; Fixme: This, and the require below may not be the Right Thing, but 180 ;; Fixme: This, and the require below may not be the Right Thing, but
167 (defun rfc2047-encodable-p () 184 (defun rfc2047-encodable-p ()
168 "Return non-nil if any characters in current buffer need encoding in headers. 185 "Return non-nil if any characters in current buffer need encoding in headers.
169 The buffer may be narrowed." 186 The buffer may be narrowed."
170 (require 'message) ; for message-posting-charset 187 (require 'message) ; for message-posting-charset
171 (let ((charsets 188 (let ((charsets
172 (mapcar 189 (mm-find-mime-charset-region (point-min) (point-max))))
173 'mm-mime-charset 190 (and charsets (not (equal charsets (list message-posting-charset))))))
174 (mm-find-charset-region (point-min) (point-max)))) 191
175 (cs (list 'us-ascii (car message-posting-charset))) 192 ;; Use this syntax table when parsing into regions that may need
176 found) 193 ;; encoding. Double quotes are string delimiters, backslash is
177 (while charsets 194 ;; character quoting, and all other RFC 2822 special characters are
178 (unless (memq (pop charsets) cs) 195 ;; treated as punctuation so we can use forward-sexp/forward-word to
179 (setq found t))) 196 ;; skip to the end of regions appropriately. Nb. ietf-drums does
180 found)) 197 ;; things differently.
181 198 (defconst rfc2047-syntax-table
182 (defun rfc2047-dissect-region (b e) 199 (let ((table (make-char-table 'syntax-table '(2))))
183 "Dissect the region between B and E into words." 200 (modify-syntax-entry ?\\ "\\" table)
184 (let ((word-chars "-A-Za-z0-9!*+/") 201 (modify-syntax-entry ?\" "\"" table)
185 ;; Not using ietf-drums-specials-token makes life simple. 202 (modify-syntax-entry ?\( "." table)
186 mail-parse-mule-charset 203 (modify-syntax-entry ?\) "." table)
187 words point current 204 (modify-syntax-entry ?\< "." table)
188 result word) 205 (modify-syntax-entry ?\> "." table)
189 (save-restriction 206 (modify-syntax-entry ?\[ "." table)
190 (narrow-to-region b e) 207 (modify-syntax-entry ?\] "." table)
191 (goto-char (point-min)) 208 (modify-syntax-entry ?: "." table)
192 (skip-chars-forward "\000-\177") 209 (modify-syntax-entry ?\; "." table)
193 (while (not (eobp)) 210 (modify-syntax-entry ?, "." table)
194 (setq point (point)) 211 (modify-syntax-entry ?@ "." table)
195 (skip-chars-backward word-chars b) 212 table))
196 (unless (eq b (point))
197 (push (cons (buffer-substring b (point)) nil) words))
198 (setq b (point))
199 (goto-char point)
200 (setq current (mm-charset-after))
201 (forward-char 1)
202 (skip-chars-forward word-chars)
203 (while (and (not (eobp))
204 (eq (mm-charset-after) current))
205 (forward-char 1)
206 (skip-chars-forward word-chars))
207 (unless (eq b (point))
208 (push (cons (buffer-substring b (point)) current) words))
209 (setq b (point))
210 (skip-chars-forward "\000-\177"))
211 (unless (eq b (point))
212 (push (cons (buffer-substring b (point)) nil) words)))
213 ;; merge adjacent words
214 (setq word (pop words))
215 (while word
216 (if (and (cdr word)
217 (caar words)
218 (not (cdar words))
219 (not (string-match "[^ \t]" (caar words))))
220 (if (eq (cdr (nth 1 words)) (cdr word))
221 (progn
222 (setq word (cons (concat
223 (car (nth 1 words)) (caar words)
224 (car word))
225 (cdr word)))
226 (pop words)
227 (pop words))
228 (push (cons (concat (caar words) (car word)) (cdr word))
229 result)
230 (pop words)
231 (setq word (pop words)))
232 (push word result)
233 (setq word (pop words))))
234 result))
235 213
236 (defun rfc2047-encode-region (b e) 214 (defun rfc2047-encode-region (b e)
237 "Encode all encodable words in region B to E." 215 "Encode words in region B to E that need encoding.
238 (let ((words (rfc2047-dissect-region b e)) word) 216 By default, the region is treated as containing RFC2822 addresses.
239 (save-restriction 217 Dynamically bind `rfc2047-encoding-type' to change that."
240 (narrow-to-region b e) 218 (save-restriction
241 (delete-region (point-min) (point-max)) 219 (narrow-to-region b e)
242 (while (setq word (pop words)) 220 (if (eq 'mime rfc2047-encoding-type)
243 (if (not (cdr word)) 221 ;; Simple case -- treat as single word.
244 (insert (car word)) 222 (progn
245 (rfc2047-fold-region (gnus-point-at-bol) (point)) 223 (goto-char (point-min))
246 (goto-char (point-max)) 224 ;; Does it need encoding?
247 (if (> (- (point) (save-restriction 225 (skip-chars-forward "\000-\177" e)
248 (widen) 226 (unless (eobp)
249 (gnus-point-at-bol))) 76) 227 (rfc2047-encode b e)))
250 (insert "\n ")) 228 ;; `address-mime' case -- take care of quoted words, comments.
251 ;; Insert blank between encoded words 229 (with-syntax-table rfc2047-syntax-table
252 (if (eq (char-before) ?=) (insert " ")) 230 (let ((start (point)) ; start of current token
253 (rfc2047-encode (point) 231 end ; end of current token
254 (progn (insert (car word)) (point)) 232 ;; Whether there's an encoded word before the current
255 (cdr word)))) 233 ;; tpken, either immediately or separated by space.
256 (rfc2047-fold-region (point-min) (point-max))))) 234 last-encoded)
235 (goto-char (point-min))
236 (condition-case nil ; in case of unbalanced quotes
237 ;; Look for rfc2822-style: sequences of atoms, quoted
238 ;; strings, specials, whitespace. (Specials mustn't be
239 ;; encoded.)
240 (while (not (eobp))
241 (setq start (point))
242 ;; Skip whitespace.
243 (unless (= 0 (skip-chars-forward " \t"))
244 (setq start (point)))
245 (cond
246 ((not (char-after))) ; eob
247 ;; else token start
248 ((eq ?\" (char-syntax (char-after)))
249 ;; Quoted word.
250 (forward-sexp)
251 (setq end (point))
252 ;; Does it need encoding?
253 (goto-char start)
254 (skip-chars-forward "\000-\177" end)
255 (if (= end (point))
256 (setq last-encoded nil)
257 ;; It needs encoding. Strip the quotes first,
258 ;; since encoded words can't occur in quotes.
259 (goto-char end)
260 (delete-backward-char 1)
261 (goto-char start)
262 (delete-char 1)
263 (when last-encoded
264 ;; There was a preceding quoted word. We need
265 ;; to include any separating whitespace in this
266 ;; word to avoid it getting lost.
267 (skip-chars-backward " \t")
268 ;; A space is needed between the encoded words.
269 (insert ? )
270 (setq start (point)
271 end (1+ end)))
272 ;; Adjust the end position for the deleted quotes.
273 (rfc2047-encode start (- end 2))
274 (setq last-encoded t))) ; record that it was encoded
275 ((eq ?. (char-syntax (char-after)))
276 ;; Skip other delimiters, but record that they've
277 ;; potentially separated quoted words.
278 (forward-char)
279 (setq last-encoded nil))
280 (t ; normal token/whitespace sequence
281 ;; Find the end.
282 (forward-word 1)
283 (skip-chars-backward " \t")
284 (setq end (point))
285 ;; Deal with encoding and leading space as for
286 ;; quoted words.
287 (goto-char start)
288 (skip-chars-forward "\000-\177" end)
289 (if (= end (point))
290 (setq last-encoded nil)
291 (when last-encoded
292 (goto-char start)
293 (skip-chars-backward " \t")
294 (insert ? )
295 (setq start (point)
296 end (1+ end)))
297 (rfc2047-encode start end)
298 (setq last-encoded t)))))
299 (error (error "Invalid data for rfc2047 encoding: %s"
300 (buffer-substring b e)))))))
301 (rfc2047-fold-region b (point))))
257 302
258 (defun rfc2047-encode-string (string) 303 (defun rfc2047-encode-string (string)
259 "Encode words in STRING." 304 "Encode words in STRING.
305 By default, the string is treated as containing addresses (see
306 `rfc2047-special-chars')."
260 (with-temp-buffer 307 (with-temp-buffer
261 (insert string) 308 (insert string)
262 (rfc2047-encode-region (point-min) (point-max)) 309 (rfc2047-encode-region (point-min) (point-max))
263 (buffer-string))) 310 (buffer-string)))
264 311
265 (defun rfc2047-encode (b e charset) 312 (defun rfc2047-encode (b e)
266 "Encode the word in the region B to E with CHARSET." 313 "Encode the word(s) in the region B to E.
267 (let* ((mime-charset (mm-mime-charset charset)) 314 By default, the region is treated as containing addresses (see
268 (cs (mm-charset-to-coding-system mime-charset)) 315 `rfc2047-special-chars')."
269 (encoding (or (cdr (assq mime-charset 316 (let* ((mime-charset (mm-find-mime-charset-region b e))
317 (cs (if (> (length mime-charset) 1)
318 ;; Fixme: Instead of this, try to break region into
319 ;; parts that can be encoded separately.
320 (error "Can't rfc2047-encode `%s'"
321 (buffer-substring b e))
322 (setq mime-charset (car mime-charset))
323 (mm-charset-to-coding-system mime-charset)))
324 ;; Fixme: Better, calculate the number of non-ASCII
325 ;; characters, at least for 8-bit charsets.
326 (encoding (if (assq mime-charset
327 rfc2047-charset-encoding-alist)
328 (cdr (assq mime-charset
270 rfc2047-charset-encoding-alist)) 329 rfc2047-charset-encoding-alist))
271 'B)) 330 'B))
272 (start (concat 331 (start (concat
273 "=?" (downcase (symbol-name mime-charset)) "?" 332 "=?" (downcase (symbol-name mime-charset)) "?"
274 (downcase (symbol-name encoding)) "?")) 333 (downcase (symbol-name encoding)) "?"))
275 (first t)) 334 (first t))
276 (save-restriction 335 (if mime-charset
277 (narrow-to-region b e) 336 (save-restriction
278 (when (eq encoding 'B) 337 (narrow-to-region b e)
279 ;; break into lines before encoding 338 (when (eq encoding 'B)
280 (goto-char (point-min)) 339 ;; break into lines before encoding
281 (while (not (eobp)) 340 (goto-char (point-min))
282 (goto-char (min (point-max) (+ 15 (point)))) 341 (while (not (eobp))
283 (unless (eobp) 342 (goto-char (min (point-max) (+ 15 (point))))
284 (insert "\n")))) 343 (unless (eobp)
285 (if (and (mm-multibyte-p) 344 (insert ?\n))))
286 (mm-coding-system-p cs)) 345 (if (and (mm-multibyte-p)
287 (mm-encode-coding-region (point-min) (point-max) cs)) 346 (mm-coding-system-p cs))
288 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) 347 (mm-encode-coding-region (point-min) (point-max) cs))
289 (point-min) (point-max)) 348 (funcall (cdr (assq encoding rfc2047-encoding-function-alist))
290 (goto-char (point-min)) 349 (point-min) (point-max))
291 (while (not (eobp)) 350 (goto-char (point-min))
292 (unless first 351 (while (not (eobp))
293 (insert " ")) 352 (unless first
294 (setq first nil) 353 (insert ? ))
295 (insert start) 354 (setq first nil)
296 (end-of-line) 355 (insert start)
297 (insert "?=") 356 (end-of-line)
298 (forward-line 1))))) 357 (insert "?=")
358 (forward-line 1))))))
299 359
300 (defun rfc2047-fold-region (b e) 360 (defun rfc2047-fold-region (b e)
301 "Fold long lines in region B to E." 361 "Fold long lines in region B to E."
302 (save-restriction 362 (save-restriction
303 (narrow-to-region b e) 363 (narrow-to-region b e)
304 (goto-char (point-min)) 364 (goto-char (point-min))
305 (let ((break nil) 365 (let ((break nil)
306 (qword-break nil) 366 (qword-break nil)
307 (bol (save-restriction 367 (bol (save-restriction
308 (widen) 368 (widen)
309 (gnus-point-at-bol)))) 369 (mm-point-at-bol))))
310 (while (not (eobp)) 370 (while (not (eobp))
311 (when (and (or break qword-break) (> (- (point) bol) 76)) 371 (when (and (or break qword-break) (> (- (point) bol) 76))
312 (goto-char (or break qword-break)) 372 (goto-char (or break qword-break))
313 (setq break nil 373 (setq break nil
314 qword-break nil) 374 qword-break nil)
315 (if (looking-at " \t") 375 (if (looking-at " \t")
316 (insert "\n") 376 (insert ?\n)
317 (insert "\n ")) 377 (insert "\n "))
318 (setq bol (1- (point))) 378 (setq bol (1- (point)))
319 ;; Don't break before the first non-LWSP characters. 379 ;; Don't break before the first non-LWSP characters.
320 (skip-chars-forward " \t") 380 (skip-chars-forward " \t")
321 (unless (eobp) (forward-char 1))) 381 (unless (eobp) (forward-char 1)))
345 (when (and (or break qword-break) (> (- (point) bol) 76)) 405 (when (and (or break qword-break) (> (- (point) bol) 76))
346 (goto-char (or break qword-break)) 406 (goto-char (or break qword-break))
347 (setq break nil 407 (setq break nil
348 qword-break nil) 408 qword-break nil)
349 (if (looking-at " \t") 409 (if (looking-at " \t")
350 (insert "\n") 410 (insert ?\n)
351 (insert "\n ")) 411 (insert "\n "))
352 (setq bol (1- (point))) 412 (setq bol (1- (point)))
353 ;; Don't break before the first non-LWSP characters. 413 ;; Don't break before the first non-LWSP characters.
354 (skip-chars-forward " \t") 414 (skip-chars-forward " \t")
355 (unless (eobp) (forward-char 1)))))) 415 (unless (eobp) (forward-char 1))))))
359 (save-restriction 419 (save-restriction
360 (narrow-to-region b e) 420 (narrow-to-region b e)
361 (goto-char (point-min)) 421 (goto-char (point-min))
362 (let ((bol (save-restriction 422 (let ((bol (save-restriction
363 (widen) 423 (widen)
364 (gnus-point-at-bol))) 424 (mm-point-at-bol)))
365 (eol (gnus-point-at-eol)) 425 (eol (mm-point-at-eol))
366 leading) 426 leading)
367 (forward-line 1) 427 (forward-line 1)
368 (while (not (eobp)) 428 (while (not (eobp))
369 (looking-at "[ \t]*") 429 (looking-at "[ \t]*")
370 (setq leading (- (match-end 0) (match-beginning 0))) 430 (setq leading (- (match-end 0) (match-beginning 0)))
371 (if (< (- (gnus-point-at-eol) bol leading) 76) 431 (if (< (- (mm-point-at-eol) bol leading) 76)
372 (progn 432 (progn
373 (goto-char eol) 433 (goto-char eol)
374 (delete-region eol (progn 434 (delete-region eol (progn
375 (skip-chars-forward "[ \t\n\r]+") 435 (skip-chars-forward "[ \t\n\r]+")
376 (1- (point))))) 436 (1- (point)))))
377 (setq bol (gnus-point-at-bol))) 437 (setq bol (mm-point-at-bol)))
378 (setq eol (gnus-point-at-eol)) 438 (setq eol (mm-point-at-eol))
379 (forward-line 1))))) 439 (forward-line 1)))))
380 440
381 (defun rfc2047-b-encode-region (b e) 441 (defun rfc2047-b-encode-region (b e)
382 "Base64-encode the header contained in region B to E." 442 "Base64-encode the header contained in region B to E."
383 (save-restriction 443 (save-restriction
394 (save-restriction 454 (save-restriction
395 (narrow-to-region (goto-char b) e) 455 (narrow-to-region (goto-char b) e)
396 (let ((alist rfc2047-q-encoding-alist) 456 (let ((alist rfc2047-q-encoding-alist)
397 (bol (save-restriction 457 (bol (save-restriction
398 (widen) 458 (widen)
399 (gnus-point-at-bol)))) 459 (mm-point-at-bol))))
400 (while alist 460 (while alist
401 (when (looking-at (caar alist)) 461 (when (looking-at (caar alist))
402 (quoted-printable-encode-region b e nil (cdar alist)) 462 (quoted-printable-encode-region b e nil (cdar alist))
403 (subst-char-in-region (point-min) (point-max) ? ?_) 463 (subst-char-in-region (point-min) (point-max) ? ?_)
404 (setq alist nil)) 464 (setq alist nil))
411 (goto-char (1+ (point-min))) 471 (goto-char (1+ (point-min)))
412 (while (and (not (bobp)) (not (eobp))) 472 (while (and (not (bobp)) (not (eobp)))
413 (goto-char (min (point-max) (+ 56 bol))) 473 (goto-char (min (point-max) (+ 56 bol)))
414 (search-backward "=" (- (point) 2) t) 474 (search-backward "=" (- (point) 2) t)
415 (unless (or (bobp) (eobp)) 475 (unless (or (bobp) (eobp))
416 (insert "\n") 476 (insert ?\n)
417 (setq bol (point))))))))) 477 (setq bol (point)))))))))
418 478
419 ;;; 479 ;;;
420 ;;; Functions for decoding RFC2047 messages 480 ;;; Functions for decoding RFC2047 messages
421 ;;; 481 ;;;
422 482
423 (defvar rfc2047-encoded-word-regexp 483 (eval-and-compile
424 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") 484 (defvar rfc2047-encoded-word-regexp
485 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\
486 \\([!->@-~ +]+\\)\\?="))
425 487
426 (defun rfc2047-decode-region (start end) 488 (defun rfc2047-decode-region (start end)
427 "Decode MIME-encoded words in region between START and END." 489 "Decode MIME-encoded words in region between START and END."
428 (interactive "r") 490 (interactive "r")
429 (let ((case-fold-search t) 491 (let ((case-fold-search t)
492 (undoing (not (eq t buffer-undo-list)))
430 b e) 493 b e)
431 (save-excursion 494 (unwind-protect
432 (save-restriction 495 (save-excursion
433 (narrow-to-region start end) 496 (save-restriction
434 (goto-char (point-min)) 497 (buffer-enable-undo)
435 ;; Remove whitespace between encoded words. 498 (narrow-to-region start end)
436 (while (re-search-forward 499 (goto-char (point-min))
437 (concat "\\(" rfc2047-encoded-word-regexp "\\)" 500 ;; Remove whitespace between encoded words.
438 "\\(\n?[ \t]\\)+" 501 (while (re-search-forward
439 "\\(" rfc2047-encoded-word-regexp "\\)") 502 (eval-when-compile
440 nil t) 503 (concat "\\(" rfc2047-encoded-word-regexp "\\)"
441 (delete-region (goto-char (match-end 1)) (match-beginning 6))) 504 "\\(\n?[ \t]\\)+"
442 ;; Decode the encoded words. 505 "\\(" rfc2047-encoded-word-regexp "\\)"))
443 (setq b (goto-char (point-min))) 506 nil t)
444 (while (re-search-forward rfc2047-encoded-word-regexp nil t) 507 (delete-region (goto-char (match-end 1)) (match-beginning 6)))
445 (setq e (match-beginning 0)) 508 ;; Decode the encoded words.
446 (insert (rfc2047-parse-and-decode 509 (setq b (goto-char (point-min)))
447 (prog1 510 (while (re-search-forward rfc2047-encoded-word-regexp nil t)
448 (match-string 0) 511 (setq e (match-beginning 0))
449 (delete-region (match-beginning 0) (match-end 0))))) 512 (rfc2047-parse-and-decode (match-beginning 0) (match-end 0)))
450 (when (and (mm-multibyte-p) 513 (when (and (mm-multibyte-p)
451 mail-parse-charset 514 mail-parse-charset
452 (not (eq mail-parse-charset 'gnus-decoded))) 515 (not (eq mail-parse-charset 'us-ascii))
453 (mm-decode-coding-region b e mail-parse-charset)) 516 (not (eq mail-parse-charset 'gnus-decoded)))
454 (setq b (point))) 517 (mm-decode-coding-region b (point-max) mail-parse-charset))
455 (when (and (mm-multibyte-p) 518 (rfc2047-unfold-region (point-min) (point-max))))
456 mail-parse-charset 519 (unless undoing
457 (not (eq mail-parse-charset 'us-ascii)) 520 (buffer-disable-undo)))))
458 (not (eq mail-parse-charset 'gnus-decoded)))
459 (mm-decode-coding-region b (point-max) mail-parse-charset))
460 (rfc2047-unfold-region (point-min) (point-max))))))
461 521
462 (defun rfc2047-decode-string (string) 522 (defun rfc2047-decode-string (string)
463 "Decode the quoted-printable-encoded STRING and return the results." 523 "Decode the quoted-printable-encoded STRING and return the results."
464 (let ((m (mm-multibyte-p))) 524 (let ((m (mm-multibyte-p)))
465 (with-temp-buffer 525 (with-temp-buffer
468 (insert string) 528 (insert string)
469 (inline 529 (inline
470 (rfc2047-decode-region (point-min) (point-max))) 530 (rfc2047-decode-region (point-min) (point-max)))
471 (buffer-string)))) 531 (buffer-string))))
472 532
473 (defun rfc2047-parse-and-decode (word) 533 (defun rfc2047-parse-and-decode (b e)
474 "Decode WORD and return it if it is an encoded word. 534 "Decode WORD and return it if it is an encoded word.
475 Return WORD if not." 535 Return WORD if not."
476 (if (not (string-match rfc2047-encoded-word-regexp word)) 536 (save-restriction
477 word 537 (narrow-to-region b e)
478 (or 538 (goto-char b)
479 (condition-case nil 539 (when (looking-at (eval-when-compile
480 (rfc2047-decode 540 (concat "\\`" rfc2047-encoded-word-regexp "\\'")))
481 (match-string 1 word) 541 (condition-case nil
482 (upcase (match-string 2 word)) 542 (let ((charset (match-string 1))
483 (match-string 3 word)) 543 (encoding (upcase (match-string 2))))
484 (error word)) 544 (undo-boundary)
485 word))) 545 (delete-region (match-beginning 0) (1+ (match-end 2)))
486 546 (delete-region (- (point-max) 2) (point-max))
487 (defun rfc2047-decode (charset encoding string) 547 (rfc2047-decode charset encoding (point-min) (point-max)))
488 "Decode STRING from the given MIME CHARSET in the given ENCODING. 548 ;; If we get an error, undo the change
549 (error (undo))))))
550
551 (defun rfc2047-decode (charset encoding b e)
552 "Decode from the given MIME CHARSET in the given ENCODING in region B to E.
489 Valid ENCODINGs are \"B\" and \"Q\". 553 Valid ENCODINGs are \"B\" and \"Q\".
490 If your Emacs implementation can't decode CHARSET, return nil." 554 If your Emacs implementation can't decode CHARSET, return nil."
491 (if (stringp charset) 555 (if (stringp charset)
492 (setq charset (intern (downcase charset)))) 556 (setq charset (intern (downcase charset))))
493 (if (or (not charset) 557 (if (or (not charset)
502 (setq cs (mm-charset-to-coding-system mail-parse-charset))) 566 (setq cs (mm-charset-to-coding-system mail-parse-charset)))
503 (when cs 567 (when cs
504 (when (and (eq cs 'ascii) 568 (when (and (eq cs 'ascii)
505 mail-parse-charset) 569 mail-parse-charset)
506 (setq cs mail-parse-charset)) 570 (setq cs mail-parse-charset))
507 ;; Ensure unibyte result in Emacs 20. 571 (save-restriction
508 (let (default-enable-multibyte-characters) 572 (narrow-to-region b e)
509 (with-temp-buffer 573 (cond
510 (mm-decode-coding-string 574 ((equal "B" encoding)
511 (cond 575 (base64-decode-region b e))
512 ((equal "B" encoding) 576 ((equal "Q" encoding)
513 (base64-decode-string string)) 577 (subst-char-in-region b e ?_ ? t)
514 ((equal "Q" encoding) 578 (quoted-printable-decode-region b e))
515 (quoted-printable-decode-string 579 (t (error "Invalid encoding: %s" encoding)))
516 (mm-replace-chars-in-string string ?_ ? ))) 580 (mm-decode-coding-region (point-min) (point-max) cs)))))
517 (t (error "Invalid encoding: %s" encoding)))
518 cs))))))
519 581
520 (provide 'rfc2047) 582 (provide 'rfc2047)
521 583
522 ;;; rfc2047.el ends here 584 ;;; rfc2047.el ends here