Mercurial > emacs
comparison lisp/gnus/rfc2047.el @ 82951:0fde48feb604
Import Gnus 5.10 from the v5_10 branch of the Gnus repository.
author | Andreas Schwab <schwab@suse.de> |
---|---|
date | Thu, 22 Jul 2004 16:45:51 +0000 |
parents | d0e0914b0f6d |
children | c5e16264557d cce1c0ee76ee |
comparison
equal
deleted
inserted
replaced
56503:8bbd2323fbf2 | 82951:0fde48feb604 |
---|---|
1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages | 1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
2 ;; Copyright (C) 1998,1999,2000,02,03,2004 Free Software Foundation, Inc. | 2 ;; Copyright (C) 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc. |
3 | 3 |
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | 4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | 5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> |
6 ;; This file is part of GNU Emacs. | 6 ;; This file is part of GNU Emacs. |
7 | 7 |
27 | 27 |
28 ;;; Code: | 28 ;;; Code: |
29 | 29 |
30 (eval-when-compile | 30 (eval-when-compile |
31 (require 'cl) | 31 (require 'cl) |
32 (defvar message-posting-charset)) | 32 (defvar message-posting-charset) |
33 (unless (fboundp 'with-syntax-table) ; not in Emacs 20 | |
34 (defmacro with-syntax-table (table &rest body) | |
35 "Evaluate BODY with syntax table of current buffer set to TABLE. | |
36 The syntax table of the current buffer is saved, BODY is evaluated, and the | |
37 saved table is restored, even in case of an abnormal exit. | |
38 Value is what BODY returns." | |
39 (let ((old-table (make-symbol "table")) | |
40 (old-buffer (make-symbol "buffer"))) | |
41 `(let ((,old-table (syntax-table)) | |
42 (,old-buffer (current-buffer))) | |
43 (unwind-protect | |
44 (progn | |
45 (set-syntax-table ,table) | |
46 ,@body) | |
47 (save-current-buffer | |
48 (set-buffer ,old-buffer) | |
49 (set-syntax-table ,old-table)))))))) | |
33 | 50 |
34 (require 'qp) | 51 (require 'qp) |
35 (require 'mm-util) | 52 (require 'mm-util) |
36 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. | 53 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
37 (require 'mail-prsvr) | 54 (require 'mail-prsvr) |
38 (require 'base64) | 55 (require 'base64) |
39 (autoload 'mm-body-7-or-8 "mm-bodies") | 56 (autoload 'mm-body-7-or-8 "mm-bodies") |
40 | 57 |
58 (eval-and-compile | |
59 ;; Avoid gnus-util for mm- code. | |
60 (defalias 'rfc2047-point-at-bol | |
61 (if (fboundp 'point-at-bol) | |
62 'point-at-bol | |
63 'line-beginning-position)) | |
64 | |
65 (defalias 'rfc2047-point-at-eol | |
66 (if (fboundp 'point-at-eol) | |
67 'point-at-eol | |
68 'line-end-position))) | |
69 | |
41 (defvar rfc2047-header-encoding-alist | 70 (defvar rfc2047-header-encoding-alist |
42 '(("Newsgroups\\|Followup-To" . nil) | 71 '(("Newsgroups" . nil) |
72 ("Followup-To" . nil) | |
43 ("Message-ID" . nil) | 73 ("Message-ID" . nil) |
44 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . | 74 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\ |
45 address-mime) | 75 \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime) |
46 (t . mime)) | 76 (t . mime)) |
47 "*Header/encoding method alist. | 77 "*Header/encoding method alist. |
48 The list is traversed sequentially. The keys can either be | 78 The list is traversed sequentially. The keys can either be |
49 header regexps or t. | 79 header regexps or t. |
50 | 80 |
78 (cn-big5 . B) | 108 (cn-big5 . B) |
79 (cn-gb . B) | 109 (cn-gb . B) |
80 (cn-gb-2312 . B) | 110 (cn-gb-2312 . B) |
81 (euc-kr . B) | 111 (euc-kr . B) |
82 (iso-2022-jp-2 . B) | 112 (iso-2022-jp-2 . B) |
83 (iso-2022-int-1 . B)) | 113 (iso-2022-int-1 . B) |
114 (viscii . Q)) | |
84 "Alist of MIME charsets to RFC2047 encodings. | 115 "Alist of MIME charsets to RFC2047 encodings. |
85 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, | 116 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
86 quoted-printable and base64 respectively.") | 117 quoted-printable and base64 respectively.") |
87 | 118 |
88 (defvar rfc2047-encoding-function-alist | 119 (defvar rfc2047-encoding-function-alist |
89 '((Q . rfc2047-q-encode-region) | 120 '((Q . rfc2047-q-encode-region) |
90 (B . rfc2047-b-encode-region) | 121 (B . rfc2047-b-encode-region) |
91 (nil . ignore)) | 122 (nil . ignore)) |
92 "Alist of RFC2047 encodings to encoding functions.") | 123 "Alist of RFC2047 encodings to encoding functions.") |
93 | |
94 (defvar rfc2047-q-encoding-alist | |
95 '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" | |
96 . "-A-Za-z0-9!*+/" ) | |
97 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | |
98 ;; Avoid using 8bit characters. | |
99 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
100 ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) | |
101 "Alist of header regexps and valid Q characters.") | |
102 | 124 |
103 ;;; | 125 ;;; |
104 ;;; Functions for encoding RFC2047 messages | 126 ;;; Functions for encoding RFC2047 messages |
105 ;;; | 127 ;;; |
106 | 128 |
110 (narrow-to-region | 132 (narrow-to-region |
111 (point) | 133 (point) |
112 (progn | 134 (progn |
113 (forward-line 1) | 135 (forward-line 1) |
114 (if (re-search-forward "^[^ \n\t]" nil t) | 136 (if (re-search-forward "^[^ \n\t]" nil t) |
115 (progn | 137 (rfc2047-point-at-bol) |
116 (beginning-of-line) | |
117 (point)) | |
118 (point-max)))) | 138 (point-max)))) |
119 (goto-char (point-min))) | 139 (goto-char (point-min))) |
140 | |
141 (defun rfc2047-field-value () | |
142 "Return the value of the field at point." | |
143 (save-excursion | |
144 (save-restriction | |
145 (rfc2047-narrow-to-field) | |
146 (re-search-forward ":[ \t\n]*" nil t) | |
147 (buffer-substring (point) (point-max))))) | |
120 | 148 |
121 (defvar rfc2047-encoding-type 'address-mime | 149 (defvar rfc2047-encoding-type 'address-mime |
122 "The type of encoding done by `rfc2047-encode-region'. | 150 "The type of encoding done by `rfc2047-encode-region'. |
123 This should be dynamically bound around calls to | 151 This should be dynamically bound around calls to |
124 `rfc2047-encode-region' to either `mime' or `address-mime'. See | 152 `rfc2047-encode-region' to either `mime' or `address-mime'. See |
167 (re-search-forward "^[^:]+: *" nil t) | 195 (re-search-forward "^[^:]+: *" nil t) |
168 (cond | 196 (cond |
169 ((eq method 'address-mime) | 197 ((eq method 'address-mime) |
170 (rfc2047-encode-region (point) (point-max))) | 198 (rfc2047-encode-region (point) (point-max))) |
171 ((eq method 'mime) | 199 ((eq method 'mime) |
172 (let ((rfc2047-encoding-type method)) | 200 (let ((rfc2047-encoding-type 'mime)) |
173 (rfc2047-encode-region (point) (point-max)))) | 201 (rfc2047-encode-region (point) (point-max)))) |
174 ((eq method 'default) | 202 ((eq method 'default) |
175 (if (and (featurep 'mule) | 203 (if (and (featurep 'mule) |
176 (if (boundp 'default-enable-multibyte-characters) | 204 (if (boundp 'default-enable-multibyte-characters) |
177 default-enable-multibyte-characters) | 205 default-enable-multibyte-characters) |
178 mail-parse-charset) | 206 mail-parse-charset) |
179 (mm-encode-coding-region (point) (point-max) | 207 (mm-encode-coding-region (point) (point-max) |
180 mail-parse-charset))) | 208 mail-parse-charset))) |
209 ;; We get this when CC'ing messsages to newsgroups with | |
210 ;; 8-bit names. The group name mail copy just got | |
211 ;; unconditionally encoded. Previously, it would ask | |
212 ;; whether to encode, which was quite confusing for the | |
213 ;; user. If the new behaviour is wrong, tell me. I have | |
214 ;; left the old code commented out below. | |
215 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07. | |
216 ;; Modified by Dave Love, with the commented-out code changed | |
217 ;; in accordance with changes elsewhere. | |
218 ((null method) | |
219 (rfc2047-encode-region (point) (point-max))) | |
220 ;;; ((null method) | |
221 ;;; (if (or (message-options-get | |
222 ;;; 'rfc2047-encode-message-header-encode-any) | |
223 ;;; (message-options-set | |
224 ;;; 'rfc2047-encode-message-header-encode-any | |
225 ;;; (y-or-n-p | |
226 ;;; "Some texts are not encoded. Encode anyway?"))) | |
227 ;;; (rfc2047-encode-region (point-min) (point-max)) | |
228 ;;; (error "Cannot send unencoded text"))) | |
181 ((mm-coding-system-p method) | 229 ((mm-coding-system-p method) |
182 (if (and (featurep 'mule) | 230 (if (and (featurep 'mule) |
183 (if (boundp 'default-enable-multibyte-characters) | 231 (if (boundp 'default-enable-multibyte-characters) |
184 default-enable-multibyte-characters)) | 232 default-enable-multibyte-characters)) |
185 (mm-encode-coding-region (point) (point-max) method))) | 233 (mm-encode-coding-region (point) (point-max) method))) |
195 "Return non-nil if any characters in current buffer need encoding in headers. | 243 "Return non-nil if any characters in current buffer need encoding in headers. |
196 The buffer may be narrowed." | 244 The buffer may be narrowed." |
197 (require 'message) ; for message-posting-charset | 245 (require 'message) ; for message-posting-charset |
198 (let ((charsets | 246 (let ((charsets |
199 (mm-find-mime-charset-region (point-min) (point-max)))) | 247 (mm-find-mime-charset-region (point-min) (point-max)))) |
200 (and charsets (not (equal charsets (list message-posting-charset)))))) | 248 (and charsets |
249 (not (equal charsets (list (car message-posting-charset))))))) | |
201 | 250 |
202 ;; Use this syntax table when parsing into regions that may need | 251 ;; Use this syntax table when parsing into regions that may need |
203 ;; encoding. Double quotes are string delimiters, backslash is | 252 ;; encoding. Double quotes are string delimiters, backslash is |
204 ;; character quoting, and all other RFC 2822 special characters are | 253 ;; character quoting, and all other RFC 2822 special characters are |
205 ;; treated as punctuation so we can use forward-sexp/forward-word to | 254 ;; treated as punctuation so we can use forward-sexp/forward-word to |
206 ;; skip to the end of regions appropriately. Nb. ietf-drums does | 255 ;; skip to the end of regions appropriately. Nb. ietf-drums does |
207 ;; things differently. | 256 ;; things differently. |
208 (defconst rfc2047-syntax-table | 257 (defconst rfc2047-syntax-table |
209 (let ((table (make-char-table 'syntax-table '(2)))) | 258 ;; (make-char-table 'syntax-table '(2)) only works in Emacs. |
259 (let ((table (make-syntax-table))) | |
260 ;; The following is done to work for setting all elements of the table | |
261 ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way. | |
262 ;; Play safe and don't assume the form of the word syntax entry -- | |
263 ;; copy it from ?a. | |
264 (if (fboundp 'set-char-table-range) ; Emacs | |
265 (funcall (intern "set-char-table-range") | |
266 table t (aref (standard-syntax-table) ?a)) | |
267 (if (fboundp 'put-char-table) | |
268 (if (fboundp 'get-char-table) ; warning avoidance | |
269 (put-char-table t (get-char-table ?a (standard-syntax-table)) | |
270 table)))) | |
210 (modify-syntax-entry ?\\ "\\" table) | 271 (modify-syntax-entry ?\\ "\\" table) |
211 (modify-syntax-entry ?\" "\"" table) | 272 (modify-syntax-entry ?\" "\"" table) |
212 (modify-syntax-entry ?\( "." table) | 273 (modify-syntax-entry ?\( "." table) |
213 (modify-syntax-entry ?\) "." table) | 274 (modify-syntax-entry ?\) "." table) |
214 (modify-syntax-entry ?\< "." table) | 275 (modify-syntax-entry ?\< "." table) |
226 By default, the region is treated as containing RFC2822 addresses. | 287 By default, the region is treated as containing RFC2822 addresses. |
227 Dynamically bind `rfc2047-encoding-type' to change that." | 288 Dynamically bind `rfc2047-encoding-type' to change that." |
228 (save-restriction | 289 (save-restriction |
229 (narrow-to-region b e) | 290 (narrow-to-region b e) |
230 (if (eq 'mime rfc2047-encoding-type) | 291 (if (eq 'mime rfc2047-encoding-type) |
231 ;; Simple case -- treat as single word. | 292 ;; Simple case. Treat as single word after any initial ASCII |
293 ;; part and before any tailing ASCII part. The leading ASCII | |
294 ;; is relevant for instance in Subject headers with `Re:' for | |
295 ;; interoperability with non-MIME clients, and we might as | |
296 ;; well avoid the tail too. | |
232 (progn | 297 (progn |
233 (goto-char (point-min)) | 298 (goto-char (point-min)) |
234 ;; Does it need encoding? | 299 ;; Does it need encoding? |
235 (skip-chars-forward "\000-\177" e) | 300 (skip-chars-forward "\000-\177") |
236 (unless (eobp) | 301 (unless (eobp) |
237 (rfc2047-encode b e))) | 302 (skip-chars-backward "^ \n") ; beginning of space-delimited word |
303 (rfc2047-encode (point) (progn | |
304 (goto-char e) | |
305 (skip-chars-backward "\000-\177") | |
306 (skip-chars-forward "^ \n") | |
307 ;; end of space-delimited word | |
308 (point))))) | |
238 ;; `address-mime' case -- take care of quoted words, comments. | 309 ;; `address-mime' case -- take care of quoted words, comments. |
239 (with-syntax-table rfc2047-syntax-table | 310 (with-syntax-table rfc2047-syntax-table |
240 (let ((start (point)) ; start of current token | 311 (let ((start) ; start of current token |
241 end ; end of current token | 312 end ; end of current token |
242 ;; Whether there's an encoded word before the current | 313 ;; Whether there's an encoded word before the current |
243 ;; token, either immediately or separated by space. | 314 ;; token, either immediately or separated by space. |
244 last-encoded) | 315 last-encoded) |
245 (goto-char (point-min)) | 316 (goto-char (point-min)) |
246 (condition-case nil ; in case of unbalanced quotes | 317 (condition-case nil ; in case of unbalanced quotes |
247 ;; Look for rfc2822-style: sequences of atoms, quoted | 318 ;; Look for rfc2822-style: sequences of atoms, quoted |
248 ;; strings, specials, whitespace. (Specials mustn't be | 319 ;; strings, specials, whitespace. (Specials mustn't be |
249 ;; encoded.) | 320 ;; encoded.) |
250 (while (not (eobp)) | 321 (while (not (eobp)) |
251 (setq start (point)) | 322 (setq start (point)) |
304 (insert ? ) | 375 (insert ? ) |
305 (setq start (point) | 376 (setq start (point) |
306 end (1+ end))) | 377 end (1+ end))) |
307 (rfc2047-encode start end) | 378 (rfc2047-encode start end) |
308 (setq last-encoded t))))) | 379 (setq last-encoded t))))) |
309 (error (error "Invalid data for rfc2047 encoding: %s" | 380 (error |
310 (buffer-substring b e))))))) | 381 (error "Invalid data for rfc2047 encoding: %s" |
382 (buffer-substring b e))))))) | |
311 (rfc2047-fold-region b (point)))) | 383 (rfc2047-fold-region b (point)))) |
312 | 384 |
313 (defun rfc2047-encode-string (string) | 385 (defun rfc2047-encode-string (string) |
314 "Encode words in STRING. | 386 "Encode words in STRING. |
315 By default, the string is treated as containing addresses (see | 387 By default, the string is treated as containing addresses (see |
316 `rfc2047-special-chars')." | 388 `rfc2047-encoding-type')." |
317 (with-temp-buffer | 389 (with-temp-buffer |
318 (insert string) | 390 (insert string) |
319 (rfc2047-encode-region (point-min) (point-max)) | 391 (rfc2047-encode-region (point-min) (point-max)) |
320 (buffer-string))) | 392 (buffer-string))) |
321 | 393 |
322 (defun rfc2047-encode (b e) | 394 (defun rfc2047-encode (b e) |
323 "Encode the word(s) in the region B to E. | 395 "Encode the word(s) in the region B to E. |
324 By default, the region is treated as containing addresses (see | 396 By default, the region is treated as containing addresses (see |
325 `rfc2047-special-chars')." | 397 `rfc2047-encoding-type')." |
326 (let* ((mime-charset (mm-find-mime-charset-region b e)) | 398 (let* ((mime-charset (mm-find-mime-charset-region b e)) |
327 (cs (if (> (length mime-charset) 1) | 399 (cs (if (> (length mime-charset) 1) |
328 ;; Fixme: Instead of this, try to break region into | 400 ;; Fixme: Instead of this, try to break region into |
329 ;; parts that can be encoded separately. | 401 ;; parts that can be encoded separately. |
330 (error "Can't rfc2047-encode `%s'" | 402 (error "Can't rfc2047-encode `%s'" |
331 (buffer-substring b e)) | 403 (buffer-substring b e)) |
332 (setq mime-charset (car mime-charset)) | 404 (setq mime-charset (car mime-charset)) |
333 (mm-charset-to-coding-system mime-charset))) | 405 (mm-charset-to-coding-system mime-charset))) |
334 ;; Fixme: Better, calculate the number of non-ASCII | 406 ;; Fixme: Better, calculate the number of non-ASCII |
335 ;; characters, at least for 8-bit charsets. | 407 ;; characters, at least for 8-bit charsets. |
336 (encoding (if (assq mime-charset | 408 (encoding (or (cdr (assq mime-charset |
337 rfc2047-charset-encoding-alist) | |
338 (cdr (assq mime-charset | |
339 rfc2047-charset-encoding-alist)) | 409 rfc2047-charset-encoding-alist)) |
340 'B)) | 410 ;; For the charsets that don't have a preferred |
411 ;; encoding, choose the one that's shorter. | |
412 (save-restriction | |
413 (narrow-to-region b e) | |
414 (if (eq (mm-qp-or-base64) 'base64) | |
415 'B | |
416 'Q)))) | |
341 (start (concat | 417 (start (concat |
342 "=?" (downcase (symbol-name mime-charset)) "?" | 418 "=?" (downcase (symbol-name mime-charset)) "?" |
343 (downcase (symbol-name encoding)) "?")) | 419 (downcase (symbol-name encoding)) "?")) |
420 (factor (case mime-charset | |
421 ((iso-8859-5 iso-8859-7 iso-8859-8 koi8-r) 1) | |
422 ((big5 gb2312 euc-kr) 2) | |
423 (utf-8 4) | |
424 (t 8))) | |
425 (pre (- b (save-restriction | |
426 (widen) | |
427 (rfc2047-point-at-bol)))) | |
428 ;; encoded-words must not be longer than 75 characters, | |
429 ;; including charset, encoding etc. This leaves us with | |
430 ;; 75 - (length start) - 2 - 2 characters. The last 2 is for | |
431 ;; possible base64 padding. In the worst case (iso-2022-*) | |
432 ;; each character expands to 8 bytes which is expanded by a | |
433 ;; factor of 4/3 by base64 encoding. | |
434 (length (floor (- 75 (length start) 4) (* factor (/ 4.0 3.0)))) | |
435 ;; Limit line length to 76 characters. | |
436 (length1 (max 1 (floor (- 76 (length start) 4 pre) | |
437 (* factor (/ 4.0 3.0))))) | |
344 (first t)) | 438 (first t)) |
345 (if mime-charset | 439 (if mime-charset |
346 (save-restriction | 440 (save-restriction |
347 (narrow-to-region b e) | 441 (narrow-to-region b e) |
348 (when (eq encoding 'B) | 442 (when (eq encoding 'B) |
349 ;; break into lines before encoding | 443 ;; break into lines before encoding |
350 (goto-char (point-min)) | 444 (goto-char (point-min)) |
351 (while (not (eobp)) | 445 (while (not (eobp)) |
352 (goto-char (min (point-max) (+ 15 (point)))) | 446 (if first |
447 (progn | |
448 (goto-char (min (point-max) (+ length1 (point)))) | |
449 (setq first nil)) | |
450 (goto-char (min (point-max) (+ length (point))))) | |
353 (unless (eobp) | 451 (unless (eobp) |
354 (insert ?\n)))) | 452 (insert ?\n))) |
453 (setq first t)) | |
355 (if (and (mm-multibyte-p) | 454 (if (and (mm-multibyte-p) |
356 (mm-coding-system-p cs)) | 455 (mm-coding-system-p cs)) |
357 (mm-encode-coding-region (point-min) (point-max) cs)) | 456 (mm-encode-coding-region (point-min) (point-max) cs)) |
358 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | 457 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) |
359 (point-min) (point-max)) | 458 (point-min) (point-max)) |
365 (insert start) | 464 (insert start) |
366 (end-of-line) | 465 (end-of-line) |
367 (insert "?=") | 466 (insert "?=") |
368 (forward-line 1)))))) | 467 (forward-line 1)))))) |
369 | 468 |
469 (defun rfc2047-fold-field () | |
470 "Fold the current header field." | |
471 (save-excursion | |
472 (save-restriction | |
473 (rfc2047-narrow-to-field) | |
474 (rfc2047-fold-region (point-min) (point-max))))) | |
475 | |
370 (defun rfc2047-fold-region (b e) | 476 (defun rfc2047-fold-region (b e) |
371 "Fold long lines in region B to E." | 477 "Fold long lines in region B to E." |
372 (save-restriction | 478 (save-restriction |
373 (narrow-to-region b e) | 479 (narrow-to-region b e) |
374 (goto-char (point-min)) | 480 (goto-char (point-min)) |
375 (let ((break nil) | 481 (let ((break nil) |
376 (qword-break nil) | 482 (qword-break nil) |
377 (first t) | 483 (first t) |
378 (bol (save-restriction | 484 (bol (save-restriction |
379 (widen) | 485 (widen) |
380 (mm-point-at-bol)))) | 486 (rfc2047-point-at-bol)))) |
381 (while (not (eobp)) | 487 (while (not (eobp)) |
382 (when (and (or break qword-break) (> (- (point) bol) 76)) | 488 (when (and (or break qword-break) |
489 (> (- (point) bol) 76)) | |
383 (goto-char (or break qword-break)) | 490 (goto-char (or break qword-break)) |
384 (setq break nil | 491 (setq break nil |
385 qword-break nil) | 492 qword-break nil) |
386 (if (looking-at "[ \t]") | 493 (if (looking-at "[ \t]") |
387 (insert ?\n) | 494 (insert ?\n) |
388 (insert "\n ")) | 495 (insert "\n ")) |
389 (setq bol (1- (point))) | 496 (setq bol (1- (point))) |
390 ;; Don't break before the first non-LWSP characters. | 497 ;; Don't break before the first non-LWSP characters. |
391 (skip-chars-forward " \t") | 498 (skip-chars-forward " \t") |
392 (unless (eobp) (forward-char 1))) | 499 (unless (eobp) |
500 (forward-char 1))) | |
393 (cond | 501 (cond |
394 ((eq (char-after) ?\n) | 502 ((eq (char-after) ?\n) |
395 (forward-char 1) | 503 (forward-char 1) |
396 (setq bol (point) | 504 (setq bol (point) |
397 break nil | 505 break nil |
410 ((not break) | 518 ((not break) |
411 (if (not (looking-at "=\\?[^=]")) | 519 (if (not (looking-at "=\\?[^=]")) |
412 (if (eq (char-after) ?=) | 520 (if (eq (char-after) ?=) |
413 (forward-char 1) | 521 (forward-char 1) |
414 (skip-chars-forward "^ \t\n\r=")) | 522 (skip-chars-forward "^ \t\n\r=")) |
415 (setq qword-break (point)) | 523 ;; Don't break at the start of the field. |
524 (unless (= (point) b) | |
525 (setq qword-break (point))) | |
416 (skip-chars-forward "^ \t\n\r"))) | 526 (skip-chars-forward "^ \t\n\r"))) |
417 (t | 527 (t |
418 (skip-chars-forward "^ \t\n\r")))) | 528 (skip-chars-forward "^ \t\n\r")))) |
419 (when (and (or break qword-break) (> (- (point) bol) 76)) | 529 (when (and (or break qword-break) |
530 (> (- (point) bol) 76)) | |
420 (goto-char (or break qword-break)) | 531 (goto-char (or break qword-break)) |
421 (setq break nil | 532 (setq break nil |
422 qword-break nil) | 533 qword-break nil) |
423 (if (looking-at "[ \t]") | 534 (if (looking-at "[ \t]") |
424 (insert ?\n) | 535 (insert ?\n) |
425 (insert "\n ")) | 536 (insert "\n ")) |
426 (setq bol (1- (point))) | 537 (setq bol (1- (point))) |
427 ;; Don't break before the first non-LWSP characters. | 538 ;; Don't break before the first non-LWSP characters. |
428 (skip-chars-forward " \t") | 539 (skip-chars-forward " \t") |
429 (unless (eobp) (forward-char 1)))))) | 540 (unless (eobp) |
541 (forward-char 1)))))) | |
542 | |
543 (defun rfc2047-unfold-field () | |
544 "Fold the current line." | |
545 (save-excursion | |
546 (save-restriction | |
547 (rfc2047-narrow-to-field) | |
548 (rfc2047-unfold-region (point-min) (point-max))))) | |
430 | 549 |
431 (defun rfc2047-unfold-region (b e) | 550 (defun rfc2047-unfold-region (b e) |
432 "Unfold lines in region B to E." | 551 "Unfold lines in region B to E." |
433 (save-restriction | 552 (save-restriction |
434 (narrow-to-region b e) | 553 (narrow-to-region b e) |
435 (goto-char (point-min)) | 554 (goto-char (point-min)) |
436 (let ((bol (save-restriction | 555 (let ((bol (save-restriction |
437 (widen) | 556 (widen) |
438 (mm-point-at-bol))) | 557 (rfc2047-point-at-bol))) |
439 (eol (mm-point-at-eol)) | 558 (eol (rfc2047-point-at-eol))) |
440 leading) | |
441 (forward-line 1) | 559 (forward-line 1) |
442 (while (not (eobp)) | 560 (while (not (eobp)) |
443 (if (and (looking-at "[ \t]") | 561 (if (and (looking-at "[ \t]") |
444 (< (- (mm-point-at-eol) bol) 76)) | 562 (< (- (rfc2047-point-at-eol) bol) 76)) |
445 (delete-region eol (progn | 563 (delete-region eol (progn |
446 (goto-char eol) | 564 (goto-char eol) |
447 (skip-chars-forward "\r\n") | 565 (skip-chars-forward "\r\n") |
448 (point))) | 566 (point))) |
449 (setq bol (mm-point-at-bol))) | 567 (setq bol (rfc2047-point-at-bol))) |
450 (setq eol (mm-point-at-eol)) | 568 (setq eol (rfc2047-point-at-eol)) |
451 (forward-line 1))))) | 569 (forward-line 1))))) |
452 | 570 |
453 (defun rfc2047-b-encode-region (b e) | 571 (defun rfc2047-b-encode-region (b e) |
454 "Base64-encode the header contained in region B to E." | 572 "Base64-encode the header contained in region B to E." |
455 (save-restriction | 573 (save-restriction |
463 (defun rfc2047-q-encode-region (b e) | 581 (defun rfc2047-q-encode-region (b e) |
464 "Quoted-printable-encode the header in region B to E." | 582 "Quoted-printable-encode the header in region B to E." |
465 (save-excursion | 583 (save-excursion |
466 (save-restriction | 584 (save-restriction |
467 (narrow-to-region (goto-char b) e) | 585 (narrow-to-region (goto-char b) e) |
468 (let ((alist rfc2047-q-encoding-alist) | 586 (let ((bol (save-restriction |
469 (bol (save-restriction | |
470 (widen) | 587 (widen) |
471 (mm-point-at-bol)))) | 588 (rfc2047-point-at-bol)))) |
472 (while alist | 589 (quoted-printable-encode-region |
473 (when (looking-at (caar alist)) | 590 b e nil |
474 (quoted-printable-encode-region b e nil (cdar alist)) | 591 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. |
475 (subst-char-in-region (point-min) (point-max) ? ?_) | 592 ;; Avoid using 8bit characters. |
476 (setq alist nil)) | 593 ;; This list excludes `especials' (see the RFC2047 syntax), |
477 (pop alist)) | 594 ;; meaning that some characters in non-structured fields will |
595 ;; get encoded when they con't need to be. The following is | |
596 ;; what it used to be. | |
597 ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
598 ;;; "\010\012\014\040-\074\076\100-\136\140-\177") | |
599 "-\b\n\f !#-'*+0-9A-Z\\^`-~\d") | |
600 (subst-char-in-region (point-min) (point-max) ? ?_) | |
478 ;; The size of QP encapsulation is about 20, so set limit to | 601 ;; The size of QP encapsulation is about 20, so set limit to |
479 ;; 56=76-20. | 602 ;; 56=76-20. |
480 (unless (< (- (point-max) (point-min)) 56) | 603 (unless (< (- (point-max) (point-min)) 56) |
481 ;; Don't break if it could fit in one line. | 604 ;; Don't break if it could fit in one line. |
482 ;; Let rfc2047-encode-region break it later. | 605 ;; Let rfc2047-encode-region break it later. |
483 (goto-char (1+ (point-min))) | 606 (goto-char (1+ (point-min))) |
484 (while (and (not (bobp)) (not (eobp))) | 607 (while (and (not (bobp)) (not (eobp))) |
485 (goto-char (min (point-max) (+ 56 bol))) | 608 (goto-char (min (point-max) (+ 56 bol))) |
486 (search-backward "=" (- (point) 2) t) | 609 (search-backward "=" (- (point) 2) t) |
487 (unless (or (bobp) (eobp)) | 610 (unless (or (bobp) (eobp)) |
488 (insert "\n") | 611 (insert ?\n) |
489 (setq bol (point))))))))) | 612 (setq bol (point))))))))) |
490 | 613 |
491 ;;; | 614 ;;; |
492 ;;; Functions for decoding RFC2047 messages | 615 ;;; Functions for decoding RFC2047 messages |
493 ;;; | 616 ;;; |
494 | 617 |
495 (defvar rfc2047-encoded-word-regexp | 618 (eval-and-compile |
496 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") | 619 (defconst rfc2047-encoded-word-regexp |
620 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\ | |
621 \\?\\([!->@-~ +]*\\)\\?=")) | |
622 | |
623 ;; Fixme: This should decode in place, not cons intermediate strings. | |
624 ;; Also check whether it needs to worry about delimiting fields like | |
625 ;; encoding. | |
626 | |
627 ;; In fact it's reported that (invalid) encoding of mailboxes in | |
628 ;; addr-specs is in use, so delimiting fields might help. Probably | |
629 ;; not decoding a word which isn't properly delimited is good enough | |
630 ;; and worthwhile (is it more correct or not?), e.g. something like | |
631 ;; `=?iso-8859-1?q?foo?=@'. | |
497 | 632 |
498 (defun rfc2047-decode-region (start end) | 633 (defun rfc2047-decode-region (start end) |
499 "Decode MIME-encoded words in region between START and END." | 634 "Decode MIME-encoded words in region between START and END." |
500 (interactive "r") | 635 (interactive "r") |
501 (let ((case-fold-search t) | 636 (let ((case-fold-search t) |
504 (save-restriction | 639 (save-restriction |
505 (narrow-to-region start end) | 640 (narrow-to-region start end) |
506 (goto-char (point-min)) | 641 (goto-char (point-min)) |
507 ;; Remove whitespace between encoded words. | 642 ;; Remove whitespace between encoded words. |
508 (while (re-search-forward | 643 (while (re-search-forward |
509 (concat "\\(" rfc2047-encoded-word-regexp "\\)" | 644 (eval-when-compile |
510 "\\(\n?[ \t]\\)+" | 645 (concat "\\(" rfc2047-encoded-word-regexp "\\)" |
511 "\\(" rfc2047-encoded-word-regexp "\\)") | 646 "\\(\n?[ \t]\\)+" |
647 "\\(" rfc2047-encoded-word-regexp "\\)")) | |
512 nil t) | 648 nil t) |
513 (delete-region (goto-char (match-end 1)) (match-beginning 6))) | 649 (delete-region (goto-char (match-end 1)) (match-beginning 6))) |
514 ;; Decode the encoded words. | 650 ;; Decode the encoded words. |
515 (setq b (goto-char (point-min))) | 651 (setq b (goto-char (point-min))) |
516 (while (re-search-forward rfc2047-encoded-word-regexp nil t) | 652 (while (re-search-forward rfc2047-encoded-word-regexp nil t) |
517 (setq e (match-beginning 0)) | 653 (setq e (match-beginning 0)) |
518 (insert (rfc2047-parse-and-decode | 654 (insert (rfc2047-parse-and-decode |
519 (prog1 | 655 (prog1 |
520 (match-string 0) | 656 (match-string 0) |
521 (delete-region (match-beginning 0) (match-end 0))))) | 657 (delete-region (match-beginning 0) (match-end 0))))) |
658 ;; Remove newlines between decoded words, though such things | |
659 ;; essentially must not be there. | |
660 (save-restriction | |
661 (narrow-to-region e (point)) | |
662 (goto-char e) | |
663 (while (re-search-forward "[\n\r]+" nil t) | |
664 (replace-match " ")) | |
665 (goto-char (point-max))) | |
522 (when (and (mm-multibyte-p) | 666 (when (and (mm-multibyte-p) |
523 mail-parse-charset | 667 mail-parse-charset |
668 (not (eq mail-parse-charset 'us-ascii)) | |
524 (not (eq mail-parse-charset 'gnus-decoded))) | 669 (not (eq mail-parse-charset 'gnus-decoded))) |
525 (mm-decode-coding-region b e mail-parse-charset)) | 670 (mm-decode-coding-region b e mail-parse-charset)) |
526 (setq b (point))) | 671 (setq b (point))) |
527 (when (and (mm-multibyte-p) | 672 (when (and (mm-multibyte-p) |
528 mail-parse-charset | 673 mail-parse-charset |
529 (not (eq mail-parse-charset 'us-ascii)) | 674 (not (eq mail-parse-charset 'us-ascii)) |
530 (not (eq mail-parse-charset 'gnus-decoded))) | 675 (not (eq mail-parse-charset 'gnus-decoded))) |
531 (mm-decode-coding-region b (point-max) mail-parse-charset)) | 676 (mm-decode-coding-region b (point-max) mail-parse-charset)))))) |
532 (rfc2047-unfold-region (point-min) (point-max)))))) | |
533 | 677 |
534 (defun rfc2047-decode-string (string) | 678 (defun rfc2047-decode-string (string) |
535 "Decode the quoted-printable-encoded STRING and return the results." | 679 "Decode the quoted-printable-encoded STRING and return the results." |
536 (let ((m (mm-multibyte-p))) | 680 (let ((m (mm-multibyte-p))) |
537 (with-temp-buffer | 681 (if (string-match "=\\?" string) |
538 (when m | 682 (with-temp-buffer |
539 (mm-enable-multibyte)) | 683 ;; Fixme: This logic is wrong, but seems to be required by |
540 (insert string) | 684 ;; Gnus summary buffer generation. The value of `m' depends |
541 (inline | 685 ;; on the current buffer, not global multibyteness or that |
542 (rfc2047-decode-region (point-min) (point-max))) | 686 ;; of the string. Also the string returned should always be |
543 (buffer-string)))) | 687 ;; multibyte in a multibyte session, i.e. the buffer should |
688 ;; be multibyte before `buffer-string' is called. | |
689 (when m | |
690 (mm-enable-multibyte)) | |
691 (insert string) | |
692 (inline | |
693 (rfc2047-decode-region (point-min) (point-max))) | |
694 (buffer-string)) | |
695 ;; Fixme: As above, `m' here is inappropriate. | |
696 (if (and m | |
697 mail-parse-charset | |
698 (not (eq mail-parse-charset 'us-ascii)) | |
699 (not (eq mail-parse-charset 'gnus-decoded))) | |
700 (mm-decode-coding-string string mail-parse-charset) | |
701 (mm-string-as-multibyte string))))) | |
544 | 702 |
545 (defun rfc2047-parse-and-decode (word) | 703 (defun rfc2047-parse-and-decode (word) |
546 "Decode WORD and return it if it is an encoded word. | 704 "Decode WORD and return it if it is an encoded word. |
547 Return WORD if not." | 705 Return WORD if it is not not an encoded word or if the charset isn't |
706 decodable." | |
548 (if (not (string-match rfc2047-encoded-word-regexp word)) | 707 (if (not (string-match rfc2047-encoded-word-regexp word)) |
549 word | 708 word |
550 (or | 709 (or |
551 (condition-case nil | 710 (condition-case nil |
552 (rfc2047-decode | 711 (rfc2047-decode |
553 (match-string 1 word) | 712 (match-string 1 word) |
554 (upcase (match-string 2 word)) | 713 (upcase (match-string 2 word)) |
555 (match-string 3 word)) | 714 (match-string 3 word)) |
556 (error word)) | 715 (error word)) |
557 word))) | 716 word))) ; un-decodable |
717 | |
718 (defun rfc2047-pad-base64 (string) | |
719 "Pad STRING to quartets." | |
720 ;; Be more liberal to accept buggy base64 strings. If | |
721 ;; base64-decode-string accepts buggy strings, this function could | |
722 ;; be aliased to identity. | |
723 (case (mod (length string) 4) | |
724 (0 string) | |
725 (1 string) ;; Error, don't pad it. | |
726 (2 (concat string "==")) | |
727 (3 (concat string "=")))) | |
558 | 728 |
559 (defun rfc2047-decode (charset encoding string) | 729 (defun rfc2047-decode (charset encoding string) |
560 "Decode STRING from the given MIME CHARSET in the given ENCODING. | 730 "Decode STRING from the given MIME CHARSET in the given ENCODING. |
561 Valid ENCODINGs are \"B\" and \"Q\". | 731 Valid ENCODINGs are \"B\" and \"Q\". |
562 If your Emacs implementation can't decode CHARSET, return nil." | 732 If your Emacs implementation can't decode CHARSET, return nil." |
574 (setq cs (mm-charset-to-coding-system mail-parse-charset))) | 744 (setq cs (mm-charset-to-coding-system mail-parse-charset))) |
575 (when cs | 745 (when cs |
576 (when (and (eq cs 'ascii) | 746 (when (and (eq cs 'ascii) |
577 mail-parse-charset) | 747 mail-parse-charset) |
578 (setq cs mail-parse-charset)) | 748 (setq cs mail-parse-charset)) |
579 ;; Ensure unibyte result in Emacs 20. | 749 (mm-decode-coding-string |
580 (let (default-enable-multibyte-characters) | 750 (cond |
581 (with-temp-buffer | 751 ((equal "B" encoding) |
582 (mm-decode-coding-string | 752 (base64-decode-string |
583 (cond | 753 (rfc2047-pad-base64 string))) |
584 ((equal "B" encoding) | 754 ((equal "Q" encoding) |
585 (base64-decode-string string)) | 755 (quoted-printable-decode-string |
586 ((equal "Q" encoding) | 756 (mm-replace-chars-in-string string ?_ ? ))) |
587 (quoted-printable-decode-string | 757 (t (error "Invalid encoding: %s" encoding))) |
588 (mm-replace-chars-in-string string ?_ ? ))) | 758 cs)))) |
589 (t (error "Invalid encoding: %s" encoding))) | |
590 cs)))))) | |
591 | 759 |
592 (provide 'rfc2047) | 760 (provide 'rfc2047) |
593 | 761 |
594 ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6 | 762 ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6 |
595 ;;; rfc2047.el ends here | 763 ;;; rfc2047.el ends here |