Mercurial > emacs
annotate lisp/gnus/rfc2047.el @ 89000:56933b338052
(rfc2047-encode): Fix last change.
author | Dave Love <fx@gnu.org> |
---|---|
date | Sun, 18 Aug 2002 16:56:23 +0000 |
parents | 4cda993e8f6e |
children | b39c11cf3b5d |
rev | line source |
---|---|
38413
a26d9b55abb6
Some fixes to follow coding conventions in files from Gnus.
Pavel Janík <Pavel@Janik.cz>
parents:
35985
diff
changeset
|
1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
31717 | 2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. |
3 | |
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
6 ;; This file is part of GNU Emacs. | |
7 | |
8 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 ;; it under the terms of the GNU General Public License as published by | |
10 ;; the Free Software Foundation; either version 2, or (at your option) | |
11 ;; any later version. | |
12 | |
13 ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 ;; GNU General Public License for more details. | |
17 | |
18 ;; You should have received a copy of the GNU General Public License | |
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 ;; Boston, MA 02111-1307, USA. | |
22 | |
23 ;;; Commentary: | |
24 | |
34674 | 25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
26 ;; Three: Message Header Extensions for Non-ASCII Text". | |
27 | |
31717 | 28 ;;; Code: |
29 | |
33304 | 30 (eval-when-compile (require 'cl)) |
31717 | 31 |
32 (require 'qp) | |
33 (require 'mm-util) | |
34 (require 'ietf-drums) | |
35 (require 'mail-prsvr) | |
33304 | 36 (require 'base64) |
37 ;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus. | |
38 (require 'gnus-util) | |
39 (autoload 'mm-body-7-or-8 "mm-bodies") | |
33127
eca95f9d7f05
(base64): Require unconditionally.
Dave Love <fx@gnu.org>
parents:
31764
diff
changeset
|
40 |
31717 | 41 (defvar rfc2047-header-encoding-alist |
42 '(("Newsgroups" . nil) | |
43 ("Message-ID" . nil) | |
44 (t . mime)) | |
45 "*Header/encoding method alist. | |
46 The list is traversed sequentially. The keys can either be | |
33304 | 47 header regexps or t. |
31717 | 48 |
49 The values can be: | |
50 | |
51 1) nil, in which case no encoding is done; | |
52 2) `mime', in which case the header will be encoded according to RFC2047; | |
53 3) a charset, in which case it will be encoded as that charset; | |
54 4) `default', in which case the field will be encoded as the rest | |
55 of the article.") | |
56 | |
57 (defvar rfc2047-charset-encoding-alist | |
58 '((us-ascii . nil) | |
59 (iso-8859-1 . Q) | |
60 (iso-8859-2 . Q) | |
61 (iso-8859-3 . Q) | |
62 (iso-8859-4 . Q) | |
63 (iso-8859-5 . B) | |
64 (koi8-r . B) | |
65 (iso-8859-7 . Q) | |
88920
4cda993e8f6e
(rfc2047-charset-encoding-alist): Use B for Hebrew.
Dave Love <fx@gnu.org>
parents:
88792
diff
changeset
|
66 (iso-8859-8 . B) |
31717 | 67 (iso-8859-9 . Q) |
33304 | 68 (iso-8859-14 . Q) |
69 (iso-8859-15 . Q) | |
31717 | 70 (iso-2022-jp . B) |
71 (iso-2022-kr . B) | |
72 (gb2312 . B) | |
35838
53eebdb81828
2001-02-01 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
35453
diff
changeset
|
73 (big5 . B) |
53eebdb81828
2001-02-01 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
35453
diff
changeset
|
74 (cn-big5 . B) |
31717 | 75 (cn-gb . B) |
76 (cn-gb-2312 . B) | |
77 (euc-kr . B) | |
78 (iso-2022-jp-2 . B) | |
79 (iso-2022-int-1 . B)) | |
80 "Alist of MIME charsets to RFC2047 encodings. | |
88920
4cda993e8f6e
(rfc2047-charset-encoding-alist): Use B for Hebrew.
Dave Love <fx@gnu.org>
parents:
88792
diff
changeset
|
81 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
4cda993e8f6e
(rfc2047-charset-encoding-alist): Use B for Hebrew.
Dave Love <fx@gnu.org>
parents:
88792
diff
changeset
|
82 quoted-printable and base64 respectively.") |
31717 | 83 |
84 (defvar rfc2047-encoding-function-alist | |
85 '((Q . rfc2047-q-encode-region) | |
86 (B . rfc2047-b-encode-region) | |
87 (nil . ignore)) | |
88 "Alist of RFC2047 encodings to encoding functions.") | |
89 | |
90 (defvar rfc2047-q-encoding-alist | |
33304 | 91 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") |
31764 | 92 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
93 ;; Avoid using 8bit characters. |
31764 | 94 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" |
95 ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) | |
31717 | 96 "Alist of header regexps and valid Q characters.") |
97 | |
98 ;;; | |
99 ;;; Functions for encoding RFC2047 messages | |
100 ;;; | |
101 | |
102 (defun rfc2047-narrow-to-field () | |
103 "Narrow the buffer to the header on the current line." | |
104 (beginning-of-line) | |
105 (narrow-to-region | |
106 (point) | |
107 (progn | |
108 (forward-line 1) | |
109 (if (re-search-forward "^[^ \n\t]" nil t) | |
110 (progn | |
111 (beginning-of-line) | |
112 (point)) | |
113 (point-max)))) | |
114 (goto-char (point-min))) | |
115 | |
116 (defun rfc2047-encode-message-header () | |
117 "Encode the message header according to `rfc2047-header-encoding-alist'. | |
118 Should be called narrowed to the head of the message." | |
119 (interactive "*") | |
120 (save-excursion | |
121 (goto-char (point-min)) | |
122 (let (alist elem method) | |
123 (while (not (eobp)) | |
124 (save-restriction | |
125 (rfc2047-narrow-to-field) | |
126 (if (not (rfc2047-encodable-p)) | |
127 (if (and (eq (mm-body-7-or-8) '8bit) | |
128 (mm-multibyte-p) | |
129 (mm-coding-system-p | |
130 (car message-posting-charset))) | |
131 ;; 8 bit must be decoded. | |
132 ;; Is message-posting-charset a coding system? | |
33304 | 133 (mm-encode-coding-region |
134 (point-min) (point-max) | |
31717 | 135 (car message-posting-charset))) |
136 ;; We found something that may perhaps be encoded. | |
137 (setq method nil | |
138 alist rfc2047-header-encoding-alist) | |
139 (while (setq elem (pop alist)) | |
140 (when (or (and (stringp (car elem)) | |
141 (looking-at (car elem))) | |
142 (eq (car elem) t)) | |
143 (setq alist nil | |
144 method (cdr elem)))) | |
145 (cond | |
146 ((eq method 'mime) | |
33304 | 147 (rfc2047-encode-region (point-min) (point-max))) |
31717 | 148 ((eq method 'default) |
149 (if (and (featurep 'mule) | |
33815
61c7f3065929
(rfc2047-encode-message-header): Don't encode if
Dave Love <fx@gnu.org>
parents:
33304
diff
changeset
|
150 (if (boundp 'default-enable-multibyte-characters) |
61c7f3065929
(rfc2047-encode-message-header): Don't encode if
Dave Love <fx@gnu.org>
parents:
33304
diff
changeset
|
151 default-enable-multibyte-characters) |
31717 | 152 mail-parse-charset) |
33304 | 153 (mm-encode-coding-region (point-min) (point-max) |
31717 | 154 mail-parse-charset))) |
155 ((mm-coding-system-p method) | |
33815
61c7f3065929
(rfc2047-encode-message-header): Don't encode if
Dave Love <fx@gnu.org>
parents:
33304
diff
changeset
|
156 (if (and (featurep 'mule) |
61c7f3065929
(rfc2047-encode-message-header): Don't encode if
Dave Love <fx@gnu.org>
parents:
33304
diff
changeset
|
157 (if (boundp 'default-enable-multibyte-characters) |
61c7f3065929
(rfc2047-encode-message-header): Don't encode if
Dave Love <fx@gnu.org>
parents:
33304
diff
changeset
|
158 default-enable-multibyte-characters)) |
31717 | 159 (mm-encode-coding-region (point-min) (point-max) method))) |
160 ;; Hm. | |
161 (t))) | |
162 (goto-char (point-max))))))) | |
163 | |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
164 ;; Fixme: This, and the require below may not be the Right Thing, but |
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
165 ;; should be safe just before release. -- fx 2001-02-08 |
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
166 (eval-when-compile (defvar message-posting-charset)) |
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
167 |
33304 | 168 (defun rfc2047-encodable-p () |
169 "Return non-nil if any characters in current buffer need encoding in headers. | |
170 The buffer may be narrowed." | |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
171 (require 'message) ; for message-posting-charset |
31717 | 172 (let ((charsets |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
173 (mm-find-mime-charset-region (point-min) (point-max)))) |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
174 (and charsets (not (equal charsets (list message-posting-charset)))))) |
31717 | 175 |
176 (defun rfc2047-dissect-region (b e) | |
177 "Dissect the region between B and E into words." | |
33304 | 178 (let ((word-chars "-A-Za-z0-9!*+/") |
179 ;; Not using ietf-drums-specials-token makes life simple. | |
180 mail-parse-mule-charset | |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
181 words point nonascii |
33304 | 182 result word) |
31717 | 183 (save-restriction |
184 (narrow-to-region b e) | |
185 (goto-char (point-min)) | |
33304 | 186 (skip-chars-forward "\000-\177") |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
187 ;; Fixme: This loop used to check charsets when it found |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
188 ;; non-ASCII characters. That's removed, since it doesn't make |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
189 ;; much sense in Emacs 22 and doesn't seem necessary in Emacs |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
190 ;; 21, even. I'm not sure exactly what it should be doing, and |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
191 ;; it needs another look, especially for efficiency's sake. -- fx |
31717 | 192 (while (not (eobp)) |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
193 (setq point (point) |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
194 nonascii nil) |
33304 | 195 (skip-chars-backward word-chars b) |
196 (unless (eq b (point)) | |
197 (push (cons (buffer-substring b (point)) nil) words)) | |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
198 (setq b (point) |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
199 nonascii t) |
33304 | 200 (goto-char point) |
201 (forward-char 1) | |
202 (skip-chars-forward word-chars) | |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
203 (while (not (eobp)) |
33304 | 204 (forward-char 1) |
205 (skip-chars-forward word-chars)) | |
206 (unless (eq b (point)) | |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
207 (push (cons (buffer-substring b (point)) nonascii) words)) |
33304 | 208 (setq b (point)) |
209 (skip-chars-forward "\000-\177")) | |
210 (unless (eq b (point)) | |
211 (push (cons (buffer-substring b (point)) nil) words))) | |
212 ;; merge adjacent words | |
213 (setq word (pop words)) | |
214 (while word | |
215 (if (and (cdr word) | |
216 (caar words) | |
217 (not (cdar words)) | |
218 (not (string-match "[^ \t]" (caar words)))) | |
219 (if (eq (cdr (nth 1 words)) (cdr word)) | |
220 (progn | |
221 (setq word (cons (concat | |
222 (car (nth 1 words)) (caar words) | |
223 (car word)) | |
224 (cdr word))) | |
225 (pop words) | |
226 (pop words)) | |
227 (push (cons (concat (caar words) (car word)) (cdr word)) | |
228 result) | |
229 (pop words) | |
230 (setq word (pop words))) | |
231 (push word result) | |
232 (setq word (pop words)))) | |
233 result)) | |
31717 | 234 |
235 (defun rfc2047-encode-region (b e) | |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
236 "Encode all encodable words in region B to E." |
33304 | 237 (let ((words (rfc2047-dissect-region b e)) word) |
238 (save-restriction | |
239 (narrow-to-region b e) | |
240 (delete-region (point-min) (point-max)) | |
241 (while (setq word (pop words)) | |
242 (if (not (cdr word)) | |
243 (insert (car word)) | |
244 (rfc2047-fold-region (gnus-point-at-bol) (point)) | |
245 (goto-char (point-max)) | |
246 (if (> (- (point) (save-restriction | |
247 (widen) | |
248 (gnus-point-at-bol))) 76) | |
249 (insert "\n ")) | |
250 ;; Insert blank between encoded words | |
251 (if (eq (char-before) ?=) (insert " ")) | |
252 (rfc2047-encode (point) | |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
253 (progn (insert (car word)) (point))))) |
33304 | 254 (rfc2047-fold-region (point-min) (point-max))))) |
31717 | 255 |
256 (defun rfc2047-encode-string (string) | |
257 "Encode words in STRING." | |
258 (with-temp-buffer | |
259 (insert string) | |
260 (rfc2047-encode-region (point-min) (point-max)) | |
261 (buffer-string))) | |
262 | |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
263 (defun rfc2047-encode (b e) |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
264 "Encode the word in the region B to E." |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
265 (let* ((buff (current-buffer)) |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
266 (mime-charset (with-temp-buffer |
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
267 (insert-buffer-substring buff b e) |
89000
56933b338052
(rfc2047-encode): Fix last change.
Dave Love <fx@gnu.org>
parents:
88920
diff
changeset
|
268 (mm-find-mime-charset-region 1 (point-max)))) |
88792
690e82320649
(rfc2047-encodable-p): Avoid mm-find-charset-region.
Dave Love <fx@gnu.org>
parents:
38413
diff
changeset
|
269 (cs (if (> (length mime-charset) 1) |
89000
56933b338052
(rfc2047-encode): Fix last change.
Dave Love <fx@gnu.org>
parents:
88920
diff
changeset
|
270 (error "Can't encode word: %s" (buffer-substring b e)) |
56933b338052
(rfc2047-encode): Fix last change.
Dave Love <fx@gnu.org>
parents:
88920
diff
changeset
|
271 (setq mime-charset (car mime-charset)) |
56933b338052
(rfc2047-encode): Fix last change.
Dave Love <fx@gnu.org>
parents:
88920
diff
changeset
|
272 (mm-charset-to-coding-system mime-charset))) |
31717 | 273 (encoding (or (cdr (assq mime-charset |
274 rfc2047-charset-encoding-alist)) | |
275 'B)) | |
276 (start (concat | |
277 "=?" (downcase (symbol-name mime-charset)) "?" | |
278 (downcase (symbol-name encoding)) "?")) | |
279 (first t)) | |
280 (save-restriction | |
281 (narrow-to-region b e) | |
282 (when (eq encoding 'B) | |
283 ;; break into lines before encoding | |
284 (goto-char (point-min)) | |
285 (while (not (eobp)) | |
286 (goto-char (min (point-max) (+ 15 (point)))) | |
287 (unless (eobp) | |
288 (insert "\n")))) | |
289 (if (and (mm-multibyte-p) | |
35838
53eebdb81828
2001-02-01 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
35453
diff
changeset
|
290 (mm-coding-system-p cs)) |
53eebdb81828
2001-02-01 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
35453
diff
changeset
|
291 (mm-encode-coding-region (point-min) (point-max) cs)) |
31717 | 292 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) |
293 (point-min) (point-max)) | |
294 (goto-char (point-min)) | |
295 (while (not (eobp)) | |
296 (unless first | |
297 (insert " ")) | |
298 (setq first nil) | |
299 (insert start) | |
300 (end-of-line) | |
301 (insert "?=") | |
302 (forward-line 1))))) | |
303 | |
304 (defun rfc2047-fold-region (b e) | |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
305 "Fold long lines in region B to E." |
31717 | 306 (save-restriction |
307 (narrow-to-region b e) | |
308 (goto-char (point-min)) | |
33304 | 309 (let ((break nil) |
310 (qword-break nil) | |
311 (bol (save-restriction | |
312 (widen) | |
313 (gnus-point-at-bol)))) | |
31717 | 314 (while (not (eobp)) |
33304 | 315 (when (and (or break qword-break) (> (- (point) bol) 76)) |
316 (goto-char (or break qword-break)) | |
317 (setq break nil | |
318 qword-break nil) | |
35453
26726eff41ca
2001-01-21 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
34674
diff
changeset
|
319 (if (looking-at " \t") |
26726eff41ca
2001-01-21 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
34674
diff
changeset
|
320 (insert "\n") |
26726eff41ca
2001-01-21 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
34674
diff
changeset
|
321 (insert "\n ")) |
33304 | 322 (setq bol (1- (point))) |
323 ;; Don't break before the first non-LWSP characters. | |
324 (skip-chars-forward " \t") | |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
325 (unless (eobp) (forward-char 1))) |
31717 | 326 (cond |
33304 | 327 ((eq (char-after) ?\n) |
328 (forward-char 1) | |
329 (setq bol (point) | |
330 break nil | |
331 qword-break nil) | |
332 (skip-chars-forward " \t") | |
333 (unless (or (eobp) (eq (char-after) ?\n)) | |
334 (forward-char 1))) | |
335 ((eq (char-after) ?\r) | |
336 (forward-char 1)) | |
31717 | 337 ((memq (char-after) '(? ?\t)) |
33304 | 338 (skip-chars-forward " \t") |
339 (setq break (1- (point)))) | |
340 ((not break) | |
341 (if (not (looking-at "=\\?[^=]")) | |
342 (if (eq (char-after) ?=) | |
343 (forward-char 1) | |
344 (skip-chars-forward "^ \t\n\r=")) | |
345 (setq qword-break (point)) | |
346 (skip-chars-forward "^ \t\n\r"))) | |
347 (t | |
348 (skip-chars-forward "^ \t\n\r")))) | |
349 (when (and (or break qword-break) (> (- (point) bol) 76)) | |
350 (goto-char (or break qword-break)) | |
351 (setq break nil | |
352 qword-break nil) | |
35453
26726eff41ca
2001-01-21 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
34674
diff
changeset
|
353 (if (looking-at " \t") |
26726eff41ca
2001-01-21 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
34674
diff
changeset
|
354 (insert "\n") |
26726eff41ca
2001-01-21 ShengHuo ZHU <zsh@cs.rochester.edu>
ShengHuo ZHU <zsh@cs.rochester.edu>
parents:
34674
diff
changeset
|
355 (insert "\n ")) |
33304 | 356 (setq bol (1- (point))) |
357 ;; Don't break before the first non-LWSP characters. | |
358 (skip-chars-forward " \t") | |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
359 (unless (eobp) (forward-char 1)))))) |
33304 | 360 |
361 (defun rfc2047-unfold-region (b e) | |
35985
b9c371244b90
(rfc2047-fold-region): Don't forward-char at EOB.
Dave Love <fx@gnu.org>
parents:
35838
diff
changeset
|
362 "Unfold lines in region B to E." |
33304 | 363 (save-restriction |
364 (narrow-to-region b e) | |
365 (goto-char (point-min)) | |
366 (let ((bol (save-restriction | |
367 (widen) | |
368 (gnus-point-at-bol))) | |
369 (eol (gnus-point-at-eol)) | |
370 leading) | |
371 (forward-line 1) | |
372 (while (not (eobp)) | |
373 (looking-at "[ \t]*") | |
374 (setq leading (- (match-end 0) (match-beginning 0))) | |
375 (if (< (- (gnus-point-at-eol) bol leading) 76) | |
376 (progn | |
377 (goto-char eol) | |
378 (delete-region eol (progn | |
379 (skip-chars-forward "[ \t\n\r]+") | |
380 (1- (point))))) | |
381 (setq bol (gnus-point-at-bol))) | |
382 (setq eol (gnus-point-at-eol)) | |
383 (forward-line 1))))) | |
31717 | 384 |
385 (defun rfc2047-b-encode-region (b e) | |
33304 | 386 "Base64-encode the header contained in region B to E." |
31717 | 387 (save-restriction |
388 (narrow-to-region (goto-char b) e) | |
389 (while (not (eobp)) | |
390 (base64-encode-region (point) (progn (end-of-line) (point)) t) | |
391 (if (and (bolp) (eolp)) | |
392 (delete-backward-char 1)) | |
393 (forward-line)))) | |
394 | |
395 (defun rfc2047-q-encode-region (b e) | |
33304 | 396 "Quoted-printable-encode the header in region B to E." |
31717 | 397 (save-excursion |
398 (save-restriction | |
399 (narrow-to-region (goto-char b) e) | |
33304 | 400 (let ((alist rfc2047-q-encoding-alist) |
401 (bol (save-restriction | |
402 (widen) | |
403 (gnus-point-at-bol)))) | |
31717 | 404 (while alist |
405 (when (looking-at (caar alist)) | |
406 (quoted-printable-encode-region b e nil (cdar alist)) | |
407 (subst-char-in-region (point-min) (point-max) ? ?_) | |
408 (setq alist nil)) | |
409 (pop alist)) | |
33304 | 410 ;; The size of QP encapsulation is about 20, so set limit to |
411 ;; 56=76-20. | |
412 (unless (< (- (point-max) (point-min)) 56) | |
413 ;; Don't break if it could fit in one line. | |
414 ;; Let rfc2047-encode-region break it later. | |
415 (goto-char (1+ (point-min))) | |
416 (while (and (not (bobp)) (not (eobp))) | |
417 (goto-char (min (point-max) (+ 56 bol))) | |
418 (search-backward "=" (- (point) 2) t) | |
419 (unless (or (bobp) (eobp)) | |
420 (insert "\n") | |
421 (setq bol (point))))))))) | |
31717 | 422 |
423 ;;; | |
424 ;;; Functions for decoding RFC2047 messages | |
425 ;;; | |
426 | |
427 (defvar rfc2047-encoded-word-regexp | |
428 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") | |
429 | |
430 (defun rfc2047-decode-region (start end) | |
431 "Decode MIME-encoded words in region between START and END." | |
432 (interactive "r") | |
433 (let ((case-fold-search t) | |
434 b e) | |
435 (save-excursion | |
436 (save-restriction | |
437 (narrow-to-region start end) | |
438 (goto-char (point-min)) | |
439 ;; Remove whitespace between encoded words. | |
440 (while (re-search-forward | |
441 (concat "\\(" rfc2047-encoded-word-regexp "\\)" | |
442 "\\(\n?[ \t]\\)+" | |
443 "\\(" rfc2047-encoded-word-regexp "\\)") | |
444 nil t) | |
445 (delete-region (goto-char (match-end 1)) (match-beginning 6))) | |
446 ;; Decode the encoded words. | |
447 (setq b (goto-char (point-min))) | |
448 (while (re-search-forward rfc2047-encoded-word-regexp nil t) | |
449 (setq e (match-beginning 0)) | |
450 (insert (rfc2047-parse-and-decode | |
451 (prog1 | |
452 (match-string 0) | |
453 (delete-region (match-beginning 0) (match-end 0))))) | |
454 (when (and (mm-multibyte-p) | |
455 mail-parse-charset | |
456 (not (eq mail-parse-charset 'gnus-decoded))) | |
457 (mm-decode-coding-region b e mail-parse-charset)) | |
458 (setq b (point))) | |
459 (when (and (mm-multibyte-p) | |
460 mail-parse-charset | |
461 (not (eq mail-parse-charset 'us-ascii)) | |
462 (not (eq mail-parse-charset 'gnus-decoded))) | |
33304 | 463 (mm-decode-coding-region b (point-max) mail-parse-charset)) |
464 (rfc2047-unfold-region (point-min) (point-max)))))) | |
31717 | 465 |
466 (defun rfc2047-decode-string (string) | |
467 "Decode the quoted-printable-encoded STRING and return the results." | |
468 (let ((m (mm-multibyte-p))) | |
469 (with-temp-buffer | |
470 (when m | |
471 (mm-enable-multibyte)) | |
472 (insert string) | |
473 (inline | |
474 (rfc2047-decode-region (point-min) (point-max))) | |
475 (buffer-string)))) | |
476 | |
477 (defun rfc2047-parse-and-decode (word) | |
478 "Decode WORD and return it if it is an encoded word. | |
479 Return WORD if not." | |
480 (if (not (string-match rfc2047-encoded-word-regexp word)) | |
481 word | |
482 (or | |
483 (condition-case nil | |
484 (rfc2047-decode | |
485 (match-string 1 word) | |
486 (upcase (match-string 2 word)) | |
487 (match-string 3 word)) | |
488 (error word)) | |
489 word))) | |
490 | |
491 (defun rfc2047-decode (charset encoding string) | |
33304 | 492 "Decode STRING from the given MIME CHARSET in the given ENCODING. |
31717 | 493 Valid ENCODINGs are \"B\" and \"Q\". |
33304 | 494 If your Emacs implementation can't decode CHARSET, return nil." |
31717 | 495 (if (stringp charset) |
496 (setq charset (intern (downcase charset)))) | |
33304 | 497 (if (or (not charset) |
31717 | 498 (eq 'gnus-all mail-parse-ignored-charsets) |
499 (memq 'gnus-all mail-parse-ignored-charsets) | |
500 (memq charset mail-parse-ignored-charsets)) | |
501 (setq charset mail-parse-charset)) | |
502 (let ((cs (mm-charset-to-coding-system charset))) | |
33304 | 503 (if (and (not cs) charset |
31717 | 504 (listp mail-parse-ignored-charsets) |
505 (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
506 (setq cs (mm-charset-to-coding-system mail-parse-charset))) | |
507 (when cs | |
508 (when (and (eq cs 'ascii) | |
509 mail-parse-charset) | |
510 (setq cs mail-parse-charset)) | |
33304 | 511 ;; Ensure unibyte result in Emacs 20. |
512 (let (default-enable-multibyte-characters) | |
513 (with-temp-buffer | |
514 (mm-decode-coding-string | |
515 (cond | |
516 ((equal "B" encoding) | |
517 (base64-decode-string string)) | |
518 ((equal "Q" encoding) | |
519 (quoted-printable-decode-string | |
520 (mm-replace-chars-in-string string ?_ ? ))) | |
521 (t (error "Invalid encoding: %s" encoding))) | |
522 cs)))))) | |
31717 | 523 |
524 (provide 'rfc2047) | |
525 | |
526 ;;; rfc2047.el ends here |