Mercurial > emacs
comparison lisp/gnus/rfc2047.el @ 47945:3915f2c7691e
(message-posting-charset): defvar when compiling.
(ietf-drums, gnus-util): Don't require.
(rfc2047-header-encoding-alist): Add `address-mime' part. Doc
fixes.
(rfc2047-charset-encoding-alist): Use B for iso-8859-7,
iso-8859-8.
(rfc2047-q-encoding-alist): Augment header list.
(rfc2047-encoding-type): New.
(rfc2047-dissect-region): Deleted.
(rfc2047-encode-region, rfc2047-encode): Rewritten to take
account of rfc2047 rules with respect to rfc2822 tokens and to do
encoding in place rather than by passing strings.
(rfc2047-encode-message-header): Don't include header name field
in encoding. Add `address-mime' case and bind
rfc2047-encoding-type for `mime' case.
(rfc2047-encode-string): Doc fix.
(rfc2047-encode): Use longer chunks for base64.
(rfc2047-fold-region): Insert single characters, not strings.
(rfc2047-encoded-word-regexp): Wrap in eval-and-compile.
(rfc2047-decode-region): Avoid consing regexp in loop.
author | Dave Love <fx@gnu.org> |
---|---|
date | Fri, 18 Oct 2002 10:52:56 +0000 |
parents | a26d9b55abb6 |
children | 9cd6016af581 |
comparison
equal
deleted
inserted
replaced
47944:03cfc305a0fa | 47945:3915f2c7691e |
---|---|
1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages | 1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages |
2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | 2 ;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. |
3 | 3 |
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | 4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | 5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> |
6 ;; This file is part of GNU Emacs. | 6 ;; This file is part of GNU Emacs. |
7 | 7 |
25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part | 25 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part |
26 ;; Three: Message Header Extensions for Non-ASCII Text". | 26 ;; Three: Message Header Extensions for Non-ASCII Text". |
27 | 27 |
28 ;;; Code: | 28 ;;; Code: |
29 | 29 |
30 (eval-when-compile (require 'cl)) | 30 (eval-when-compile |
31 (require 'cl) | |
32 (defvar message-posting-charset)) | |
31 | 33 |
32 (require 'qp) | 34 (require 'qp) |
33 (require 'mm-util) | 35 (require 'mm-util) |
34 (require 'ietf-drums) | 36 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus. |
35 (require 'mail-prsvr) | 37 (require 'mail-prsvr) |
36 (require 'base64) | 38 (require 'base64) |
37 ;; Fixme: Avoid this (for gnus-point-at-...) mm dependence on gnus. | |
38 (require 'gnus-util) | |
39 (autoload 'mm-body-7-or-8 "mm-bodies") | 39 (autoload 'mm-body-7-or-8 "mm-bodies") |
40 | 40 |
41 (defvar rfc2047-header-encoding-alist | 41 (defvar rfc2047-header-encoding-alist |
42 '(("Newsgroups" . nil) | 42 '(("Newsgroups" . nil) |
43 ("Message-ID" . nil) | 43 ("Message-ID" . nil) |
44 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . | |
45 address-mime) | |
44 (t . mime)) | 46 (t . mime)) |
45 "*Header/encoding method alist. | 47 "*Header/encoding method alist. |
46 The list is traversed sequentially. The keys can either be | 48 The list is traversed sequentially. The keys can either be |
47 header regexps or t. | 49 header regexps or t. |
48 | 50 |
49 The values can be: | 51 The values can be: |
50 | 52 |
51 1) nil, in which case no encoding is done; | 53 1) nil, in which case no encoding is done; |
52 2) `mime', in which case the header will be encoded according to RFC2047; | 54 2) `mime', in which case the header will be encoded according to RFC2047; |
53 3) a charset, in which case it will be encoded as that charset; | 55 3) `address-mime', like `mime', but takes account of the rules for address |
54 4) `default', in which case the field will be encoded as the rest | 56 fields (where quoted strings and comments must be treated separately); |
57 4) a charset, in which case it will be encoded as that charset; | |
58 5) `default', in which case the field will be encoded as the rest | |
55 of the article.") | 59 of the article.") |
56 | 60 |
57 (defvar rfc2047-charset-encoding-alist | 61 (defvar rfc2047-charset-encoding-alist |
58 '((us-ascii . nil) | 62 '((us-ascii . nil) |
59 (iso-8859-1 . Q) | 63 (iso-8859-1 . Q) |
60 (iso-8859-2 . Q) | 64 (iso-8859-2 . Q) |
61 (iso-8859-3 . Q) | 65 (iso-8859-3 . Q) |
62 (iso-8859-4 . Q) | 66 (iso-8859-4 . Q) |
63 (iso-8859-5 . B) | 67 (iso-8859-5 . B) |
64 (koi8-r . B) | 68 (koi8-r . B) |
65 (iso-8859-7 . Q) | 69 (iso-8859-7 . B) |
66 (iso-8859-8 . Q) | 70 (iso-8859-8 . B) |
67 (iso-8859-9 . Q) | 71 (iso-8859-9 . Q) |
68 (iso-8859-14 . Q) | 72 (iso-8859-14 . Q) |
69 (iso-8859-15 . Q) | 73 (iso-8859-15 . Q) |
70 (iso-2022-jp . B) | 74 (iso-2022-jp . B) |
71 (iso-2022-kr . B) | 75 (iso-2022-kr . B) |
76 (cn-gb-2312 . B) | 80 (cn-gb-2312 . B) |
77 (euc-kr . B) | 81 (euc-kr . B) |
78 (iso-2022-jp-2 . B) | 82 (iso-2022-jp-2 . B) |
79 (iso-2022-int-1 . B)) | 83 (iso-2022-int-1 . B)) |
80 "Alist of MIME charsets to RFC2047 encodings. | 84 "Alist of MIME charsets to RFC2047 encodings. |
81 Valid encodings are nil, `Q' and `B'.") | 85 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding, |
86 quoted-printable and base64 respectively.") | |
82 | 87 |
83 (defvar rfc2047-encoding-function-alist | 88 (defvar rfc2047-encoding-function-alist |
84 '((Q . rfc2047-q-encode-region) | 89 '((Q . rfc2047-q-encode-region) |
85 (B . rfc2047-b-encode-region) | 90 (B . rfc2047-b-encode-region) |
86 (nil . ignore)) | 91 (nil . ignore)) |
87 "Alist of RFC2047 encodings to encoding functions.") | 92 "Alist of RFC2047 encodings to encoding functions.") |
88 | 93 |
89 (defvar rfc2047-q-encoding-alist | 94 (defvar rfc2047-q-encoding-alist |
90 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") | 95 '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" |
96 . "-A-Za-z0-9!*+/" ) | |
91 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | 97 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. |
92 ;; Avoid using 8bit characters. | 98 ;; Avoid using 8bit characters. |
93 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | 99 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" |
94 ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) | 100 ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) |
95 "Alist of header regexps and valid Q characters.") | 101 "Alist of header regexps and valid Q characters.") |
109 (progn | 115 (progn |
110 (beginning-of-line) | 116 (beginning-of-line) |
111 (point)) | 117 (point)) |
112 (point-max)))) | 118 (point-max)))) |
113 (goto-char (point-min))) | 119 (goto-char (point-min))) |
120 | |
121 (defvar rfc2047-encoding-type 'address-mime | |
122 "The type of encoding done by `rfc2047-encode-region'. | |
123 This should be dynamically bound around calls to | |
124 `rfc2047-encode-region' to either `mime' or `address-mime'. See | |
125 `rfc2047-header-encoding-alist', for definitions.") | |
114 | 126 |
115 (defun rfc2047-encode-message-header () | 127 (defun rfc2047-encode-message-header () |
116 "Encode the message header according to `rfc2047-header-encoding-alist'. | 128 "Encode the message header according to `rfc2047-header-encoding-alist'. |
117 Should be called narrowed to the head of the message." | 129 Should be called narrowed to the head of the message." |
118 (interactive "*") | 130 (interactive "*") |
139 (when (or (and (stringp (car elem)) | 151 (when (or (and (stringp (car elem)) |
140 (looking-at (car elem))) | 152 (looking-at (car elem))) |
141 (eq (car elem) t)) | 153 (eq (car elem) t)) |
142 (setq alist nil | 154 (setq alist nil |
143 method (cdr elem)))) | 155 method (cdr elem)))) |
156 (goto-char (point-min)) | |
157 (re-search-forward "^[^:]+: *" nil t) | |
144 (cond | 158 (cond |
159 ((eq method 'address-mime) | |
160 (rfc2047-encode-region (point) (point-max))) | |
145 ((eq method 'mime) | 161 ((eq method 'mime) |
146 (rfc2047-encode-region (point-min) (point-max))) | 162 (let (rfc2047-encoding-type) |
163 (rfc2047-encode-region (point) (point-max)))) | |
147 ((eq method 'default) | 164 ((eq method 'default) |
148 (if (and (featurep 'mule) | 165 (if (and (featurep 'mule) |
149 (if (boundp 'default-enable-multibyte-characters) | 166 (if (boundp 'default-enable-multibyte-characters) |
150 default-enable-multibyte-characters) | 167 default-enable-multibyte-characters) |
151 mail-parse-charset) | 168 mail-parse-charset) |
152 (mm-encode-coding-region (point-min) (point-max) | 169 (mm-encode-coding-region (point) (point-max) |
153 mail-parse-charset))) | 170 mail-parse-charset))) |
154 ((mm-coding-system-p method) | 171 ((mm-coding-system-p method) |
155 (if (and (featurep 'mule) | 172 (if (and (featurep 'mule) |
156 (if (boundp 'default-enable-multibyte-characters) | 173 (if (boundp 'default-enable-multibyte-characters) |
157 default-enable-multibyte-characters)) | 174 default-enable-multibyte-characters)) |
158 (mm-encode-coding-region (point-min) (point-max) method))) | 175 (mm-encode-coding-region (point) (point-max) method))) |
159 ;; Hm. | 176 ;; Hm. |
160 (t))) | 177 (t))) |
161 (goto-char (point-max))))))) | 178 (goto-char (point-max))))))) |
162 | 179 |
163 ;; Fixme: This, and the require below may not be the Right Thing, but | 180 ;; Fixme: This, and the require below may not be the Right Thing, but |
167 (defun rfc2047-encodable-p () | 184 (defun rfc2047-encodable-p () |
168 "Return non-nil if any characters in current buffer need encoding in headers. | 185 "Return non-nil if any characters in current buffer need encoding in headers. |
169 The buffer may be narrowed." | 186 The buffer may be narrowed." |
170 (require 'message) ; for message-posting-charset | 187 (require 'message) ; for message-posting-charset |
171 (let ((charsets | 188 (let ((charsets |
172 (mapcar | 189 (mm-find-mime-charset-region (point-min) (point-max)))) |
173 'mm-mime-charset | 190 (and charsets (not (equal charsets (list message-posting-charset)))))) |
174 (mm-find-charset-region (point-min) (point-max)))) | 191 |
175 (cs (list 'us-ascii (car message-posting-charset))) | 192 ;; Use this syntax table when parsing into regions that may need |
176 found) | 193 ;; encoding. Double quotes are string delimiters, backslash is |
177 (while charsets | 194 ;; character quoting, and all other RFC 2822 special characters are |
178 (unless (memq (pop charsets) cs) | 195 ;; treated as punctuation so we can use forward-sexp/forward-word to |
179 (setq found t))) | 196 ;; skip to the end of regions appropriately. Nb. ietf-drums does |
180 found)) | 197 ;; things differently. |
181 | 198 (defconst rfc2047-syntax-table |
182 (defun rfc2047-dissect-region (b e) | 199 (let ((table (make-char-table 'syntax-table '(2)))) |
183 "Dissect the region between B and E into words." | 200 (modify-syntax-entry ?\\ "\\" table) |
184 (let ((word-chars "-A-Za-z0-9!*+/") | 201 (modify-syntax-entry ?\" "\"" table) |
185 ;; Not using ietf-drums-specials-token makes life simple. | 202 (modify-syntax-entry ?\( "." table) |
186 mail-parse-mule-charset | 203 (modify-syntax-entry ?\) "." table) |
187 words point current | 204 (modify-syntax-entry ?\< "." table) |
188 result word) | 205 (modify-syntax-entry ?\> "." table) |
189 (save-restriction | 206 (modify-syntax-entry ?\[ "." table) |
190 (narrow-to-region b e) | 207 (modify-syntax-entry ?\] "." table) |
191 (goto-char (point-min)) | 208 (modify-syntax-entry ?: "." table) |
192 (skip-chars-forward "\000-\177") | 209 (modify-syntax-entry ?\; "." table) |
193 (while (not (eobp)) | 210 (modify-syntax-entry ?, "." table) |
194 (setq point (point)) | 211 (modify-syntax-entry ?@ "." table) |
195 (skip-chars-backward word-chars b) | 212 table)) |
196 (unless (eq b (point)) | |
197 (push (cons (buffer-substring b (point)) nil) words)) | |
198 (setq b (point)) | |
199 (goto-char point) | |
200 (setq current (mm-charset-after)) | |
201 (forward-char 1) | |
202 (skip-chars-forward word-chars) | |
203 (while (and (not (eobp)) | |
204 (eq (mm-charset-after) current)) | |
205 (forward-char 1) | |
206 (skip-chars-forward word-chars)) | |
207 (unless (eq b (point)) | |
208 (push (cons (buffer-substring b (point)) current) words)) | |
209 (setq b (point)) | |
210 (skip-chars-forward "\000-\177")) | |
211 (unless (eq b (point)) | |
212 (push (cons (buffer-substring b (point)) nil) words))) | |
213 ;; merge adjacent words | |
214 (setq word (pop words)) | |
215 (while word | |
216 (if (and (cdr word) | |
217 (caar words) | |
218 (not (cdar words)) | |
219 (not (string-match "[^ \t]" (caar words)))) | |
220 (if (eq (cdr (nth 1 words)) (cdr word)) | |
221 (progn | |
222 (setq word (cons (concat | |
223 (car (nth 1 words)) (caar words) | |
224 (car word)) | |
225 (cdr word))) | |
226 (pop words) | |
227 (pop words)) | |
228 (push (cons (concat (caar words) (car word)) (cdr word)) | |
229 result) | |
230 (pop words) | |
231 (setq word (pop words))) | |
232 (push word result) | |
233 (setq word (pop words)))) | |
234 result)) | |
235 | 213 |
236 (defun rfc2047-encode-region (b e) | 214 (defun rfc2047-encode-region (b e) |
237 "Encode all encodable words in region B to E." | 215 "Encode words in region B to E that need encoding. |
238 (let ((words (rfc2047-dissect-region b e)) word) | 216 By default, the region is treated as containing RFC2822 addresses. |
239 (save-restriction | 217 Dynamically bind `rfc2047-encoding-type' to change that." |
240 (narrow-to-region b e) | 218 (save-restriction |
241 (delete-region (point-min) (point-max)) | 219 (narrow-to-region b e) |
242 (while (setq word (pop words)) | 220 (if (eq 'mime rfc2047-encoding-type) |
243 (if (not (cdr word)) | 221 ;; Simple case -- treat as single word. |
244 (insert (car word)) | 222 (progn |
245 (rfc2047-fold-region (gnus-point-at-bol) (point)) | 223 (goto-char (point-min)) |
246 (goto-char (point-max)) | 224 ;; Does it need encoding? |
247 (if (> (- (point) (save-restriction | 225 (skip-chars-forward "\000-\177" e) |
248 (widen) | 226 (unless (eobp) |
249 (gnus-point-at-bol))) 76) | 227 (rfc2047-encode b e))) |
250 (insert "\n ")) | 228 ;; `address-mime' case -- take care of quoted words, comments. |
251 ;; Insert blank between encoded words | 229 (with-syntax-table rfc2047-syntax-table |
252 (if (eq (char-before) ?=) (insert " ")) | 230 (let ((start (point)) ; start of current token |
253 (rfc2047-encode (point) | 231 end ; end of current token |
254 (progn (insert (car word)) (point)) | 232 ;; Whether there's an encoded word before the current |
255 (cdr word)))) | 233 ;; tpken, either immediately or separated by space. |
256 (rfc2047-fold-region (point-min) (point-max))))) | 234 last-encoded) |
235 (goto-char (point-min)) | |
236 (condition-case nil ; in case of unbalanced quotes | |
237 ;; Look for rfc2822-style: sequences of atoms, quoted | |
238 ;; strings, specials, whitespace. (Specials mustn't be | |
239 ;; encoded.) | |
240 (while (not (eobp)) | |
241 (setq start (point)) | |
242 ;; Skip whitespace. | |
243 (unless (= 0 (skip-chars-forward " \t")) | |
244 (setq start (point))) | |
245 (cond | |
246 ((not (char-after))) ; eob | |
247 ;; else token start | |
248 ((eq ?\" (char-syntax (char-after))) | |
249 ;; Quoted word. | |
250 (forward-sexp) | |
251 (setq end (point)) | |
252 ;; Does it need encoding? | |
253 (goto-char start) | |
254 (skip-chars-forward "\000-\177" end) | |
255 (if (= end (point)) | |
256 (setq last-encoded nil) | |
257 ;; It needs encoding. Strip the quotes first, | |
258 ;; since encoded words can't occur in quotes. | |
259 (goto-char end) | |
260 (delete-backward-char 1) | |
261 (goto-char start) | |
262 (delete-char 1) | |
263 (when last-encoded | |
264 ;; There was a preceding quoted word. We need | |
265 ;; to include any separating whitespace in this | |
266 ;; word to avoid it getting lost. | |
267 (skip-chars-backward " \t") | |
268 ;; A space is needed between the encoded words. | |
269 (insert ? ) | |
270 (setq start (point) | |
271 end (1+ end))) | |
272 ;; Adjust the end position for the deleted quotes. | |
273 (rfc2047-encode start (- end 2)) | |
274 (setq last-encoded t))) ; record that it was encoded | |
275 ((eq ?. (char-syntax (char-after))) | |
276 ;; Skip other delimiters, but record that they've | |
277 ;; potentially separated quoted words. | |
278 (forward-char) | |
279 (setq last-encoded nil)) | |
280 (t ; normal token/whitespace sequence | |
281 ;; Find the end. | |
282 (forward-word 1) | |
283 (skip-chars-backward " \t") | |
284 (setq end (point)) | |
285 ;; Deal with encoding and leading space as for | |
286 ;; quoted words. | |
287 (goto-char start) | |
288 (skip-chars-forward "\000-\177" end) | |
289 (if (= end (point)) | |
290 (setq last-encoded nil) | |
291 (when last-encoded | |
292 (goto-char start) | |
293 (skip-chars-backward " \t") | |
294 (insert ? ) | |
295 (setq start (point) | |
296 end (1+ end))) | |
297 (rfc2047-encode start end) | |
298 (setq last-encoded t))))) | |
299 (error (error "Invalid data for rfc2047 encoding: %s" | |
300 (buffer-substring b e))))))) | |
301 (rfc2047-fold-region b (point)))) | |
257 | 302 |
258 (defun rfc2047-encode-string (string) | 303 (defun rfc2047-encode-string (string) |
259 "Encode words in STRING." | 304 "Encode words in STRING. |
305 By default, the string is treated as containing addresses (see | |
306 `rfc2047-special-chars')." | |
260 (with-temp-buffer | 307 (with-temp-buffer |
261 (insert string) | 308 (insert string) |
262 (rfc2047-encode-region (point-min) (point-max)) | 309 (rfc2047-encode-region (point-min) (point-max)) |
263 (buffer-string))) | 310 (buffer-string))) |
264 | 311 |
265 (defun rfc2047-encode (b e charset) | 312 (defun rfc2047-encode (b e) |
266 "Encode the word in the region B to E with CHARSET." | 313 "Encode the word(s) in the region B to E. |
267 (let* ((mime-charset (mm-mime-charset charset)) | 314 By default, the region is treated as containing addresses (see |
268 (cs (mm-charset-to-coding-system mime-charset)) | 315 `rfc2047-special-chars')." |
269 (encoding (or (cdr (assq mime-charset | 316 (let* ((mime-charset (mm-find-mime-charset-region b e)) |
317 (cs (if (> (length mime-charset) 1) | |
318 ;; Fixme: Instead of this, try to break region into | |
319 ;; parts that can be encoded separately. | |
320 (error "Can't rfc2047-encode `%s'" | |
321 (buffer-substring b e)) | |
322 (setq mime-charset (car mime-charset)) | |
323 (mm-charset-to-coding-system mime-charset))) | |
324 ;; Fixme: Better, calculate the number of non-ASCII | |
325 ;; characters, at least for 8-bit charsets. | |
326 (encoding (if (assq mime-charset | |
327 rfc2047-charset-encoding-alist) | |
328 (cdr (assq mime-charset | |
270 rfc2047-charset-encoding-alist)) | 329 rfc2047-charset-encoding-alist)) |
271 'B)) | 330 'B)) |
272 (start (concat | 331 (start (concat |
273 "=?" (downcase (symbol-name mime-charset)) "?" | 332 "=?" (downcase (symbol-name mime-charset)) "?" |
274 (downcase (symbol-name encoding)) "?")) | 333 (downcase (symbol-name encoding)) "?")) |
275 (first t)) | 334 (first t)) |
276 (save-restriction | 335 (if mime-charset |
277 (narrow-to-region b e) | 336 (save-restriction |
278 (when (eq encoding 'B) | 337 (narrow-to-region b e) |
279 ;; break into lines before encoding | 338 (when (eq encoding 'B) |
280 (goto-char (point-min)) | 339 ;; break into lines before encoding |
281 (while (not (eobp)) | 340 (goto-char (point-min)) |
282 (goto-char (min (point-max) (+ 15 (point)))) | 341 (while (not (eobp)) |
283 (unless (eobp) | 342 (goto-char (min (point-max) (+ 15 (point)))) |
284 (insert "\n")))) | 343 (unless (eobp) |
285 (if (and (mm-multibyte-p) | 344 (insert ?\n)))) |
286 (mm-coding-system-p cs)) | 345 (if (and (mm-multibyte-p) |
287 (mm-encode-coding-region (point-min) (point-max) cs)) | 346 (mm-coding-system-p cs)) |
288 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | 347 (mm-encode-coding-region (point-min) (point-max) cs)) |
289 (point-min) (point-max)) | 348 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) |
290 (goto-char (point-min)) | 349 (point-min) (point-max)) |
291 (while (not (eobp)) | 350 (goto-char (point-min)) |
292 (unless first | 351 (while (not (eobp)) |
293 (insert " ")) | 352 (unless first |
294 (setq first nil) | 353 (insert ? )) |
295 (insert start) | 354 (setq first nil) |
296 (end-of-line) | 355 (insert start) |
297 (insert "?=") | 356 (end-of-line) |
298 (forward-line 1))))) | 357 (insert "?=") |
358 (forward-line 1)))))) | |
299 | 359 |
300 (defun rfc2047-fold-region (b e) | 360 (defun rfc2047-fold-region (b e) |
301 "Fold long lines in region B to E." | 361 "Fold long lines in region B to E." |
302 (save-restriction | 362 (save-restriction |
303 (narrow-to-region b e) | 363 (narrow-to-region b e) |
304 (goto-char (point-min)) | 364 (goto-char (point-min)) |
305 (let ((break nil) | 365 (let ((break nil) |
306 (qword-break nil) | 366 (qword-break nil) |
307 (bol (save-restriction | 367 (bol (save-restriction |
308 (widen) | 368 (widen) |
309 (gnus-point-at-bol)))) | 369 (mm-point-at-bol)))) |
310 (while (not (eobp)) | 370 (while (not (eobp)) |
311 (when (and (or break qword-break) (> (- (point) bol) 76)) | 371 (when (and (or break qword-break) (> (- (point) bol) 76)) |
312 (goto-char (or break qword-break)) | 372 (goto-char (or break qword-break)) |
313 (setq break nil | 373 (setq break nil |
314 qword-break nil) | 374 qword-break nil) |
315 (if (looking-at " \t") | 375 (if (looking-at " \t") |
316 (insert "\n") | 376 (insert ?\n) |
317 (insert "\n ")) | 377 (insert "\n ")) |
318 (setq bol (1- (point))) | 378 (setq bol (1- (point))) |
319 ;; Don't break before the first non-LWSP characters. | 379 ;; Don't break before the first non-LWSP characters. |
320 (skip-chars-forward " \t") | 380 (skip-chars-forward " \t") |
321 (unless (eobp) (forward-char 1))) | 381 (unless (eobp) (forward-char 1))) |
345 (when (and (or break qword-break) (> (- (point) bol) 76)) | 405 (when (and (or break qword-break) (> (- (point) bol) 76)) |
346 (goto-char (or break qword-break)) | 406 (goto-char (or break qword-break)) |
347 (setq break nil | 407 (setq break nil |
348 qword-break nil) | 408 qword-break nil) |
349 (if (looking-at " \t") | 409 (if (looking-at " \t") |
350 (insert "\n") | 410 (insert ?\n) |
351 (insert "\n ")) | 411 (insert "\n ")) |
352 (setq bol (1- (point))) | 412 (setq bol (1- (point))) |
353 ;; Don't break before the first non-LWSP characters. | 413 ;; Don't break before the first non-LWSP characters. |
354 (skip-chars-forward " \t") | 414 (skip-chars-forward " \t") |
355 (unless (eobp) (forward-char 1)))))) | 415 (unless (eobp) (forward-char 1)))))) |
359 (save-restriction | 419 (save-restriction |
360 (narrow-to-region b e) | 420 (narrow-to-region b e) |
361 (goto-char (point-min)) | 421 (goto-char (point-min)) |
362 (let ((bol (save-restriction | 422 (let ((bol (save-restriction |
363 (widen) | 423 (widen) |
364 (gnus-point-at-bol))) | 424 (mm-point-at-bol))) |
365 (eol (gnus-point-at-eol)) | 425 (eol (mm-point-at-eol)) |
366 leading) | 426 leading) |
367 (forward-line 1) | 427 (forward-line 1) |
368 (while (not (eobp)) | 428 (while (not (eobp)) |
369 (looking-at "[ \t]*") | 429 (looking-at "[ \t]*") |
370 (setq leading (- (match-end 0) (match-beginning 0))) | 430 (setq leading (- (match-end 0) (match-beginning 0))) |
371 (if (< (- (gnus-point-at-eol) bol leading) 76) | 431 (if (< (- (mm-point-at-eol) bol leading) 76) |
372 (progn | 432 (progn |
373 (goto-char eol) | 433 (goto-char eol) |
374 (delete-region eol (progn | 434 (delete-region eol (progn |
375 (skip-chars-forward "[ \t\n\r]+") | 435 (skip-chars-forward "[ \t\n\r]+") |
376 (1- (point))))) | 436 (1- (point))))) |
377 (setq bol (gnus-point-at-bol))) | 437 (setq bol (mm-point-at-bol))) |
378 (setq eol (gnus-point-at-eol)) | 438 (setq eol (mm-point-at-eol)) |
379 (forward-line 1))))) | 439 (forward-line 1))))) |
380 | 440 |
381 (defun rfc2047-b-encode-region (b e) | 441 (defun rfc2047-b-encode-region (b e) |
382 "Base64-encode the header contained in region B to E." | 442 "Base64-encode the header contained in region B to E." |
383 (save-restriction | 443 (save-restriction |
394 (save-restriction | 454 (save-restriction |
395 (narrow-to-region (goto-char b) e) | 455 (narrow-to-region (goto-char b) e) |
396 (let ((alist rfc2047-q-encoding-alist) | 456 (let ((alist rfc2047-q-encoding-alist) |
397 (bol (save-restriction | 457 (bol (save-restriction |
398 (widen) | 458 (widen) |
399 (gnus-point-at-bol)))) | 459 (mm-point-at-bol)))) |
400 (while alist | 460 (while alist |
401 (when (looking-at (caar alist)) | 461 (when (looking-at (caar alist)) |
402 (quoted-printable-encode-region b e nil (cdar alist)) | 462 (quoted-printable-encode-region b e nil (cdar alist)) |
403 (subst-char-in-region (point-min) (point-max) ? ?_) | 463 (subst-char-in-region (point-min) (point-max) ? ?_) |
404 (setq alist nil)) | 464 (setq alist nil)) |
411 (goto-char (1+ (point-min))) | 471 (goto-char (1+ (point-min))) |
412 (while (and (not (bobp)) (not (eobp))) | 472 (while (and (not (bobp)) (not (eobp))) |
413 (goto-char (min (point-max) (+ 56 bol))) | 473 (goto-char (min (point-max) (+ 56 bol))) |
414 (search-backward "=" (- (point) 2) t) | 474 (search-backward "=" (- (point) 2) t) |
415 (unless (or (bobp) (eobp)) | 475 (unless (or (bobp) (eobp)) |
416 (insert "\n") | 476 (insert ?\n) |
417 (setq bol (point))))))))) | 477 (setq bol (point))))))))) |
418 | 478 |
419 ;;; | 479 ;;; |
420 ;;; Functions for decoding RFC2047 messages | 480 ;;; Functions for decoding RFC2047 messages |
421 ;;; | 481 ;;; |
422 | 482 |
423 (defvar rfc2047-encoded-word-regexp | 483 (eval-and-compile |
424 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") | 484 (defvar rfc2047-encoded-word-regexp |
485 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\ | |
486 \\([!->@-~ +]+\\)\\?=")) | |
425 | 487 |
426 (defun rfc2047-decode-region (start end) | 488 (defun rfc2047-decode-region (start end) |
427 "Decode MIME-encoded words in region between START and END." | 489 "Decode MIME-encoded words in region between START and END." |
428 (interactive "r") | 490 (interactive "r") |
429 (let ((case-fold-search t) | 491 (let ((case-fold-search t) |
492 (undoing (not (eq t buffer-undo-list))) | |
430 b e) | 493 b e) |
431 (save-excursion | 494 (unwind-protect |
432 (save-restriction | 495 (save-excursion |
433 (narrow-to-region start end) | 496 (save-restriction |
434 (goto-char (point-min)) | 497 (buffer-enable-undo) |
435 ;; Remove whitespace between encoded words. | 498 (narrow-to-region start end) |
436 (while (re-search-forward | 499 (goto-char (point-min)) |
437 (concat "\\(" rfc2047-encoded-word-regexp "\\)" | 500 ;; Remove whitespace between encoded words. |
438 "\\(\n?[ \t]\\)+" | 501 (while (re-search-forward |
439 "\\(" rfc2047-encoded-word-regexp "\\)") | 502 (eval-when-compile |
440 nil t) | 503 (concat "\\(" rfc2047-encoded-word-regexp "\\)" |
441 (delete-region (goto-char (match-end 1)) (match-beginning 6))) | 504 "\\(\n?[ \t]\\)+" |
442 ;; Decode the encoded words. | 505 "\\(" rfc2047-encoded-word-regexp "\\)")) |
443 (setq b (goto-char (point-min))) | 506 nil t) |
444 (while (re-search-forward rfc2047-encoded-word-regexp nil t) | 507 (delete-region (goto-char (match-end 1)) (match-beginning 6))) |
445 (setq e (match-beginning 0)) | 508 ;; Decode the encoded words. |
446 (insert (rfc2047-parse-and-decode | 509 (setq b (goto-char (point-min))) |
447 (prog1 | 510 (while (re-search-forward rfc2047-encoded-word-regexp nil t) |
448 (match-string 0) | 511 (setq e (match-beginning 0)) |
449 (delete-region (match-beginning 0) (match-end 0))))) | 512 (rfc2047-parse-and-decode (match-beginning 0) (match-end 0))) |
450 (when (and (mm-multibyte-p) | 513 (when (and (mm-multibyte-p) |
451 mail-parse-charset | 514 mail-parse-charset |
452 (not (eq mail-parse-charset 'gnus-decoded))) | 515 (not (eq mail-parse-charset 'us-ascii)) |
453 (mm-decode-coding-region b e mail-parse-charset)) | 516 (not (eq mail-parse-charset 'gnus-decoded))) |
454 (setq b (point))) | 517 (mm-decode-coding-region b (point-max) mail-parse-charset)) |
455 (when (and (mm-multibyte-p) | 518 (rfc2047-unfold-region (point-min) (point-max)))) |
456 mail-parse-charset | 519 (unless undoing |
457 (not (eq mail-parse-charset 'us-ascii)) | 520 (buffer-disable-undo))))) |
458 (not (eq mail-parse-charset 'gnus-decoded))) | |
459 (mm-decode-coding-region b (point-max) mail-parse-charset)) | |
460 (rfc2047-unfold-region (point-min) (point-max)))))) | |
461 | 521 |
462 (defun rfc2047-decode-string (string) | 522 (defun rfc2047-decode-string (string) |
463 "Decode the quoted-printable-encoded STRING and return the results." | 523 "Decode the quoted-printable-encoded STRING and return the results." |
464 (let ((m (mm-multibyte-p))) | 524 (let ((m (mm-multibyte-p))) |
465 (with-temp-buffer | 525 (with-temp-buffer |
468 (insert string) | 528 (insert string) |
469 (inline | 529 (inline |
470 (rfc2047-decode-region (point-min) (point-max))) | 530 (rfc2047-decode-region (point-min) (point-max))) |
471 (buffer-string)))) | 531 (buffer-string)))) |
472 | 532 |
473 (defun rfc2047-parse-and-decode (word) | 533 (defun rfc2047-parse-and-decode (b e) |
474 "Decode WORD and return it if it is an encoded word. | 534 "Decode WORD and return it if it is an encoded word. |
475 Return WORD if not." | 535 Return WORD if not." |
476 (if (not (string-match rfc2047-encoded-word-regexp word)) | 536 (save-restriction |
477 word | 537 (narrow-to-region b e) |
478 (or | 538 (goto-char b) |
479 (condition-case nil | 539 (when (looking-at (eval-when-compile |
480 (rfc2047-decode | 540 (concat "\\`" rfc2047-encoded-word-regexp "\\'"))) |
481 (match-string 1 word) | 541 (condition-case nil |
482 (upcase (match-string 2 word)) | 542 (let ((charset (match-string 1)) |
483 (match-string 3 word)) | 543 (encoding (upcase (match-string 2)))) |
484 (error word)) | 544 (undo-boundary) |
485 word))) | 545 (delete-region (match-beginning 0) (1+ (match-end 2))) |
486 | 546 (delete-region (- (point-max) 2) (point-max)) |
487 (defun rfc2047-decode (charset encoding string) | 547 (rfc2047-decode charset encoding (point-min) (point-max))) |
488 "Decode STRING from the given MIME CHARSET in the given ENCODING. | 548 ;; If we get an error, undo the change |
549 (error (undo)))))) | |
550 | |
551 (defun rfc2047-decode (charset encoding b e) | |
552 "Decode from the given MIME CHARSET in the given ENCODING in region B to E. | |
489 Valid ENCODINGs are \"B\" and \"Q\". | 553 Valid ENCODINGs are \"B\" and \"Q\". |
490 If your Emacs implementation can't decode CHARSET, return nil." | 554 If your Emacs implementation can't decode CHARSET, return nil." |
491 (if (stringp charset) | 555 (if (stringp charset) |
492 (setq charset (intern (downcase charset)))) | 556 (setq charset (intern (downcase charset)))) |
493 (if (or (not charset) | 557 (if (or (not charset) |
502 (setq cs (mm-charset-to-coding-system mail-parse-charset))) | 566 (setq cs (mm-charset-to-coding-system mail-parse-charset))) |
503 (when cs | 567 (when cs |
504 (when (and (eq cs 'ascii) | 568 (when (and (eq cs 'ascii) |
505 mail-parse-charset) | 569 mail-parse-charset) |
506 (setq cs mail-parse-charset)) | 570 (setq cs mail-parse-charset)) |
507 ;; Ensure unibyte result in Emacs 20. | 571 (save-restriction |
508 (let (default-enable-multibyte-characters) | 572 (narrow-to-region b e) |
509 (with-temp-buffer | 573 (cond |
510 (mm-decode-coding-string | 574 ((equal "B" encoding) |
511 (cond | 575 (base64-decode-region b e)) |
512 ((equal "B" encoding) | 576 ((equal "Q" encoding) |
513 (base64-decode-string string)) | 577 (subst-char-in-region b e ?_ ? t) |
514 ((equal "Q" encoding) | 578 (quoted-printable-decode-region b e)) |
515 (quoted-printable-decode-string | 579 (t (error "Invalid encoding: %s" encoding))) |
516 (mm-replace-chars-in-string string ?_ ? ))) | 580 (mm-decode-coding-region (point-min) (point-max) cs))))) |
517 (t (error "Invalid encoding: %s" encoding))) | |
518 cs)))))) | |
519 | 581 |
520 (provide 'rfc2047) | 582 (provide 'rfc2047) |
521 | 583 |
522 ;;; rfc2047.el ends here | 584 ;;; rfc2047.el ends here |