Mercurial > emacs
annotate lisp/gnus/rfc2047.el @ 33298:8a75bc99cf0b
Avoid compiler warnings.
Use (featurep 'xemacs). Require cl when compiling.
(uudecode-char-int): New alias, replacing char-int.
(uudecode-decode-region): Don't call buffer-disable-undo.
author | Dave Love <fx@gnu.org> |
---|---|
date | Wed, 08 Nov 2000 15:27:30 +0000 |
parents | eca95f9d7f05 |
children | d401dfab680a |
rev | line source |
---|---|
31717 | 1 ;;; rfc2047.el --- Functions for encoding and decoding rfc2047 messages |
2 ;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. | |
3 | |
4 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
5 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> | |
6 ;; This file is part of GNU Emacs. | |
7 | |
8 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 ;; it under the terms of the GNU General Public License as published by | |
10 ;; the Free Software Foundation; either version 2, or (at your option) | |
11 ;; any later version. | |
12 | |
13 ;; GNU Emacs is distributed in the hope that it will be useful, | |
14 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 ;; GNU General Public License for more details. | |
17 | |
18 ;; You should have received a copy of the GNU General Public License | |
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 ;; Boston, MA 02111-1307, USA. | |
22 | |
23 ;;; Commentary: | |
24 | |
25 ;;; Code: | |
26 | |
33127
eca95f9d7f05
(base64): Require unconditionally.
Dave Love <fx@gnu.org>
parents:
31764
diff
changeset
|
27 (require 'base64) |
31717 | 28 |
29 (require 'qp) | |
30 (require 'mm-util) | |
31 (require 'ietf-drums) | |
32 (require 'mail-prsvr) | |
33 | |
33127
eca95f9d7f05
(base64): Require unconditionally.
Dave Love <fx@gnu.org>
parents:
31764
diff
changeset
|
34 (eval-when-compile (defvar message-posting-charset)) |
eca95f9d7f05
(base64): Require unconditionally.
Dave Love <fx@gnu.org>
parents:
31764
diff
changeset
|
35 |
31717 | 36 (defvar rfc2047-header-encoding-alist |
37 '(("Newsgroups" . nil) | |
38 ("Message-ID" . nil) | |
39 (t . mime)) | |
40 "*Header/encoding method alist. | |
41 The list is traversed sequentially. The keys can either be | |
42 header regexps or `t'. | |
43 | |
44 The values can be: | |
45 | |
46 1) nil, in which case no encoding is done; | |
47 2) `mime', in which case the header will be encoded according to RFC2047; | |
48 3) a charset, in which case it will be encoded as that charset; | |
49 4) `default', in which case the field will be encoded as the rest | |
50 of the article.") | |
51 | |
52 (defvar rfc2047-charset-encoding-alist | |
53 '((us-ascii . nil) | |
54 (iso-8859-1 . Q) | |
55 (iso-8859-2 . Q) | |
56 (iso-8859-3 . Q) | |
57 (iso-8859-4 . Q) | |
58 (iso-8859-5 . B) | |
59 (koi8-r . B) | |
60 (iso-8859-7 . Q) | |
61 (iso-8859-8 . Q) | |
62 (iso-8859-9 . Q) | |
63 (iso-2022-jp . B) | |
64 (iso-2022-kr . B) | |
65 (gb2312 . B) | |
66 (cn-gb . B) | |
67 (cn-gb-2312 . B) | |
68 (euc-kr . B) | |
69 (iso-2022-jp-2 . B) | |
70 (iso-2022-int-1 . B)) | |
71 "Alist of MIME charsets to RFC2047 encodings. | |
72 Valid encodings are nil, `Q' and `B'.") | |
73 | |
74 (defvar rfc2047-encoding-function-alist | |
75 '((Q . rfc2047-q-encode-region) | |
76 (B . rfc2047-b-encode-region) | |
77 (nil . ignore)) | |
78 "Alist of RFC2047 encodings to encoding functions.") | |
79 | |
80 (defvar rfc2047-q-encoding-alist | |
31764 | 81 '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") |
82 ;; = (\075), _ (\137), ? (\077) are used in the encoded word. | |
83 ;; Avoid using 8bit characters. Some versions of Emacs has bug! | |
84 ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" | |
85 ("." . "\010\012\014\040-\074\076\100-\136\140-\177")) | |
31717 | 86 "Alist of header regexps and valid Q characters.") |
87 | |
88 ;;; | |
89 ;;; Functions for encoding RFC2047 messages | |
90 ;;; | |
91 | |
92 (defun rfc2047-narrow-to-field () | |
93 "Narrow the buffer to the header on the current line." | |
94 (beginning-of-line) | |
95 (narrow-to-region | |
96 (point) | |
97 (progn | |
98 (forward-line 1) | |
99 (if (re-search-forward "^[^ \n\t]" nil t) | |
100 (progn | |
101 (beginning-of-line) | |
102 (point)) | |
103 (point-max)))) | |
104 (goto-char (point-min))) | |
105 | |
106 (defun rfc2047-encode-message-header () | |
107 "Encode the message header according to `rfc2047-header-encoding-alist'. | |
108 Should be called narrowed to the head of the message." | |
109 (interactive "*") | |
33127
eca95f9d7f05
(base64): Require unconditionally.
Dave Love <fx@gnu.org>
parents:
31764
diff
changeset
|
110 (require 'message) |
31717 | 111 (save-excursion |
112 (goto-char (point-min)) | |
113 (let (alist elem method) | |
114 (while (not (eobp)) | |
115 (save-restriction | |
116 (rfc2047-narrow-to-field) | |
117 (if (not (rfc2047-encodable-p)) | |
118 (if (and (eq (mm-body-7-or-8) '8bit) | |
119 (mm-multibyte-p) | |
120 (mm-coding-system-p | |
121 (car message-posting-charset))) | |
122 ;; 8 bit must be decoded. | |
123 ;; Is message-posting-charset a coding system? | |
124 (mm-encode-coding-region | |
125 (point-min) (point-max) | |
126 (car message-posting-charset))) | |
127 ;; We found something that may perhaps be encoded. | |
128 (setq method nil | |
129 alist rfc2047-header-encoding-alist) | |
130 (while (setq elem (pop alist)) | |
131 (when (or (and (stringp (car elem)) | |
132 (looking-at (car elem))) | |
133 (eq (car elem) t)) | |
134 (setq alist nil | |
135 method (cdr elem)))) | |
136 (cond | |
137 ((eq method 'mime) | |
138 (rfc2047-encode-region (point-min) (point-max)) | |
139 (rfc2047-fold-region (point-min) (point-max))) | |
140 ((eq method 'default) | |
141 (if (and (featurep 'mule) | |
142 mail-parse-charset) | |
143 (mm-encode-coding-region (point-min) (point-max) | |
144 mail-parse-charset))) | |
145 ((mm-coding-system-p method) | |
146 (if (featurep 'mule) | |
147 (mm-encode-coding-region (point-min) (point-max) method))) | |
148 ;; Hm. | |
149 (t))) | |
150 (goto-char (point-max))))))) | |
151 | |
152 (defun rfc2047-encodable-p (&optional header) | |
153 "Say whether the current (narrowed) buffer contains characters that need encoding in headers." | |
33127
eca95f9d7f05
(base64): Require unconditionally.
Dave Love <fx@gnu.org>
parents:
31764
diff
changeset
|
154 (require 'message) |
31717 | 155 (let ((charsets |
156 (mapcar | |
157 'mm-mime-charset | |
158 (mm-find-charset-region (point-min) (point-max)))) | |
159 (cs (list 'us-ascii (car message-posting-charset))) | |
160 found) | |
161 (while charsets | |
162 (unless (memq (pop charsets) cs) | |
163 (setq found t))) | |
164 found)) | |
165 | |
166 (defun rfc2047-dissect-region (b e) | |
167 "Dissect the region between B and E into words." | |
168 (let ((all-specials (concat ietf-drums-tspecials " \t\n\r")) | |
169 (special-list (mapcar 'identity ietf-drums-tspecials)) | |
170 (blank-list '(? ?\t ?\n ?\r)) | |
171 words current cs state mail-parse-mule-charset) | |
172 (save-restriction | |
173 (narrow-to-region b e) | |
174 (goto-char (point-min)) | |
175 (skip-chars-forward all-specials) | |
176 (setq b (point)) | |
177 (while (not (eobp)) | |
178 (cond | |
179 ((not state) | |
180 (setq state 'word) | |
181 (if (not (eq (setq cs (mm-charset-after)) 'ascii)) | |
182 (setq current cs)) | |
183 (setq b (point))) | |
184 ((eq state 'blank) | |
185 (cond | |
186 ((memq (char-after) special-list) | |
187 (setq state nil)) | |
188 ((memq (char-after) blank-list)) | |
189 (t | |
190 (setq state 'word) | |
191 (unless b | |
192 (setq b (point))) | |
193 (if (not (eq (setq cs (mm-charset-after)) 'ascii)) | |
194 (setq current cs))))) | |
195 ((eq state 'word) | |
196 (cond | |
197 ((memq (char-after) special-list) | |
198 (setq state nil) | |
199 (push (list b (point) current) words) | |
200 (setq current nil)) | |
201 ((memq (char-after) blank-list) | |
202 (setq state 'blank) | |
203 (if (not current) | |
204 (setq b nil) | |
205 (push (list b (point) current) words) | |
206 (setq b (point)) | |
207 (setq current nil))) | |
208 ((or (eq (setq cs (mm-charset-after)) 'ascii) | |
209 (if current | |
210 (eq current cs) | |
211 (setq current cs)))) | |
212 (t | |
213 (push (list b (point) current) words) | |
214 (setq current cs) | |
215 (setq b (point)))))) | |
216 (if state | |
217 (forward-char) | |
218 (skip-chars-forward all-specials))) | |
219 (if (eq state 'word) | |
220 (push (list b (point) current) words))) | |
221 words)) | |
222 | |
223 (defun rfc2047-encode-region (b e) | |
224 "Encode all encodable words in REGION." | |
225 (let ((words (rfc2047-dissect-region b e)) | |
226 beg end current word) | |
227 (while (setq word (pop words)) | |
228 (if (equal (nth 2 word) current) | |
229 (setq beg (nth 0 word)) | |
230 (when current | |
231 (if (and (eq beg (nth 1 word)) (nth 2 word)) | |
232 (progn | |
233 ;; There might be a bug in Emacs Mule. | |
234 ;; A space must be inserted before encoding. | |
235 (goto-char beg) | |
236 (insert " ") | |
237 (rfc2047-encode (1+ beg) (1+ end) current)) | |
238 (rfc2047-encode beg end current))) | |
239 (setq current (nth 2 word) | |
240 beg (nth 0 word) | |
241 end (nth 1 word)))) | |
242 (when current | |
243 (rfc2047-encode beg end current)))) | |
244 | |
245 (defun rfc2047-encode-string (string) | |
246 "Encode words in STRING." | |
247 (with-temp-buffer | |
248 (insert string) | |
249 (rfc2047-encode-region (point-min) (point-max)) | |
250 (buffer-string))) | |
251 | |
252 (defun rfc2047-encode (b e charset) | |
253 "Encode the word in the region with CHARSET." | |
254 (let* ((mime-charset (mm-mime-charset charset)) | |
255 (encoding (or (cdr (assq mime-charset | |
256 rfc2047-charset-encoding-alist)) | |
257 'B)) | |
258 (start (concat | |
259 "=?" (downcase (symbol-name mime-charset)) "?" | |
260 (downcase (symbol-name encoding)) "?")) | |
261 (first t)) | |
262 (save-restriction | |
263 (narrow-to-region b e) | |
264 (when (eq encoding 'B) | |
265 ;; break into lines before encoding | |
266 (goto-char (point-min)) | |
267 (while (not (eobp)) | |
268 (goto-char (min (point-max) (+ 15 (point)))) | |
269 (unless (eobp) | |
270 (insert "\n")))) | |
271 (if (and (mm-multibyte-p) | |
272 (mm-coding-system-p mime-charset)) | |
273 (mm-encode-coding-region (point-min) (point-max) mime-charset)) | |
274 (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) | |
275 (point-min) (point-max)) | |
276 (goto-char (point-min)) | |
277 (while (not (eobp)) | |
278 (unless first | |
279 (insert " ")) | |
280 (setq first nil) | |
281 (insert start) | |
282 (end-of-line) | |
283 (insert "?=") | |
284 (forward-line 1))))) | |
285 | |
286 (defun rfc2047-fold-region (b e) | |
287 "Fold the long lines in the region." | |
288 (save-restriction | |
289 (narrow-to-region b e) | |
290 (goto-char (point-min)) | |
291 (let ((break nil)) | |
292 (while (not (eobp)) | |
293 (cond | |
294 ((memq (char-after) '(? ?\t)) | |
295 (setq break (point))) | |
296 ((and (not break) | |
297 (looking-at "=\\?")) | |
298 (setq break (point))) | |
299 ((and break | |
300 (looking-at "\\?=") | |
301 (> (- (point) (save-excursion (beginning-of-line) (point))) 76)) | |
302 (goto-char break) | |
303 (setq break nil) | |
304 (insert "\n "))) | |
305 (unless (eobp) | |
306 (forward-char 1)))))) | |
307 | |
308 (defun rfc2047-b-encode-region (b e) | |
309 "Encode the header contained in REGION with the B encoding." | |
310 (save-restriction | |
311 (narrow-to-region (goto-char b) e) | |
312 (while (not (eobp)) | |
313 (base64-encode-region (point) (progn (end-of-line) (point)) t) | |
314 (if (and (bolp) (eolp)) | |
315 (delete-backward-char 1)) | |
316 (forward-line)))) | |
317 | |
318 (defun rfc2047-q-encode-region (b e) | |
319 "Encode the header contained in REGION with the Q encoding." | |
320 (save-excursion | |
321 (save-restriction | |
322 (narrow-to-region (goto-char b) e) | |
323 (let ((alist rfc2047-q-encoding-alist)) | |
324 (while alist | |
325 (when (looking-at (caar alist)) | |
326 (quoted-printable-encode-region b e nil (cdar alist)) | |
327 (subst-char-in-region (point-min) (point-max) ? ?_) | |
328 (setq alist nil)) | |
329 (pop alist)) | |
330 (goto-char (point-min)) | |
331 (while (not (eobp)) | |
332 (goto-char (min (point-max) (+ 64 (point)))) | |
333 (search-backward "=" (- (point) 2) t) | |
334 (unless (eobp) | |
335 (insert "\n"))))))) | |
336 | |
337 ;;; | |
338 ;;; Functions for decoding RFC2047 messages | |
339 ;;; | |
340 | |
341 (defvar rfc2047-encoded-word-regexp | |
342 "=\\?\\([^][\000-\040()<>@,\;:\\\"/?.=]+\\)\\?\\(B\\|Q\\)\\?\\([!->@-~ +]+\\)\\?=") | |
343 | |
344 (defun rfc2047-decode-region (start end) | |
345 "Decode MIME-encoded words in region between START and END." | |
346 (interactive "r") | |
347 (let ((case-fold-search t) | |
348 b e) | |
349 (save-excursion | |
350 (save-restriction | |
351 (narrow-to-region start end) | |
352 (goto-char (point-min)) | |
353 ;; Remove whitespace between encoded words. | |
354 (while (re-search-forward | |
355 (concat "\\(" rfc2047-encoded-word-regexp "\\)" | |
356 "\\(\n?[ \t]\\)+" | |
357 "\\(" rfc2047-encoded-word-regexp "\\)") | |
358 nil t) | |
359 (delete-region (goto-char (match-end 1)) (match-beginning 6))) | |
360 ;; Decode the encoded words. | |
361 (setq b (goto-char (point-min))) | |
362 (while (re-search-forward rfc2047-encoded-word-regexp nil t) | |
363 (setq e (match-beginning 0)) | |
364 (insert (rfc2047-parse-and-decode | |
365 (prog1 | |
366 (match-string 0) | |
367 (delete-region (match-beginning 0) (match-end 0))))) | |
368 (when (and (mm-multibyte-p) | |
369 mail-parse-charset | |
370 (not (eq mail-parse-charset 'gnus-decoded))) | |
371 (mm-decode-coding-region b e mail-parse-charset)) | |
372 (setq b (point))) | |
373 (when (and (mm-multibyte-p) | |
374 mail-parse-charset | |
375 (not (eq mail-parse-charset 'us-ascii)) | |
376 (not (eq mail-parse-charset 'gnus-decoded))) | |
377 (mm-decode-coding-region b (point-max) mail-parse-charset)))))) | |
378 | |
379 (defun rfc2047-decode-string (string) | |
380 "Decode the quoted-printable-encoded STRING and return the results." | |
381 (let ((m (mm-multibyte-p))) | |
382 (with-temp-buffer | |
383 (when m | |
384 (mm-enable-multibyte)) | |
385 (insert string) | |
386 (inline | |
387 (rfc2047-decode-region (point-min) (point-max))) | |
388 (buffer-string)))) | |
389 | |
390 (defun rfc2047-parse-and-decode (word) | |
391 "Decode WORD and return it if it is an encoded word. | |
392 Return WORD if not." | |
393 (if (not (string-match rfc2047-encoded-word-regexp word)) | |
394 word | |
395 (or | |
396 (condition-case nil | |
397 (rfc2047-decode | |
398 (match-string 1 word) | |
399 (upcase (match-string 2 word)) | |
400 (match-string 3 word)) | |
401 (error word)) | |
402 word))) | |
403 | |
404 (defun rfc2047-decode (charset encoding string) | |
405 "Decode STRING that uses CHARSET with ENCODING. | |
406 Valid ENCODINGs are \"B\" and \"Q\". | |
407 If your Emacs implementation can't decode CHARSET, it returns nil." | |
408 (if (stringp charset) | |
409 (setq charset (intern (downcase charset)))) | |
410 (if (or (not charset) | |
411 (eq 'gnus-all mail-parse-ignored-charsets) | |
412 (memq 'gnus-all mail-parse-ignored-charsets) | |
413 (memq charset mail-parse-ignored-charsets)) | |
414 (setq charset mail-parse-charset)) | |
415 (let ((cs (mm-charset-to-coding-system charset))) | |
416 (if (and (not cs) charset | |
417 (listp mail-parse-ignored-charsets) | |
418 (memq 'gnus-unknown mail-parse-ignored-charsets)) | |
419 (setq cs (mm-charset-to-coding-system mail-parse-charset))) | |
420 (when cs | |
421 (when (and (eq cs 'ascii) | |
422 mail-parse-charset) | |
423 (setq cs mail-parse-charset)) | |
424 (mm-decode-coding-string | |
425 (cond | |
426 ((equal "B" encoding) | |
427 (base64-decode-string string)) | |
428 ((equal "Q" encoding) | |
429 (quoted-printable-decode-string | |
430 (mm-replace-chars-in-string string ?_ ? ))) | |
431 (t (error "Invalid encoding: %s" encoding))) | |
432 cs)))) | |
433 | |
434 (provide 'rfc2047) | |
435 | |
436 ;;; rfc2047.el ends here |