38413
|
1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
|
60161
|
2
|
74548
|
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
75347
|
4 ;; 2005, 2006, 2007 Free Software Foundation, Inc.
|
31717
|
5
|
|
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
|
|
7 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
|
|
8 ;; This file is part of GNU Emacs.
|
|
9
|
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 ;; it under the terms of the GNU General Public License as published by
|
|
12 ;; the Free Software Foundation; either version 2, or (at your option)
|
|
13 ;; any later version.
|
|
14
|
|
15 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 ;; GNU General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
64085
|
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
23 ;; Boston, MA 02110-1301, USA.
|
31717
|
24
|
|
25 ;;; Commentary:
|
|
26
|
34674
|
27 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
|
|
28 ;; Three: Message Header Extensions for Non-ASCII Text".
|
|
29
|
31717
|
30 ;;; Code:
|
|
31
|
47945
|
32 (eval-when-compile
|
|
33 (require 'cl)
|
56927
|
34 (defvar message-posting-charset)
|
|
35 (unless (fboundp 'with-syntax-table) ; not in Emacs 20
|
|
36 (defmacro with-syntax-table (table &rest body)
|
|
37 "Evaluate BODY with syntax table of current buffer set to TABLE.
|
|
38 The syntax table of the current buffer is saved, BODY is evaluated, and the
|
|
39 saved table is restored, even in case of an abnormal exit.
|
|
40 Value is what BODY returns."
|
|
41 (let ((old-table (make-symbol "table"))
|
|
42 (old-buffer (make-symbol "buffer")))
|
|
43 `(let ((,old-table (syntax-table))
|
|
44 (,old-buffer (current-buffer)))
|
|
45 (unwind-protect
|
|
46 (progn
|
|
47 (set-syntax-table ,table)
|
|
48 ,@body)
|
|
49 (save-current-buffer
|
|
50 (set-buffer ,old-buffer)
|
|
51 (set-syntax-table ,old-table))))))))
|
31717
|
52
|
|
53 (require 'qp)
|
|
54 (require 'mm-util)
|
60161
|
55 (require 'ietf-drums)
|
47945
|
56 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
|
31717
|
57 (require 'mail-prsvr)
|
33304
|
58 (require 'base64)
|
|
59 (autoload 'mm-body-7-or-8 "mm-bodies")
|
33127
|
60
|
56927
|
61 (eval-and-compile
|
|
62 ;; Avoid gnus-util for mm- code.
|
|
63 (defalias 'rfc2047-point-at-bol
|
|
64 (if (fboundp 'point-at-bol)
|
|
65 'point-at-bol
|
|
66 'line-beginning-position))
|
|
67
|
|
68 (defalias 'rfc2047-point-at-eol
|
|
69 (if (fboundp 'point-at-eol)
|
|
70 'point-at-eol
|
|
71 'line-end-position)))
|
|
72
|
31717
|
73 (defvar rfc2047-header-encoding-alist
|
56927
|
74 '(("Newsgroups" . nil)
|
|
75 ("Followup-To" . nil)
|
31717
|
76 ("Message-ID" . nil)
|
58835
|
77 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\
|
56927
|
78 \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime)
|
31717
|
79 (t . mime))
|
|
80 "*Header/encoding method alist.
|
|
81 The list is traversed sequentially. The keys can either be
|
33304
|
82 header regexps or t.
|
31717
|
83
|
|
84 The values can be:
|
|
85
|
|
86 1) nil, in which case no encoding is done;
|
|
87 2) `mime', in which case the header will be encoded according to RFC2047;
|
47945
|
88 3) `address-mime', like `mime', but takes account of the rules for address
|
|
89 fields (where quoted strings and comments must be treated separately);
|
|
90 4) a charset, in which case it will be encoded as that charset;
|
|
91 5) `default', in which case the field will be encoded as the rest
|
31717
|
92 of the article.")
|
|
93
|
|
94 (defvar rfc2047-charset-encoding-alist
|
|
95 '((us-ascii . nil)
|
|
96 (iso-8859-1 . Q)
|
|
97 (iso-8859-2 . Q)
|
|
98 (iso-8859-3 . Q)
|
|
99 (iso-8859-4 . Q)
|
|
100 (iso-8859-5 . B)
|
|
101 (koi8-r . B)
|
47945
|
102 (iso-8859-7 . B)
|
|
103 (iso-8859-8 . B)
|
31717
|
104 (iso-8859-9 . Q)
|
33304
|
105 (iso-8859-14 . Q)
|
|
106 (iso-8859-15 . Q)
|
31717
|
107 (iso-2022-jp . B)
|
|
108 (iso-2022-kr . B)
|
|
109 (gb2312 . B)
|
90684
|
110 (gbk . B)
|
|
111 (gb18030 . B)
|
35838
|
112 (big5 . B)
|
|
113 (cn-big5 . B)
|
31717
|
114 (cn-gb . B)
|
|
115 (cn-gb-2312 . B)
|
|
116 (euc-kr . B)
|
|
117 (iso-2022-jp-2 . B)
|
56927
|
118 (iso-2022-int-1 . B)
|
|
119 (viscii . Q))
|
31717
|
120 "Alist of MIME charsets to RFC2047 encodings.
|
47945
|
121 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
|
|
122 quoted-printable and base64 respectively.")
|
31717
|
123
|
61126
|
124 (defvar rfc2047-encode-function-alist
|
|
125 '((Q . rfc2047-q-encode-string)
|
|
126 (B . rfc2047-b-encode-string)
|
|
127 (nil . identity))
|
31717
|
128 "Alist of RFC2047 encodings to encoding functions.")
|
|
129
|
61126
|
130 (defvar rfc2047-encode-encoded-words t
|
|
131 "Whether encoded words should be encoded again.")
|
|
132
|
31717
|
133 ;;;
|
|
134 ;;; Functions for encoding RFC2047 messages
|
|
135 ;;;
|
|
136
|
57243
|
137 (defun rfc2047-qp-or-base64 ()
|
|
138 "Return the type with which to encode the buffer.
|
|
139 This is either `base64' or `quoted-printable'."
|
|
140 (save-excursion
|
|
141 (let ((limit (min (point-max) (+ 2000 (point-min))))
|
|
142 (n8bit 0))
|
|
143 (goto-char (point-min))
|
|
144 (skip-chars-forward "\x20-\x7f\r\n\t" limit)
|
|
145 (while (< (point) limit)
|
|
146 (incf n8bit)
|
|
147 (forward-char 1)
|
|
148 (skip-chars-forward "\x20-\x7f\r\n\t" limit))
|
|
149 (if (or (< (* 6 n8bit) (- limit (point-min)))
|
|
150 ;; Don't base64, say, a short line with a single
|
|
151 ;; non-ASCII char when splitting parts by charset.
|
|
152 (= n8bit 1))
|
|
153 'quoted-printable
|
|
154 'base64))))
|
|
155
|
31717
|
156 (defun rfc2047-narrow-to-field ()
|
|
157 "Narrow the buffer to the header on the current line."
|
|
158 (beginning-of-line)
|
|
159 (narrow-to-region
|
|
160 (point)
|
|
161 (progn
|
|
162 (forward-line 1)
|
|
163 (if (re-search-forward "^[^ \n\t]" nil t)
|
56927
|
164 (rfc2047-point-at-bol)
|
31717
|
165 (point-max))))
|
|
166 (goto-char (point-min)))
|
|
167
|
56927
|
168 (defun rfc2047-field-value ()
|
|
169 "Return the value of the field at point."
|
|
170 (save-excursion
|
|
171 (save-restriction
|
|
172 (rfc2047-narrow-to-field)
|
|
173 (re-search-forward ":[ \t\n]*" nil t)
|
61126
|
174 (buffer-substring-no-properties (point) (point-max)))))
|
56927
|
175
|
72605
|
176 (defun rfc2047-quote-special-characters-in-quoted-strings (&optional
|
|
177 encodable-regexp)
|
|
178 "Quote special characters with `\\'s in quoted strings.
|
|
179 Quoting will not be done in a quoted string if it contains characters
|
|
180 matching ENCODABLE-REGEXP."
|
|
181 (goto-char (point-min))
|
|
182 (let ((tspecials (concat "[" ietf-drums-tspecials "]"))
|
72810
|
183 beg end)
|
72605
|
184 (with-syntax-table (standard-syntax-table)
|
|
185 (while (search-forward "\"" nil t)
|
72810
|
186 (setq beg (match-beginning 0))
|
|
187 (unless (eq (char-before beg) ?\\)
|
|
188 (goto-char beg)
|
|
189 (setq beg (1+ beg))
|
72605
|
190 (condition-case nil
|
|
191 (progn
|
|
192 (forward-sexp)
|
72810
|
193 (setq end (1- (point)))
|
|
194 (goto-char beg)
|
|
195 (if (and encodable-regexp
|
|
196 (re-search-forward encodable-regexp end t))
|
|
197 (goto-char (1+ end))
|
|
198 (save-restriction
|
|
199 (narrow-to-region beg end)
|
72605
|
200 (while (re-search-forward tspecials nil 'move)
|
72810
|
201 (if (eq (char-before) ?\\)
|
|
202 (if (looking-at tspecials) ;; Already quoted.
|
|
203 (forward-char)
|
|
204 (insert "\\"))
|
72605
|
205 (goto-char (match-beginning 0))
|
72810
|
206 (insert "\\")
|
|
207 (forward-char))))
|
|
208 (forward-char)))
|
72605
|
209 (error
|
|
210 (goto-char beg))))))))
|
|
211
|
47945
|
212 (defvar rfc2047-encoding-type 'address-mime
|
|
213 "The type of encoding done by `rfc2047-encode-region'.
|
|
214 This should be dynamically bound around calls to
|
|
215 `rfc2047-encode-region' to either `mime' or `address-mime'. See
|
|
216 `rfc2047-header-encoding-alist', for definitions.")
|
|
217
|
31717
|
218 (defun rfc2047-encode-message-header ()
|
|
219 "Encode the message header according to `rfc2047-header-encoding-alist'.
|
|
220 Should be called narrowed to the head of the message."
|
|
221 (interactive "*")
|
|
222 (save-excursion
|
|
223 (goto-char (point-min))
|
|
224 (let (alist elem method)
|
|
225 (while (not (eobp))
|
|
226 (save-restriction
|
|
227 (rfc2047-narrow-to-field)
|
72605
|
228 (setq method nil
|
|
229 alist rfc2047-header-encoding-alist)
|
|
230 (while (setq elem (pop alist))
|
|
231 (when (or (and (stringp (car elem))
|
|
232 (looking-at (car elem)))
|
|
233 (eq (car elem) t))
|
|
234 (setq alist nil
|
|
235 method (cdr elem))))
|
31717
|
236 (if (not (rfc2047-encodable-p))
|
72605
|
237 (prog2
|
|
238 (when (eq method 'address-mime)
|
|
239 (rfc2047-quote-special-characters-in-quoted-strings))
|
61126
|
240 (if (and (eq (mm-body-7-or-8) '8bit)
|
|
241 (mm-multibyte-p)
|
|
242 (mm-coding-system-p
|
|
243 (car message-posting-charset)))
|
|
244 ;; 8 bit must be decoded.
|
|
245 (mm-encode-coding-region
|
|
246 (point-min) (point-max)
|
|
247 (mm-charset-to-coding-system
|
|
248 (car message-posting-charset))))
|
50881
|
249 ;; No encoding necessary, but folding is nice
|
61126
|
250 (when nil
|
|
251 (rfc2047-fold-region
|
|
252 (save-excursion
|
|
253 (goto-char (point-min))
|
|
254 (skip-chars-forward "^:")
|
|
255 (when (looking-at ": ")
|
|
256 (forward-char 2))
|
|
257 (point))
|
|
258 (point-max))))
|
31717
|
259 ;; We found something that may perhaps be encoded.
|
47945
|
260 (re-search-forward "^[^:]+: *" nil t)
|
31717
|
261 (cond
|
47945
|
262 ((eq method 'address-mime)
|
|
263 (rfc2047-encode-region (point) (point-max)))
|
31717
|
264 ((eq method 'mime)
|
56927
|
265 (let ((rfc2047-encoding-type 'mime))
|
47945
|
266 (rfc2047-encode-region (point) (point-max))))
|
31717
|
267 ((eq method 'default)
|
|
268 (if (and (featurep 'mule)
|
33815
|
269 (if (boundp 'default-enable-multibyte-characters)
|
|
270 default-enable-multibyte-characters)
|
31717
|
271 mail-parse-charset)
|
47945
|
272 (mm-encode-coding-region (point) (point-max)
|
31717
|
273 mail-parse-charset)))
|
56927
|
274 ;; We get this when CC'ing messsages to newsgroups with
|
|
275 ;; 8-bit names. The group name mail copy just got
|
|
276 ;; unconditionally encoded. Previously, it would ask
|
|
277 ;; whether to encode, which was quite confusing for the
|
|
278 ;; user. If the new behaviour is wrong, tell me. I have
|
|
279 ;; left the old code commented out below.
|
|
280 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
|
|
281 ;; Modified by Dave Love, with the commented-out code changed
|
|
282 ;; in accordance with changes elsewhere.
|
|
283 ((null method)
|
|
284 (rfc2047-encode-region (point) (point-max)))
|
|
285 ;;; ((null method)
|
|
286 ;;; (if (or (message-options-get
|
|
287 ;;; 'rfc2047-encode-message-header-encode-any)
|
|
288 ;;; (message-options-set
|
|
289 ;;; 'rfc2047-encode-message-header-encode-any
|
|
290 ;;; (y-or-n-p
|
|
291 ;;; "Some texts are not encoded. Encode anyway?")))
|
|
292 ;;; (rfc2047-encode-region (point-min) (point-max))
|
|
293 ;;; (error "Cannot send unencoded text")))
|
31717
|
294 ((mm-coding-system-p method)
|
33815
|
295 (if (and (featurep 'mule)
|
|
296 (if (boundp 'default-enable-multibyte-characters)
|
|
297 default-enable-multibyte-characters))
|
47945
|
298 (mm-encode-coding-region (point) (point-max) method)))
|
31717
|
299 ;; Hm.
|
|
300 (t)))
|
|
301 (goto-char (point-max)))))))
|
|
302
|
35985
|
303 ;; Fixme: This, and the require below may not be the Right Thing, but
|
|
304 ;; should be safe just before release. -- fx 2001-02-08
|
|
305 (eval-when-compile (defvar message-posting-charset))
|
|
306
|
33304
|
307 (defun rfc2047-encodable-p ()
|
|
308 "Return non-nil if any characters in current buffer need encoding in headers.
|
|
309 The buffer may be narrowed."
|
35985
|
310 (require 'message) ; for message-posting-charset
|
31717
|
311 (let ((charsets
|
47945
|
312 (mm-find-mime-charset-region (point-min) (point-max))))
|
61126
|
313 (goto-char (point-min))
|
|
314 (or (and rfc2047-encode-encoded-words
|
|
315 (prog1
|
|
316 (search-forward "=?" nil t)
|
|
317 (goto-char (point-min))))
|
|
318 (and charsets
|
|
319 (not (equal charsets (list (car message-posting-charset))))))))
|
31717
|
320
|
47945
|
321 ;; Use this syntax table when parsing into regions that may need
|
|
322 ;; encoding. Double quotes are string delimiters, backslash is
|
|
323 ;; character quoting, and all other RFC 2822 special characters are
|
|
324 ;; treated as punctuation so we can use forward-sexp/forward-word to
|
|
325 ;; skip to the end of regions appropriately. Nb. ietf-drums does
|
|
326 ;; things differently.
|
|
327 (defconst rfc2047-syntax-table
|
56927
|
328 ;; (make-char-table 'syntax-table '(2)) only works in Emacs.
|
|
329 (let ((table (make-syntax-table)))
|
|
330 ;; The following is done to work for setting all elements of the table
|
90104
|
331 ;; in Emacs 21-23 and XEmacs; it appears to be the cleanest way.
|
56927
|
332 ;; Play safe and don't assume the form of the word syntax entry --
|
|
333 ;; copy it from ?a.
|
|
334 (if (fboundp 'set-char-table-range) ; Emacs
|
|
335 (funcall (intern "set-char-table-range")
|
|
336 table t (aref (standard-syntax-table) ?a))
|
|
337 (if (fboundp 'put-char-table)
|
|
338 (if (fboundp 'get-char-table) ; warning avoidance
|
|
339 (put-char-table t (get-char-table ?a (standard-syntax-table))
|
|
340 table))))
|
47945
|
341 (modify-syntax-entry ?\\ "\\" table)
|
|
342 (modify-syntax-entry ?\" "\"" table)
|
61126
|
343 (modify-syntax-entry ?\( "(" table)
|
|
344 (modify-syntax-entry ?\) ")" table)
|
47945
|
345 (modify-syntax-entry ?\< "." table)
|
|
346 (modify-syntax-entry ?\> "." table)
|
|
347 (modify-syntax-entry ?\[ "." table)
|
|
348 (modify-syntax-entry ?\] "." table)
|
|
349 (modify-syntax-entry ?: "." table)
|
|
350 (modify-syntax-entry ?\; "." table)
|
|
351 (modify-syntax-entry ?, "." table)
|
|
352 (modify-syntax-entry ?@ "." table)
|
|
353 table))
|
31717
|
354
|
|
355 (defun rfc2047-encode-region (b e)
|
47945
|
356 "Encode words in region B to E that need encoding.
|
|
357 By default, the region is treated as containing RFC2822 addresses.
|
|
358 Dynamically bind `rfc2047-encoding-type' to change that."
|
|
359 (save-restriction
|
|
360 (narrow-to-region b e)
|
61126
|
361 (let ((encodable-regexp (if rfc2047-encode-encoded-words
|
|
362 "[^\000-\177]+\\|=\\?"
|
|
363 "[^\000-\177]+"))
|
|
364 start ; start of current token
|
|
365 end begin csyntax
|
|
366 ;; Whether there's an encoded word before the current token,
|
|
367 ;; either immediately or separated by space.
|
|
368 last-encoded
|
|
369 (orig-text (buffer-substring-no-properties b e)))
|
|
370 (if (eq 'mime rfc2047-encoding-type)
|
|
371 ;; Simple case. Continuous words in which all those contain
|
|
372 ;; non-ASCII characters are encoded collectively. Encoding
|
|
373 ;; ASCII words, including `Re:' used in Subject headers, is
|
|
374 ;; avoided for interoperability with non-MIME clients and
|
|
375 ;; for making it easy to find keywords.
|
|
376 (progn
|
|
377 (goto-char (point-min))
|
|
378 (while (progn (skip-chars-forward " \t\n")
|
|
379 (not (eobp)))
|
|
380 (setq start (point))
|
|
381 (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)")
|
|
382 (progn
|
|
383 (setq end (match-end 0))
|
|
384 (re-search-forward encodable-regexp end t)))
|
|
385 (goto-char end))
|
|
386 (if (> (point) start)
|
|
387 (rfc2047-encode start (point))
|
|
388 (goto-char end))))
|
|
389 ;; `address-mime' case -- take care of quoted words, comments.
|
72605
|
390 (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp)
|
61126
|
391 (with-syntax-table rfc2047-syntax-table
|
47945
|
392 (goto-char (point-min))
|
61126
|
393 (condition-case err ; in case of unbalanced quotes
|
47945
|
394 ;; Look for rfc2822-style: sequences of atoms, quoted
|
|
395 ;; strings, specials, whitespace. (Specials mustn't be
|
|
396 ;; encoded.)
|
|
397 (while (not (eobp))
|
61126
|
398 ;; Skip whitespace.
|
|
399 (skip-chars-forward " \t\n")
|
47945
|
400 (setq start (point))
|
|
401 (cond
|
|
402 ((not (char-after))) ; eob
|
|
403 ;; else token start
|
61126
|
404 ((eq ?\" (setq csyntax (char-syntax (char-after))))
|
47945
|
405 ;; Quoted word.
|
|
406 (forward-sexp)
|
|
407 (setq end (point))
|
|
408 ;; Does it need encoding?
|
|
409 (goto-char start)
|
61126
|
410 (if (re-search-forward encodable-regexp end 'move)
|
|
411 ;; It needs encoding. Strip the quotes first,
|
|
412 ;; since encoded words can't occur in quotes.
|
|
413 (progn
|
|
414 (goto-char end)
|
|
415 (delete-backward-char 1)
|
|
416 (goto-char start)
|
|
417 (delete-char 1)
|
|
418 (when last-encoded
|
|
419 ;; There was a preceding quoted word. We need
|
|
420 ;; to include any separating whitespace in this
|
|
421 ;; word to avoid it getting lost.
|
|
422 (skip-chars-backward " \t")
|
|
423 ;; A space is needed between the encoded words.
|
|
424 (insert ? )
|
|
425 (setq start (point)
|
|
426 end (1+ end)))
|
|
427 ;; Adjust the end position for the deleted quotes.
|
|
428 (rfc2047-encode start (- end 2))
|
|
429 (setq last-encoded t)) ; record that it was encoded
|
|
430 (setq last-encoded nil)))
|
|
431 ((eq ?. csyntax)
|
47945
|
432 ;; Skip other delimiters, but record that they've
|
|
433 ;; potentially separated quoted words.
|
|
434 (forward-char)
|
|
435 (setq last-encoded nil))
|
61126
|
436 ((eq ?\) csyntax)
|
|
437 (error "Unbalanced parentheses"))
|
|
438 ((eq ?\( csyntax)
|
|
439 ;; Look for the end of parentheses.
|
|
440 (forward-list)
|
|
441 ;; Encode text as an unstructured field.
|
|
442 (let ((rfc2047-encoding-type 'mime))
|
|
443 (rfc2047-encode-region (1+ start) (1- (point))))
|
|
444 (skip-chars-forward ")"))
|
47945
|
445 (t ; normal token/whitespace sequence
|
|
446 ;; Find the end.
|
61126
|
447 ;; Skip one ASCII word, or encode continuous words
|
|
448 ;; in which all those contain non-ASCII characters.
|
|
449 (setq end nil)
|
|
450 (while (not (or end (eobp)))
|
|
451 (when (looking-at "[\000-\177]+")
|
|
452 (setq begin (point)
|
|
453 end (match-end 0))
|
|
454 (when (progn
|
|
455 (while (and (or (re-search-forward
|
|
456 "[ \t\n]\\|\\Sw" end 'move)
|
|
457 (setq end nil))
|
|
458 (eq ?\\ (char-syntax (char-before))))
|
|
459 ;; Skip backslash-quoted characters.
|
|
460 (forward-char))
|
|
461 end)
|
|
462 (setq end (match-beginning 0))
|
|
463 (if rfc2047-encode-encoded-words
|
|
464 (progn
|
|
465 (goto-char begin)
|
|
466 (when (search-forward "=?" end 'move)
|
|
467 (goto-char (match-beginning 0))
|
|
468 (setq end nil)))
|
|
469 (goto-char end))))
|
|
470 ;; Where the value nil of `end' means there may be
|
|
471 ;; text to have to be encoded following the point.
|
|
472 ;; Otherwise, the point reached to the end of ASCII
|
|
473 ;; words separated by whitespace or a special char.
|
|
474 (unless end
|
|
475 (when (looking-at encodable-regexp)
|
|
476 (goto-char (setq begin (match-end 0)))
|
|
477 (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)")
|
|
478 (setq end (match-end 0))
|
|
479 (progn
|
|
480 (while (re-search-forward
|
|
481 encodable-regexp end t))
|
|
482 (< begin (point)))
|
|
483 (goto-char begin)
|
|
484 (or (not (re-search-forward "\\Sw" end t))
|
|
485 (progn
|
|
486 (goto-char (match-beginning 0))
|
|
487 nil)))
|
|
488 (goto-char end))
|
|
489 (when (looking-at "[^ \t\n]+")
|
|
490 (setq end (match-end 0))
|
|
491 (if (re-search-forward "\\Sw+" end t)
|
|
492 ;; There are special characters better
|
|
493 ;; to be encoded so that MTAs may parse
|
|
494 ;; them safely.
|
|
495 (cond ((= end (point)))
|
|
496 ((looking-at (concat "\\sw*\\("
|
|
497 encodable-regexp
|
|
498 "\\)"))
|
|
499 (setq end nil))
|
|
500 (t
|
|
501 (goto-char (1- (match-end 0)))
|
|
502 (unless (= (point) (match-beginning 0))
|
|
503 ;; Separate encodable text and
|
|
504 ;; delimiter.
|
|
505 (insert " "))))
|
|
506 (goto-char end)
|
|
507 (skip-chars-forward " \t\n")
|
|
508 (if (and (looking-at "[^ \t\n]+")
|
|
509 (string-match encodable-regexp
|
|
510 (match-string 0)))
|
|
511 (setq end nil)
|
|
512 (goto-char end)))))))
|
|
513 (skip-chars-backward " \t\n")
|
47945
|
514 (setq end (point))
|
|
515 (goto-char start)
|
61126
|
516 (if (re-search-forward encodable-regexp end 'move)
|
|
517 (progn
|
|
518 (unless (memq (char-before start) '(nil ?\t ? ))
|
|
519 (if (progn
|
|
520 (goto-char start)
|
|
521 (skip-chars-backward "^ \t\n")
|
|
522 (and (looking-at "\\Sw+")
|
|
523 (= (match-end 0) start)))
|
|
524 ;; Also encode bogus delimiters.
|
|
525 (setq start (point))
|
|
526 ;; Separate encodable text and delimiter.
|
|
527 (goto-char start)
|
|
528 (insert " ")
|
|
529 (setq start (1+ start)
|
|
530 end (1+ end))))
|
|
531 (rfc2047-encode start end)
|
|
532 (setq last-encoded t))
|
|
533 (setq last-encoded nil)))))
|
56927
|
534 (error
|
61126
|
535 (if (or debug-on-quit debug-on-error)
|
|
536 (signal (car err) (cdr err))
|
|
537 (error "Invalid data for rfc2047 encoding: %s"
|
|
538 (mm-replace-in-string orig-text "[ \t\n]+" " "))))))))
|
|
539 (rfc2047-fold-region b (point))
|
|
540 (goto-char (point-max))))
|
31717
|
541
|
|
542 (defun rfc2047-encode-string (string)
|
47945
|
543 "Encode words in STRING.
|
|
544 By default, the string is treated as containing addresses (see
|
56927
|
545 `rfc2047-encoding-type')."
|
61126
|
546 (mm-with-multibyte-buffer
|
31717
|
547 (insert string)
|
|
548 (rfc2047-encode-region (point-min) (point-max))
|
|
549 (buffer-string)))
|
|
550
|
61126
|
551 (defvar rfc2047-encode-max-chars 76
|
|
552 "Maximum characters of each header line that contain encoded-words.
|
|
553 If it is nil, encoded-words will not be folded. Too small value may
|
|
554 cause an error. Don't change this for no particular reason.")
|
|
555
|
|
556 (defun rfc2047-encode-1 (column string cs encoder start crest tail
|
|
557 &optional eword)
|
|
558 "Subroutine used by `rfc2047-encode'."
|
|
559 (cond ((string-equal string "")
|
|
560 (or eword ""))
|
|
561 ((not rfc2047-encode-max-chars)
|
|
562 (concat start
|
|
563 (funcall encoder (if cs
|
|
564 (mm-encode-coding-string string cs)
|
|
565 string))
|
|
566 "?="))
|
|
567 ((>= column rfc2047-encode-max-chars)
|
|
568 (when eword
|
|
569 (cond ((string-match "\n[ \t]+\\'" eword)
|
|
570 ;; Reomove a superfluous empty line.
|
|
571 (setq eword (substring eword 0 (match-beginning 0))))
|
|
572 ((string-match "(+\\'" eword)
|
|
573 ;; Break the line before the open parenthesis.
|
|
574 (setq crest (concat crest (match-string 0 eword))
|
|
575 eword (substring eword 0 (match-beginning 0))))))
|
|
576 (rfc2047-encode-1 (length crest) string cs encoder start " " tail
|
|
577 (concat eword "\n" crest)))
|
|
578 (t
|
|
579 (let ((index 0)
|
|
580 (limit (1- (length string)))
|
|
581 (prev "")
|
|
582 next len)
|
|
583 (while (and prev
|
|
584 (<= index limit))
|
|
585 (setq next (concat start
|
|
586 (funcall encoder
|
|
587 (if cs
|
|
588 (mm-encode-coding-string
|
|
589 (substring string 0 (1+ index))
|
|
590 cs)
|
|
591 (substring string 0 (1+ index))))
|
|
592 "?=")
|
|
593 len (+ column (length next)))
|
|
594 (if (> len rfc2047-encode-max-chars)
|
|
595 (setq next prev
|
|
596 prev nil)
|
|
597 (if (or (< index limit)
|
|
598 (<= (+ len (or (string-match "\n" tail)
|
|
599 (length tail)))
|
|
600 rfc2047-encode-max-chars))
|
|
601 (setq prev next
|
|
602 index (1+ index))
|
|
603 (if (string-match "\\`)+" tail)
|
|
604 ;; Break the line after the close parenthesis.
|
|
605 (setq tail (concat (substring tail 0 (match-end 0))
|
|
606 "\n "
|
|
607 (substring tail (match-end 0)))
|
|
608 prev next
|
|
609 index (1+ index))
|
|
610 (setq next prev
|
|
611 prev nil)))))
|
|
612 (if (> index limit)
|
|
613 (concat eword next tail)
|
|
614 (if (= 0 index)
|
|
615 (if (and eword
|
|
616 (string-match "(+\\'" eword))
|
|
617 (setq crest (concat crest (match-string 0 eword))
|
|
618 eword (substring eword 0 (match-beginning 0)))
|
|
619 (setq eword (concat eword next)))
|
|
620 (setq crest " "
|
|
621 eword (concat eword next)))
|
|
622 (when (string-match "\n[ \t]+\\'" eword)
|
|
623 ;; Reomove a superfluous empty line.
|
|
624 (setq eword (substring eword 0 (match-beginning 0))))
|
|
625 (rfc2047-encode-1 (length crest) (substring string index)
|
|
626 cs encoder start " " tail
|
|
627 (concat eword "\n" crest)))))))
|
|
628
|
47945
|
629 (defun rfc2047-encode (b e)
|
|
630 "Encode the word(s) in the region B to E.
|
61126
|
631 Point moves to the end of the region."
|
|
632 (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii)))
|
|
633 cs encoding tail crest eword)
|
|
634 (cond ((> (length mime-charset) 1)
|
|
635 (error "Can't rfc2047-encode `%s'"
|
|
636 (buffer-substring-no-properties b e)))
|
|
637 ((= (length mime-charset) 1)
|
|
638 (setq mime-charset (car mime-charset)
|
|
639 cs (mm-charset-to-coding-system mime-charset))
|
|
640 (unless (and (mm-multibyte-p)
|
|
641 (mm-coding-system-p cs))
|
|
642 (setq cs nil))
|
|
643 (save-restriction
|
|
644 (narrow-to-region b e)
|
|
645 (setq encoding
|
|
646 (or (cdr (assq mime-charset
|
31717
|
647 rfc2047-charset-encoding-alist))
|
56927
|
648 ;; For the charsets that don't have a preferred
|
|
649 ;; encoding, choose the one that's shorter.
|
61126
|
650 (if (eq (rfc2047-qp-or-base64) 'base64)
|
|
651 'B
|
|
652 'Q)))
|
|
653 (widen)
|
|
654 (goto-char e)
|
|
655 (skip-chars-forward "^ \t\n")
|
|
656 ;; `tail' may contain a close parenthesis.
|
|
657 (setq tail (buffer-substring-no-properties e (point)))
|
|
658 (goto-char b)
|
|
659 (setq b (point-marker)
|
|
660 e (set-marker (make-marker) e))
|
|
661 (rfc2047-fold-region (rfc2047-point-at-bol) b)
|
|
662 (goto-char b)
|
|
663 (skip-chars-backward "^ \t\n")
|
|
664 (unless (= 0 (skip-chars-backward " \t"))
|
|
665 ;; `crest' may contain whitespace and an open parenthesis.
|
|
666 (setq crest (buffer-substring-no-properties (point) b)))
|
|
667 (setq eword (rfc2047-encode-1
|
|
668 (- b (rfc2047-point-at-bol))
|
|
669 (mm-replace-in-string
|
|
670 (buffer-substring-no-properties b e)
|
|
671 "\n\\([ \t]?\\)" "\\1")
|
|
672 cs
|
|
673 (or (cdr (assq encoding
|
|
674 rfc2047-encode-function-alist))
|
|
675 'identity)
|
|
676 (concat "=?" (downcase (symbol-name mime-charset))
|
|
677 "?" (upcase (symbol-name encoding)) "?")
|
|
678 (or crest " ")
|
|
679 tail))
|
|
680 (delete-region (if (eq (aref eword 0) ?\n)
|
|
681 (if (bolp)
|
|
682 ;; The line was folded before encoding.
|
|
683 (1- (point))
|
|
684 (point))
|
|
685 (goto-char b))
|
|
686 (+ e (length tail)))
|
|
687 ;; `eword' contains `crest' and `tail'.
|
|
688 (insert eword)
|
|
689 (set-marker b nil)
|
|
690 (set-marker e nil)
|
|
691 (unless (or (/= 0 (length tail))
|
|
692 (eobp)
|
|
693 (looking-at "[ \t\n)]"))
|
|
694 (insert " "))))
|
|
695 (t
|
|
696 (goto-char e)))))
|
31717
|
697
|
56927
|
698 (defun rfc2047-fold-field ()
|
|
699 "Fold the current header field."
|
|
700 (save-excursion
|
|
701 (save-restriction
|
|
702 (rfc2047-narrow-to-field)
|
|
703 (rfc2047-fold-region (point-min) (point-max)))))
|
|
704
|
31717
|
705 (defun rfc2047-fold-region (b e)
|
35985
|
706 "Fold long lines in region B to E."
|
31717
|
707 (save-restriction
|
|
708 (narrow-to-region b e)
|
|
709 (goto-char (point-min))
|
33304
|
710 (let ((break nil)
|
|
711 (qword-break nil)
|
50881
|
712 (first t)
|
33304
|
713 (bol (save-restriction
|
|
714 (widen)
|
56927
|
715 (rfc2047-point-at-bol))))
|
31717
|
716 (while (not (eobp))
|
56927
|
717 (when (and (or break qword-break)
|
|
718 (> (- (point) bol) 76))
|
33304
|
719 (goto-char (or break qword-break))
|
|
720 (setq break nil
|
|
721 qword-break nil)
|
61126
|
722 (skip-chars-backward " \t")
|
50881
|
723 (if (looking-at "[ \t]")
|
47945
|
724 (insert ?\n)
|
35453
|
725 (insert "\n "))
|
33304
|
726 (setq bol (1- (point)))
|
|
727 ;; Don't break before the first non-LWSP characters.
|
|
728 (skip-chars-forward " \t")
|
56927
|
729 (unless (eobp)
|
|
730 (forward-char 1)))
|
31717
|
731 (cond
|
33304
|
732 ((eq (char-after) ?\n)
|
|
733 (forward-char 1)
|
|
734 (setq bol (point)
|
|
735 break nil
|
|
736 qword-break nil)
|
|
737 (skip-chars-forward " \t")
|
|
738 (unless (or (eobp) (eq (char-after) ?\n))
|
|
739 (forward-char 1)))
|
|
740 ((eq (char-after) ?\r)
|
|
741 (forward-char 1))
|
31717
|
742 ((memq (char-after) '(? ?\t))
|
33304
|
743 (skip-chars-forward " \t")
|
61126
|
744 (unless first ;; Don't break just after the header name.
|
|
745 (setq break (point))))
|
33304
|
746 ((not break)
|
|
747 (if (not (looking-at "=\\?[^=]"))
|
|
748 (if (eq (char-after) ?=)
|
|
749 (forward-char 1)
|
|
750 (skip-chars-forward "^ \t\n\r="))
|
56927
|
751 ;; Don't break at the start of the field.
|
|
752 (unless (= (point) b)
|
|
753 (setq qword-break (point)))
|
33304
|
754 (skip-chars-forward "^ \t\n\r")))
|
|
755 (t
|
61126
|
756 (skip-chars-forward "^ \t\n\r")))
|
|
757 (setq first nil))
|
56927
|
758 (when (and (or break qword-break)
|
|
759 (> (- (point) bol) 76))
|
33304
|
760 (goto-char (or break qword-break))
|
|
761 (setq break nil
|
|
762 qword-break nil)
|
61126
|
763 (if (or (> 0 (skip-chars-backward " \t"))
|
|
764 (looking-at "[ \t]"))
|
|
765 (insert ?\n)
|
|
766 (insert "\n "))
|
33304
|
767 (setq bol (1- (point)))
|
|
768 ;; Don't break before the first non-LWSP characters.
|
|
769 (skip-chars-forward " \t")
|
56927
|
770 (unless (eobp)
|
|
771 (forward-char 1))))))
|
|
772
|
|
773 (defun rfc2047-unfold-field ()
|
|
774 "Fold the current line."
|
|
775 (save-excursion
|
|
776 (save-restriction
|
|
777 (rfc2047-narrow-to-field)
|
|
778 (rfc2047-unfold-region (point-min) (point-max)))))
|
33304
|
779
|
|
780 (defun rfc2047-unfold-region (b e)
|
35985
|
781 "Unfold lines in region B to E."
|
33304
|
782 (save-restriction
|
|
783 (narrow-to-region b e)
|
|
784 (goto-char (point-min))
|
|
785 (let ((bol (save-restriction
|
|
786 (widen)
|
56927
|
787 (rfc2047-point-at-bol)))
|
|
788 (eol (rfc2047-point-at-eol)))
|
33304
|
789 (forward-line 1)
|
|
790 (while (not (eobp))
|
50881
|
791 (if (and (looking-at "[ \t]")
|
56927
|
792 (< (- (rfc2047-point-at-eol) bol) 76))
|
50881
|
793 (delete-region eol (progn
|
|
794 (goto-char eol)
|
|
795 (skip-chars-forward "\r\n")
|
|
796 (point)))
|
56927
|
797 (setq bol (rfc2047-point-at-bol)))
|
|
798 (setq eol (rfc2047-point-at-eol))
|
33304
|
799 (forward-line 1)))))
|
31717
|
800
|
61126
|
801 (defun rfc2047-b-encode-string (string)
|
|
802 "Base64-encode the header contained in STRING."
|
|
803 (base64-encode-string string t))
|
31717
|
804
|
61126
|
805 (defun rfc2047-q-encode-string (string)
|
|
806 "Quoted-printable-encode the header in STRING."
|
|
807 (mm-with-unibyte-buffer
|
|
808 (insert string)
|
|
809 (quoted-printable-encode-region
|
|
810 (point-min) (point-max) nil
|
|
811 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
|
|
812 ;; Avoid using 8bit characters.
|
|
813 ;; This list excludes `especials' (see the RFC2047 syntax),
|
|
814 ;; meaning that some characters in non-structured fields will
|
|
815 ;; get encoded when they con't need to be. The following is
|
|
816 ;; what it used to be.
|
|
817 ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
|
|
818 ;;; "\010\012\014\040-\074\076\100-\136\140-\177")
|
|
819 "-\b\n\f !#-'*+0-9A-Z\\^`-~\d")
|
|
820 (subst-char-in-region (point-min) (point-max) ? ?_)
|
|
821 (buffer-string)))
|
|
822
|
|
823 (defun rfc2047-encode-parameter (param value)
|
|
824 "Return and PARAM=VALUE string encoded in the RFC2047-like style.
|
|
825 This is a replacement for the `rfc2231-encode-string' function.
|
|
826
|
|
827 When attaching files as MIME parts, we should use the RFC2231 encoding
|
|
828 to specify the file names containing non-ASCII characters. However,
|
|
829 many mail softwares don't support it in practice and recipients won't
|
|
830 be able to extract files with correct names. Instead, the RFC2047-like
|
|
831 encoding is acceptable generally. This function provides the very
|
|
832 RFC2047-like encoding, resigning to such a regrettable trend. To use
|
|
833 it, put the following line in your ~/.gnus.el file:
|
|
834
|
|
835 \(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter)
|
|
836 "
|
|
837 (let* ((rfc2047-encoding-type 'mime)
|
|
838 (rfc2047-encode-max-chars nil)
|
|
839 (string (rfc2047-encode-string value)))
|
|
840 (if (string-match (concat "[" ietf-drums-tspecials "]") string)
|
|
841 (format "%s=%S" param string)
|
|
842 (concat param "=" string))))
|
31717
|
843
|
|
844 ;;;
|
|
845 ;;; Functions for decoding RFC2047 messages
|
|
846 ;;;
|
|
847
|
56927
|
848 (eval-and-compile
|
|
849 (defconst rfc2047-encoded-word-regexp
|
61126
|
850 "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(\\*[^?]+\\)?\
|
|
851 \\?\\(B\\|Q\\)\\?\\([!->@-~ ]*\\)\\?="))
|
56927
|
852
|
60161
|
853 (defvar rfc2047-quote-decoded-words-containing-tspecials nil
|
|
854 "If non-nil, quote decoded words containing special characters.")
|
|
855
|
66299
|
856 (defvar rfc2047-allow-incomplete-encoded-text t
|
|
857 "*Non-nil means allow incomplete encoded-text in successive encoded-words.
|
|
858 Dividing of encoded-text in the place other than character boundaries
|
|
859 violates RFC2047 section 5, while we have a capability to decode it.
|
|
860 If it is non-nil, the decoder will decode B- or Q-encoding in each
|
|
861 encoded-word, concatenate them, and decode it by charset. Otherwise,
|
|
862 the decoder will fully decode each encoded-word before concatenating
|
|
863 them.")
|
|
864
|
72605
|
865 (defun rfc2047-strip-backslashes-in-quoted-strings ()
|
72632
|
866 "Strip backslashes in quoted strings. `\\\"' remains."
|
72605
|
867 (goto-char (point-min))
|
|
868 (let (beg)
|
|
869 (with-syntax-table (standard-syntax-table)
|
|
870 (while (search-forward "\"" nil t)
|
|
871 (unless (eq (char-before) ?\\)
|
|
872 (setq beg (match-end 0))
|
|
873 (goto-char (match-beginning 0))
|
|
874 (condition-case nil
|
|
875 (progn
|
|
876 (forward-sexp)
|
|
877 (save-restriction
|
|
878 (narrow-to-region beg (1- (point)))
|
|
879 (goto-char beg)
|
|
880 (while (search-forward "\\" nil 'move)
|
72632
|
881 (unless (memq (char-after) '(?\"))
|
72605
|
882 (delete-backward-char 1))
|
|
883 (forward-char)))
|
|
884 (forward-char))
|
|
885 (error
|
|
886 (goto-char beg))))))))
|
|
887
|
66299
|
888 (defun rfc2047-charset-to-coding-system (charset)
|
|
889 "Return coding-system corresponding to MIME CHARSET.
|
|
890 If your Emacs implementation can't decode CHARSET, return nil."
|
|
891 (when (stringp charset)
|
|
892 (setq charset (intern (downcase charset))))
|
|
893 (when (or (not charset)
|
|
894 (eq 'gnus-all mail-parse-ignored-charsets)
|
|
895 (memq 'gnus-all mail-parse-ignored-charsets)
|
|
896 (memq charset mail-parse-ignored-charsets))
|
|
897 (setq charset mail-parse-charset))
|
67643
|
898 (let ((cs (mm-charset-to-coding-system charset)))
|
66299
|
899 (cond ((eq cs 'ascii)
|
|
900 (setq cs (or (mm-charset-to-coding-system mail-parse-charset)
|
|
901 'raw-text)))
|
69046
|
902 ((mm-coding-system-p cs))
|
66299
|
903 ((and charset
|
|
904 (listp mail-parse-ignored-charsets)
|
|
905 (memq 'gnus-unknown mail-parse-ignored-charsets))
|
|
906 (setq cs (mm-charset-to-coding-system mail-parse-charset))))
|
|
907 (if (eq cs 'ascii)
|
|
908 'raw-text
|
|
909 cs)))
|
|
910
|
|
911 (defun rfc2047-decode-encoded-words (words)
|
|
912 "Decode successive encoded-words in WORDS and return a decoded string.
|
|
913 Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT
|
|
914 ENCODED-WORD)."
|
|
915 (let (word charset cs encoding text rest)
|
|
916 (while words
|
|
917 (setq word (pop words))
|
69944
|
918 (if (and (setq cs (rfc2047-charset-to-coding-system
|
|
919 (setq charset (car word))))
|
66299
|
920 (condition-case code
|
|
921 (cond ((char-equal ?B (nth 1 word))
|
|
922 (setq text (base64-decode-string
|
|
923 (rfc2047-pad-base64 (nth 2 word)))))
|
|
924 ((char-equal ?Q (nth 1 word))
|
|
925 (setq text (quoted-printable-decode-string
|
|
926 (mm-subst-char-in-string
|
|
927 ?_ ? (nth 2 word) t)))))
|
|
928 (error
|
|
929 (message "%s" (error-message-string code))
|
|
930 nil)))
|
|
931 (if (and rfc2047-allow-incomplete-encoded-text
|
|
932 (eq cs (caar rest)))
|
|
933 ;; Concatenate text of which the charset is the same.
|
|
934 (setcdr (car rest) (concat (cdar rest) text))
|
|
935 (push (cons cs text) rest))
|
|
936 ;; Don't decode encoded-word.
|
|
937 (push (cons nil (nth 3 word)) rest)))
|
|
938 (while rest
|
|
939 (setq words (concat
|
|
940 (or (and (setq cs (caar rest))
|
|
941 (condition-case code
|
|
942 (mm-decode-coding-string (cdar rest) cs)
|
|
943 (error
|
|
944 (message "%s" (error-message-string code))
|
|
945 nil)))
|
|
946 (concat (when (cdr rest) " ")
|
|
947 (cdar rest)
|
|
948 (when (and words
|
|
949 (not (eq (string-to-char words) ? )))
|
|
950 " ")))
|
|
951 words)
|
|
952 rest (cdr rest)))
|
|
953 words))
|
|
954
|
56927
|
955 ;; Fixme: This should decode in place, not cons intermediate strings.
|
|
956 ;; Also check whether it needs to worry about delimiting fields like
|
|
957 ;; encoding.
|
|
958
|
|
959 ;; In fact it's reported that (invalid) encoding of mailboxes in
|
|
960 ;; addr-specs is in use, so delimiting fields might help. Probably
|
|
961 ;; not decoding a word which isn't properly delimited is good enough
|
|
962 ;; and worthwhile (is it more correct or not?), e.g. something like
|
|
963 ;; `=?iso-8859-1?q?foo?=@'.
|
31717
|
964
|
72605
|
965 (defun rfc2047-decode-region (start end &optional address-mime)
|
|
966 "Decode MIME-encoded words in region between START and END.
|
|
967 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
|
|
968 other than `\"' and `\\' in quoted strings."
|
31717
|
969 (interactive "r")
|
|
970 (let ((case-fold-search t)
|
66299
|
971 (eword-regexp (eval-when-compile
|
|
972 ;; Ignore whitespace between encoded-words.
|
|
973 (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp
|
|
974 "\\)")))
|
|
975 b e match words)
|
47951
|
976 (save-excursion
|
|
977 (save-restriction
|
|
978 (narrow-to-region start end)
|
72605
|
979 (when address-mime
|
|
980 (rfc2047-strip-backslashes-in-quoted-strings))
|
66299
|
981 (goto-char (setq b start))
|
|
982 ;; Look for the encoded-words.
|
|
983 (while (setq match (re-search-forward eword-regexp nil t))
|
|
984 (setq e (match-beginning 1)
|
|
985 end (match-end 0)
|
|
986 words nil)
|
|
987 (while match
|
|
988 (push (list (match-string 2) ;; charset
|
|
989 (char-after (match-beginning 4)) ;; encoding
|
|
990 (match-string 5) ;; encoded-text
|
|
991 (match-string 1)) ;; encoded-word
|
|
992 words)
|
|
993 ;; Look for the subsequent encoded-words.
|
|
994 (when (setq match (looking-at eword-regexp))
|
|
995 (goto-char (setq end (match-end 0)))))
|
|
996 ;; Replace the encoded-words with the decoded one.
|
|
997 (delete-region e end)
|
|
998 (insert (rfc2047-decode-encoded-words (nreverse words)))
|
56927
|
999 (save-restriction
|
|
1000 (narrow-to-region e (point))
|
|
1001 (goto-char e)
|
60161
|
1002 ;; Remove newlines between decoded words, though such
|
|
1003 ;; things essentially must not be there.
|
56927
|
1004 (while (re-search-forward "[\n\r]+" nil t)
|
|
1005 (replace-match " "))
|
60161
|
1006 ;; Quote decoded words if there are special characters
|
|
1007 ;; which might violate RFC2822.
|
|
1008 (when (and rfc2047-quote-decoded-words-containing-tspecials
|
|
1009 (let ((regexp (car (rassq
|
|
1010 'address-mime
|
|
1011 rfc2047-header-encoding-alist))))
|
|
1012 (when regexp
|
|
1013 (save-restriction
|
|
1014 (widen)
|
|
1015 (beginning-of-line)
|
|
1016 (while (and (memq (char-after) '(? ?\t))
|
|
1017 (zerop (forward-line -1))))
|
|
1018 (looking-at regexp)))))
|
|
1019 (let (quoted)
|
|
1020 (goto-char e)
|
|
1021 (skip-chars-forward " \t")
|
|
1022 (setq start (point))
|
|
1023 (setq quoted (eq (char-after) ?\"))
|
|
1024 (goto-char (point-max))
|
|
1025 (skip-chars-backward " \t")
|
|
1026 (if (setq quoted (and quoted
|
|
1027 (> (point) (1+ start))
|
|
1028 (eq (char-before) ?\")))
|
|
1029 (progn
|
|
1030 (backward-char)
|
|
1031 (setq start (1+ start)
|
|
1032 end (point-marker)))
|
|
1033 (setq end (point-marker)))
|
|
1034 (goto-char start)
|
|
1035 (while (search-forward "\"" end t)
|
|
1036 (when (prog2
|
|
1037 (backward-char)
|
|
1038 (zerop (% (skip-chars-backward "\\\\") 2))
|
|
1039 (goto-char (match-beginning 0)))
|
|
1040 (insert "\\"))
|
|
1041 (forward-char))
|
|
1042 (when (and (not quoted)
|
|
1043 (progn
|
|
1044 (goto-char start)
|
|
1045 (re-search-forward
|
|
1046 (concat "[" ietf-drums-tspecials "]")
|
|
1047 end t)))
|
|
1048 (goto-char start)
|
|
1049 (insert "\"")
|
|
1050 (goto-char end)
|
|
1051 (insert "\""))
|
|
1052 (set-marker end nil)))
|
56927
|
1053 (goto-char (point-max)))
|
47951
|
1054 (when (and (mm-multibyte-p)
|
|
1055 mail-parse-charset
|
56927
|
1056 (not (eq mail-parse-charset 'us-ascii))
|
47951
|
1057 (not (eq mail-parse-charset 'gnus-decoded)))
|
|
1058 (mm-decode-coding-region b e mail-parse-charset))
|
|
1059 (setq b (point)))
|
|
1060 (when (and (mm-multibyte-p)
|
|
1061 mail-parse-charset
|
|
1062 (not (eq mail-parse-charset 'us-ascii))
|
|
1063 (not (eq mail-parse-charset 'gnus-decoded)))
|
56927
|
1064 (mm-decode-coding-region b (point-max) mail-parse-charset))))))
|
31717
|
1065
|
72605
|
1066 (defun rfc2047-decode-address-region (start end)
|
|
1067 "Decode MIME-encoded words in region between START and END.
|
|
1068 Backslashes which precede characters other than `\"' and `\\' in quoted
|
|
1069 strings are stripped."
|
|
1070 (rfc2047-decode-region start end t))
|
|
1071
|
|
1072 (defun rfc2047-decode-string (string &optional address-mime)
|
|
1073 "Decode MIME-encoded STRING and return the result.
|
|
1074 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
|
|
1075 other than `\"' and `\\' in quoted strings."
|
31717
|
1076 (let ((m (mm-multibyte-p)))
|
56927
|
1077 (if (string-match "=\\?" string)
|
|
1078 (with-temp-buffer
|
|
1079 ;; Fixme: This logic is wrong, but seems to be required by
|
|
1080 ;; Gnus summary buffer generation. The value of `m' depends
|
|
1081 ;; on the current buffer, not global multibyteness or that
|
|
1082 ;; of the string. Also the string returned should always be
|
|
1083 ;; multibyte in a multibyte session, i.e. the buffer should
|
|
1084 ;; be multibyte before `buffer-string' is called.
|
|
1085 (when m
|
|
1086 (mm-enable-multibyte))
|
|
1087 (insert string)
|
|
1088 (inline
|
72605
|
1089 (rfc2047-decode-region (point-min) (point-max) address-mime))
|
56927
|
1090 (buffer-string))
|
72605
|
1091 (when address-mime
|
|
1092 (setq string
|
|
1093 (with-temp-buffer
|
|
1094 (when (mm-multibyte-string-p string)
|
|
1095 (mm-enable-multibyte))
|
|
1096 (insert string)
|
|
1097 (rfc2047-strip-backslashes-in-quoted-strings)
|
|
1098 (buffer-string))))
|
56927
|
1099 ;; Fixme: As above, `m' here is inappropriate.
|
|
1100 (if (and m
|
|
1101 mail-parse-charset
|
|
1102 (not (eq mail-parse-charset 'us-ascii))
|
|
1103 (not (eq mail-parse-charset 'gnus-decoded)))
|
61126
|
1104 ;; `decode-coding-string' in Emacs offers a third optional
|
|
1105 ;; arg NOCOPY to avoid consing a new string if the decoding
|
|
1106 ;; is "trivial". Unfortunately it currently doesn't
|
|
1107 ;; consider anything else than a `nil' coding system
|
|
1108 ;; trivial.
|
|
1109 ;; `rfc2047-decode-string' is called multiple times for each
|
|
1110 ;; article during summary buffer generation, and we really
|
|
1111 ;; want to avoid unnecessary consing. So we bypass
|
|
1112 ;; `decode-coding-string' if the string is purely ASCII.
|
|
1113 (if (and (fboundp 'detect-coding-string)
|
|
1114 ;; string is purely ASCII
|
|
1115 (eq (detect-coding-string string t) 'undecided))
|
|
1116 string
|
|
1117 (mm-decode-coding-string string mail-parse-charset))
|
56927
|
1118 (mm-string-as-multibyte string)))))
|
31717
|
1119
|
72605
|
1120 (defun rfc2047-decode-address-string (string)
|
|
1121 "Decode MIME-encoded STRING and return the result.
|
|
1122 Backslashes which precede characters other than `\"' and `\\' in quoted
|
|
1123 strings are stripped."
|
|
1124 (rfc2047-decode-string string t))
|
|
1125
|
56927
|
1126 (defun rfc2047-pad-base64 (string)
|
|
1127 "Pad STRING to quartets."
|
|
1128 ;; Be more liberal to accept buggy base64 strings. If
|
|
1129 ;; base64-decode-string accepts buggy strings, this function could
|
|
1130 ;; be aliased to identity.
|
57243
|
1131 (if (= 0 (mod (length string) 4))
|
|
1132 string
|
|
1133 (when (string-match "=+$" string)
|
|
1134 (setq string (substring string 0 (match-beginning 0))))
|
|
1135 (case (mod (length string) 4)
|
|
1136 (0 string)
|
|
1137 (1 string) ;; Error, don't pad it.
|
|
1138 (2 (concat string "=="))
|
|
1139 (3 (concat string "=")))))
|
31717
|
1140
|
|
1141 (provide 'rfc2047)
|
|
1142
|
52401
|
1143 ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6
|
31717
|
1144 ;;; rfc2047.el ends here
|