38413
|
1 ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages
|
60161
|
2
|
74548
|
3 ;; Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004,
|
79708
|
4 ;; 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
31717
|
5
|
|
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
|
|
7 ;; MORIOKA Tomohiko <morioka@jaist.ac.jp>
|
|
8 ;; This file is part of GNU Emacs.
|
|
9
|
|
10 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
11 ;; it under the terms of the GNU General Public License as published by
|
78224
|
12 ;; the Free Software Foundation; either version 3, or (at your option)
|
31717
|
13 ;; any later version.
|
|
14
|
|
15 ;; GNU Emacs is distributed in the hope that it will be useful,
|
|
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
18 ;; GNU General Public License for more details.
|
|
19
|
|
20 ;; You should have received a copy of the GNU General Public License
|
|
21 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
64085
|
22 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
23 ;; Boston, MA 02110-1301, USA.
|
31717
|
24
|
|
25 ;;; Commentary:
|
|
26
|
34674
|
27 ;; RFC 2047 is "MIME (Multipurpose Internet Mail Extensions) Part
|
|
28 ;; Three: Message Header Extensions for Non-ASCII Text".
|
|
29
|
31717
|
30 ;;; Code:
|
|
31
|
47945
|
32 (eval-when-compile
|
86154
|
33 (require 'cl))
|
|
34 (defvar message-posting-charset)
|
31717
|
35
|
|
36 (require 'qp)
|
|
37 (require 'mm-util)
|
60161
|
38 (require 'ietf-drums)
|
47945
|
39 ;; Fixme: Avoid this (used for mail-parse-charset) mm dependence on gnus.
|
31717
|
40 (require 'mail-prsvr)
|
78125
|
41 (require 'rfc2045) ;; rfc2045-encode-string
|
33304
|
42 (autoload 'mm-body-7-or-8 "mm-bodies")
|
33127
|
43
|
31717
|
44 (defvar rfc2047-header-encoding-alist
|
56927
|
45 '(("Newsgroups" . nil)
|
|
46 ("Followup-To" . nil)
|
31717
|
47 ("Message-ID" . nil)
|
58835
|
48 ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|\\(In-\\)?Reply-To\\|Sender\
|
56927
|
49 \\|Mail-Followup-To\\|Mail-Copies-To\\|Approved\\)" . address-mime)
|
31717
|
50 (t . mime))
|
|
51 "*Header/encoding method alist.
|
|
52 The list is traversed sequentially. The keys can either be
|
33304
|
53 header regexps or t.
|
31717
|
54
|
|
55 The values can be:
|
|
56
|
|
57 1) nil, in which case no encoding is done;
|
|
58 2) `mime', in which case the header will be encoded according to RFC2047;
|
47945
|
59 3) `address-mime', like `mime', but takes account of the rules for address
|
|
60 fields (where quoted strings and comments must be treated separately);
|
|
61 4) a charset, in which case it will be encoded as that charset;
|
|
62 5) `default', in which case the field will be encoded as the rest
|
31717
|
63 of the article.")
|
|
64
|
|
65 (defvar rfc2047-charset-encoding-alist
|
|
66 '((us-ascii . nil)
|
|
67 (iso-8859-1 . Q)
|
|
68 (iso-8859-2 . Q)
|
|
69 (iso-8859-3 . Q)
|
|
70 (iso-8859-4 . Q)
|
|
71 (iso-8859-5 . B)
|
|
72 (koi8-r . B)
|
47945
|
73 (iso-8859-7 . B)
|
|
74 (iso-8859-8 . B)
|
31717
|
75 (iso-8859-9 . Q)
|
33304
|
76 (iso-8859-14 . Q)
|
|
77 (iso-8859-15 . Q)
|
31717
|
78 (iso-2022-jp . B)
|
|
79 (iso-2022-kr . B)
|
|
80 (gb2312 . B)
|
35838
|
81 (big5 . B)
|
|
82 (cn-big5 . B)
|
31717
|
83 (cn-gb . B)
|
|
84 (cn-gb-2312 . B)
|
|
85 (euc-kr . B)
|
|
86 (iso-2022-jp-2 . B)
|
56927
|
87 (iso-2022-int-1 . B)
|
|
88 (viscii . Q))
|
31717
|
89 "Alist of MIME charsets to RFC2047 encodings.
|
47945
|
90 Valid encodings are nil, `Q' and `B'. These indicate binary (no) encoding,
|
|
91 quoted-printable and base64 respectively.")
|
31717
|
92
|
61126
|
93 (defvar rfc2047-encode-function-alist
|
|
94 '((Q . rfc2047-q-encode-string)
|
|
95 (B . rfc2047-b-encode-string)
|
|
96 (nil . identity))
|
31717
|
97 "Alist of RFC2047 encodings to encoding functions.")
|
|
98
|
61126
|
99 (defvar rfc2047-encode-encoded-words t
|
|
100 "Whether encoded words should be encoded again.")
|
|
101
|
87097
|
102 (defvar rfc2047-allow-irregular-q-encoded-words t
|
|
103 "*Whether to decode irregular Q-encoded words.")
|
|
104
|
|
105 (eval-and-compile ;; Necessary to hard code them in `rfc2047-decode-region'.
|
|
106 (defconst rfc2047-encoded-word-regexp
|
|
107 "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
|
|
108 \\(B\\?[+/0-9A-Za-z]*=*\
|
|
109 \\|Q\\?[ ->@-~]*\
|
|
110 \\)\\?="
|
|
111 "Regexp that matches encoded word."
|
|
112 ;; The patterns for the B encoding and the Q encoding, i.e. the ones
|
|
113 ;; beginning with "B" and "Q" respectively, are restricted into only
|
|
114 ;; the characters that those encodings may generally use.
|
|
115 )
|
|
116 (defconst rfc2047-encoded-word-regexp-loose
|
|
117 "=\\?\\([^][\000-\040()<>@,\;:*\\\"/?.=]+\\)\\(?:\\*[^?]+\\)?\\?\
|
|
118 \\(B\\?[+/0-9A-Za-z]*=*\
|
|
119 \\|Q\\?\\(?:\\?+[ -<>@-~]\\)?\\(?:[ ->@-~]+\\?+[ -<>@-~]\\)*[ ->@-~]*\\?*\
|
|
120 \\)\\?="
|
|
121 "Regexp that matches encoded word allowing loose Q encoding."
|
|
122 ;; The pattern for the Q encoding, i.e. the one beginning with "Q",
|
|
123 ;; is similar to:
|
|
124 ;; "Q\\?\\(\\?+[^\n=?]\\)?\\([^\n?]+\\?+[^\n=?]\\)*[^\n?]*\\?*"
|
|
125 ;; <--------1-------><----------2,3----------><--4--><-5->
|
|
126 ;; They mean:
|
|
127 ;; 1. After "Q?", allow "?"s that follow a character other than "=".
|
|
128 ;; 2. Allow "=" after "Q?"; it isn't regarded as the terminator.
|
|
129 ;; 3. In the middle of an encoded word, allow "?"s that follow a
|
|
130 ;; character other than "=".
|
|
131 ;; 4. Allow any characters other than "?" in the middle of an
|
|
132 ;; encoded word.
|
|
133 ;; 5. At the end, allow "?"s.
|
|
134 ))
|
|
135
|
31717
|
136 ;;;
|
|
137 ;;; Functions for encoding RFC2047 messages
|
|
138 ;;;
|
|
139
|
57243
|
140 (defun rfc2047-qp-or-base64 ()
|
|
141 "Return the type with which to encode the buffer.
|
|
142 This is either `base64' or `quoted-printable'."
|
|
143 (save-excursion
|
|
144 (let ((limit (min (point-max) (+ 2000 (point-min))))
|
|
145 (n8bit 0))
|
|
146 (goto-char (point-min))
|
|
147 (skip-chars-forward "\x20-\x7f\r\n\t" limit)
|
|
148 (while (< (point) limit)
|
|
149 (incf n8bit)
|
|
150 (forward-char 1)
|
|
151 (skip-chars-forward "\x20-\x7f\r\n\t" limit))
|
|
152 (if (or (< (* 6 n8bit) (- limit (point-min)))
|
|
153 ;; Don't base64, say, a short line with a single
|
|
154 ;; non-ASCII char when splitting parts by charset.
|
|
155 (= n8bit 1))
|
|
156 'quoted-printable
|
|
157 'base64))))
|
|
158
|
31717
|
159 (defun rfc2047-narrow-to-field ()
|
|
160 "Narrow the buffer to the header on the current line."
|
|
161 (beginning-of-line)
|
|
162 (narrow-to-region
|
|
163 (point)
|
|
164 (progn
|
|
165 (forward-line 1)
|
|
166 (if (re-search-forward "^[^ \n\t]" nil t)
|
85712
|
167 (point-at-bol)
|
31717
|
168 (point-max))))
|
|
169 (goto-char (point-min)))
|
|
170
|
56927
|
171 (defun rfc2047-field-value ()
|
|
172 "Return the value of the field at point."
|
|
173 (save-excursion
|
|
174 (save-restriction
|
|
175 (rfc2047-narrow-to-field)
|
|
176 (re-search-forward ":[ \t\n]*" nil t)
|
61126
|
177 (buffer-substring-no-properties (point) (point-max)))))
|
56927
|
178
|
72605
|
179 (defun rfc2047-quote-special-characters-in-quoted-strings (&optional
|
|
180 encodable-regexp)
|
|
181 "Quote special characters with `\\'s in quoted strings.
|
|
182 Quoting will not be done in a quoted string if it contains characters
|
85712
|
183 matching ENCODABLE-REGEXP or it is within parentheses."
|
72605
|
184 (goto-char (point-min))
|
|
185 (let ((tspecials (concat "[" ietf-drums-tspecials "]"))
|
85712
|
186 (start (point))
|
72810
|
187 beg end)
|
72605
|
188 (with-syntax-table (standard-syntax-table)
|
85712
|
189 (while (not (eobp))
|
|
190 (if (ignore-errors
|
|
191 (forward-list 1)
|
|
192 (eq (char-before) ?\)))
|
|
193 (forward-list -1)
|
|
194 (goto-char (point-max)))
|
|
195 (save-restriction
|
|
196 (narrow-to-region start (point))
|
|
197 (goto-char start)
|
|
198 (while (search-forward "\"" nil t)
|
|
199 (setq beg (match-beginning 0))
|
|
200 (unless (eq (char-before beg) ?\\)
|
|
201 (goto-char beg)
|
|
202 (setq beg (1+ beg))
|
|
203 (condition-case nil
|
|
204 (progn
|
|
205 (forward-sexp)
|
|
206 (setq end (1- (point)))
|
|
207 (goto-char beg)
|
|
208 (if (and encodable-regexp
|
|
209 (re-search-forward encodable-regexp end t))
|
|
210 (goto-char (1+ end))
|
|
211 (save-restriction
|
|
212 (narrow-to-region beg end)
|
|
213 (while (re-search-forward tspecials nil 'move)
|
|
214 (if (eq (char-before) ?\\)
|
|
215 (if (looking-at tspecials) ;; Already quoted.
|
|
216 (forward-char)
|
|
217 (insert "\\"))
|
|
218 (goto-char (match-beginning 0))
|
|
219 (insert "\\")
|
|
220 (forward-char))))
|
|
221 (forward-char)))
|
|
222 (error
|
|
223 (goto-char beg)))))
|
|
224 (goto-char (point-max)))
|
|
225 (forward-list 1)
|
|
226 (setq start (point))))))
|
72605
|
227
|
47945
|
228 (defvar rfc2047-encoding-type 'address-mime
|
|
229 "The type of encoding done by `rfc2047-encode-region'.
|
|
230 This should be dynamically bound around calls to
|
|
231 `rfc2047-encode-region' to either `mime' or `address-mime'. See
|
|
232 `rfc2047-header-encoding-alist', for definitions.")
|
|
233
|
31717
|
234 (defun rfc2047-encode-message-header ()
|
|
235 "Encode the message header according to `rfc2047-header-encoding-alist'.
|
|
236 Should be called narrowed to the head of the message."
|
|
237 (interactive "*")
|
|
238 (save-excursion
|
|
239 (goto-char (point-min))
|
|
240 (let (alist elem method)
|
|
241 (while (not (eobp))
|
|
242 (save-restriction
|
|
243 (rfc2047-narrow-to-field)
|
72605
|
244 (setq method nil
|
|
245 alist rfc2047-header-encoding-alist)
|
|
246 (while (setq elem (pop alist))
|
|
247 (when (or (and (stringp (car elem))
|
|
248 (looking-at (car elem)))
|
|
249 (eq (car elem) t))
|
|
250 (setq alist nil
|
|
251 method (cdr elem))))
|
31717
|
252 (if (not (rfc2047-encodable-p))
|
72605
|
253 (prog2
|
|
254 (when (eq method 'address-mime)
|
|
255 (rfc2047-quote-special-characters-in-quoted-strings))
|
61126
|
256 (if (and (eq (mm-body-7-or-8) '8bit)
|
|
257 (mm-multibyte-p)
|
|
258 (mm-coding-system-p
|
|
259 (car message-posting-charset)))
|
|
260 ;; 8 bit must be decoded.
|
|
261 (mm-encode-coding-region
|
|
262 (point-min) (point-max)
|
|
263 (mm-charset-to-coding-system
|
|
264 (car message-posting-charset))))
|
50881
|
265 ;; No encoding necessary, but folding is nice
|
61126
|
266 (when nil
|
|
267 (rfc2047-fold-region
|
|
268 (save-excursion
|
|
269 (goto-char (point-min))
|
|
270 (skip-chars-forward "^:")
|
|
271 (when (looking-at ": ")
|
|
272 (forward-char 2))
|
|
273 (point))
|
|
274 (point-max))))
|
31717
|
275 ;; We found something that may perhaps be encoded.
|
47945
|
276 (re-search-forward "^[^:]+: *" nil t)
|
31717
|
277 (cond
|
47945
|
278 ((eq method 'address-mime)
|
|
279 (rfc2047-encode-region (point) (point-max)))
|
31717
|
280 ((eq method 'mime)
|
56927
|
281 (let ((rfc2047-encoding-type 'mime))
|
47945
|
282 (rfc2047-encode-region (point) (point-max))))
|
31717
|
283 ((eq method 'default)
|
|
284 (if (and (featurep 'mule)
|
33815
|
285 (if (boundp 'default-enable-multibyte-characters)
|
|
286 default-enable-multibyte-characters)
|
31717
|
287 mail-parse-charset)
|
47945
|
288 (mm-encode-coding-region (point) (point-max)
|
31717
|
289 mail-parse-charset)))
|
56927
|
290 ;; We get this when CC'ing messsages to newsgroups with
|
|
291 ;; 8-bit names. The group name mail copy just got
|
|
292 ;; unconditionally encoded. Previously, it would ask
|
|
293 ;; whether to encode, which was quite confusing for the
|
|
294 ;; user. If the new behaviour is wrong, tell me. I have
|
|
295 ;; left the old code commented out below.
|
|
296 ;; -- Per Abrahamsen <abraham@dina.kvl.dk> Date: 2001-10-07.
|
|
297 ;; Modified by Dave Love, with the commented-out code changed
|
|
298 ;; in accordance with changes elsewhere.
|
|
299 ((null method)
|
|
300 (rfc2047-encode-region (point) (point-max)))
|
|
301 ;;; ((null method)
|
|
302 ;;; (if (or (message-options-get
|
|
303 ;;; 'rfc2047-encode-message-header-encode-any)
|
|
304 ;;; (message-options-set
|
|
305 ;;; 'rfc2047-encode-message-header-encode-any
|
|
306 ;;; (y-or-n-p
|
|
307 ;;; "Some texts are not encoded. Encode anyway?")))
|
|
308 ;;; (rfc2047-encode-region (point-min) (point-max))
|
|
309 ;;; (error "Cannot send unencoded text")))
|
31717
|
310 ((mm-coding-system-p method)
|
85712
|
311 (if (or (and (featurep 'mule)
|
|
312 (if (boundp 'default-enable-multibyte-characters)
|
|
313 default-enable-multibyte-characters))
|
|
314 (featurep 'file-coding))
|
47945
|
315 (mm-encode-coding-region (point) (point-max) method)))
|
31717
|
316 ;; Hm.
|
|
317 (t)))
|
|
318 (goto-char (point-max)))))))
|
|
319
|
35985
|
320 ;; Fixme: This, and the require below may not be the Right Thing, but
|
|
321 ;; should be safe just before release. -- fx 2001-02-08
|
|
322
|
33304
|
323 (defun rfc2047-encodable-p ()
|
|
324 "Return non-nil if any characters in current buffer need encoding in headers.
|
|
325 The buffer may be narrowed."
|
35985
|
326 (require 'message) ; for message-posting-charset
|
31717
|
327 (let ((charsets
|
47945
|
328 (mm-find-mime-charset-region (point-min) (point-max))))
|
61126
|
329 (goto-char (point-min))
|
|
330 (or (and rfc2047-encode-encoded-words
|
|
331 (prog1
|
87097
|
332 (re-search-forward rfc2047-encoded-word-regexp nil t)
|
61126
|
333 (goto-char (point-min))))
|
|
334 (and charsets
|
|
335 (not (equal charsets (list (car message-posting-charset))))))))
|
31717
|
336
|
47945
|
337 ;; Use this syntax table when parsing into regions that may need
|
|
338 ;; encoding. Double quotes are string delimiters, backslash is
|
|
339 ;; character quoting, and all other RFC 2822 special characters are
|
|
340 ;; treated as punctuation so we can use forward-sexp/forward-word to
|
|
341 ;; skip to the end of regions appropriately. Nb. ietf-drums does
|
|
342 ;; things differently.
|
|
343 (defconst rfc2047-syntax-table
|
56927
|
344 ;; (make-char-table 'syntax-table '(2)) only works in Emacs.
|
|
345 (let ((table (make-syntax-table)))
|
|
346 ;; The following is done to work for setting all elements of the table
|
|
347 ;; in Emacs 21 and 22 and XEmacs; it appears to be the cleanest way.
|
|
348 ;; Play safe and don't assume the form of the word syntax entry --
|
|
349 ;; copy it from ?a.
|
|
350 (if (fboundp 'set-char-table-range) ; Emacs
|
|
351 (funcall (intern "set-char-table-range")
|
|
352 table t (aref (standard-syntax-table) ?a))
|
|
353 (if (fboundp 'put-char-table)
|
|
354 (if (fboundp 'get-char-table) ; warning avoidance
|
|
355 (put-char-table t (get-char-table ?a (standard-syntax-table))
|
|
356 table))))
|
47945
|
357 (modify-syntax-entry ?\\ "\\" table)
|
|
358 (modify-syntax-entry ?\" "\"" table)
|
61126
|
359 (modify-syntax-entry ?\( "(" table)
|
|
360 (modify-syntax-entry ?\) ")" table)
|
47945
|
361 (modify-syntax-entry ?\< "." table)
|
|
362 (modify-syntax-entry ?\> "." table)
|
|
363 (modify-syntax-entry ?\[ "." table)
|
|
364 (modify-syntax-entry ?\] "." table)
|
|
365 (modify-syntax-entry ?: "." table)
|
|
366 (modify-syntax-entry ?\; "." table)
|
|
367 (modify-syntax-entry ?, "." table)
|
|
368 (modify-syntax-entry ?@ "." table)
|
|
369 table))
|
31717
|
370
|
|
371 (defun rfc2047-encode-region (b e)
|
47945
|
372 "Encode words in region B to E that need encoding.
|
|
373 By default, the region is treated as containing RFC2822 addresses.
|
|
374 Dynamically bind `rfc2047-encoding-type' to change that."
|
|
375 (save-restriction
|
|
376 (narrow-to-region b e)
|
61126
|
377 (let ((encodable-regexp (if rfc2047-encode-encoded-words
|
|
378 "[^\000-\177]+\\|=\\?"
|
|
379 "[^\000-\177]+"))
|
|
380 start ; start of current token
|
|
381 end begin csyntax
|
|
382 ;; Whether there's an encoded word before the current token,
|
|
383 ;; either immediately or separated by space.
|
|
384 last-encoded
|
|
385 (orig-text (buffer-substring-no-properties b e)))
|
|
386 (if (eq 'mime rfc2047-encoding-type)
|
|
387 ;; Simple case. Continuous words in which all those contain
|
|
388 ;; non-ASCII characters are encoded collectively. Encoding
|
|
389 ;; ASCII words, including `Re:' used in Subject headers, is
|
|
390 ;; avoided for interoperability with non-MIME clients and
|
|
391 ;; for making it easy to find keywords.
|
|
392 (progn
|
|
393 (goto-char (point-min))
|
|
394 (while (progn (skip-chars-forward " \t\n")
|
|
395 (not (eobp)))
|
|
396 (setq start (point))
|
|
397 (while (and (looking-at "[ \t\n]*\\([^ \t\n]+\\)")
|
|
398 (progn
|
|
399 (setq end (match-end 0))
|
|
400 (re-search-forward encodable-regexp end t)))
|
|
401 (goto-char end))
|
|
402 (if (> (point) start)
|
|
403 (rfc2047-encode start (point))
|
|
404 (goto-char end))))
|
|
405 ;; `address-mime' case -- take care of quoted words, comments.
|
72605
|
406 (rfc2047-quote-special-characters-in-quoted-strings encodable-regexp)
|
61126
|
407 (with-syntax-table rfc2047-syntax-table
|
47945
|
408 (goto-char (point-min))
|
61126
|
409 (condition-case err ; in case of unbalanced quotes
|
47945
|
410 ;; Look for rfc2822-style: sequences of atoms, quoted
|
|
411 ;; strings, specials, whitespace. (Specials mustn't be
|
|
412 ;; encoded.)
|
|
413 (while (not (eobp))
|
61126
|
414 ;; Skip whitespace.
|
|
415 (skip-chars-forward " \t\n")
|
47945
|
416 (setq start (point))
|
|
417 (cond
|
|
418 ((not (char-after))) ; eob
|
|
419 ;; else token start
|
61126
|
420 ((eq ?\" (setq csyntax (char-syntax (char-after))))
|
47945
|
421 ;; Quoted word.
|
|
422 (forward-sexp)
|
|
423 (setq end (point))
|
|
424 ;; Does it need encoding?
|
|
425 (goto-char start)
|
61126
|
426 (if (re-search-forward encodable-regexp end 'move)
|
|
427 ;; It needs encoding. Strip the quotes first,
|
|
428 ;; since encoded words can't occur in quotes.
|
|
429 (progn
|
|
430 (goto-char end)
|
|
431 (delete-backward-char 1)
|
|
432 (goto-char start)
|
|
433 (delete-char 1)
|
|
434 (when last-encoded
|
|
435 ;; There was a preceding quoted word. We need
|
|
436 ;; to include any separating whitespace in this
|
|
437 ;; word to avoid it getting lost.
|
|
438 (skip-chars-backward " \t")
|
|
439 ;; A space is needed between the encoded words.
|
|
440 (insert ? )
|
|
441 (setq start (point)
|
|
442 end (1+ end)))
|
|
443 ;; Adjust the end position for the deleted quotes.
|
|
444 (rfc2047-encode start (- end 2))
|
|
445 (setq last-encoded t)) ; record that it was encoded
|
|
446 (setq last-encoded nil)))
|
|
447 ((eq ?. csyntax)
|
47945
|
448 ;; Skip other delimiters, but record that they've
|
|
449 ;; potentially separated quoted words.
|
|
450 (forward-char)
|
|
451 (setq last-encoded nil))
|
61126
|
452 ((eq ?\) csyntax)
|
|
453 (error "Unbalanced parentheses"))
|
|
454 ((eq ?\( csyntax)
|
|
455 ;; Look for the end of parentheses.
|
|
456 (forward-list)
|
|
457 ;; Encode text as an unstructured field.
|
|
458 (let ((rfc2047-encoding-type 'mime))
|
|
459 (rfc2047-encode-region (1+ start) (1- (point))))
|
|
460 (skip-chars-forward ")"))
|
47945
|
461 (t ; normal token/whitespace sequence
|
|
462 ;; Find the end.
|
61126
|
463 ;; Skip one ASCII word, or encode continuous words
|
|
464 ;; in which all those contain non-ASCII characters.
|
|
465 (setq end nil)
|
|
466 (while (not (or end (eobp)))
|
|
467 (when (looking-at "[\000-\177]+")
|
|
468 (setq begin (point)
|
|
469 end (match-end 0))
|
|
470 (when (progn
|
|
471 (while (and (or (re-search-forward
|
|
472 "[ \t\n]\\|\\Sw" end 'move)
|
|
473 (setq end nil))
|
|
474 (eq ?\\ (char-syntax (char-before))))
|
|
475 ;; Skip backslash-quoted characters.
|
|
476 (forward-char))
|
|
477 end)
|
|
478 (setq end (match-beginning 0))
|
|
479 (if rfc2047-encode-encoded-words
|
|
480 (progn
|
|
481 (goto-char begin)
|
|
482 (when (search-forward "=?" end 'move)
|
|
483 (goto-char (match-beginning 0))
|
|
484 (setq end nil)))
|
|
485 (goto-char end))))
|
|
486 ;; Where the value nil of `end' means there may be
|
|
487 ;; text to have to be encoded following the point.
|
|
488 ;; Otherwise, the point reached to the end of ASCII
|
|
489 ;; words separated by whitespace or a special char.
|
|
490 (unless end
|
|
491 (when (looking-at encodable-regexp)
|
|
492 (goto-char (setq begin (match-end 0)))
|
|
493 (while (and (looking-at "[ \t\n]+\\([^ \t\n]+\\)")
|
|
494 (setq end (match-end 0))
|
|
495 (progn
|
|
496 (while (re-search-forward
|
|
497 encodable-regexp end t))
|
|
498 (< begin (point)))
|
|
499 (goto-char begin)
|
|
500 (or (not (re-search-forward "\\Sw" end t))
|
|
501 (progn
|
|
502 (goto-char (match-beginning 0))
|
|
503 nil)))
|
|
504 (goto-char end))
|
|
505 (when (looking-at "[^ \t\n]+")
|
|
506 (setq end (match-end 0))
|
|
507 (if (re-search-forward "\\Sw+" end t)
|
|
508 ;; There are special characters better
|
|
509 ;; to be encoded so that MTAs may parse
|
|
510 ;; them safely.
|
|
511 (cond ((= end (point)))
|
|
512 ((looking-at (concat "\\sw*\\("
|
|
513 encodable-regexp
|
|
514 "\\)"))
|
|
515 (setq end nil))
|
|
516 (t
|
|
517 (goto-char (1- (match-end 0)))
|
|
518 (unless (= (point) (match-beginning 0))
|
|
519 ;; Separate encodable text and
|
|
520 ;; delimiter.
|
|
521 (insert " "))))
|
|
522 (goto-char end)
|
|
523 (skip-chars-forward " \t\n")
|
|
524 (if (and (looking-at "[^ \t\n]+")
|
|
525 (string-match encodable-regexp
|
|
526 (match-string 0)))
|
|
527 (setq end nil)
|
|
528 (goto-char end)))))))
|
|
529 (skip-chars-backward " \t\n")
|
47945
|
530 (setq end (point))
|
|
531 (goto-char start)
|
61126
|
532 (if (re-search-forward encodable-regexp end 'move)
|
|
533 (progn
|
|
534 (unless (memq (char-before start) '(nil ?\t ? ))
|
|
535 (if (progn
|
|
536 (goto-char start)
|
|
537 (skip-chars-backward "^ \t\n")
|
|
538 (and (looking-at "\\Sw+")
|
|
539 (= (match-end 0) start)))
|
|
540 ;; Also encode bogus delimiters.
|
|
541 (setq start (point))
|
|
542 ;; Separate encodable text and delimiter.
|
|
543 (goto-char start)
|
|
544 (insert " ")
|
|
545 (setq start (1+ start)
|
|
546 end (1+ end))))
|
|
547 (rfc2047-encode start end)
|
|
548 (setq last-encoded t))
|
|
549 (setq last-encoded nil)))))
|
56927
|
550 (error
|
61126
|
551 (if (or debug-on-quit debug-on-error)
|
|
552 (signal (car err) (cdr err))
|
|
553 (error "Invalid data for rfc2047 encoding: %s"
|
|
554 (mm-replace-in-string orig-text "[ \t\n]+" " "))))))))
|
|
555 (rfc2047-fold-region b (point))
|
|
556 (goto-char (point-max))))
|
31717
|
557
|
|
558 (defun rfc2047-encode-string (string)
|
47945
|
559 "Encode words in STRING.
|
|
560 By default, the string is treated as containing addresses (see
|
56927
|
561 `rfc2047-encoding-type')."
|
61126
|
562 (mm-with-multibyte-buffer
|
31717
|
563 (insert string)
|
|
564 (rfc2047-encode-region (point-min) (point-max))
|
|
565 (buffer-string)))
|
|
566
|
87097
|
567 ;; From RFC 2047:
|
|
568 ;; 2. Syntax of encoded-words
|
|
569 ;; [...]
|
|
570 ;; While there is no limit to the length of a multiple-line header
|
|
571 ;; field, each line of a header field that contains one or more
|
|
572 ;; 'encoded-word's is limited to 76 characters.
|
|
573 ;;
|
|
574 ;; In `rfc2047-encode-parameter' it is bound to nil, so don't defconst it.
|
61126
|
575 (defvar rfc2047-encode-max-chars 76
|
|
576 "Maximum characters of each header line that contain encoded-words.
|
87097
|
577 According to RFC 2047, it is 76. If it is nil, encoded-words
|
|
578 will not be folded. Too small value may cause an error. You
|
|
579 should not change this value.")
|
61126
|
580
|
|
581 (defun rfc2047-encode-1 (column string cs encoder start crest tail
|
|
582 &optional eword)
|
|
583 "Subroutine used by `rfc2047-encode'."
|
|
584 (cond ((string-equal string "")
|
|
585 (or eword ""))
|
|
586 ((not rfc2047-encode-max-chars)
|
|
587 (concat start
|
|
588 (funcall encoder (if cs
|
|
589 (mm-encode-coding-string string cs)
|
|
590 string))
|
|
591 "?="))
|
|
592 ((>= column rfc2047-encode-max-chars)
|
|
593 (when eword
|
|
594 (cond ((string-match "\n[ \t]+\\'" eword)
|
|
595 ;; Reomove a superfluous empty line.
|
|
596 (setq eword (substring eword 0 (match-beginning 0))))
|
|
597 ((string-match "(+\\'" eword)
|
|
598 ;; Break the line before the open parenthesis.
|
|
599 (setq crest (concat crest (match-string 0 eword))
|
|
600 eword (substring eword 0 (match-beginning 0))))))
|
|
601 (rfc2047-encode-1 (length crest) string cs encoder start " " tail
|
|
602 (concat eword "\n" crest)))
|
|
603 (t
|
|
604 (let ((index 0)
|
|
605 (limit (1- (length string)))
|
|
606 (prev "")
|
|
607 next len)
|
|
608 (while (and prev
|
|
609 (<= index limit))
|
|
610 (setq next (concat start
|
|
611 (funcall encoder
|
|
612 (if cs
|
|
613 (mm-encode-coding-string
|
|
614 (substring string 0 (1+ index))
|
|
615 cs)
|
|
616 (substring string 0 (1+ index))))
|
|
617 "?=")
|
|
618 len (+ column (length next)))
|
|
619 (if (> len rfc2047-encode-max-chars)
|
|
620 (setq next prev
|
|
621 prev nil)
|
|
622 (if (or (< index limit)
|
|
623 (<= (+ len (or (string-match "\n" tail)
|
|
624 (length tail)))
|
|
625 rfc2047-encode-max-chars))
|
|
626 (setq prev next
|
|
627 index (1+ index))
|
|
628 (if (string-match "\\`)+" tail)
|
|
629 ;; Break the line after the close parenthesis.
|
|
630 (setq tail (concat (substring tail 0 (match-end 0))
|
|
631 "\n "
|
|
632 (substring tail (match-end 0)))
|
|
633 prev next
|
|
634 index (1+ index))
|
|
635 (setq next prev
|
|
636 prev nil)))))
|
|
637 (if (> index limit)
|
|
638 (concat eword next tail)
|
|
639 (if (= 0 index)
|
|
640 (if (and eword
|
|
641 (string-match "(+\\'" eword))
|
|
642 (setq crest (concat crest (match-string 0 eword))
|
|
643 eword (substring eword 0 (match-beginning 0)))
|
|
644 (setq eword (concat eword next)))
|
|
645 (setq crest " "
|
|
646 eword (concat eword next)))
|
|
647 (when (string-match "\n[ \t]+\\'" eword)
|
|
648 ;; Reomove a superfluous empty line.
|
|
649 (setq eword (substring eword 0 (match-beginning 0))))
|
|
650 (rfc2047-encode-1 (length crest) (substring string index)
|
|
651 cs encoder start " " tail
|
|
652 (concat eword "\n" crest)))))))
|
|
653
|
47945
|
654 (defun rfc2047-encode (b e)
|
|
655 "Encode the word(s) in the region B to E.
|
61126
|
656 Point moves to the end of the region."
|
|
657 (let ((mime-charset (or (mm-find-mime-charset-region b e) (list 'us-ascii)))
|
|
658 cs encoding tail crest eword)
|
|
659 (cond ((> (length mime-charset) 1)
|
|
660 (error "Can't rfc2047-encode `%s'"
|
|
661 (buffer-substring-no-properties b e)))
|
|
662 ((= (length mime-charset) 1)
|
|
663 (setq mime-charset (car mime-charset)
|
|
664 cs (mm-charset-to-coding-system mime-charset))
|
|
665 (unless (and (mm-multibyte-p)
|
|
666 (mm-coding-system-p cs))
|
|
667 (setq cs nil))
|
|
668 (save-restriction
|
|
669 (narrow-to-region b e)
|
|
670 (setq encoding
|
|
671 (or (cdr (assq mime-charset
|
31717
|
672 rfc2047-charset-encoding-alist))
|
56927
|
673 ;; For the charsets that don't have a preferred
|
|
674 ;; encoding, choose the one that's shorter.
|
61126
|
675 (if (eq (rfc2047-qp-or-base64) 'base64)
|
|
676 'B
|
|
677 'Q)))
|
|
678 (widen)
|
|
679 (goto-char e)
|
|
680 (skip-chars-forward "^ \t\n")
|
|
681 ;; `tail' may contain a close parenthesis.
|
|
682 (setq tail (buffer-substring-no-properties e (point)))
|
|
683 (goto-char b)
|
|
684 (setq b (point-marker)
|
|
685 e (set-marker (make-marker) e))
|
85712
|
686 (rfc2047-fold-region (point-at-bol) b)
|
61126
|
687 (goto-char b)
|
|
688 (skip-chars-backward "^ \t\n")
|
|
689 (unless (= 0 (skip-chars-backward " \t"))
|
|
690 ;; `crest' may contain whitespace and an open parenthesis.
|
|
691 (setq crest (buffer-substring-no-properties (point) b)))
|
|
692 (setq eword (rfc2047-encode-1
|
85712
|
693 (- b (point-at-bol))
|
61126
|
694 (mm-replace-in-string
|
|
695 (buffer-substring-no-properties b e)
|
|
696 "\n\\([ \t]?\\)" "\\1")
|
|
697 cs
|
|
698 (or (cdr (assq encoding
|
|
699 rfc2047-encode-function-alist))
|
|
700 'identity)
|
|
701 (concat "=?" (downcase (symbol-name mime-charset))
|
|
702 "?" (upcase (symbol-name encoding)) "?")
|
|
703 (or crest " ")
|
|
704 tail))
|
|
705 (delete-region (if (eq (aref eword 0) ?\n)
|
|
706 (if (bolp)
|
|
707 ;; The line was folded before encoding.
|
|
708 (1- (point))
|
|
709 (point))
|
|
710 (goto-char b))
|
|
711 (+ e (length tail)))
|
|
712 ;; `eword' contains `crest' and `tail'.
|
|
713 (insert eword)
|
|
714 (set-marker b nil)
|
|
715 (set-marker e nil)
|
|
716 (unless (or (/= 0 (length tail))
|
|
717 (eobp)
|
|
718 (looking-at "[ \t\n)]"))
|
|
719 (insert " "))))
|
|
720 (t
|
|
721 (goto-char e)))))
|
31717
|
722
|
56927
|
723 (defun rfc2047-fold-field ()
|
|
724 "Fold the current header field."
|
|
725 (save-excursion
|
|
726 (save-restriction
|
|
727 (rfc2047-narrow-to-field)
|
|
728 (rfc2047-fold-region (point-min) (point-max)))))
|
|
729
|
31717
|
730 (defun rfc2047-fold-region (b e)
|
35985
|
731 "Fold long lines in region B to E."
|
31717
|
732 (save-restriction
|
|
733 (narrow-to-region b e)
|
|
734 (goto-char (point-min))
|
33304
|
735 (let ((break nil)
|
|
736 (qword-break nil)
|
50881
|
737 (first t)
|
33304
|
738 (bol (save-restriction
|
|
739 (widen)
|
85712
|
740 (point-at-bol))))
|
31717
|
741 (while (not (eobp))
|
56927
|
742 (when (and (or break qword-break)
|
|
743 (> (- (point) bol) 76))
|
33304
|
744 (goto-char (or break qword-break))
|
|
745 (setq break nil
|
|
746 qword-break nil)
|
61126
|
747 (skip-chars-backward " \t")
|
50881
|
748 (if (looking-at "[ \t]")
|
47945
|
749 (insert ?\n)
|
35453
|
750 (insert "\n "))
|
33304
|
751 (setq bol (1- (point)))
|
|
752 ;; Don't break before the first non-LWSP characters.
|
|
753 (skip-chars-forward " \t")
|
56927
|
754 (unless (eobp)
|
|
755 (forward-char 1)))
|
31717
|
756 (cond
|
33304
|
757 ((eq (char-after) ?\n)
|
|
758 (forward-char 1)
|
|
759 (setq bol (point)
|
|
760 break nil
|
|
761 qword-break nil)
|
|
762 (skip-chars-forward " \t")
|
|
763 (unless (or (eobp) (eq (char-after) ?\n))
|
|
764 (forward-char 1)))
|
|
765 ((eq (char-after) ?\r)
|
|
766 (forward-char 1))
|
31717
|
767 ((memq (char-after) '(? ?\t))
|
33304
|
768 (skip-chars-forward " \t")
|
61126
|
769 (unless first ;; Don't break just after the header name.
|
|
770 (setq break (point))))
|
33304
|
771 ((not break)
|
|
772 (if (not (looking-at "=\\?[^=]"))
|
|
773 (if (eq (char-after) ?=)
|
|
774 (forward-char 1)
|
|
775 (skip-chars-forward "^ \t\n\r="))
|
56927
|
776 ;; Don't break at the start of the field.
|
|
777 (unless (= (point) b)
|
|
778 (setq qword-break (point)))
|
33304
|
779 (skip-chars-forward "^ \t\n\r")))
|
|
780 (t
|
61126
|
781 (skip-chars-forward "^ \t\n\r")))
|
|
782 (setq first nil))
|
56927
|
783 (when (and (or break qword-break)
|
|
784 (> (- (point) bol) 76))
|
33304
|
785 (goto-char (or break qword-break))
|
|
786 (setq break nil
|
|
787 qword-break nil)
|
61126
|
788 (if (or (> 0 (skip-chars-backward " \t"))
|
|
789 (looking-at "[ \t]"))
|
|
790 (insert ?\n)
|
|
791 (insert "\n "))
|
33304
|
792 (setq bol (1- (point)))
|
|
793 ;; Don't break before the first non-LWSP characters.
|
|
794 (skip-chars-forward " \t")
|
56927
|
795 (unless (eobp)
|
|
796 (forward-char 1))))))
|
|
797
|
|
798 (defun rfc2047-unfold-field ()
|
|
799 "Fold the current line."
|
|
800 (save-excursion
|
|
801 (save-restriction
|
|
802 (rfc2047-narrow-to-field)
|
|
803 (rfc2047-unfold-region (point-min) (point-max)))))
|
33304
|
804
|
|
805 (defun rfc2047-unfold-region (b e)
|
35985
|
806 "Unfold lines in region B to E."
|
33304
|
807 (save-restriction
|
|
808 (narrow-to-region b e)
|
|
809 (goto-char (point-min))
|
|
810 (let ((bol (save-restriction
|
|
811 (widen)
|
85712
|
812 (point-at-bol)))
|
|
813 (eol (point-at-eol)))
|
33304
|
814 (forward-line 1)
|
|
815 (while (not (eobp))
|
50881
|
816 (if (and (looking-at "[ \t]")
|
85712
|
817 (< (- (point-at-eol) bol) 76))
|
50881
|
818 (delete-region eol (progn
|
|
819 (goto-char eol)
|
|
820 (skip-chars-forward "\r\n")
|
|
821 (point)))
|
85712
|
822 (setq bol (point-at-bol)))
|
|
823 (setq eol (point-at-eol))
|
33304
|
824 (forward-line 1)))))
|
31717
|
825
|
61126
|
826 (defun rfc2047-b-encode-string (string)
|
|
827 "Base64-encode the header contained in STRING."
|
|
828 (base64-encode-string string t))
|
31717
|
829
|
61126
|
830 (defun rfc2047-q-encode-string (string)
|
|
831 "Quoted-printable-encode the header in STRING."
|
|
832 (mm-with-unibyte-buffer
|
|
833 (insert string)
|
|
834 (quoted-printable-encode-region
|
|
835 (point-min) (point-max) nil
|
|
836 ;; = (\075), _ (\137), ? (\077) are used in the encoded word.
|
|
837 ;; Avoid using 8bit characters.
|
|
838 ;; This list excludes `especials' (see the RFC2047 syntax),
|
|
839 ;; meaning that some characters in non-structured fields will
|
|
840 ;; get encoded when they con't need to be. The following is
|
|
841 ;; what it used to be.
|
|
842 ;;; ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?"
|
|
843 ;;; "\010\012\014\040-\074\076\100-\136\140-\177")
|
|
844 "-\b\n\f !#-'*+0-9A-Z\\^`-~\d")
|
|
845 (subst-char-in-region (point-min) (point-max) ? ?_)
|
|
846 (buffer-string)))
|
|
847
|
|
848 (defun rfc2047-encode-parameter (param value)
|
|
849 "Return and PARAM=VALUE string encoded in the RFC2047-like style.
|
|
850 This is a replacement for the `rfc2231-encode-string' function.
|
|
851
|
|
852 When attaching files as MIME parts, we should use the RFC2231 encoding
|
|
853 to specify the file names containing non-ASCII characters. However,
|
|
854 many mail softwares don't support it in practice and recipients won't
|
|
855 be able to extract files with correct names. Instead, the RFC2047-like
|
|
856 encoding is acceptable generally. This function provides the very
|
|
857 RFC2047-like encoding, resigning to such a regrettable trend. To use
|
|
858 it, put the following line in your ~/.gnus.el file:
|
|
859
|
|
860 \(defalias 'mail-header-encode-parameter 'rfc2047-encode-parameter)
|
|
861 "
|
78125
|
862 (let ((rfc2047-encoding-type 'mime)
|
|
863 (rfc2047-encode-max-chars nil))
|
|
864 (rfc2045-encode-string param (rfc2047-encode-string value))))
|
31717
|
865
|
|
866 ;;;
|
|
867 ;;; Functions for decoding RFC2047 messages
|
|
868 ;;;
|
|
869
|
60161
|
870 (defvar rfc2047-quote-decoded-words-containing-tspecials nil
|
|
871 "If non-nil, quote decoded words containing special characters.")
|
|
872
|
66299
|
873 (defvar rfc2047-allow-incomplete-encoded-text t
|
|
874 "*Non-nil means allow incomplete encoded-text in successive encoded-words.
|
|
875 Dividing of encoded-text in the place other than character boundaries
|
|
876 violates RFC2047 section 5, while we have a capability to decode it.
|
|
877 If it is non-nil, the decoder will decode B- or Q-encoding in each
|
|
878 encoded-word, concatenate them, and decode it by charset. Otherwise,
|
|
879 the decoder will fully decode each encoded-word before concatenating
|
|
880 them.")
|
|
881
|
72605
|
882 (defun rfc2047-strip-backslashes-in-quoted-strings ()
|
72632
|
883 "Strip backslashes in quoted strings. `\\\"' remains."
|
72605
|
884 (goto-char (point-min))
|
|
885 (let (beg)
|
|
886 (with-syntax-table (standard-syntax-table)
|
|
887 (while (search-forward "\"" nil t)
|
|
888 (unless (eq (char-before) ?\\)
|
|
889 (setq beg (match-end 0))
|
|
890 (goto-char (match-beginning 0))
|
|
891 (condition-case nil
|
|
892 (progn
|
|
893 (forward-sexp)
|
|
894 (save-restriction
|
|
895 (narrow-to-region beg (1- (point)))
|
|
896 (goto-char beg)
|
|
897 (while (search-forward "\\" nil 'move)
|
72632
|
898 (unless (memq (char-after) '(?\"))
|
72605
|
899 (delete-backward-char 1))
|
|
900 (forward-char)))
|
|
901 (forward-char))
|
|
902 (error
|
|
903 (goto-char beg))))))))
|
|
904
|
66299
|
905 (defun rfc2047-charset-to-coding-system (charset)
|
|
906 "Return coding-system corresponding to MIME CHARSET.
|
|
907 If your Emacs implementation can't decode CHARSET, return nil."
|
|
908 (when (stringp charset)
|
|
909 (setq charset (intern (downcase charset))))
|
|
910 (when (or (not charset)
|
|
911 (eq 'gnus-all mail-parse-ignored-charsets)
|
|
912 (memq 'gnus-all mail-parse-ignored-charsets)
|
|
913 (memq charset mail-parse-ignored-charsets))
|
|
914 (setq charset mail-parse-charset))
|
67643
|
915 (let ((cs (mm-charset-to-coding-system charset)))
|
66299
|
916 (cond ((eq cs 'ascii)
|
|
917 (setq cs (or (mm-charset-to-coding-system mail-parse-charset)
|
|
918 'raw-text)))
|
69046
|
919 ((mm-coding-system-p cs))
|
66299
|
920 ((and charset
|
|
921 (listp mail-parse-ignored-charsets)
|
|
922 (memq 'gnus-unknown mail-parse-ignored-charsets))
|
|
923 (setq cs (mm-charset-to-coding-system mail-parse-charset))))
|
|
924 (if (eq cs 'ascii)
|
|
925 'raw-text
|
|
926 cs)))
|
|
927
|
|
928 (defun rfc2047-decode-encoded-words (words)
|
|
929 "Decode successive encoded-words in WORDS and return a decoded string.
|
|
930 Each element of WORDS looks like (CHARSET ENCODING ENCODED-TEXT
|
|
931 ENCODED-WORD)."
|
|
932 (let (word charset cs encoding text rest)
|
|
933 (while words
|
|
934 (setq word (pop words))
|
69944
|
935 (if (and (setq cs (rfc2047-charset-to-coding-system
|
|
936 (setq charset (car word))))
|
66299
|
937 (condition-case code
|
|
938 (cond ((char-equal ?B (nth 1 word))
|
|
939 (setq text (base64-decode-string
|
|
940 (rfc2047-pad-base64 (nth 2 word)))))
|
|
941 ((char-equal ?Q (nth 1 word))
|
|
942 (setq text (quoted-printable-decode-string
|
|
943 (mm-subst-char-in-string
|
|
944 ?_ ? (nth 2 word) t)))))
|
|
945 (error
|
|
946 (message "%s" (error-message-string code))
|
|
947 nil)))
|
|
948 (if (and rfc2047-allow-incomplete-encoded-text
|
|
949 (eq cs (caar rest)))
|
|
950 ;; Concatenate text of which the charset is the same.
|
|
951 (setcdr (car rest) (concat (cdar rest) text))
|
|
952 (push (cons cs text) rest))
|
|
953 ;; Don't decode encoded-word.
|
|
954 (push (cons nil (nth 3 word)) rest)))
|
|
955 (while rest
|
|
956 (setq words (concat
|
|
957 (or (and (setq cs (caar rest))
|
|
958 (condition-case code
|
|
959 (mm-decode-coding-string (cdar rest) cs)
|
|
960 (error
|
|
961 (message "%s" (error-message-string code))
|
|
962 nil)))
|
|
963 (concat (when (cdr rest) " ")
|
|
964 (cdar rest)
|
|
965 (when (and words
|
|
966 (not (eq (string-to-char words) ? )))
|
|
967 " ")))
|
|
968 words)
|
|
969 rest (cdr rest)))
|
|
970 words))
|
|
971
|
56927
|
972 ;; Fixme: This should decode in place, not cons intermediate strings.
|
|
973 ;; Also check whether it needs to worry about delimiting fields like
|
|
974 ;; encoding.
|
|
975
|
|
976 ;; In fact it's reported that (invalid) encoding of mailboxes in
|
|
977 ;; addr-specs is in use, so delimiting fields might help. Probably
|
|
978 ;; not decoding a word which isn't properly delimited is good enough
|
|
979 ;; and worthwhile (is it more correct or not?), e.g. something like
|
|
980 ;; `=?iso-8859-1?q?foo?=@'.
|
31717
|
981
|
72605
|
982 (defun rfc2047-decode-region (start end &optional address-mime)
|
|
983 "Decode MIME-encoded words in region between START and END.
|
|
984 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
|
|
985 other than `\"' and `\\' in quoted strings."
|
31717
|
986 (interactive "r")
|
|
987 (let ((case-fold-search t)
|
87097
|
988 (eword-regexp
|
|
989 (if rfc2047-allow-irregular-q-encoded-words
|
|
990 (eval-when-compile
|
|
991 (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp-loose "\\)"))
|
|
992 (eval-when-compile
|
|
993 (concat "[\n\t ]*\\(" rfc2047-encoded-word-regexp "\\)"))))
|
66299
|
994 b e match words)
|
47951
|
995 (save-excursion
|
|
996 (save-restriction
|
|
997 (narrow-to-region start end)
|
72605
|
998 (when address-mime
|
|
999 (rfc2047-strip-backslashes-in-quoted-strings))
|
66299
|
1000 (goto-char (setq b start))
|
|
1001 ;; Look for the encoded-words.
|
|
1002 (while (setq match (re-search-forward eword-regexp nil t))
|
|
1003 (setq e (match-beginning 1)
|
|
1004 end (match-end 0)
|
|
1005 words nil)
|
|
1006 (while match
|
|
1007 (push (list (match-string 2) ;; charset
|
85712
|
1008 (char-after (match-beginning 3)) ;; encoding
|
87097
|
1009 (substring (match-string 3) 2) ;; encoded-text
|
66299
|
1010 (match-string 1)) ;; encoded-word
|
|
1011 words)
|
|
1012 ;; Look for the subsequent encoded-words.
|
|
1013 (when (setq match (looking-at eword-regexp))
|
|
1014 (goto-char (setq end (match-end 0)))))
|
|
1015 ;; Replace the encoded-words with the decoded one.
|
|
1016 (delete-region e end)
|
|
1017 (insert (rfc2047-decode-encoded-words (nreverse words)))
|
56927
|
1018 (save-restriction
|
|
1019 (narrow-to-region e (point))
|
|
1020 (goto-char e)
|
60161
|
1021 ;; Remove newlines between decoded words, though such
|
|
1022 ;; things essentially must not be there.
|
56927
|
1023 (while (re-search-forward "[\n\r]+" nil t)
|
|
1024 (replace-match " "))
|
60161
|
1025 ;; Quote decoded words if there are special characters
|
|
1026 ;; which might violate RFC2822.
|
|
1027 (when (and rfc2047-quote-decoded-words-containing-tspecials
|
|
1028 (let ((regexp (car (rassq
|
|
1029 'address-mime
|
|
1030 rfc2047-header-encoding-alist))))
|
|
1031 (when regexp
|
|
1032 (save-restriction
|
|
1033 (widen)
|
|
1034 (beginning-of-line)
|
|
1035 (while (and (memq (char-after) '(? ?\t))
|
|
1036 (zerop (forward-line -1))))
|
|
1037 (looking-at regexp)))))
|
|
1038 (let (quoted)
|
|
1039 (goto-char e)
|
|
1040 (skip-chars-forward " \t")
|
|
1041 (setq start (point))
|
|
1042 (setq quoted (eq (char-after) ?\"))
|
|
1043 (goto-char (point-max))
|
|
1044 (skip-chars-backward " \t")
|
|
1045 (if (setq quoted (and quoted
|
|
1046 (> (point) (1+ start))
|
|
1047 (eq (char-before) ?\")))
|
|
1048 (progn
|
|
1049 (backward-char)
|
|
1050 (setq start (1+ start)
|
|
1051 end (point-marker)))
|
|
1052 (setq end (point-marker)))
|
|
1053 (goto-char start)
|
|
1054 (while (search-forward "\"" end t)
|
|
1055 (when (prog2
|
|
1056 (backward-char)
|
|
1057 (zerop (% (skip-chars-backward "\\\\") 2))
|
|
1058 (goto-char (match-beginning 0)))
|
|
1059 (insert "\\"))
|
|
1060 (forward-char))
|
|
1061 (when (and (not quoted)
|
|
1062 (progn
|
|
1063 (goto-char start)
|
|
1064 (re-search-forward
|
|
1065 (concat "[" ietf-drums-tspecials "]")
|
|
1066 end t)))
|
|
1067 (goto-char start)
|
|
1068 (insert "\"")
|
|
1069 (goto-char end)
|
|
1070 (insert "\""))
|
|
1071 (set-marker end nil)))
|
56927
|
1072 (goto-char (point-max)))
|
47951
|
1073 (when (and (mm-multibyte-p)
|
|
1074 mail-parse-charset
|
56927
|
1075 (not (eq mail-parse-charset 'us-ascii))
|
47951
|
1076 (not (eq mail-parse-charset 'gnus-decoded)))
|
|
1077 (mm-decode-coding-region b e mail-parse-charset))
|
|
1078 (setq b (point)))
|
|
1079 (when (and (mm-multibyte-p)
|
|
1080 mail-parse-charset
|
|
1081 (not (eq mail-parse-charset 'us-ascii))
|
|
1082 (not (eq mail-parse-charset 'gnus-decoded)))
|
56927
|
1083 (mm-decode-coding-region b (point-max) mail-parse-charset))))))
|
31717
|
1084
|
72605
|
1085 (defun rfc2047-decode-address-region (start end)
|
|
1086 "Decode MIME-encoded words in region between START and END.
|
|
1087 Backslashes which precede characters other than `\"' and `\\' in quoted
|
|
1088 strings are stripped."
|
|
1089 (rfc2047-decode-region start end t))
|
|
1090
|
|
1091 (defun rfc2047-decode-string (string &optional address-mime)
|
|
1092 "Decode MIME-encoded STRING and return the result.
|
|
1093 If ADDRESS-MIME is non-nil, strip backslashes which precede characters
|
|
1094 other than `\"' and `\\' in quoted strings."
|
31717
|
1095 (let ((m (mm-multibyte-p)))
|
56927
|
1096 (if (string-match "=\\?" string)
|
|
1097 (with-temp-buffer
|
|
1098 ;; Fixme: This logic is wrong, but seems to be required by
|
|
1099 ;; Gnus summary buffer generation. The value of `m' depends
|
|
1100 ;; on the current buffer, not global multibyteness or that
|
|
1101 ;; of the string. Also the string returned should always be
|
|
1102 ;; multibyte in a multibyte session, i.e. the buffer should
|
|
1103 ;; be multibyte before `buffer-string' is called.
|
|
1104 (when m
|
|
1105 (mm-enable-multibyte))
|
|
1106 (insert string)
|
|
1107 (inline
|
72605
|
1108 (rfc2047-decode-region (point-min) (point-max) address-mime))
|
56927
|
1109 (buffer-string))
|
72605
|
1110 (when address-mime
|
|
1111 (setq string
|
|
1112 (with-temp-buffer
|
|
1113 (when (mm-multibyte-string-p string)
|
|
1114 (mm-enable-multibyte))
|
|
1115 (insert string)
|
|
1116 (rfc2047-strip-backslashes-in-quoted-strings)
|
|
1117 (buffer-string))))
|
56927
|
1118 ;; Fixme: As above, `m' here is inappropriate.
|
|
1119 (if (and m
|
|
1120 mail-parse-charset
|
|
1121 (not (eq mail-parse-charset 'us-ascii))
|
|
1122 (not (eq mail-parse-charset 'gnus-decoded)))
|
61126
|
1123 ;; `decode-coding-string' in Emacs offers a third optional
|
|
1124 ;; arg NOCOPY to avoid consing a new string if the decoding
|
|
1125 ;; is "trivial". Unfortunately it currently doesn't
|
|
1126 ;; consider anything else than a `nil' coding system
|
|
1127 ;; trivial.
|
|
1128 ;; `rfc2047-decode-string' is called multiple times for each
|
|
1129 ;; article during summary buffer generation, and we really
|
|
1130 ;; want to avoid unnecessary consing. So we bypass
|
|
1131 ;; `decode-coding-string' if the string is purely ASCII.
|
|
1132 (if (and (fboundp 'detect-coding-string)
|
|
1133 ;; string is purely ASCII
|
|
1134 (eq (detect-coding-string string t) 'undecided))
|
|
1135 string
|
|
1136 (mm-decode-coding-string string mail-parse-charset))
|
56927
|
1137 (mm-string-as-multibyte string)))))
|
31717
|
1138
|
72605
|
1139 (defun rfc2047-decode-address-string (string)
|
|
1140 "Decode MIME-encoded STRING and return the result.
|
|
1141 Backslashes which precede characters other than `\"' and `\\' in quoted
|
|
1142 strings are stripped."
|
|
1143 (rfc2047-decode-string string t))
|
|
1144
|
56927
|
1145 (defun rfc2047-pad-base64 (string)
|
|
1146 "Pad STRING to quartets."
|
|
1147 ;; Be more liberal to accept buggy base64 strings. If
|
|
1148 ;; base64-decode-string accepts buggy strings, this function could
|
|
1149 ;; be aliased to identity.
|
57243
|
1150 (if (= 0 (mod (length string) 4))
|
|
1151 string
|
|
1152 (when (string-match "=+$" string)
|
|
1153 (setq string (substring string 0 (match-beginning 0))))
|
|
1154 (case (mod (length string) 4)
|
|
1155 (0 string)
|
|
1156 (1 string) ;; Error, don't pad it.
|
|
1157 (2 (concat string "=="))
|
|
1158 (3 (concat string "=")))))
|
31717
|
1159
|
|
1160 (provide 'rfc2047)
|
|
1161
|
52401
|
1162 ;;; arch-tag: a07fe3d4-22b5-4c4a-bd89-b1f82d5d36f6
|
31717
|
1163 ;;; rfc2047.el ends here
|