Mercurial > emacs
changeset 89077:b39c11cf3b5d
(message-posting-charset): defvar when compiling.
(rfc2047-header-encoding-alist): Add `address-mime' part.
(rfc2047-charset-encoding-alist): Use B for iso-8859-7. Doc fix.
(rfc2047-q-encoding-alist): Augment header list.
(rfc2047-encodable-p): Use mm-find-mime-charset-region.
(rfc2047-special-chars, rfc2047-non-special-chars): New.
(rfc2047-dissect-region, rfc2047-encode-region, rfc2047-encode):
Rewritten to avoid charset stuff and to take account of rfc2822
tokens.
(rfc2047-encode-message-header): Don't include header name field
in encoding. Add `address-mime' case and bind
rfc2047-special-chars for `mime' case.
author | Dave Love <fx@gnu.org> |
---|---|
date | Thu, 05 Sep 2002 17:43:48 +0000 |
parents | 17b36ecfa8a8 |
children | 63d0bc85dea9 |
files | lisp/gnus/rfc2047.el |
diffstat | 1 files changed, 120 insertions(+), 103 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/gnus/rfc2047.el Thu Sep 05 17:38:15 2002 +0000 +++ b/lisp/gnus/rfc2047.el Thu Sep 05 17:43:48 2002 +0000 @@ -1,5 +1,5 @@ ;;; rfc2047.el --- functions for encoding and decoding rfc2047 messages -;; Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. +;; Copyright (C) 1998, 1999, 2000, 2002 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> ;; MORIOKA Tomohiko <morioka@jaist.ac.jp> @@ -27,7 +27,9 @@ ;;; Code: -(eval-when-compile (require 'cl)) +(eval-when-compile + (require 'cl) + (defvar message-posting-charset)) (require 'qp) (require 'mm-util) @@ -41,6 +43,8 @@ (defvar rfc2047-header-encoding-alist '(("Newsgroups" . nil) ("Message-ID" . nil) + ("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\)" . + address-mime) (t . mime)) "*Header/encoding method alist. The list is traversed sequentially. The keys can either be @@ -50,8 +54,10 @@ 1) nil, in which case no encoding is done; 2) `mime', in which case the header will be encoded according to RFC2047; -3) a charset, in which case it will be encoded as that charset; -4) `default', in which case the field will be encoded as the rest +3) `address-mime', like `mime', but takes account of the rules for address + fields (where quoted strings and comments must be treated separately); +4) a charset, in which case it will be encoded as that charset; +5) `default', in which case the field will be encoded as the rest of the article.") (defvar rfc2047-charset-encoding-alist @@ -62,7 +68,7 @@ (iso-8859-4 . Q) (iso-8859-5 . B) (koi8-r . B) - (iso-8859-7 . Q) + (iso-8859-7 . B) (iso-8859-8 . B) (iso-8859-9 . Q) (iso-8859-14 . Q) @@ -88,7 +94,8 @@ "Alist of RFC2047 encodings to encoding functions.") (defvar rfc2047-q-encoding-alist - '(("\\(From\\|Cc\\|To\\|Bcc\||Reply-To\\):" . "-A-Za-z0-9!*+/") + '(("\\(Resent-\\)?\\(From\\|Cc\\|To\\|Bcc\\|Reply-To\\|Sender\\):" + . "-A-Za-z0-9!*+/" ) ;; = (\075), _ (\137), ? (\077) are used in the encoded word. ;; Avoid using 8bit characters. ;; Equivalent to "^\000-\007\011\013\015-\037\200-\377=_?" @@ -142,21 +149,26 @@ (eq (car elem) t)) (setq alist nil method (cdr elem)))) + (goto-char (point-min)) + (re-search-forward "^[^:]+: *" nil t) (cond + ((eq method 'address-mime) + (rfc2047-encode-region (point) (point-max))) ((eq method 'mime) - (rfc2047-encode-region (point-min) (point-max))) + (let (rfc2047-special-chars) + (rfc2047-encode-region (point) (point-max)))) ((eq method 'default) (if (and (featurep 'mule) (if (boundp 'default-enable-multibyte-characters) default-enable-multibyte-characters) mail-parse-charset) - (mm-encode-coding-region (point-min) (point-max) + (mm-encode-coding-region (point) (point-max) mail-parse-charset))) ((mm-coding-system-p method) (if (and (featurep 'mule) (if (boundp 'default-enable-multibyte-characters) default-enable-multibyte-characters)) - (mm-encode-coding-region (point-min) (point-max) method))) + (mm-encode-coding-region (point) (point-max) method))) ;; Hm. (t))) (goto-char (point-max))))))) @@ -173,74 +185,72 @@ (mm-find-mime-charset-region (point-min) (point-max)))) (and charsets (not (equal charsets (list message-posting-charset)))))) +;; ietf-drums-specials-token less \ . @ +(defconst rfc2047-special-chars (append "()<>[]:;,\"" nil) + "List of characters treated as special when rfc207-encoding address fields. +When encoding other sorts of fields, bin it to nil to avoid treating +RFC 2822 quoted words and comments specially.") + +(defconst rfc2047-non-special-chars (concat "^" rfc2047-special-chars)) + (defun rfc2047-dissect-region (b e) - "Dissect the region between B and E into words." - (let ((word-chars "-A-Za-z0-9!*+/") - ;; Not using ietf-drums-specials-token makes life simple. - mail-parse-mule-charset - words point nonascii - result word) - (save-restriction - (narrow-to-region b e) - (goto-char (point-min)) - (skip-chars-forward "\000-\177") - ;; Fixme: This loop used to check charsets when it found - ;; non-ASCII characters. That's removed, since it doesn't make - ;; much sense in Emacs 22 and doesn't seem necessary in Emacs - ;; 21, even. I'm not sure exactly what it should be doing, and - ;; it needs another look, especially for efficiency's sake. -- fx - (while (not (eobp)) - (setq point (point) - nonascii nil) - (skip-chars-backward word-chars b) - (unless (eq b (point)) - (push (cons (buffer-substring b (point)) nil) words)) - (setq b (point) - nonascii t) - (goto-char point) - (forward-char 1) - (skip-chars-forward word-chars) - (while (not (eobp)) - (forward-char 1) - (skip-chars-forward word-chars)) - (unless (eq b (point)) - (push (cons (buffer-substring b (point)) nonascii) words)) - (setq b (point)) - (skip-chars-forward "\000-\177")) - (unless (eq b (point)) - (push (cons (buffer-substring b (point)) nil) words))) - ;; merge adjacent words - (setq word (pop words)) - (while word - (if (and (cdr word) - (caar words) - (not (cdar words)) - (not (string-match "[^ \t]" (caar words)))) - (if (eq (cdr (nth 1 words)) (cdr word)) - (progn - (setq word (cons (concat - (car (nth 1 words)) (caar words) - (car word)) - (cdr word))) - (pop words) - (pop words)) - (push (cons (concat (caar words) (car word)) (cdr word)) - result) - (pop words) - (setq word (pop words))) - (push word result) - (setq word (pop words)))) - result)) + "Dissect the region between B and E into tokens. +The tokens comprise sequences of atoms, quoted strings, special +characters and whitespace." + (save-restriction + (narrow-to-region b e) + (if (null rfc2047-special-chars) + ;; simple `mime' case -- no need to tokenize + (list (buffer-substring b e)) + ;; `address-mime' case -- take care of quoted words, comments + (with-syntax-table ietf-drums-syntax-table + (let ((start (point)) + words) + (goto-char (point-min)) + (condition-case nil ; in case of unbalanced specials + ;; Dissect into: sequences of atoms, quoted strings, + ;; specials, whitespace. (Specials mustn't be encoded.) + (while (not (eobp)) + (setq start (point)) + (unless (= 0 (skip-chars-forward ietf-drums-wsp-token)) + (push (buffer-substring start (point)) words) + (setq start (point))) + (cond + ((memq (char-after) rfc2047-special-chars) + ;; Grab string or special char. + (if (eq ?\" (char-after)) + (progn + (forward-sexp) + (push (buffer-substring start (point)) words)) + (push (string (char-after)) words) + (forward-char))) + ((not (char-after))) ; eob + (t ; normal token/whitespace sequence + (skip-chars-forward rfc2047-non-special-chars) + (skip-chars-backward ietf-drums-wsp-token) + (push (buffer-substring start (point)) words)))) + (error (error "Invalid data for rfc2047 encoding: %s" + (buffer-substring b e)))) + (nreverse words)))))) +;; Fixme: why does this cons a list of words and insert them, rather +;; than encoding in place? (defun rfc2047-encode-region (b e) - "Encode all encodable words in region B to E." + "Encode all encodable words in region B to E. +By default, the region is treated as containing addresses (see +`rfc2047-special-chars')." (let ((words (rfc2047-dissect-region b e)) word) (save-restriction (narrow-to-region b e) (delete-region (point-min) (point-max)) - (while (setq word (pop words)) - (if (not (cdr word)) - (insert (car word)) + (dolist (word words) + ;; Quoted strings can't contain encoded words. Strip the + ;; quotes. + (if rfc2047-special-chars + (if (eq ?\" (aref word 0)) + (setq word (substring word 1 -1)))) + (if (string-match "\\`[\0-\177]*\\'" word) ; including whitespace + (insert word) (rfc2047-fold-region (gnus-point-at-bol) (point)) (goto-char (point-max)) (if (> (- (point) (save-restriction @@ -250,56 +260,63 @@ ;; Insert blank between encoded words (if (eq (char-before) ?=) (insert " ")) (rfc2047-encode (point) - (progn (insert (car word)) (point))))) + (progn (insert word) (point))))) (rfc2047-fold-region (point-min) (point-max))))) (defun rfc2047-encode-string (string) - "Encode words in STRING." + "Encode words in STRING. +By default, the string is treated as containing addresses (see +`rfc2047-special-chars')." (with-temp-buffer (insert string) (rfc2047-encode-region (point-min) (point-max)) (buffer-string))) (defun rfc2047-encode (b e) - "Encode the word in the region B to E." - (let* ((buff (current-buffer)) - (mime-charset (with-temp-buffer - (insert-buffer-substring buff b e) - (mm-find-mime-charset-region 1 (point-max)))) + "Encode the word(s) in the region B to E. +By default, the region is treated as containing addresses (see +`rfc2047-special-chars')." + (let* ((mime-charset (mm-find-mime-charset-region b e)) (cs (if (> (length mime-charset) 1) - (error "Can't encode word: %s" (buffer-substring b e)) + ;; Fixme: instead of this, try to break region into + ;; parts that can be encoded separately. + (error "Can't rfc2047-encode `%s'" + (buffer-substring b e)) (setq mime-charset (car mime-charset)) (mm-charset-to-coding-system mime-charset))) - (encoding (or (cdr (assq mime-charset + (encoding (if (assq mime-charset + rfc2047-charset-encoding-alist) + (cdr (assq mime-charset rfc2047-charset-encoding-alist)) - 'B)) + 'B)) (start (concat "=?" (downcase (symbol-name mime-charset)) "?" (downcase (symbol-name encoding)) "?")) (first t)) - (save-restriction - (narrow-to-region b e) - (when (eq encoding 'B) - ;; break into lines before encoding - (goto-char (point-min)) - (while (not (eobp)) - (goto-char (min (point-max) (+ 15 (point)))) - (unless (eobp) - (insert "\n")))) - (if (and (mm-multibyte-p) - (mm-coding-system-p cs)) - (mm-encode-coding-region (point-min) (point-max) cs)) - (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) - (point-min) (point-max)) - (goto-char (point-min)) - (while (not (eobp)) - (unless first - (insert " ")) - (setq first nil) - (insert start) - (end-of-line) - (insert "?=") - (forward-line 1))))) + (if mime-charset + (save-restriction + (narrow-to-region b e) + (when (eq encoding 'B) + ;; break into lines before encoding + (goto-char (point-min)) + (while (not (eobp)) + (goto-char (min (point-max) (+ 15 (point)))) + (unless (eobp) + (insert "\n")))) + (if (and (mm-multibyte-p) + (mm-coding-system-p cs)) + (mm-encode-coding-region (point-min) (point-max) cs)) + (funcall (cdr (assq encoding rfc2047-encoding-function-alist)) + (point-min) (point-max)) + (goto-char (point-min)) + (while (not (eobp)) + (unless first + (insert " ")) + (setq first nil) + (insert start) + (end-of-line) + (insert "?=") + (forward-line 1)))))) (defun rfc2047-fold-region (b e) "Fold long lines in region B to E."