view lisp/gnus/rfc2231.el @ 82975:590114f9753d gnus-5_10-pre-merge-josefsson

2004-08-31 Katsumi Yamaoka <yamaoka@jpl.org> * gnus-agent.el (gnus-agent-restore-gcc): Use ^ and regexp-quote. * gnus-sum.el (gnus-newsgroup-variables): Doc fix (tiny change). From Helmut Waitzmann <Helmut.Waitzmann@web.de>. * gnus-agent.el (gnus-agent-regenerate-group): Activate the group when the group's active is not available. * gnus-art.el (article-hide-headers): Refer to the values for gnus-ignored-headers and gnus-visible-headers in the summary buffer since a user may have set them as group parameters. (gnus-article-next-page): Fix the way to find a real end-of-buffer (tiny change). From YAGI Tatsuya <ynyaaa@ybb.ne.jp>. (gnus-article-read-summary-keys): Restore new window-start and hscroll to summary window. (gnus-prev-page-map): Remove duplicated one. * gnus-cite.el (gnus-cite-ignore-quoted-from): New user option. (gnus-cite-parse): Ignore quoted envelope From_. Suggested by Karl Chen <quarl@nospam.quarl.org> and Reiner Steib <Reiner.Steib@gmx.de>. * gnus-cus.el (gnus-agent-cat-prepare-category-field): Replace pp-to-string with gnus-pp-to-string. * gnus-eform.el (gnus-edit-form): Replace pp with gnus-pp. * gnus-group.el (gnus-group-make-kiboze-group): Replace pp with gnus-pp. * gnus-msg.el (gnus-setup-message): Ignore an article copy while parsing gnus-posting-styles when the message is not for replying. (gnus-summary-resend-message-edit): Call mime-to-mml. Suggested by Hiroshi Fujishima <pooh@nature.tsukuba.ac.jp>. (gnus-debug): Replace pp with gnus-pp. * gnus-score.el (gnus-score-save): Replace pp with gnus-pp. * gnus-spec.el (gnus-update-format): Replace pp-to-string with gnus-pp-to-string. * gnus-sum.el (gnus-read-header): Don't remove a header for the parent article of a sparse article in the thread hashtb. From Stefan Wiens <s.wi@gmx.net>. * gnus-util.el (gnus-bind-print-variables): New macro. (gnus-prin1): Use it. (gnus-prin1-to-string): Use it. (gnus-pp): New function. (gnus-pp-to-string): New function. * gnus.el: Don't make unnecessary *Group* buffer when loading. * mail-source.el (mail-source-touch-pop): Doc fix. * message.el (message-mode): Don't modify paragraph-separate there. (message-setup-fill-variables): Add mml tags to paragraph-start and paragraph-separate. Suggested by Andrew Korty <ajk@iu.edu>. (message-smtpmail-send-it): Doc fix. (message-exchange-point-and-mark): Don't activate region if it was inactive. Suggested by Hiroshi Fujishima <pooh@nature.tsukuba.ac.jp> and Jesper Harder <harder@ifa.au.dk>. * mm-decode.el (mm-save-part): Bind enable-multibyte-characters to t while entering a file name using the mm-with-multibyte macro. Suggested by Hiroshi Fujishima <pooh@nature.tsukuba.ac.jp>. * mm-encode.el (mm-content-transfer-encoding-defaults): Use qp-or-base64 for the application/* types. (mm-safer-encoding): Consider 7bit is safe. * mm-util.el (mm-with-multibyte-buffer): New macro. (mm-with-multibyte): New macro. * mm-view.el (mm-inline-render-with-function): Use multibyte buffer; decode html source by charset. * nndoc.el (nndoc-type-alist): Improve regexp for article-begin, add generate-head-function and generate-article-function to the rfc822-forward entry. (nndoc-forward-type-p): Recognize envelope From_. (nndoc-rfc822-forward-generate-article): New function. (nndoc-rfc822-forward-generate-head): New function. From David Hedbor <dhedbor@real.com>. * nnmail.el (nnmail-split-lowercase-expanded): New user option. (nnmail-expand-newtext): Lowercase expanded entries if nnmail-split-lowercase-expanded is non-nil. * score-mode.el (gnus-score-pretty-print): Replace pp with gnus-pp. * webmail.el (webmail-debug): Replace pp with gnus-pp. * gnus-art.el (gnus-article-wash-html-with-w3m): Bind w3m-safe-url-regexp as the value for mm-w3m-safe-url-regexp; use w3m-minor-mode-map instead of mm-w3m-local-map-property. (gnus-mime-save-part-and-strip): Use mm-complicated-handles instead of mm-multiple-handles. (gnus-mime-delete-part): Ditto. * mm-decode.el (mm-multiple-handles): Recognize a string as a mime handle, as well as a list. (mm-complicated-handles): Former definition of mm-multiple-handles. * mm-view.el (mm-w3m-mode-map): Remove. (mm-w3m-local-map-property): Remove. (mm-w3m-cid-retrieve-1): Call itself recursively. Suggested by ARISAWA Akihiro <ari@mbf.sphere.ne.jp>. (mm-w3m-cid-retrieve): Simplify. (mm-inline-text-html-render-with-w3m): Decode html source by charset; check META tags only when charsets are not specified in headers; specify charset to w3m-region; use w3m-minor-mode-map instead of mm-w3m-local-map-property.
author Reiner Steib <Reiner.Steib@gmx.de>
date Tue, 31 Aug 2004 14:47:59 +0000
parents 0fde48feb604
children e300f00a427a
line wrap: on
line source

;;; rfc2231.el --- Functions for decoding rfc2231 headers

;; Copyright (C) 1998, 1999, 2000, 2002, 2003 Free Software Foundation, Inc.

;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;;; Code:

(eval-when-compile (require 'cl))
(require 'ietf-drums)
(require 'rfc2047)
(autoload 'mm-encode-body "mm-bodies")
(autoload 'mail-header-remove-whitespace "mail-parse")
(autoload 'mail-header-remove-comments "mail-parse")

(defun rfc2231-get-value (ct attribute)
  "Return the value of ATTRIBUTE from CT."
  (cdr (assq attribute (cdr ct))))

(defun rfc2231-parse-qp-string (string)
  "Parse QP-encoded string using `rfc2231-parse-string'.
N.B.  This is in violation with RFC2047, but it seem to be in common use."
  (rfc2231-parse-string (rfc2047-decode-string string)))

(defun rfc2231-parse-string (string)
  "Parse STRING and return a list.
The list will be on the form
 `(name (attribute . value) (attribute . value)...)"
  (with-temp-buffer
    (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token))
	  (stoken (ietf-drums-token-to-list ietf-drums-tspecials))
	  (ntoken (ietf-drums-token-to-list "0-9"))
	  (prev-value "")
	  display-name mailbox c display-string parameters
	  attribute value type subtype number encoded
	  prev-attribute)
      (ietf-drums-init (mail-header-remove-whitespace
			(mail-header-remove-comments string)))
      (let ((table (copy-syntax-table ietf-drums-syntax-table)))
	(modify-syntax-entry ?\' "w" table)
	(modify-syntax-entry ?= " " table)
	;; The following isn't valid, but one should be liberal
	;; in what one receives.
	(modify-syntax-entry ?\: "w" table)
	(set-syntax-table table))
      (setq c (char-after))
      (when (and (memq c ttoken)
		 (not (memq c stoken)))
	(setq type (downcase (buffer-substring
			      (point) (progn (forward-sexp 1) (point)))))
	;; Do the params
	(while (not (eobp))
	  (setq c (char-after))
	  (unless (eq c ?\;)
	    (error "Invalid header: %s" string))
	  (forward-char 1)
	  ;; If c in nil, then this is an invalid header, but
	  ;; since elm generates invalid headers on this form,
	  ;; we allow it.
	  (when (setq c (char-after))
	    (if (and (memq c ttoken)
		     (not (memq c stoken)))
		(setq attribute
		      (intern
		       (downcase
			(buffer-substring
			 (point) (progn (forward-sexp 1) (point))))))
	      (error "Invalid header: %s" string))
	    (setq c (char-after))
	    (setq encoded nil)
	    (when (eq c ?*)
	      (forward-char 1)
	      (setq c (char-after))
	      (if (not (memq c ntoken))
		  (setq encoded t
			number nil)
		(setq number
		      (string-to-number
		       (buffer-substring
			(point) (progn (forward-sexp 1) (point)))))
		(setq c (char-after))
		(when (eq c ?*)
		  (setq encoded t)
		  (forward-char 1)
		  (setq c (char-after)))))
	    ;; See if we have any previous continuations.
	    (when (and prev-attribute
		       (not (eq prev-attribute attribute)))
	      (push (cons prev-attribute prev-value) parameters)
	      (setq prev-attribute nil
		    prev-value ""))
	    (unless (eq c ?=)
	      (error "Invalid header: %s" string))
	    (forward-char 1)
	    (setq c (char-after))
	    (cond
	     ((eq c ?\")
	      (setq value
		    (buffer-substring (1+ (point))
				      (progn (forward-sexp 1) (1- (point))))))
	     ((and (or (memq c ttoken)
		       (> c ?\177)) ;; EXTENSION: Support non-ascii chars.
		   (not (memq c stoken)))
	      (setq value (buffer-substring
			   (point) (progn (forward-sexp) (point)))))
	     (t
	      (error "Invalid header: %s" string)))
	    (when encoded
	      (setq value (rfc2231-decode-encoded-string value)))
	    (if number
		(setq prev-attribute attribute
		      prev-value (concat prev-value value))
	      (push (cons attribute value) parameters))))

	;; Take care of any final continuations.
	(when prev-attribute
	  (push (cons prev-attribute prev-value) parameters))

	(when type
	  `(,type ,@(nreverse parameters)))))))

(defun rfc2231-decode-encoded-string (string)
  "Decode an RFC2231-encoded string.
These look like \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\"."
  (with-temp-buffer
    (let ((elems (split-string string "'")))
      ;; The encoded string may contain zero to two single-quote
      ;; marks.  This should give us the encoded word stripped
      ;; of any preceding values.
      (insert (car (last elems)))
      (goto-char (point-min))
      (while (search-forward "%" nil t)
	(insert
	 (prog1
	     (string-to-number (buffer-substring (point) (+ (point) 2)) 16)
	   (delete-region (1- (point)) (+ (point) 2)))))
      ;; Encode using the charset, if any.
      (when (and (mm-multibyte-p)
		 (> (length elems) 1)
		 (not (equal (intern (downcase (car elems))) 'us-ascii)))
	(mm-decode-coding-region (point-min) (point-max)
				 (intern (downcase (car elems)))))
      (buffer-string))))

(defun rfc2231-encode-string (param value)
  "Return and PARAM=VALUE string encoded according to RFC2231."
  (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token))
	(tspecial (ietf-drums-token-to-list ietf-drums-tspecials))
	(special (ietf-drums-token-to-list "*'%\n\t"))
	(ascii (ietf-drums-token-to-list ietf-drums-text-token))
	(num -1)
	spacep encodep charsetp charset broken)
    (with-temp-buffer
      (insert value)
      (goto-char (point-min))
      (while (not (eobp))
	(cond
	 ((or (memq (following-char) control)
	      (memq (following-char) tspecial)
	      (memq (following-char) special))
	  (setq encodep t))
	 ((eq (following-char) ? )
	  (setq spacep t))
	 ((not (memq (following-char) ascii))
	  (setq charsetp t)))
	(forward-char 1))
      (when charsetp
	(setq charset (mm-encode-body)))
      (cond
       ((or encodep charsetp)
	(goto-char (point-min))
	(while (not (eobp))
	  (when (> (current-column) 60)
	    (insert ";\n")
	    (setq broken t))
	  (if (or (not (memq (following-char) ascii))
		  (memq (following-char) control)
		  (memq (following-char) tspecial)
		  (memq (following-char) special)
		  (eq (following-char) ? ))
	      (progn
		(insert "%" (format "%02x" (following-char)))
		(delete-char 1))
	    (forward-char 1)))
	(goto-char (point-min))
	(insert (symbol-name (or charset 'us-ascii)) "''")
	(goto-char (point-min))
	(if (not broken)
	    (insert param "*=")
	  (while (not (eobp))
	    (insert (if (>= num 0) " " "\n ")
		    param "*" (format "%d" (incf num)) "*=")
	    (forward-line 1))))
       (spacep
	(goto-char (point-min))
	(insert param "=\"")
	(goto-char (point-max))
	(insert "\""))
       (t
	(goto-char (point-min))
	(insert param "=")))
      (buffer-string))))

(provide 'rfc2231)

;;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63
;;; rfc2231.el ends here