Mercurial > emacs
annotate lisp/gnus/rfc2231.el @ 88155:d7ddb3e565de
sync with trunk
author | Henrik Enberg <henrik.enberg@telia.com> |
---|---|
date | Mon, 16 Jan 2006 00:03:54 +0000 |
parents | a26d9b55abb6 |
children |
rev | line source |
---|---|
88155 | 1 ;;; rfc2231.el --- Functions for decoding rfc2231 headers |
31717 | 2 |
88155 | 3 ;; Copyright (C) 1998, 1999, 2000, 2002, 2003, 2004, |
4 ;; 2005 Free Software Foundation, Inc. | |
31717 | 5 |
6 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | |
7 ;; This file is part of GNU Emacs. | |
8 | |
9 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
10 ;; it under the terms of the GNU General Public License as published by | |
11 ;; the Free Software Foundation; either version 2, or (at your option) | |
12 ;; any later version. | |
13 | |
14 ;; GNU Emacs is distributed in the hope that it will be useful, | |
15 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 ;; GNU General Public License for more details. | |
18 | |
19 ;; You should have received a copy of the GNU General Public License | |
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
88155 | 21 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, |
22 ;; Boston, MA 02110-1301, USA. | |
31717 | 23 |
24 ;;; Commentary: | |
25 | |
26 ;;; Code: | |
27 | |
33121
228696a7231c
2000-11-01 ShengHuo ZHU <zsh@cs.rochester.edu>
Dave Love <fx@gnu.org>
parents:
31717
diff
changeset
|
28 (eval-when-compile (require 'cl)) |
31717 | 29 (require 'ietf-drums) |
88155 | 30 (require 'rfc2047) |
31 (autoload 'mm-encode-body "mm-bodies") | |
32 (autoload 'mail-header-remove-whitespace "mail-parse") | |
33 (autoload 'mail-header-remove-comments "mail-parse") | |
31717 | 34 |
35 (defun rfc2231-get-value (ct attribute) | |
36 "Return the value of ATTRIBUTE from CT." | |
37 (cdr (assq attribute (cdr ct)))) | |
38 | |
88155 | 39 (defun rfc2231-parse-qp-string (string) |
40 "Parse QP-encoded string using `rfc2231-parse-string'. | |
41 N.B. This is in violation with RFC2047, but it seem to be in common use." | |
42 (rfc2231-parse-string (rfc2047-decode-string string))) | |
43 | |
31717 | 44 (defun rfc2231-parse-string (string) |
45 "Parse STRING and return a list. | |
46 The list will be on the form | |
47 `(name (attribute . value) (attribute . value)...)" | |
48 (with-temp-buffer | |
49 (let ((ttoken (ietf-drums-token-to-list ietf-drums-text-token)) | |
50 (stoken (ietf-drums-token-to-list ietf-drums-tspecials)) | |
51 (ntoken (ietf-drums-token-to-list "0-9")) | |
52 (prev-value "") | |
53 display-name mailbox c display-string parameters | |
54 attribute value type subtype number encoded | |
88155 | 55 prev-attribute prev-encoded) |
31717 | 56 (ietf-drums-init (mail-header-remove-whitespace |
57 (mail-header-remove-comments string))) | |
58 (let ((table (copy-syntax-table ietf-drums-syntax-table))) | |
59 (modify-syntax-entry ?\' "w" table) | |
88155 | 60 (modify-syntax-entry ?* " " table) |
61 (modify-syntax-entry ?\; " " table) | |
62 (modify-syntax-entry ?= " " table) | |
31717 | 63 ;; The following isn't valid, but one should be liberal |
64 ;; in what one receives. | |
65 (modify-syntax-entry ?\: "w" table) | |
66 (set-syntax-table table)) | |
67 (setq c (char-after)) | |
68 (when (and (memq c ttoken) | |
69 (not (memq c stoken))) | |
70 (setq type (downcase (buffer-substring | |
71 (point) (progn (forward-sexp 1) (point))))) | |
72 ;; Do the params | |
73 (while (not (eobp)) | |
74 (setq c (char-after)) | |
75 (unless (eq c ?\;) | |
76 (error "Invalid header: %s" string)) | |
77 (forward-char 1) | |
78 ;; If c in nil, then this is an invalid header, but | |
79 ;; since elm generates invalid headers on this form, | |
80 ;; we allow it. | |
81 (when (setq c (char-after)) | |
82 (if (and (memq c ttoken) | |
83 (not (memq c stoken))) | |
84 (setq attribute | |
85 (intern | |
86 (downcase | |
87 (buffer-substring | |
88 (point) (progn (forward-sexp 1) (point)))))) | |
89 (error "Invalid header: %s" string)) | |
90 (setq c (char-after)) | |
91 (when (eq c ?*) | |
92 (forward-char 1) | |
93 (setq c (char-after)) | |
88155 | 94 (if (not (memq c ntoken)) |
95 (setq encoded t | |
96 number nil) | |
31717 | 97 (setq number |
98 (string-to-number | |
99 (buffer-substring | |
100 (point) (progn (forward-sexp 1) (point))))) | |
101 (setq c (char-after)) | |
102 (when (eq c ?*) | |
103 (setq encoded t) | |
104 (forward-char 1) | |
105 (setq c (char-after))))) | |
106 ;; See if we have any previous continuations. | |
107 (when (and prev-attribute | |
108 (not (eq prev-attribute attribute))) | |
88155 | 109 (push (cons prev-attribute |
110 (if prev-encoded | |
111 (rfc2231-decode-encoded-string prev-value) | |
112 prev-value)) | |
113 parameters) | |
31717 | 114 (setq prev-attribute nil |
88155 | 115 prev-value "" |
116 prev-encoded nil)) | |
31717 | 117 (unless (eq c ?=) |
118 (error "Invalid header: %s" string)) | |
119 (forward-char 1) | |
120 (setq c (char-after)) | |
121 (cond | |
122 ((eq c ?\") | |
123 (setq value | |
124 (buffer-substring (1+ (point)) | |
125 (progn (forward-sexp 1) (1- (point)))))) | |
88155 | 126 ((and (or (memq c ttoken) |
127 (> c ?\177)) ;; EXTENSION: Support non-ascii chars. | |
31717 | 128 (not (memq c stoken))) |
129 (setq value (buffer-substring | |
88155 | 130 (point) |
131 (progn | |
132 (forward-sexp) | |
133 ;; We might not have reached at the end of | |
134 ;; the value because of non-ascii chars, | |
135 ;; so we should jump over them if any. | |
136 (while (and (not (eobp)) | |
137 (> (char-after) ?\177)) | |
138 (forward-char 1) | |
139 (forward-sexp)) | |
140 (point))))) | |
31717 | 141 (t |
142 (error "Invalid header: %s" string))) | |
143 (if number | |
144 (setq prev-attribute attribute | |
88155 | 145 prev-value (concat prev-value value) |
146 prev-encoded encoded) | |
147 (push (cons attribute | |
148 (if encoded | |
149 (rfc2231-decode-encoded-string value) | |
150 value)) | |
151 parameters)))) | |
31717 | 152 |
153 ;; Take care of any final continuations. | |
154 (when prev-attribute | |
88155 | 155 (push (cons prev-attribute |
156 (if prev-encoded | |
157 (rfc2231-decode-encoded-string prev-value) | |
158 prev-value)) | |
159 parameters)) | |
31717 | 160 |
161 (when type | |
162 `(,type ,@(nreverse parameters))))))) | |
163 | |
164 (defun rfc2231-decode-encoded-string (string) | |
165 "Decode an RFC2231-encoded string. | |
166 These look like \"us-ascii'en-us'This%20is%20%2A%2A%2Afun%2A%2A%2A\"." | |
167 (with-temp-buffer | |
168 (let ((elems (split-string string "'"))) | |
169 ;; The encoded string may contain zero to two single-quote | |
170 ;; marks. This should give us the encoded word stripped | |
171 ;; of any preceding values. | |
172 (insert (car (last elems))) | |
173 (goto-char (point-min)) | |
174 (while (search-forward "%" nil t) | |
175 (insert | |
176 (prog1 | |
177 (string-to-number (buffer-substring (point) (+ (point) 2)) 16) | |
178 (delete-region (1- (point)) (+ (point) 2))))) | |
179 ;; Encode using the charset, if any. | |
88155 | 180 (when (and (mm-multibyte-p) |
181 (> (length elems) 1) | |
182 (not (equal (intern (downcase (car elems))) 'us-ascii))) | |
31717 | 183 (mm-decode-coding-region (point-min) (point-max) |
88155 | 184 (intern (downcase (car elems))))) |
31717 | 185 (buffer-string)))) |
186 | |
187 (defun rfc2231-encode-string (param value) | |
188 "Return and PARAM=VALUE string encoded according to RFC2231." | |
189 (let ((control (ietf-drums-token-to-list ietf-drums-no-ws-ctl-token)) | |
190 (tspecial (ietf-drums-token-to-list ietf-drums-tspecials)) | |
191 (special (ietf-drums-token-to-list "*'%\n\t")) | |
192 (ascii (ietf-drums-token-to-list ietf-drums-text-token)) | |
193 (num -1) | |
194 spacep encodep charsetp charset broken) | |
195 (with-temp-buffer | |
196 (insert value) | |
197 (goto-char (point-min)) | |
198 (while (not (eobp)) | |
199 (cond | |
200 ((or (memq (following-char) control) | |
201 (memq (following-char) tspecial) | |
202 (memq (following-char) special)) | |
203 (setq encodep t)) | |
204 ((eq (following-char) ? ) | |
205 (setq spacep t)) | |
206 ((not (memq (following-char) ascii)) | |
207 (setq charsetp t))) | |
208 (forward-char 1)) | |
209 (when charsetp | |
210 (setq charset (mm-encode-body))) | |
211 (cond | |
212 ((or encodep charsetp) | |
213 (goto-char (point-min)) | |
214 (while (not (eobp)) | |
215 (when (> (current-column) 60) | |
88155 | 216 (insert ";\n") |
31717 | 217 (setq broken t)) |
218 (if (or (not (memq (following-char) ascii)) | |
219 (memq (following-char) control) | |
220 (memq (following-char) tspecial) | |
221 (memq (following-char) special) | |
222 (eq (following-char) ? )) | |
223 (progn | |
224 (insert "%" (format "%02x" (following-char))) | |
225 (delete-char 1)) | |
226 (forward-char 1))) | |
227 (goto-char (point-min)) | |
88155 | 228 (insert (symbol-name (or charset 'us-ascii)) "''") |
31717 | 229 (goto-char (point-min)) |
230 (if (not broken) | |
231 (insert param "*=") | |
232 (while (not (eobp)) | |
88155 | 233 (insert (if (>= num 0) " " "\n ") |
234 param "*" (format "%d" (incf num)) "*=") | |
31717 | 235 (forward-line 1)))) |
236 (spacep | |
237 (goto-char (point-min)) | |
238 (insert param "=\"") | |
239 (goto-char (point-max)) | |
240 (insert "\"")) | |
241 (t | |
242 (goto-char (point-min)) | |
243 (insert param "="))) | |
244 (buffer-string)))) | |
245 | |
246 (provide 'rfc2231) | |
247 | |
88155 | 248 ;;; arch-tag: c3ab751d-d108-406a-b301-68882ad8cd63 |
31717 | 249 ;;; rfc2231.el ends here |