Mercurial > emacs
comparison lisp/gnus/mm-url.el @ 56927:55fd4f77387a after-merge-gnus-5_10
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Merge from emacs--gnus--5.10, gnus--rel--5.10
Patches applied:
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--base-0
tag of miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-464
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-1
Import from CVS branch gnus-5_10-branch
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-2
Merge from lorentey@elte.hu--2004/emacs--multi-tty--0, emacs--cvs-trunk--0
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-3
Merge from gnus--rel--5.10
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-4
Merge from gnus--rel--5.10
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-18
Update from CVS
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-19
Remove autoconf-generated files from archive
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-20
Update from CVS
author | Miles Bader <miles@gnu.org> |
---|---|
date | Sat, 04 Sep 2004 13:13:48 +0000 |
parents | |
children | df80d19d7a2e |
comparison
equal
deleted
inserted
replaced
56926:f8e248e9a717 | 56927:55fd4f77387a |
---|---|
1 ;;; mm-url.el --- a wrapper of url functions/commands for Gnus | |
2 ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. | |
3 | |
4 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
5 | |
6 ;; This file is part of GNU Emacs. | |
7 | |
8 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
9 ;; it under the terms of the GNU General Public License as published | |
10 ;; by the Free Software Foundation; either version 2, or (at your | |
11 ;; option) any later version. | |
12 | |
13 ;; GNU Emacs is distributed in the hope that it will be useful, but | |
14 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 ;; General Public License for more details. | |
17 | |
18 ;; You should have received a copy of the GNU General Public License | |
19 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
20 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
21 ;; Boston, MA 02111-1307, USA. | |
22 | |
23 ;;; Commentary: | |
24 | |
25 ;; Some codes are stolen from w3 and url packages. Some are moved from | |
26 ;; nnweb. | |
27 | |
28 ;; TODO: Support POST, cookie. | |
29 | |
30 ;;; Code: | |
31 | |
32 (eval-when-compile (require 'cl)) | |
33 | |
34 (require 'mm-util) | |
35 (require 'gnus) | |
36 | |
37 (eval-and-compile | |
38 (autoload 'executable-find "executable")) | |
39 | |
40 (eval-when-compile | |
41 (require 'timer)) | |
42 | |
43 (defgroup mm-url nil | |
44 "A wrapper of url package and external url command for Gnus." | |
45 :group 'gnus) | |
46 | |
47 (defcustom mm-url-use-external (not | |
48 (condition-case nil | |
49 (require 'url) | |
50 (error nil))) | |
51 "*If non-nil, use external grab program `mm-url-program'." | |
52 :type 'boolean | |
53 :group 'mm-url) | |
54 | |
55 (defvar mm-url-predefined-programs | |
56 '((wget "wget" "--user-agent=mm-url" "-q" "-O" "-") | |
57 (w3m "w3m" "-dump_source") | |
58 (lynx "lynx" "-source") | |
59 (curl "curl"))) | |
60 | |
61 (defcustom mm-url-program | |
62 (cond | |
63 ((executable-find "wget") 'wget) | |
64 ((executable-find "w3m") 'w3m) | |
65 ((executable-find "lynx") 'lynx) | |
66 ((executable-find "curl") 'curl) | |
67 (t "GET")) | |
68 "The url grab program. | |
69 Likely values are `wget', `w3m', `lynx' and `curl'." | |
70 :type '(choice | |
71 (symbol :tag "wget" wget) | |
72 (symbol :tag "w3m" w3m) | |
73 (symbol :tag "lynx" lynx) | |
74 (symbol :tag "curl" curl) | |
75 (string :tag "other")) | |
76 :group 'mm-url) | |
77 | |
78 (defcustom mm-url-arguments nil | |
79 "The arguments for `mm-url-program'." | |
80 :type '(repeat string) | |
81 :group 'mm-url) | |
82 | |
83 | |
84 ;;; Internal variables | |
85 | |
86 (defvar mm-url-package-name | |
87 (gnus-replace-in-string | |
88 (gnus-replace-in-string gnus-version " v.*$" "") | |
89 " " "-")) | |
90 | |
91 (defvar mm-url-package-version gnus-version-number) | |
92 | |
93 ;; Stolen from w3. | |
94 (defvar mm-url-html-entities | |
95 '( | |
96 ;;(excl . 33) | |
97 (quot . 34) | |
98 ;;(num . 35) | |
99 ;;(dollar . 36) | |
100 ;;(percent . 37) | |
101 (amp . 38) | |
102 (rsquo . 39) ; should be U+8217 | |
103 ;;(apos . 39) | |
104 ;;(lpar . 40) | |
105 ;;(rpar . 41) | |
106 ;;(ast . 42) | |
107 ;;(plus . 43) | |
108 ;;(comma . 44) | |
109 ;;(period . 46) | |
110 ;;(colon . 58) | |
111 ;;(semi . 59) | |
112 (lt . 60) | |
113 ;;(equals . 61) | |
114 (gt . 62) | |
115 ;;(quest . 63) | |
116 ;;(commat . 64) | |
117 ;;(lsqb . 91) | |
118 ;;(rsqb . 93) | |
119 (uarr . 94) ; should be U+8593 | |
120 ;;(lowbar . 95) | |
121 (lsquo . 96) ; should be U+8216 | |
122 (lcub . 123) | |
123 ;;(verbar . 124) | |
124 (rcub . 125) | |
125 (tilde . 126) | |
126 (nbsp . 160) | |
127 (iexcl . 161) | |
128 (cent . 162) | |
129 (pound . 163) | |
130 (curren . 164) | |
131 (yen . 165) | |
132 (brvbar . 166) | |
133 (sect . 167) | |
134 (uml . 168) | |
135 (copy . 169) | |
136 (ordf . 170) | |
137 (laquo . 171) | |
138 (not . 172) | |
139 (shy . 173) | |
140 (reg . 174) | |
141 (macr . 175) | |
142 (deg . 176) | |
143 (plusmn . 177) | |
144 (sup2 . 178) | |
145 (sup3 . 179) | |
146 (acute . 180) | |
147 (micro . 181) | |
148 (para . 182) | |
149 (middot . 183) | |
150 (cedil . 184) | |
151 (sup1 . 185) | |
152 (ordm . 186) | |
153 (raquo . 187) | |
154 (frac14 . 188) | |
155 (frac12 . 189) | |
156 (frac34 . 190) | |
157 (iquest . 191) | |
158 (Agrave . 192) | |
159 (Aacute . 193) | |
160 (Acirc . 194) | |
161 (Atilde . 195) | |
162 (Auml . 196) | |
163 (Aring . 197) | |
164 (AElig . 198) | |
165 (Ccedil . 199) | |
166 (Egrave . 200) | |
167 (Eacute . 201) | |
168 (Ecirc . 202) | |
169 (Euml . 203) | |
170 (Igrave . 204) | |
171 (Iacute . 205) | |
172 (Icirc . 206) | |
173 (Iuml . 207) | |
174 (ETH . 208) | |
175 (Ntilde . 209) | |
176 (Ograve . 210) | |
177 (Oacute . 211) | |
178 (Ocirc . 212) | |
179 (Otilde . 213) | |
180 (Ouml . 214) | |
181 (times . 215) | |
182 (Oslash . 216) | |
183 (Ugrave . 217) | |
184 (Uacute . 218) | |
185 (Ucirc . 219) | |
186 (Uuml . 220) | |
187 (Yacute . 221) | |
188 (THORN . 222) | |
189 (szlig . 223) | |
190 (agrave . 224) | |
191 (aacute . 225) | |
192 (acirc . 226) | |
193 (atilde . 227) | |
194 (auml . 228) | |
195 (aring . 229) | |
196 (aelig . 230) | |
197 (ccedil . 231) | |
198 (egrave . 232) | |
199 (eacute . 233) | |
200 (ecirc . 234) | |
201 (euml . 235) | |
202 (igrave . 236) | |
203 (iacute . 237) | |
204 (icirc . 238) | |
205 (iuml . 239) | |
206 (eth . 240) | |
207 (ntilde . 241) | |
208 (ograve . 242) | |
209 (oacute . 243) | |
210 (ocirc . 244) | |
211 (otilde . 245) | |
212 (ouml . 246) | |
213 (divide . 247) | |
214 (oslash . 248) | |
215 (ugrave . 249) | |
216 (uacute . 250) | |
217 (ucirc . 251) | |
218 (uuml . 252) | |
219 (yacute . 253) | |
220 (thorn . 254) | |
221 (yuml . 255) | |
222 | |
223 ;; Special handling of these | |
224 (frac56 . "5/6") | |
225 (frac16 . "1/6") | |
226 (frac45 . "4/5") | |
227 (frac35 . "3/5") | |
228 (frac25 . "2/5") | |
229 (frac15 . "1/5") | |
230 (frac23 . "2/3") | |
231 (frac13 . "1/3") | |
232 (frac78 . "7/8") | |
233 (frac58 . "5/8") | |
234 (frac38 . "3/8") | |
235 (frac18 . "1/8") | |
236 | |
237 ;; The following 5 entities are not mentioned in the HTML 2.0 | |
238 ;; standard, nor in any other HTML proposed standard of which I | |
239 ;; am aware. I am not even sure they are ISO entity names. *** | |
240 ;; Hence, some arrangement should be made to give a bad HTML | |
241 ;; message when they are seen. | |
242 (ndash . 45) | |
243 (mdash . 45) | |
244 (emsp . 32) | |
245 (ensp . 32) | |
246 (sim . 126) | |
247 (le . "<=") | |
248 (agr . "alpha") | |
249 (rdquo . "''") | |
250 (ldquo . "``") | |
251 (trade . "(TM)") | |
252 ;; To be done | |
253 ;; (shy . ????) ; soft hyphen | |
254 ) | |
255 "*An assoc list of entity names and how to actually display them.") | |
256 | |
257 (defconst mm-url-unreserved-chars | |
258 '( | |
259 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z | |
260 ?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z | |
261 ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9 | |
262 ?- ?_ ?. ?! ?~ ?* ?' ?\( ?\)) | |
263 "A list of characters that are _NOT_ reserved in the URL spec. | |
264 This is taken from RFC 2396.") | |
265 | |
266 (defun mm-url-load-url () | |
267 "Load `url-insert-file-contents'." | |
268 (unless (condition-case () | |
269 (require 'url-handlers) | |
270 (error nil)) | |
271 ;; w3-4.0pre0.46 or earlier version. | |
272 (require 'w3-vars) | |
273 (require 'url))) | |
274 | |
275 ;;;###autoload | |
276 (defun mm-url-insert-file-contents (url) | |
277 "Insert file contents of URL. | |
278 If `mm-url-use-external' is non-nil, use `mm-url-program'." | |
279 (if mm-url-use-external | |
280 (progn | |
281 (if (string-match "^file:/+" url) | |
282 (insert-file-contents (substring url (1- (match-end 0)))) | |
283 (mm-url-insert-file-contents-external url)) | |
284 (goto-char (point-min)) | |
285 (if (fboundp 'url-generic-parse-url) | |
286 (setq url-current-object | |
287 (url-generic-parse-url url))) | |
288 (list url (buffer-size))) | |
289 (mm-url-load-url) | |
290 (let ((name buffer-file-name) | |
291 (url-request-extra-headers (list (cons "Connection" "Close"))) | |
292 (url-package-name (or mm-url-package-name | |
293 url-package-name)) | |
294 (url-package-version (or mm-url-package-version | |
295 url-package-version)) | |
296 result) | |
297 (setq result (url-insert-file-contents url)) | |
298 (save-excursion | |
299 (goto-char (point-min)) | |
300 (while (re-search-forward "\r 1000\r ?" nil t) | |
301 (replace-match ""))) | |
302 (setq buffer-file-name name) | |
303 (if (and (fboundp 'url-generic-parse-url) | |
304 (listp result)) | |
305 (setq url-current-object (url-generic-parse-url | |
306 (car result)))) | |
307 result))) | |
308 | |
309 ;;;###autoload | |
310 (defun mm-url-insert-file-contents-external (url) | |
311 "Insert file contents of URL using `mm-url-program'." | |
312 (let (program args) | |
313 (if (symbolp mm-url-program) | |
314 (let ((item (cdr (assq mm-url-program mm-url-predefined-programs)))) | |
315 (setq program (car item) | |
316 args (append (cdr item) (list url)))) | |
317 (setq program mm-url-program | |
318 args (append mm-url-arguments (list url)))) | |
319 (unless (eq 0 (apply 'call-process program nil t nil args)) | |
320 (error "Couldn't fetch %s" url)))) | |
321 | |
322 (defvar mm-url-timeout 30 | |
323 "The number of seconds before timing out an URL fetch.") | |
324 | |
325 (defvar mm-url-retries 10 | |
326 "The number of retries after timing out when fetching an URL.") | |
327 | |
328 (defun mm-url-insert (url &optional follow-refresh) | |
329 "Insert the contents from an URL in the current buffer. | |
330 If FOLLOW-REFRESH is non-nil, redirect refresh url in META." | |
331 (let ((times mm-url-retries) | |
332 (done nil) | |
333 (first t) | |
334 result) | |
335 (while (and (not (zerop (decf times))) | |
336 (not done)) | |
337 (with-timeout (mm-url-timeout) | |
338 (unless first | |
339 (message "Trying again (%s)..." (- mm-url-retries times))) | |
340 (setq first nil) | |
341 (if follow-refresh | |
342 (save-restriction | |
343 (narrow-to-region (point) (point)) | |
344 (mm-url-insert-file-contents url) | |
345 (goto-char (point-min)) | |
346 (when (re-search-forward | |
347 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) | |
348 (let ((url (match-string 1))) | |
349 (delete-region (point-min) (point-max)) | |
350 (setq result (mm-url-insert url t))))) | |
351 (setq result (mm-url-insert-file-contents url))) | |
352 (setq done t))) | |
353 result)) | |
354 | |
355 (defun mm-url-decode-entities () | |
356 "Decode all HTML entities." | |
357 (goto-char (point-min)) | |
358 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) | |
359 (let ((elem (if (eq (aref (match-string 1) 0) ?\#) | |
360 (let ((c | |
361 (string-to-number (substring | |
362 (match-string 1) 1)))) | |
363 (if (mm-char-or-char-int-p c) c 32)) | |
364 (or (cdr (assq (intern (match-string 1)) | |
365 mm-url-html-entities)) | |
366 ?#)))) | |
367 (unless (stringp elem) | |
368 (setq elem (char-to-string elem))) | |
369 (replace-match elem t t)))) | |
370 | |
371 (defun mm-url-decode-entities-nbsp () | |
372 "Decode all HTML entities and to a space." | |
373 (let ((mm-url-html-entities (cons '(nbsp . 32) mm-url-html-entities))) | |
374 (mm-url-decode-entities))) | |
375 | |
376 (defun mm-url-decode-entities-string (string) | |
377 (with-temp-buffer | |
378 (insert string) | |
379 (mm-url-decode-entities) | |
380 (buffer-string))) | |
381 | |
382 (defun mm-url-form-encode-xwfu (chunk) | |
383 "Escape characters in a string for application/x-www-form-urlencoded. | |
384 Blasphemous crap because someone didn't think %20 was good enough for encoding | |
385 spaces. Die Die Die." | |
386 ;; This will get rid of the 'attributes' specified by the file type, | |
387 ;; which are useless for an application/x-www-form-urlencoded form. | |
388 (if (consp chunk) | |
389 (setq chunk (cdr chunk))) | |
390 | |
391 (mapconcat | |
392 (lambda (char) | |
393 (cond | |
394 ((= char ? ) "+") | |
395 ((memq char mm-url-unreserved-chars) (char-to-string char)) | |
396 (t (upcase (format "%%%02x" char))))) | |
397 ;; Fixme: Should this actually be accepting multibyte? Is there a | |
398 ;; better way in XEmacs? | |
399 (if (featurep 'mule) | |
400 (encode-coding-string chunk | |
401 (if (fboundp 'find-coding-systems-string) | |
402 (car (find-coding-systems-string chunk)) | |
403 buffer-file-coding-system)) | |
404 chunk) | |
405 "")) | |
406 | |
407 (defun mm-url-encode-www-form-urlencoded (pairs) | |
408 "Return PAIRS encoded for forms." | |
409 (mapconcat | |
410 (lambda (data) | |
411 (concat (mm-url-form-encode-xwfu (car data)) "=" | |
412 (mm-url-form-encode-xwfu (cdr data)))) | |
413 pairs "&")) | |
414 | |
415 (defun mm-url-fetch-form (url pairs) | |
416 "Fetch a form from URL with PAIRS as the data using the POST method." | |
417 (mm-url-load-url) | |
418 (let ((url-request-data (mm-url-encode-www-form-urlencoded pairs)) | |
419 (url-request-method "POST") | |
420 (url-request-extra-headers | |
421 '(("Content-type" . "application/x-www-form-urlencoded")))) | |
422 (url-insert-file-contents url) | |
423 (setq buffer-file-name nil)) | |
424 t) | |
425 | |
426 (defun mm-url-fetch-simple (url content) | |
427 (mm-url-load-url) | |
428 (let ((url-request-data content) | |
429 (url-request-method "POST") | |
430 (url-request-extra-headers | |
431 '(("Content-type" . "application/x-www-form-urlencoded")))) | |
432 (url-insert-file-contents url) | |
433 (setq buffer-file-name nil)) | |
434 t) | |
435 | |
436 (defun mm-url-remove-markup () | |
437 "Remove all HTML markup, leaving just plain text." | |
438 (goto-char (point-min)) | |
439 (while (search-forward "<!--" nil t) | |
440 (delete-region (match-beginning 0) | |
441 (or (search-forward "-->" nil t) | |
442 (point-max)))) | |
443 (goto-char (point-min)) | |
444 (while (re-search-forward "<[^>]+>" nil t) | |
445 (replace-match "" t t))) | |
446 | |
447 (provide 'mm-url) | |
448 | |
449 ;;; arch-tag: 0594f9b3-417c-48b0-adc2-5082e1e7917f | |
450 ;;; mm-url.el ends here |