88155
|
1 ;;; mm-url.el --- a wrapper of url functions/commands for Gnus
|
|
2
|
|
3 ;; Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
|
|
4
|
|
5 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu>
|
|
6
|
|
7 ;; This file is part of GNU Emacs.
|
|
8
|
|
9 ;; GNU Emacs is free software; you can redistribute it and/or modify
|
|
10 ;; it under the terms of the GNU General Public License as published
|
|
11 ;; by the Free Software Foundation; either version 2, or (at your
|
|
12 ;; option) any later version.
|
|
13
|
|
14 ;; GNU Emacs is distributed in the hope that it will be useful, but
|
|
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
17 ;; General Public License for more details.
|
|
18
|
|
19 ;; You should have received a copy of the GNU General Public License
|
|
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the
|
|
21 ;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
22 ;; Boston, MA 02110-1301, USA.
|
|
23
|
|
24 ;;; Commentary:
|
|
25
|
|
26 ;; Some codes are stolen from w3 and url packages. Some are moved from
|
|
27 ;; nnweb.
|
|
28
|
|
29 ;; TODO: Support POST, cookie.
|
|
30
|
|
31 ;;; Code:
|
|
32
|
|
33 (eval-when-compile (require 'cl))
|
|
34
|
|
35 (require 'mm-util)
|
|
36 (require 'gnus)
|
|
37
|
|
38 (eval-and-compile
|
|
39 (autoload 'executable-find "executable"))
|
|
40
|
|
41 (eval-when-compile
|
|
42 (if (featurep 'xemacs)
|
|
43 (require 'timer-funcs)
|
|
44 (require 'timer)))
|
|
45
|
|
46 (defvar url-current-object)
|
|
47 (defvar url-package-name)
|
|
48 (defvar url-package-version)
|
|
49
|
|
50 (defgroup mm-url nil
|
|
51 "A wrapper of url package and external url command for Gnus."
|
|
52 :group 'gnus)
|
|
53
|
|
54 (defcustom mm-url-use-external (not
|
|
55 (condition-case nil
|
|
56 (require 'url)
|
|
57 (error nil)))
|
|
58 "*If non-nil, use external grab program `mm-url-program'."
|
|
59 :version "22.1"
|
|
60 :type 'boolean
|
|
61 :group 'mm-url)
|
|
62
|
|
63 (defvar mm-url-predefined-programs
|
|
64 '((wget "wget" "--user-agent=mm-url" "-q" "-O" "-")
|
|
65 (w3m "w3m" "-dump_source")
|
|
66 (lynx "lynx" "-source")
|
|
67 (curl "curl" "--silent" "--user-agent mm-url" "--location")))
|
|
68
|
|
69 (defcustom mm-url-program
|
|
70 (cond
|
|
71 ((executable-find "wget") 'wget)
|
|
72 ((executable-find "w3m") 'w3m)
|
|
73 ((executable-find "lynx") 'lynx)
|
|
74 ((executable-find "curl") 'curl)
|
|
75 (t "GET"))
|
|
76 "The url grab program.
|
|
77 Likely values are `wget', `w3m', `lynx' and `curl'."
|
|
78 :version "22.1"
|
|
79 :type '(choice
|
|
80 (symbol :tag "wget" wget)
|
|
81 (symbol :tag "w3m" w3m)
|
|
82 (symbol :tag "lynx" lynx)
|
|
83 (symbol :tag "curl" curl)
|
|
84 (string :tag "other"))
|
|
85 :group 'mm-url)
|
|
86
|
|
87 (defcustom mm-url-arguments nil
|
|
88 "The arguments for `mm-url-program'."
|
|
89 :version "22.1"
|
|
90 :type '(repeat string)
|
|
91 :group 'mm-url)
|
|
92
|
|
93
|
|
94 ;;; Internal variables
|
|
95
|
|
96 (defvar mm-url-package-name
|
|
97 (gnus-replace-in-string
|
|
98 (gnus-replace-in-string gnus-version " v.*$" "")
|
|
99 " " "-"))
|
|
100
|
|
101 (defvar mm-url-package-version gnus-version-number)
|
|
102
|
|
103 ;; Stolen from w3.
|
|
104 (defvar mm-url-html-entities
|
|
105 '(
|
|
106 ;;(excl . 33)
|
|
107 (quot . 34)
|
|
108 ;;(num . 35)
|
|
109 ;;(dollar . 36)
|
|
110 ;;(percent . 37)
|
|
111 (amp . 38)
|
|
112 (rsquo . 39) ; should be U+8217
|
|
113 ;;(apos . 39)
|
|
114 ;;(lpar . 40)
|
|
115 ;;(rpar . 41)
|
|
116 ;;(ast . 42)
|
|
117 ;;(plus . 43)
|
|
118 ;;(comma . 44)
|
|
119 ;;(period . 46)
|
|
120 ;;(colon . 58)
|
|
121 ;;(semi . 59)
|
|
122 (lt . 60)
|
|
123 ;;(equals . 61)
|
|
124 (gt . 62)
|
|
125 ;;(quest . 63)
|
|
126 ;;(commat . 64)
|
|
127 ;;(lsqb . 91)
|
|
128 ;;(rsqb . 93)
|
|
129 (uarr . 94) ; should be U+8593
|
|
130 ;;(lowbar . 95)
|
|
131 (lsquo . 96) ; should be U+8216
|
|
132 (lcub . 123)
|
|
133 ;;(verbar . 124)
|
|
134 (rcub . 125)
|
|
135 (tilde . 126)
|
|
136 (nbsp . 160)
|
|
137 (iexcl . 161)
|
|
138 (cent . 162)
|
|
139 (pound . 163)
|
|
140 (curren . 164)
|
|
141 (yen . 165)
|
|
142 (brvbar . 166)
|
|
143 (sect . 167)
|
|
144 (uml . 168)
|
|
145 (copy . 169)
|
|
146 (ordf . 170)
|
|
147 (laquo . 171)
|
|
148 (not . 172)
|
|
149 (shy . 173)
|
|
150 (reg . 174)
|
|
151 (macr . 175)
|
|
152 (deg . 176)
|
|
153 (plusmn . 177)
|
|
154 (sup2 . 178)
|
|
155 (sup3 . 179)
|
|
156 (acute . 180)
|
|
157 (micro . 181)
|
|
158 (para . 182)
|
|
159 (middot . 183)
|
|
160 (cedil . 184)
|
|
161 (sup1 . 185)
|
|
162 (ordm . 186)
|
|
163 (raquo . 187)
|
|
164 (frac14 . 188)
|
|
165 (frac12 . 189)
|
|
166 (frac34 . 190)
|
|
167 (iquest . 191)
|
|
168 (Agrave . 192)
|
|
169 (Aacute . 193)
|
|
170 (Acirc . 194)
|
|
171 (Atilde . 195)
|
|
172 (Auml . 196)
|
|
173 (Aring . 197)
|
|
174 (AElig . 198)
|
|
175 (Ccedil . 199)
|
|
176 (Egrave . 200)
|
|
177 (Eacute . 201)
|
|
178 (Ecirc . 202)
|
|
179 (Euml . 203)
|
|
180 (Igrave . 204)
|
|
181 (Iacute . 205)
|
|
182 (Icirc . 206)
|
|
183 (Iuml . 207)
|
|
184 (ETH . 208)
|
|
185 (Ntilde . 209)
|
|
186 (Ograve . 210)
|
|
187 (Oacute . 211)
|
|
188 (Ocirc . 212)
|
|
189 (Otilde . 213)
|
|
190 (Ouml . 214)
|
|
191 (times . 215)
|
|
192 (Oslash . 216)
|
|
193 (Ugrave . 217)
|
|
194 (Uacute . 218)
|
|
195 (Ucirc . 219)
|
|
196 (Uuml . 220)
|
|
197 (Yacute . 221)
|
|
198 (THORN . 222)
|
|
199 (szlig . 223)
|
|
200 (agrave . 224)
|
|
201 (aacute . 225)
|
|
202 (acirc . 226)
|
|
203 (atilde . 227)
|
|
204 (auml . 228)
|
|
205 (aring . 229)
|
|
206 (aelig . 230)
|
|
207 (ccedil . 231)
|
|
208 (egrave . 232)
|
|
209 (eacute . 233)
|
|
210 (ecirc . 234)
|
|
211 (euml . 235)
|
|
212 (igrave . 236)
|
|
213 (iacute . 237)
|
|
214 (icirc . 238)
|
|
215 (iuml . 239)
|
|
216 (eth . 240)
|
|
217 (ntilde . 241)
|
|
218 (ograve . 242)
|
|
219 (oacute . 243)
|
|
220 (ocirc . 244)
|
|
221 (otilde . 245)
|
|
222 (ouml . 246)
|
|
223 (divide . 247)
|
|
224 (oslash . 248)
|
|
225 (ugrave . 249)
|
|
226 (uacute . 250)
|
|
227 (ucirc . 251)
|
|
228 (uuml . 252)
|
|
229 (yacute . 253)
|
|
230 (thorn . 254)
|
|
231 (yuml . 255)
|
|
232
|
|
233 ;; Special handling of these
|
|
234 (frac56 . "5/6")
|
|
235 (frac16 . "1/6")
|
|
236 (frac45 . "4/5")
|
|
237 (frac35 . "3/5")
|
|
238 (frac25 . "2/5")
|
|
239 (frac15 . "1/5")
|
|
240 (frac23 . "2/3")
|
|
241 (frac13 . "1/3")
|
|
242 (frac78 . "7/8")
|
|
243 (frac58 . "5/8")
|
|
244 (frac38 . "3/8")
|
|
245 (frac18 . "1/8")
|
|
246
|
|
247 ;; The following 5 entities are not mentioned in the HTML 2.0
|
|
248 ;; standard, nor in any other HTML proposed standard of which I
|
|
249 ;; am aware. I am not even sure they are ISO entity names. ***
|
|
250 ;; Hence, some arrangement should be made to give a bad HTML
|
|
251 ;; message when they are seen.
|
|
252 (ndash . 45)
|
|
253 (mdash . 45)
|
|
254 (emsp . 32)
|
|
255 (ensp . 32)
|
|
256 (sim . 126)
|
|
257 (le . "<=")
|
|
258 (agr . "alpha")
|
|
259 (rdquo . "''")
|
|
260 (ldquo . "``")
|
|
261 (trade . "(TM)")
|
|
262 ;; To be done
|
|
263 ;; (shy . ????) ; soft hyphen
|
|
264 )
|
|
265 "*An assoc list of entity names and how to actually display them.")
|
|
266
|
|
267 (defconst mm-url-unreserved-chars
|
|
268 '(
|
|
269 ?a ?b ?c ?d ?e ?f ?g ?h ?i ?j ?k ?l ?m ?n ?o ?p ?q ?r ?s ?t ?u ?v ?w ?x ?y ?z
|
|
270 ?A ?B ?C ?D ?E ?F ?G ?H ?I ?J ?K ?L ?M ?N ?O ?P ?Q ?R ?S ?T ?U ?V ?W ?X ?Y ?Z
|
|
271 ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9
|
|
272 ?- ?_ ?. ?! ?~ ?* ?' ?\( ?\))
|
|
273 "A list of characters that are _NOT_ reserved in the URL spec.
|
|
274 This is taken from RFC 2396.")
|
|
275
|
|
276 (defun mm-url-load-url ()
|
|
277 "Load `url-insert-file-contents'."
|
|
278 (unless (condition-case ()
|
|
279 (progn
|
|
280 (require 'url-handlers)
|
|
281 (require 'url-parse)
|
|
282 (require 'url-vars))
|
|
283 (error nil))
|
|
284 ;; w3-4.0pre0.46 or earlier version.
|
|
285 (require 'w3-vars)
|
|
286 (require 'url)))
|
|
287
|
|
288 ;;;###autoload
|
|
289 (defun mm-url-insert-file-contents (url)
|
|
290 "Insert file contents of URL.
|
|
291 If `mm-url-use-external' is non-nil, use `mm-url-program'."
|
|
292 (if mm-url-use-external
|
|
293 (progn
|
|
294 (if (string-match "^file:/+" url)
|
|
295 (insert-file-contents (substring url (1- (match-end 0))))
|
|
296 (mm-url-insert-file-contents-external url))
|
|
297 (goto-char (point-min))
|
|
298 (if (fboundp 'url-generic-parse-url)
|
|
299 (setq url-current-object
|
|
300 (url-generic-parse-url url)))
|
|
301 (list url (buffer-size)))
|
|
302 (mm-url-load-url)
|
|
303 (let ((name buffer-file-name)
|
|
304 (url-request-extra-headers (list (cons "Connection" "Close")))
|
|
305 (url-package-name (or mm-url-package-name
|
|
306 url-package-name))
|
|
307 (url-package-version (or mm-url-package-version
|
|
308 url-package-version))
|
|
309 result)
|
|
310 (setq result (url-insert-file-contents url))
|
|
311 (save-excursion
|
|
312 (goto-char (point-min))
|
|
313 (while (re-search-forward "\r 1000\r ?" nil t)
|
|
314 (replace-match "")))
|
|
315 (setq buffer-file-name name)
|
|
316 (if (and (fboundp 'url-generic-parse-url)
|
|
317 (listp result))
|
|
318 (setq url-current-object (url-generic-parse-url
|
|
319 (car result))))
|
|
320 result)))
|
|
321
|
|
322 ;;;###autoload
|
|
323 (defun mm-url-insert-file-contents-external (url)
|
|
324 "Insert file contents of URL using `mm-url-program'."
|
|
325 (let (program args)
|
|
326 (if (symbolp mm-url-program)
|
|
327 (let ((item (cdr (assq mm-url-program mm-url-predefined-programs))))
|
|
328 (setq program (car item)
|
|
329 args (append (cdr item) (list url))))
|
|
330 (setq program mm-url-program
|
|
331 args (append mm-url-arguments (list url))))
|
|
332 (unless (eq 0 (apply 'call-process program nil t nil args))
|
|
333 (error "Couldn't fetch %s" url))))
|
|
334
|
|
335 (defvar mm-url-timeout 30
|
|
336 "The number of seconds before timing out an URL fetch.")
|
|
337
|
|
338 (defvar mm-url-retries 10
|
|
339 "The number of retries after timing out when fetching an URL.")
|
|
340
|
|
341 (defun mm-url-insert (url &optional follow-refresh)
|
|
342 "Insert the contents from an URL in the current buffer.
|
|
343 If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
|
|
344 (let ((times mm-url-retries)
|
|
345 (done nil)
|
|
346 (first t)
|
|
347 result)
|
|
348 (while (and (not (zerop (decf times)))
|
|
349 (not done))
|
|
350 (with-timeout (mm-url-timeout)
|
|
351 (unless first
|
|
352 (message "Trying again (%s)..." (- mm-url-retries times)))
|
|
353 (setq first nil)
|
|
354 (if follow-refresh
|
|
355 (save-restriction
|
|
356 (narrow-to-region (point) (point))
|
|
357 (mm-url-insert-file-contents url)
|
|
358 (goto-char (point-min))
|
|
359 (when (re-search-forward
|
|
360 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
|
|
361 (let ((url (match-string 1)))
|
|
362 (delete-region (point-min) (point-max))
|
|
363 (setq result (mm-url-insert url t)))))
|
|
364 (setq result (mm-url-insert-file-contents url)))
|
|
365 (setq done t)))
|
|
366 result))
|
|
367
|
|
368 (defun mm-url-decode-entities ()
|
|
369 "Decode all HTML entities."
|
|
370 (goto-char (point-min))
|
|
371 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+[0-9]*\\);" nil t)
|
|
372 (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
|
|
373 (let ((c
|
|
374 (string-to-number (substring
|
|
375 (match-string 1) 1))))
|
|
376 (if (mm-char-or-char-int-p c) c 32))
|
|
377 (or (cdr (assq (intern (match-string 1))
|
|
378 mm-url-html-entities))
|
|
379 ?#))))
|
|
380 (unless (stringp elem)
|
|
381 (setq elem (char-to-string elem)))
|
|
382 (replace-match elem t t))))
|
|
383
|
|
384 (defun mm-url-decode-entities-nbsp ()
|
|
385 "Decode all HTML entities and to a space."
|
|
386 (let ((mm-url-html-entities (cons '(nbsp . 32) mm-url-html-entities)))
|
|
387 (mm-url-decode-entities)))
|
|
388
|
|
389 (defun mm-url-decode-entities-string (string)
|
|
390 (with-temp-buffer
|
|
391 (insert string)
|
|
392 (mm-url-decode-entities)
|
|
393 (buffer-string)))
|
|
394
|
|
395 (defun mm-url-form-encode-xwfu (chunk)
|
|
396 "Escape characters in a string for application/x-www-form-urlencoded.
|
|
397 Blasphemous crap because someone didn't think %20 was good enough for encoding
|
|
398 spaces. Die Die Die."
|
|
399 ;; This will get rid of the 'attributes' specified by the file type,
|
|
400 ;; which are useless for an application/x-www-form-urlencoded form.
|
|
401 (if (consp chunk)
|
|
402 (setq chunk (cdr chunk)))
|
|
403
|
|
404 (mapconcat
|
|
405 (lambda (char)
|
|
406 (cond
|
|
407 ((= char ? ) "+")
|
|
408 ((memq char mm-url-unreserved-chars) (char-to-string char))
|
|
409 (t (upcase (format "%%%02x" char)))))
|
|
410 ;; Fixme: Should this actually be accepting multibyte? Is there a
|
|
411 ;; better way in XEmacs?
|
|
412 (if (featurep 'mule)
|
|
413 (encode-coding-string chunk
|
|
414 (if (fboundp 'find-coding-systems-string)
|
|
415 (car (find-coding-systems-string chunk))
|
|
416 buffer-file-coding-system))
|
|
417 chunk)
|
|
418 ""))
|
|
419
|
|
420 (defun mm-url-encode-www-form-urlencoded (pairs)
|
|
421 "Return PAIRS encoded for forms."
|
|
422 (mapconcat
|
|
423 (lambda (data)
|
|
424 (concat (mm-url-form-encode-xwfu (car data)) "="
|
|
425 (mm-url-form-encode-xwfu (cdr data))))
|
|
426 pairs "&"))
|
|
427
|
|
428 (defun mm-url-fetch-form (url pairs)
|
|
429 "Fetch a form from URL with PAIRS as the data using the POST method."
|
|
430 (mm-url-load-url)
|
|
431 (let ((url-request-data (mm-url-encode-www-form-urlencoded pairs))
|
|
432 (url-request-method "POST")
|
|
433 (url-request-extra-headers
|
|
434 '(("Content-type" . "application/x-www-form-urlencoded"))))
|
|
435 (url-insert-file-contents url)
|
|
436 (setq buffer-file-name nil))
|
|
437 t)
|
|
438
|
|
439 (defun mm-url-fetch-simple (url content)
|
|
440 (mm-url-load-url)
|
|
441 (let ((url-request-data content)
|
|
442 (url-request-method "POST")
|
|
443 (url-request-extra-headers
|
|
444 '(("Content-type" . "application/x-www-form-urlencoded"))))
|
|
445 (url-insert-file-contents url)
|
|
446 (setq buffer-file-name nil))
|
|
447 t)
|
|
448
|
|
449 (defun mm-url-remove-markup ()
|
|
450 "Remove all HTML markup, leaving just plain text."
|
|
451 (goto-char (point-min))
|
|
452 (while (search-forward "<!--" nil t)
|
|
453 (delete-region (match-beginning 0)
|
|
454 (or (search-forward "-->" nil t)
|
|
455 (point-max))))
|
|
456 (goto-char (point-min))
|
|
457 (while (re-search-forward "<[^>]+>" nil t)
|
|
458 (replace-match "" t t)))
|
|
459
|
|
460 (provide 'mm-url)
|
|
461
|
|
462 ;;; arch-tag: 0594f9b3-417c-48b0-adc2-5082e1e7917f
|
|
463 ;;; mm-url.el ends here
|