Mercurial > emacs
comparison lisp/gnus/nnweb.el @ 89971:cce1c0ee76ee
Revision: miles@gnu.org--gnu-2004/emacs--unicode--0--patch-36
Merge from emacs--cvs-trunk--0, emacs--gnus--5.10, gnus--rel--5.10
Patches applied:
* miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Merge from emacs--gnus--5.10, gnus--rel--5.10
* miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-524
- miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-534
Update from CVS
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--base-0
tag of miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-464
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-1
Import from CVS branch gnus-5_10-branch
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-2
Merge from lorentey@elte.hu--2004/emacs--multi-tty--0, emacs--cvs-trunk--0
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-3
Merge from gnus--rel--5.10
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-4
Merge from gnus--rel--5.10
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-18
Update from CVS
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-19
Remove autoconf-generated files from archive
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-20
Update from CVS
author | Miles Bader <miles@gnu.org> |
---|---|
date | Thu, 09 Sep 2004 09:36:36 +0000 |
parents | 561b856c5b1f 55fd4f77387a |
children | 29e773288013 |
comparison
equal
deleted
inserted
replaced
89970:a849e5779b8c | 89971:cce1c0ee76ee |
---|---|
1 ;;; nnweb.el --- retrieving articles via web search engines | 1 ;;; nnweb.el --- retrieving articles via web search engines |
2 ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 | 2 ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003 |
3 ;; Free Software Foundation, Inc. | 3 ;; Free Software Foundation, Inc. |
4 | 4 |
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> | 5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> |
6 ;; Keywords: news | 6 ;; Keywords: news |
7 | 7 |
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | 22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, |
23 ;; Boston, MA 02111-1307, USA. | 23 ;; Boston, MA 02111-1307, USA. |
24 | 24 |
25 ;;; Commentary: | 25 ;;; Commentary: |
26 | 26 |
27 ;; Note: You need to have `url' and `w3' installed for this | 27 ;; Note: You need to have `w3' installed for some functions to work. |
28 ;; backend to work. | |
29 | 28 |
30 ;;; Code: | 29 ;;; Code: |
31 | 30 |
32 (eval-when-compile (require 'cl)) | 31 (eval-when-compile (require 'cl)) |
33 | 32 |
35 (require 'message) | 34 (require 'message) |
36 (require 'gnus-util) | 35 (require 'gnus-util) |
37 (require 'gnus) | 36 (require 'gnus) |
38 (require 'nnmail) | 37 (require 'nnmail) |
39 (require 'mm-util) | 38 (require 'mm-util) |
40 (eval-when-compile | 39 (require 'mm-url) |
40 (eval-and-compile | |
41 (ignore-errors | 41 (ignore-errors |
42 (require 'w3) | 42 (require 'url))) |
43 (require 'url) | 43 (autoload 'w3-parse-buffer "w3-parse") |
44 (require 'w3-forms))) | |
45 | |
46 ;; Report failure to find w3 at load time if appropriate. | |
47 (unless noninteractive | |
48 (eval '(progn | |
49 (require 'w3) | |
50 (require 'url) | |
51 (require 'w3-forms)))) | |
52 | 44 |
53 (nnoo-declare nnweb) | 45 (nnoo-declare nnweb) |
54 | 46 |
55 (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") | 47 (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") |
56 "Where nnweb will save its files.") | 48 "Where nnweb will save its files.") |
57 | 49 |
58 (defvoo nnweb-type 'google | 50 (defvoo nnweb-type 'google |
59 "What search engine type is being used. | 51 "What search engine type is being used. |
60 Valid types include `google', `dejanews', `dejanewsold', `reference', | 52 Valid types include `google', `dejanews', and `gmane'.") |
61 and `altavista'.") | |
62 | 53 |
63 (defvar nnweb-type-definition | 54 (defvar nnweb-type-definition |
64 '( | 55 '((google |
65 (google | |
66 ;;(article . nnweb-google-wash-article) | |
67 ;;(id . "http://groups.google.com/groups?as_umsgid=%s") | |
68 (article . ignore) | 56 (article . ignore) |
69 (id . "http://groups.google.com/groups?selm=%s&output=gplain") | 57 (id . "http://groups.google.com/groups?selm=%s&output=gplain") |
70 ;;(reference . nnweb-google-reference) | |
71 (reference . identity) | 58 (reference . identity) |
72 (map . nnweb-google-create-mapping) | 59 (map . nnweb-google-create-mapping) |
73 (search . nnweb-google-search) | 60 (search . nnweb-google-search) |
74 (address . "http://groups.google.com/groups") | 61 (address . "http://groups.google.com/groups") |
75 (identifier . nnweb-google-identity)) | 62 (identifier . nnweb-google-identity)) |
76 (dejanews ;; alias of google | 63 (dejanews ;; alias of google |
77 ;;(article . nnweb-google-wash-article) | |
78 ;;(id . "http://groups.google.com/groups?as_umsgid=%s") | |
79 (article . ignore) | 64 (article . ignore) |
80 (id . "http://groups.google.com/groups?selm=%s&output=gplain") | 65 (id . "http://groups.google.com/groups?selm=%s&output=gplain") |
81 ;;(reference . nnweb-google-reference) | |
82 (reference . identity) | 66 (reference . identity) |
83 (map . nnweb-google-create-mapping) | 67 (map . nnweb-google-create-mapping) |
84 (search . nnweb-google-search) | 68 (search . nnweb-google-search) |
85 (address . "http://groups.google.com/groups") | 69 (address . "http://groups.google.com/groups") |
86 (identifier . nnweb-google-identity)) | 70 (identifier . nnweb-google-identity)) |
87 ;;; (dejanews | 71 (gmane |
88 ;;; (article . ignore) | 72 (article . nnweb-gmane-wash-article) |
89 ;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") | 73 (id . "http://gmane.org/view.php?group=%s") |
90 ;;; (map . nnweb-dejanews-create-mapping) | 74 (reference . identity) |
91 ;;; (search . nnweb-dejanews-search) | 75 (map . nnweb-gmane-create-mapping) |
92 ;;; (address . "http://www.deja.com/=dnc/qs.xp") | 76 (search . nnweb-gmane-search) |
93 ;;; (identifier . nnweb-dejanews-identity)) | 77 (address . "http://gmane.org/") |
94 ;;; (dejanewsold | 78 (identifier . nnweb-gmane-identity))) |
95 ;;; (article . ignore) | |
96 ;;; (map . nnweb-dejanews-create-mapping) | |
97 ;;; (search . nnweb-dejanewsold-search) | |
98 ;;; (address . "http://www.deja.com/dnquery.xp") | |
99 ;;; (identifier . nnweb-dejanews-identity)) | |
100 (reference | |
101 (article . nnweb-reference-wash-article) | |
102 (map . nnweb-reference-create-mapping) | |
103 (search . nnweb-reference-search) | |
104 (address . "http://www.reference.com/cgi-bin/pn/go") | |
105 (identifier . identity)) | |
106 (altavista | |
107 (article . nnweb-altavista-wash-article) | |
108 (map . nnweb-altavista-create-mapping) | |
109 (search . nnweb-altavista-search) | |
110 (address . "http://www.altavista.digital.com/cgi-bin/query") | |
111 (id . "/cgi-bin/news?id@%s") | |
112 (identifier . identity))) | |
113 "Type-definition alist.") | 79 "Type-definition alist.") |
114 | 80 |
115 (defvoo nnweb-search nil | 81 (defvoo nnweb-search nil |
116 "Search string to feed to DejaNews.") | 82 "Search string to feed to Google.") |
117 | 83 |
118 (defvoo nnweb-max-hits 999 | 84 (defvoo nnweb-max-hits 999 |
119 "Maximum number of hits to display.") | 85 "Maximum number of hits to display.") |
120 | 86 |
121 (defvoo nnweb-ephemeral-p nil | 87 (defvoo nnweb-ephemeral-p nil |
195 (set-buffer (or buffer nntp-server-buffer)) | 161 (set-buffer (or buffer nntp-server-buffer)) |
196 (let* ((header (cadr (assq article nnweb-articles))) | 162 (let* ((header (cadr (assq article nnweb-articles))) |
197 (url (and header (mail-header-xref header)))) | 163 (url (and header (mail-header-xref header)))) |
198 (when (or (and url | 164 (when (or (and url |
199 (mm-with-unibyte-current-buffer | 165 (mm-with-unibyte-current-buffer |
200 (nnweb-fetch-url url))) | 166 (mm-url-insert url))) |
201 (and (stringp article) | 167 (and (stringp article) |
202 (nnweb-definition 'id t) | 168 (nnweb-definition 'id t) |
203 (let ((fetch (nnweb-definition 'id)) | 169 (let ((fetch (nnweb-definition 'id)) |
204 art active) | 170 art active) |
205 (when (string-match "^<\\(.*\\)>$" article) | 171 (when (string-match "^<\\(.*\\)>$" article) |
206 (setq art (match-string 1 article))) | 172 (setq art (match-string 1 article))) |
207 (when (and fetch art) | 173 (when (and fetch art) |
208 (setq url (format fetch art)) | 174 (setq url (format fetch art)) |
209 (mm-with-unibyte-current-buffer | 175 (mm-with-unibyte-current-buffer |
210 (nnweb-fetch-url url)) | 176 (mm-url-insert url)) |
211 (if (nnweb-definition 'reference t) | 177 (if (nnweb-definition 'reference t) |
212 (setq article | 178 (setq article |
213 (funcall (nnweb-definition | 179 (funcall (nnweb-definition |
214 'reference) article))))))) | 180 'reference) article))))))) |
215 (unless nnheader-callback-function | 181 (unless nnheader-callback-function |
235 | 201 |
236 (deffoo nnweb-request-update-info (group info &optional server) | 202 (deffoo nnweb-request-update-info (group info &optional server) |
237 (nnweb-possibly-change-server group server)) | 203 (nnweb-possibly-change-server group server)) |
238 | 204 |
239 (deffoo nnweb-asynchronous-p () | 205 (deffoo nnweb-asynchronous-p () |
240 t) | 206 nil) |
241 | 207 |
242 (deffoo nnweb-request-create-group (group &optional server args) | 208 (deffoo nnweb-request-create-group (group &optional server args) |
243 (nnweb-possibly-change-server nil server) | 209 (nnweb-possibly-change-server nil server) |
244 (nnweb-request-delete-group group) | 210 (nnweb-request-delete-group group) |
245 (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) | 211 (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) |
334 (nnheader-set-temp-buffer | 300 (nnheader-set-temp-buffer |
335 (format " *nnweb %s %s %s*" | 301 (format " *nnweb %s %s %s*" |
336 nnweb-type nnweb-search server)) | 302 nnweb-type nnweb-search server)) |
337 (current-buffer)))))) | 303 (current-buffer)))))) |
338 | 304 |
339 (defun nnweb-fetch-url (url) | |
340 (let (buf) | |
341 (save-excursion | |
342 (if (not nnheader-callback-function) | |
343 (progn | |
344 (with-temp-buffer | |
345 (mm-enable-multibyte) | |
346 (let ((coding-system-for-read 'binary) | |
347 (coding-system-for-write 'binary) | |
348 (default-process-coding-system 'binary)) | |
349 (nnweb-insert url)) | |
350 (setq buf (buffer-string))) | |
351 (erase-buffer) | |
352 (insert buf) | |
353 t) | |
354 (nnweb-url-retrieve-asynch | |
355 url 'nnweb-callback (current-buffer) nnheader-callback-function) | |
356 t)))) | |
357 | |
358 (defun nnweb-callback (buffer callback) | |
359 (when (gnus-buffer-live-p url-working-buffer) | |
360 (save-excursion | |
361 (set-buffer url-working-buffer) | |
362 (funcall (nnweb-definition 'article)) | |
363 (nnweb-decode-entities) | |
364 (set-buffer buffer) | |
365 (goto-char (point-max)) | |
366 (insert-buffer-substring url-working-buffer)) | |
367 (funcall callback t) | |
368 (gnus-kill-buffer url-working-buffer))) | |
369 | |
370 (defun nnweb-url-retrieve-asynch (url callback &rest data) | |
371 (let ((url-request-method "GET") | |
372 (old-asynch url-be-asynchronous) | |
373 (url-request-data nil) | |
374 (url-request-extra-headers nil) | |
375 (url-working-buffer (generate-new-buffer-name " *nnweb*"))) | |
376 (setq-default url-be-asynchronous t) | |
377 (save-excursion | |
378 (set-buffer (get-buffer-create url-working-buffer)) | |
379 (setq url-current-callback-data data | |
380 url-be-asynchronous t | |
381 url-current-callback-func callback) | |
382 (url-retrieve url nil)) | |
383 (setq-default url-be-asynchronous old-asynch))) | |
384 | |
385 (if (fboundp 'url-retrieve-synchronously) | |
386 (defun nnweb-url-retrieve-asynch (url callback &rest data) | |
387 (url-retrieve url callback data))) | |
388 | |
389 ;;; | |
390 ;;; DejaNews functions. | |
391 ;;; | |
392 | |
393 (defun nnweb-dejanews-create-mapping () | |
394 "Perform the search and create a number-to-url alist." | |
395 (save-excursion | |
396 (set-buffer nnweb-buffer) | |
397 (erase-buffer) | |
398 (when (funcall (nnweb-definition 'search) nnweb-search) | |
399 (let ((i 0) | |
400 (more t) | |
401 (case-fold-search t) | |
402 (active (or (cadr (assoc nnweb-group nnweb-group-alist)) | |
403 (cons 1 0))) | |
404 subject date from | |
405 map url parse a table group text) | |
406 (while more | |
407 ;; Go through all the article hits on this page. | |
408 (goto-char (point-min)) | |
409 (setq parse (w3-parse-buffer (current-buffer)) | |
410 table (nth 1 (nnweb-parse-find-all 'table parse))) | |
411 (dolist (row (nth 2 (car (nth 2 table)))) | |
412 (setq a (nnweb-parse-find 'a row) | |
413 url (cdr (assq 'href (nth 1 a))) | |
414 text (nreverse (nnweb-text row))) | |
415 (when a | |
416 (setq subject (nth 4 text) | |
417 group (nth 2 text) | |
418 date (nth 1 text) | |
419 from (nth 0 text)) | |
420 (if (string-match "\\([0-9]+\\)/\\([0-9]+\\)/\\([0-9]+\\)" date) | |
421 (setq date (format "%s %s 00:00:00 %s" | |
422 (car (rassq (string-to-number | |
423 (match-string 2 date)) | |
424 parse-time-months)) | |
425 (match-string 3 date) | |
426 (match-string 1 date))) | |
427 (setq date "Jan 1 00:00:00 0000")) | |
428 (incf i) | |
429 (setq url (concat url "&fmt=text")) | |
430 (when (string-match "&context=[^&]+" url) | |
431 (setq url (replace-match "" t t url))) | |
432 (unless (nnweb-get-hashtb url) | |
433 (push | |
434 (list | |
435 (incf (cdr active)) | |
436 (make-full-mail-header | |
437 (cdr active) (concat subject " (" group ")") from date | |
438 (concat "<" (nnweb-identifier url) "@dejanews>") | |
439 nil 0 0 url)) | |
440 map) | |
441 (nnweb-set-hashtb (cadar map) (car map))))) | |
442 ;; See whether there is a "Get next 20 hits" button here. | |
443 (goto-char (point-min)) | |
444 (if (or (not (re-search-forward | |
445 "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t)) | |
446 (>= i nnweb-max-hits)) | |
447 (setq more nil) | |
448 ;; Yup -- fetch it. | |
449 (setq more (match-string 1)) | |
450 (erase-buffer) | |
451 (url-insert-file-contents more))) | |
452 ;; Return the articles in the right order. | |
453 (setq nnweb-articles | |
454 (sort (nconc nnweb-articles map) 'car-less-than-car)))))) | |
455 | |
456 (defun nnweb-dejanews-search (search) | |
457 (nnweb-insert | |
458 (concat | |
459 (nnweb-definition 'address) | |
460 "?" | |
461 (nnweb-encode-www-form-urlencoded | |
462 `(("ST" . "PS") | |
463 ("svcclass" . "dnyr") | |
464 ("QRY" . ,search) | |
465 ("defaultOp" . "AND") | |
466 ("DBS" . "1") | |
467 ("OP" . "dnquery.xp") | |
468 ("LNG" . "ALL") | |
469 ("maxhits" . "100") | |
470 ("threaded" . "0") | |
471 ("format" . "verbose2") | |
472 ("showsort" . "date") | |
473 ("agesign" . "1") | |
474 ("ageweight" . "1"))))) | |
475 t) | |
476 | |
477 (defun nnweb-dejanewsold-search (search) | |
478 (nnweb-fetch-form | |
479 (nnweb-definition 'address) | |
480 `(("query" . ,search) | |
481 ("defaultOp" . "AND") | |
482 ("svcclass" . "dnold") | |
483 ("maxhits" . "100") | |
484 ("format" . "verbose2") | |
485 ("threaded" . "0") | |
486 ("showsort" . "date") | |
487 ("agesign" . "1") | |
488 ("ageweight" . "1"))) | |
489 t) | |
490 | |
491 (defun nnweb-dejanews-identity (url) | |
492 "Return an unique identifier based on URL." | |
493 (if (string-match "AN=\\([0-9]+\\)" url) | |
494 (match-string 1 url) | |
495 url)) | |
496 | |
497 ;;; | |
498 ;;; InReference | |
499 ;;; | |
500 | |
501 (defun nnweb-reference-create-mapping () | |
502 "Perform the search and create a number-to-url alist." | |
503 (save-excursion | |
504 (set-buffer nnweb-buffer) | |
505 (erase-buffer) | |
506 (when (funcall (nnweb-definition 'search) nnweb-search) | |
507 (let ((i 0) | |
508 (more t) | |
509 (case-fold-search t) | |
510 (active (or (cadr (assoc nnweb-group nnweb-group-alist)) | |
511 (cons 1 0))) | |
512 Subject Score Date Newsgroups From Message-ID | |
513 map url) | |
514 (while more | |
515 ;; Go through all the article hits on this page. | |
516 (goto-char (point-min)) | |
517 (search-forward "</pre><hr>" nil t) | |
518 (delete-region (point-min) (point)) | |
519 (goto-char (point-min)) | |
520 (while (re-search-forward "^ +[0-9]+\\." nil t) | |
521 (narrow-to-region | |
522 (point) | |
523 (if (re-search-forward "^$" nil t) | |
524 (match-beginning 0) | |
525 (point-max))) | |
526 (goto-char (point-min)) | |
527 (when (looking-at ".*href=\"\\([^\"]+\\)\"") | |
528 (setq url (match-string 1))) | |
529 (nnweb-remove-markup) | |
530 (goto-char (point-min)) | |
531 (while (search-forward "\t" nil t) | |
532 (replace-match " ")) | |
533 (goto-char (point-min)) | |
534 (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t) | |
535 (set (intern (match-string 1)) (match-string 2))) | |
536 (widen) | |
537 (search-forward "</pre>" nil t) | |
538 (incf i) | |
539 (unless (nnweb-get-hashtb url) | |
540 (push | |
541 (list | |
542 (incf (cdr active)) | |
543 (make-full-mail-header | |
544 (cdr active) (concat "(" Newsgroups ") " Subject) From Date | |
545 Message-ID | |
546 nil 0 (string-to-int Score) url)) | |
547 map) | |
548 (nnweb-set-hashtb (cadar map) (car map)))) | |
549 (setq more nil)) | |
550 ;; Return the articles in the right order. | |
551 (setq nnweb-articles | |
552 (sort (nconc nnweb-articles map) 'car-less-than-car)))))) | |
553 | |
554 (defun nnweb-reference-wash-article () | |
555 (let ((case-fold-search t)) | |
556 (goto-char (point-min)) | |
557 (re-search-forward "^</center><hr>" nil t) | |
558 (delete-region (point-min) (point)) | |
559 (search-forward "<pre>" nil t) | |
560 (forward-line -1) | |
561 (let ((body (point-marker))) | |
562 (search-forward "</pre>" nil t) | |
563 (delete-region (point) (point-max)) | |
564 (nnweb-remove-markup) | |
565 (goto-char (point-min)) | |
566 (while (looking-at " *$") | |
567 (gnus-delete-line)) | |
568 (narrow-to-region (point-min) body) | |
569 (while (and (re-search-forward "^$" nil t) | |
570 (not (eobp))) | |
571 (gnus-delete-line)) | |
572 (goto-char (point-min)) | |
573 (while (looking-at "\\(^[^ ]+:\\) *") | |
574 (replace-match "\\1 " t) | |
575 (forward-line 1)) | |
576 (goto-char (point-min)) | |
577 (when (re-search-forward "^References:" nil t) | |
578 (narrow-to-region | |
579 (point) (if (re-search-forward "^$\\|^[^:]+:" nil t) | |
580 (match-beginning 0) | |
581 (point-max))) | |
582 (goto-char (point-min)) | |
583 (while (not (eobp)) | |
584 (unless (looking-at "References") | |
585 (insert "\t") | |
586 (forward-line 1))) | |
587 (goto-char (point-min)) | |
588 (while (search-forward "," nil t) | |
589 (replace-match " " t t))) | |
590 (widen) | |
591 (nnweb-decode-entities) | |
592 (set-marker body nil)))) | |
593 | |
594 (defun nnweb-reference-search (search) | |
595 (url-insert-file-contents | |
596 (concat | |
597 (nnweb-definition 'address) | |
598 "?" | |
599 (nnweb-encode-www-form-urlencoded | |
600 `(("search" . "advanced") | |
601 ("querytext" . ,search) | |
602 ("subj" . "") | |
603 ("name" . "") | |
604 ("login" . "") | |
605 ("host" . "") | |
606 ("organization" . "") | |
607 ("groups" . "") | |
608 ("keywords" . "") | |
609 ("choice" . "Search") | |
610 ("startmonth" . "Jul") | |
611 ("startday" . "25") | |
612 ("startyear" . "1996") | |
613 ("endmonth" . "Aug") | |
614 ("endday" . "24") | |
615 ("endyear" . "1996") | |
616 ("mode" . "Quick") | |
617 ("verbosity" . "Verbose") | |
618 ("ranking" . "Relevance") | |
619 ("first" . "1") | |
620 ("last" . "25") | |
621 ("score" . "50"))))) | |
622 (setq buffer-file-name nil) | |
623 t) | |
624 | |
625 ;;; | |
626 ;;; Alta Vista | |
627 ;;; | |
628 | |
629 (defun nnweb-altavista-create-mapping () | |
630 "Perform the search and create a number-to-url alist." | |
631 (save-excursion | |
632 (set-buffer nnweb-buffer) | |
633 (erase-buffer) | |
634 (let ((part 0)) | |
635 (when (funcall (nnweb-definition 'search) nnweb-search part) | |
636 (let ((i 0) | |
637 (more t) | |
638 (case-fold-search t) | |
639 (active (or (cadr (assoc nnweb-group nnweb-group-alist)) | |
640 (cons 1 0))) | |
641 subject date from id group | |
642 map url) | |
643 (while more | |
644 ;; Go through all the article hits on this page. | |
645 (goto-char (point-min)) | |
646 (search-forward "<dt>" nil t) | |
647 (delete-region (point-min) (match-beginning 0)) | |
648 (goto-char (point-min)) | |
649 (while (search-forward "<dt>" nil t) | |
650 (replace-match "\n<blubb>")) | |
651 (nnweb-decode-entities) | |
652 (goto-char (point-min)) | |
653 (while (re-search-forward "<blubb>.*href=\"\\([^\"]+\\)\"><strong>\\([^>]*\\)</strong></a><dd>\\([^-]+\\)- <b>\\([^<]+\\)<.*href=\"news:\\([^\"]+\\)\">.*\">\\(.+\\)</a><P>" | |
654 nil t) | |
655 (setq url (match-string 1) | |
656 subject (match-string 2) | |
657 date (match-string 3) | |
658 group (match-string 4) | |
659 id (concat "<" (match-string 5) ">") | |
660 from (match-string 6)) | |
661 (incf i) | |
662 (unless (nnweb-get-hashtb url) | |
663 (push | |
664 (list | |
665 (incf (cdr active)) | |
666 (make-full-mail-header | |
667 (cdr active) (concat "(" group ") " subject) from date | |
668 id nil 0 0 url)) | |
669 map) | |
670 (nnweb-set-hashtb (cadar map) (car map)))) | |
671 ;; See if we want more. | |
672 (when (or (not nnweb-articles) | |
673 (>= i nnweb-max-hits) | |
674 (not (funcall (nnweb-definition 'search) | |
675 nnweb-search (incf part)))) | |
676 (setq more nil))) | |
677 ;; Return the articles in the right order. | |
678 (setq nnweb-articles | |
679 (sort (nconc nnweb-articles map) 'car-less-than-car))))))) | |
680 | |
681 (defun nnweb-altavista-wash-article () | |
682 (goto-char (point-min)) | |
683 (let ((case-fold-search t)) | |
684 (when (re-search-forward "^<strong>" nil t) | |
685 (delete-region (point-min) (match-beginning 0))) | |
686 (goto-char (point-min)) | |
687 (while (looking-at "<strong>\\([^ ]+\\) +</strong> +\\(.*\\)$") | |
688 (replace-match "\\1: \\2" t) | |
689 (forward-line 1)) | |
690 (when (re-search-backward "^References:" nil t) | |
691 (narrow-to-region (point) (progn (forward-line 1) (point))) | |
692 (goto-char (point-min)) | |
693 (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t) | |
694 (replace-match "<\\1> " t))) | |
695 (widen) | |
696 (nnweb-remove-markup) | |
697 (nnweb-decode-entities))) | |
698 | |
699 (defun nnweb-altavista-search (search &optional part) | |
700 (url-insert-file-contents | |
701 (concat | |
702 (nnweb-definition 'address) | |
703 "?" | |
704 (nnweb-encode-www-form-urlencoded | |
705 `(("pg" . "aq") | |
706 ("what" . "news") | |
707 ,@(when part `(("stq" . ,(int-to-string (* part 30))))) | |
708 ("fmt" . "d") | |
709 ("q" . ,search) | |
710 ("r" . "") | |
711 ("d0" . "") | |
712 ("d1" . ""))))) | |
713 (setq buffer-file-name nil) | |
714 t) | |
715 | |
716 ;;; | 305 ;;; |
717 ;;; Deja bought by google.com | 306 ;;; Deja bought by google.com |
718 ;;; | 307 ;;; |
719 | 308 |
720 (defun nnweb-google-wash-article () | 309 (defun nnweb-google-wash-article () |
729 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t) | 318 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t) |
730 (replace-match "")) | 319 (replace-match "")) |
731 (goto-char (point-min)) | 320 (goto-char (point-min)) |
732 (while (search-forward "<br>" nil t) | 321 (while (search-forward "<br>" nil t) |
733 (replace-match "\n")) | 322 (replace-match "\n")) |
734 (nnweb-remove-markup) | 323 (mm-url-remove-markup) |
735 (goto-char (point-min)) | 324 (goto-char (point-min)) |
736 (while (re-search-forward "^[ \t]*\n" nil t) | 325 (while (re-search-forward "^[ \t]*\n" nil t) |
737 (replace-match "")) | 326 (replace-match "")) |
738 (goto-char (point-max)) | 327 (goto-char (point-max)) |
739 (insert "\n") | 328 (insert "\n") |
740 (widen) | 329 (widen) |
741 (narrow-to-region (point) (point-max)) | 330 (narrow-to-region (point) (point-max)) |
742 (search-forward "</pre>" nil t) | 331 (search-forward "</pre>" nil t) |
743 (delete-region (point) (point-max)) | 332 (delete-region (point) (point-max)) |
744 (nnweb-remove-markup) | 333 (mm-url-remove-markup) |
745 (widen))) | 334 (widen))) |
746 | 335 |
747 (defun nnweb-google-parse-1 (&optional Message-ID) | 336 (defun nnweb-google-parse-1 (&optional Message-ID) |
748 (let ((i 0) | 337 (let ((i 0) |
749 (case-fold-search t) | 338 (case-fold-search t) |
761 (setq mid (match-string 2) | 350 (setq mid (match-string 2) |
762 url (format | 351 url (format |
763 "http://groups.google.com/groups?selm=%s&output=gplain" mid)) | 352 "http://groups.google.com/groups?selm=%s&output=gplain" mid)) |
764 (narrow-to-region (search-forward ">" nil t) | 353 (narrow-to-region (search-forward ">" nil t) |
765 (search-forward "</a>" nil t)) | 354 (search-forward "</a>" nil t)) |
766 (nnweb-remove-markup) | 355 (mm-url-remove-markup) |
767 (nnweb-decode-entities) | 356 (mm-url-decode-entities) |
768 (setq Subject (buffer-string)) | 357 (setq Subject (buffer-string)) |
769 (goto-char (point-max)) | 358 (goto-char (point-max)) |
770 (widen) | 359 (widen) |
771 (forward-line 1) | 360 (forward-line 2) |
772 (when (looking-at "<br><font[^>]+>") | 361 (when (looking-at "<br><font[^>]+>") |
773 (goto-char (match-end 0))) | 362 (goto-char (match-end 0))) |
774 (if (not (looking-at "<a[^>]+>")) | 363 (if (not (looking-at "<a[^>]+>")) |
775 (skip-chars-forward " \t") | 364 (skip-chars-forward " \t") |
776 (narrow-to-region (point) | 365 (narrow-to-region (point) |
777 (search-forward "</a>" nil t)) | 366 (search-forward "</a>" nil t)) |
778 (nnweb-remove-markup) | 367 (mm-url-remove-markup) |
779 (nnweb-decode-entities) | 368 (mm-url-decode-entities) |
780 (setq Newsgroups (buffer-string)) | 369 (setq Newsgroups (buffer-string)) |
781 (goto-char (point-max)) | 370 (goto-char (point-max)) |
782 (widen) | 371 (widen) |
783 (skip-chars-forward "- \t")) | 372 (skip-chars-forward "- \t")) |
784 (when (looking-at | 373 (when (looking-at |
785 "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a") | 374 "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a") |
786 (setq From (match-string 2) | 375 (setq From (match-string 4) |
787 Date (match-string 1))) | 376 Date (format "%s %s 00:00:00 %s" |
377 (match-string 2) (match-string 1) | |
378 (match-string 3)))) | |
788 (forward-line 1) | 379 (forward-line 1) |
789 (incf i) | 380 (incf i) |
790 (unless (nnweb-get-hashtb url) | 381 (unless (nnweb-get-hashtb url) |
791 (push | 382 (push |
792 (list | 383 (list |
805 (let ((map (nnweb-google-parse-1 id)) header) | 396 (let ((map (nnweb-google-parse-1 id)) header) |
806 (setq nnweb-articles | 397 (setq nnweb-articles |
807 (nconc nnweb-articles map)) | 398 (nconc nnweb-articles map)) |
808 (when (setq header (cadar map)) | 399 (when (setq header (cadar map)) |
809 (mm-with-unibyte-current-buffer | 400 (mm-with-unibyte-current-buffer |
810 (nnweb-fetch-url (mail-header-xref header))) | 401 (mm-url-insert (mail-header-xref header))) |
811 (caar map)))) | 402 (caar map)))) |
812 | 403 |
813 (defun nnweb-google-create-mapping () | 404 (defun nnweb-google-create-mapping () |
814 "Perform the search and create a number-to-url alist." | 405 "Perform the search and create a number-to-url alist." |
815 (save-excursion | 406 (save-excursion |
816 (set-buffer nnweb-buffer) | 407 (set-buffer nnweb-buffer) |
817 (erase-buffer) | 408 (erase-buffer) |
818 (when (funcall (nnweb-definition 'search) nnweb-search) | 409 (when (funcall (nnweb-definition 'search) nnweb-search) |
819 (let ((more t)) | 410 (let ((more t) |
411 (i 0)) | |
820 (while more | 412 (while more |
821 (setq nnweb-articles | 413 (setq nnweb-articles |
822 (nconc nnweb-articles (nnweb-google-parse-1))) | 414 (nconc nnweb-articles (nnweb-google-parse-1))) |
823 ;; FIXME: There is more. | 415 ;; Check if there are more articles to fetch |
824 (setq more nil)) | 416 (goto-char (point-min)) |
417 (incf i 100) | |
418 (if (or (not (re-search-forward | |
419 "<td nowrap><a href=\\([^>]+\\).*<span class=b>Next</span>" nil t)) | |
420 (>= i nnweb-max-hits)) | |
421 (setq more nil) | |
422 ;; Yup, there are more articles | |
423 (setq more (concat "http://groups.google.com" (match-string 1))) | |
424 (when more | |
425 (erase-buffer) | |
426 (mm-url-insert more)))) | |
825 ;; Return the articles in the right order. | 427 ;; Return the articles in the right order. |
826 (setq nnweb-articles | 428 (setq nnweb-articles |
827 (sort nnweb-articles 'car-less-than-car)))))) | 429 (sort nnweb-articles 'car-less-than-car)))))) |
828 | 430 |
829 (defun nnweb-google-search (search) | 431 (defun nnweb-google-search (search) |
830 (nnweb-insert | 432 (mm-url-insert |
831 (concat | 433 (concat |
832 (nnweb-definition 'address) | 434 (nnweb-definition 'address) |
833 "?" | 435 "?" |
834 (nnweb-encode-www-form-urlencoded | 436 (mm-url-encode-www-form-urlencoded |
835 `(("q" . ,search) | 437 `(("q" . ,search) |
836 ("num". "100") | 438 ("num". "100") |
837 ("hq" . "") | 439 ("hq" . "") |
838 ("hl" . "") | 440 ("hl" . "") |
839 ("lr" . "") | 441 ("lr" . "") |
842 t) | 444 t) |
843 | 445 |
844 (defun nnweb-google-identity (url) | 446 (defun nnweb-google-identity (url) |
845 "Return an unique identifier based on URL." | 447 "Return an unique identifier based on URL." |
846 (if (string-match "selm=\\([^ &>]+\\)" url) | 448 (if (string-match "selm=\\([^ &>]+\\)" url) |
449 (match-string 1 url) | |
450 url)) | |
451 | |
452 ;;; | |
453 ;;; gmane.org | |
454 ;;; | |
455 (defun nnweb-gmane-create-mapping () | |
456 "Perform the search and create a number-to-url alist." | |
457 (save-excursion | |
458 (set-buffer nnweb-buffer) | |
459 (erase-buffer) | |
460 (when (funcall (nnweb-definition 'search) nnweb-search) | |
461 (let ((more t) | |
462 (case-fold-search t) | |
463 (active (or (cadr (assoc nnweb-group nnweb-group-alist)) | |
464 (cons 1 0))) | |
465 subject group url | |
466 map) | |
467 ;; Remove stuff from the beginning of results | |
468 (goto-char (point-min)) | |
469 (search-forward "Search Results</h1><ul>" nil t) | |
470 (delete-region (point-min) (point)) | |
471 (goto-char (point-min)) | |
472 ;; Iterate over the actual hits | |
473 (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t) | |
474 (setq url (concat "http://gmane.org/" (match-string 1))) | |
475 (setq subject (match-string 2)) | |
476 (unless (nnweb-get-hashtb url) | |
477 (push | |
478 (list | |
479 (incf (cdr active)) | |
480 (make-full-mail-header | |
481 (cdr active) (concat "(" group ") " subject) nil nil | |
482 nil nil 0 0 url)) | |
483 map) | |
484 (nnweb-set-hashtb (cadar map) (car map)))) | |
485 ;; Return the articles in the right order. | |
486 (setq nnweb-articles | |
487 (sort (nconc nnweb-articles map) 'car-less-than-car)))))) | |
488 | |
489 (defun nnweb-gmane-wash-article () | |
490 (let ((case-fold-search t)) | |
491 (goto-char (point-min)) | |
492 (re-search-forward "<!--X-Head-of-Message-->" nil t) | |
493 (delete-region (point-min) (point)) | |
494 (goto-char (point-min)) | |
495 (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>") | |
496 (replace-match "\\1\\2" t) | |
497 (forward-line 1)) | |
498 (mm-url-remove-markup))) | |
499 | |
500 (defun nnweb-gmane-search (search) | |
501 (mm-url-insert | |
502 (concat | |
503 (nnweb-definition 'address) | |
504 "?" | |
505 (mm-url-encode-www-form-urlencoded | |
506 `(("query" . ,search))))) | |
507 (setq buffer-file-name nil) | |
508 t) | |
509 | |
510 | |
511 (defun nnweb-gmane-identity (url) | |
512 "Return a unique identifier based on URL." | |
513 (if (string-match "group=\\(.+\\)" url) | |
847 (match-string 1 url) | 514 (match-string 1 url) |
848 url)) | 515 url)) |
849 | 516 |
850 ;;; | 517 ;;; |
851 ;;; General web/w3 interface utility functions | 518 ;;; General web/w3 interface utility functions |
867 " ")) | 534 " ")) |
868 (insert ">\n") | 535 (insert ">\n") |
869 (mapcar 'nnweb-insert-html (nth 2 parse)) | 536 (mapcar 'nnweb-insert-html (nth 2 parse)) |
870 (insert "</" (symbol-name (car parse)) ">\n"))) | 537 (insert "</" (symbol-name (car parse)) ">\n"))) |
871 | 538 |
872 (defun nnweb-encode-www-form-urlencoded (pairs) | |
873 "Return PAIRS encoded for forms." | |
874 (mapconcat | |
875 (function | |
876 (lambda (data) | |
877 (concat (w3-form-encode-xwfu (car data)) "=" | |
878 (w3-form-encode-xwfu (cdr data))))) | |
879 pairs "&")) | |
880 | |
881 (defun nnweb-fetch-form (url pairs) | |
882 "Fetch a form from URL with PAIRS as the data using the POST method." | |
883 (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs)) | |
884 (url-request-method "POST") | |
885 (url-request-extra-headers | |
886 '(("Content-type" . "application/x-www-form-urlencoded")))) | |
887 (url-insert-file-contents url) | |
888 (setq buffer-file-name nil)) | |
889 t) | |
890 | |
891 (defun nnweb-decode-entities () | |
892 "Decode all HTML entities." | |
893 (goto-char (point-min)) | |
894 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) | |
895 (let ((elem (if (eq (aref (match-string 1) 0) ?\#) | |
896 (let ((c | |
897 (string-to-number (substring | |
898 (match-string 1) 1)))) | |
899 (if (mm-char-or-char-int-p c) c 32)) | |
900 (or (cdr (assq (intern (match-string 1)) | |
901 w3-html-entities)) | |
902 ?#)))) | |
903 (unless (stringp elem) | |
904 (setq elem (char-to-string elem))) | |
905 (replace-match elem t t)))) | |
906 | |
907 (defun nnweb-decode-entities-string (string) | |
908 (with-temp-buffer | |
909 (insert string) | |
910 (nnweb-decode-entities) | |
911 (buffer-substring (point-min) (point-max)))) | |
912 | |
913 (defun nnweb-remove-markup () | |
914 "Remove all HTML markup, leaving just plain text." | |
915 (goto-char (point-min)) | |
916 (while (search-forward "<!--" nil t) | |
917 (delete-region (match-beginning 0) | |
918 (or (search-forward "-->" nil t) | |
919 (point-max)))) | |
920 (goto-char (point-min)) | |
921 (while (re-search-forward "<[^>]+>" nil t) | |
922 (replace-match "" t t))) | |
923 | |
924 (defun nnweb-insert (url &optional follow-refresh) | |
925 "Insert the contents from an URL in the current buffer. | |
926 If FOLLOW-REFRESH is non-nil, redirect refresh url in META." | |
927 (let ((name buffer-file-name)) | |
928 (if follow-refresh | |
929 (save-restriction | |
930 (narrow-to-region (point) (point)) | |
931 (url-insert-file-contents url) | |
932 (goto-char (point-min)) | |
933 (when (re-search-forward | |
934 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) | |
935 (let ((url (match-string 1))) | |
936 (delete-region (point-min) (point-max)) | |
937 (nnweb-insert url t)))) | |
938 (url-insert-file-contents url)) | |
939 (setq buffer-file-name name))) | |
940 | |
941 (defun nnweb-parse-find (type parse &optional maxdepth) | 539 (defun nnweb-parse-find (type parse &optional maxdepth) |
942 "Find the element of TYPE in PARSE." | 540 "Find the element of TYPE in PARSE." |
943 (catch 'found | 541 (catch 'found |
944 (nnweb-parse-find-1 type parse maxdepth))) | 542 (nnweb-parse-find-1 type parse maxdepth))) |
945 | 543 |
985 (push element nnweb-text) | 583 (push element nnweb-text) |
986 (when (and (consp element) | 584 (when (and (consp element) |
987 (listp (cdr element))) | 585 (listp (cdr element))) |
988 (nnweb-text-1 element))))) | 586 (nnweb-text-1 element))))) |
989 | 587 |
990 (defun nnweb-replace-in-string (string match newtext) | |
991 (while (string-match match string) | |
992 (setq string (replace-match newtext t t string))) | |
993 string) | |
994 | |
995 (provide 'nnweb) | 588 (provide 'nnweb) |
996 | 589 |
997 ;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697 | 590 ;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697 |
998 ;;; nnweb.el ends here | 591 ;;; nnweb.el ends here |