comparison lisp/gnus/nnweb.el @ 56927:55fd4f77387a after-merge-gnus-5_10

Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523 Merge from emacs--gnus--5.10, gnus--rel--5.10 Patches applied: * miles@gnu.org--gnu-2004/emacs--gnus--5.10--base-0 tag of miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-464 * miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-1 Import from CVS branch gnus-5_10-branch * miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-2 Merge from lorentey@elte.hu--2004/emacs--multi-tty--0, emacs--cvs-trunk--0 * miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-3 Merge from gnus--rel--5.10 * miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-4 Merge from gnus--rel--5.10 * miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-18 Update from CVS * miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-19 Remove autoconf-generated files from archive * miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-20 Update from CVS
author Miles Bader <miles@gnu.org>
date Sat, 04 Sep 2004 13:13:48 +0000
parents 695cf19ef79e
children 3f394ef46f57 cce1c0ee76ee
comparison
equal deleted inserted replaced
56926:f8e248e9a717 56927:55fd4f77387a
1 ;;; nnweb.el --- retrieving articles via web search engines 1 ;;; nnweb.el --- retrieving articles via web search engines
2 ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 2 ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003
3 ;; Free Software Foundation, Inc. 3 ;; Free Software Foundation, Inc.
4 4
5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> 5 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
6 ;; Keywords: news 6 ;; Keywords: news
7 7
22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, 22 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
23 ;; Boston, MA 02111-1307, USA. 23 ;; Boston, MA 02111-1307, USA.
24 24
25 ;;; Commentary: 25 ;;; Commentary:
26 26
27 ;; Note: You need to have `url' and `w3' installed for this 27 ;; Note: You need to have `w3' installed for some functions to work.
28 ;; backend to work.
29 28
30 ;;; Code: 29 ;;; Code:
31 30
32 (eval-when-compile (require 'cl)) 31 (eval-when-compile (require 'cl))
33 32
35 (require 'message) 34 (require 'message)
36 (require 'gnus-util) 35 (require 'gnus-util)
37 (require 'gnus) 36 (require 'gnus)
38 (require 'nnmail) 37 (require 'nnmail)
39 (require 'mm-util) 38 (require 'mm-util)
40 (eval-when-compile 39 (require 'mm-url)
40 (eval-and-compile
41 (ignore-errors 41 (ignore-errors
42 (require 'w3) 42 (require 'url)))
43 (require 'url) 43 (autoload 'w3-parse-buffer "w3-parse")
44 (require 'w3-forms)))
45
46 ;; Report failure to find w3 at load time if appropriate.
47 (unless noninteractive
48 (eval '(progn
49 (require 'w3)
50 (require 'url)
51 (require 'w3-forms))))
52 44
53 (nnoo-declare nnweb) 45 (nnoo-declare nnweb)
54 46
55 (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") 47 (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
56 "Where nnweb will save its files.") 48 "Where nnweb will save its files.")
57 49
58 (defvoo nnweb-type 'google 50 (defvoo nnweb-type 'google
59 "What search engine type is being used. 51 "What search engine type is being used.
60 Valid types include `google', `dejanews', `dejanewsold', `reference', 52 Valid types include `google', `dejanews', and `gmane'.")
61 and `altavista'.")
62 53
63 (defvar nnweb-type-definition 54 (defvar nnweb-type-definition
64 '( 55 '((google
65 (google
66 ;;(article . nnweb-google-wash-article)
67 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
68 (article . ignore) 56 (article . ignore)
69 (id . "http://groups.google.com/groups?selm=%s&output=gplain") 57 (id . "http://groups.google.com/groups?selm=%s&output=gplain")
70 ;;(reference . nnweb-google-reference)
71 (reference . identity) 58 (reference . identity)
72 (map . nnweb-google-create-mapping) 59 (map . nnweb-google-create-mapping)
73 (search . nnweb-google-search) 60 (search . nnweb-google-search)
74 (address . "http://groups.google.com/groups") 61 (address . "http://groups.google.com/groups")
75 (identifier . nnweb-google-identity)) 62 (identifier . nnweb-google-identity))
76 (dejanews ;; alias of google 63 (dejanews ;; alias of google
77 ;;(article . nnweb-google-wash-article)
78 ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
79 (article . ignore) 64 (article . ignore)
80 (id . "http://groups.google.com/groups?selm=%s&output=gplain") 65 (id . "http://groups.google.com/groups?selm=%s&output=gplain")
81 ;;(reference . nnweb-google-reference)
82 (reference . identity) 66 (reference . identity)
83 (map . nnweb-google-create-mapping) 67 (map . nnweb-google-create-mapping)
84 (search . nnweb-google-search) 68 (search . nnweb-google-search)
85 (address . "http://groups.google.com/groups") 69 (address . "http://groups.google.com/groups")
86 (identifier . nnweb-google-identity)) 70 (identifier . nnweb-google-identity))
87 ;;; (dejanews 71 (gmane
88 ;;; (article . ignore) 72 (article . nnweb-gmane-wash-article)
89 ;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") 73 (id . "http://gmane.org/view.php?group=%s")
90 ;;; (map . nnweb-dejanews-create-mapping) 74 (reference . identity)
91 ;;; (search . nnweb-dejanews-search) 75 (map . nnweb-gmane-create-mapping)
92 ;;; (address . "http://www.deja.com/=dnc/qs.xp") 76 (search . nnweb-gmane-search)
93 ;;; (identifier . nnweb-dejanews-identity)) 77 (address . "http://gmane.org/")
94 ;;; (dejanewsold 78 (identifier . nnweb-gmane-identity)))
95 ;;; (article . ignore)
96 ;;; (map . nnweb-dejanews-create-mapping)
97 ;;; (search . nnweb-dejanewsold-search)
98 ;;; (address . "http://www.deja.com/dnquery.xp")
99 ;;; (identifier . nnweb-dejanews-identity))
100 (reference
101 (article . nnweb-reference-wash-article)
102 (map . nnweb-reference-create-mapping)
103 (search . nnweb-reference-search)
104 (address . "http://www.reference.com/cgi-bin/pn/go")
105 (identifier . identity))
106 (altavista
107 (article . nnweb-altavista-wash-article)
108 (map . nnweb-altavista-create-mapping)
109 (search . nnweb-altavista-search)
110 (address . "http://www.altavista.digital.com/cgi-bin/query")
111 (id . "/cgi-bin/news?id@%s")
112 (identifier . identity)))
113 "Type-definition alist.") 79 "Type-definition alist.")
114 80
115 (defvoo nnweb-search nil 81 (defvoo nnweb-search nil
116 "Search string to feed to DejaNews.") 82 "Search string to feed to Google.")
117 83
118 (defvoo nnweb-max-hits 999 84 (defvoo nnweb-max-hits 999
119 "Maximum number of hits to display.") 85 "Maximum number of hits to display.")
120 86
121 (defvoo nnweb-ephemeral-p nil 87 (defvoo nnweb-ephemeral-p nil
195 (set-buffer (or buffer nntp-server-buffer)) 161 (set-buffer (or buffer nntp-server-buffer))
196 (let* ((header (cadr (assq article nnweb-articles))) 162 (let* ((header (cadr (assq article nnweb-articles)))
197 (url (and header (mail-header-xref header)))) 163 (url (and header (mail-header-xref header))))
198 (when (or (and url 164 (when (or (and url
199 (mm-with-unibyte-current-buffer 165 (mm-with-unibyte-current-buffer
200 (nnweb-fetch-url url))) 166 (mm-url-insert url)))
201 (and (stringp article) 167 (and (stringp article)
202 (nnweb-definition 'id t) 168 (nnweb-definition 'id t)
203 (let ((fetch (nnweb-definition 'id)) 169 (let ((fetch (nnweb-definition 'id))
204 art active) 170 art active)
205 (when (string-match "^<\\(.*\\)>$" article) 171 (when (string-match "^<\\(.*\\)>$" article)
206 (setq art (match-string 1 article))) 172 (setq art (match-string 1 article)))
207 (when (and fetch art) 173 (when (and fetch art)
208 (setq url (format fetch art)) 174 (setq url (format fetch art))
209 (mm-with-unibyte-current-buffer 175 (mm-with-unibyte-current-buffer
210 (nnweb-fetch-url url)) 176 (mm-url-insert url))
211 (if (nnweb-definition 'reference t) 177 (if (nnweb-definition 'reference t)
212 (setq article 178 (setq article
213 (funcall (nnweb-definition 179 (funcall (nnweb-definition
214 'reference) article))))))) 180 'reference) article)))))))
215 (unless nnheader-callback-function 181 (unless nnheader-callback-function
235 201
236 (deffoo nnweb-request-update-info (group info &optional server) 202 (deffoo nnweb-request-update-info (group info &optional server)
237 (nnweb-possibly-change-server group server)) 203 (nnweb-possibly-change-server group server))
238 204
239 (deffoo nnweb-asynchronous-p () 205 (deffoo nnweb-asynchronous-p ()
240 t) 206 nil)
241 207
242 (deffoo nnweb-request-create-group (group &optional server args) 208 (deffoo nnweb-request-create-group (group &optional server args)
243 (nnweb-possibly-change-server nil server) 209 (nnweb-possibly-change-server nil server)
244 (nnweb-request-delete-group group) 210 (nnweb-request-delete-group group)
245 (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist) 211 (push `(,group ,(cons 1 0) ,@args) nnweb-group-alist)
334 (nnheader-set-temp-buffer 300 (nnheader-set-temp-buffer
335 (format " *nnweb %s %s %s*" 301 (format " *nnweb %s %s %s*"
336 nnweb-type nnweb-search server)) 302 nnweb-type nnweb-search server))
337 (current-buffer)))))) 303 (current-buffer))))))
338 304
339 (defun nnweb-fetch-url (url)
340 (let (buf)
341 (save-excursion
342 (if (not nnheader-callback-function)
343 (progn
344 (with-temp-buffer
345 (mm-enable-multibyte)
346 (let ((coding-system-for-read 'binary)
347 (coding-system-for-write 'binary)
348 (default-process-coding-system 'binary))
349 (nnweb-insert url))
350 (setq buf (buffer-string)))
351 (erase-buffer)
352 (insert buf)
353 t)
354 (nnweb-url-retrieve-asynch
355 url 'nnweb-callback (current-buffer) nnheader-callback-function)
356 t))))
357
358 (defun nnweb-callback (buffer callback)
359 (when (gnus-buffer-live-p url-working-buffer)
360 (save-excursion
361 (set-buffer url-working-buffer)
362 (funcall (nnweb-definition 'article))
363 (nnweb-decode-entities)
364 (set-buffer buffer)
365 (goto-char (point-max))
366 (insert-buffer-substring url-working-buffer))
367 (funcall callback t)
368 (gnus-kill-buffer url-working-buffer)))
369
370 (defun nnweb-url-retrieve-asynch (url callback &rest data)
371 (let ((url-request-method "GET")
372 (old-asynch url-be-asynchronous)
373 (url-request-data nil)
374 (url-request-extra-headers nil)
375 (url-working-buffer (generate-new-buffer-name " *nnweb*")))
376 (setq-default url-be-asynchronous t)
377 (save-excursion
378 (set-buffer (get-buffer-create url-working-buffer))
379 (setq url-current-callback-data data
380 url-be-asynchronous t
381 url-current-callback-func callback)
382 (url-retrieve url nil))
383 (setq-default url-be-asynchronous old-asynch)))
384
385 (if (fboundp 'url-retrieve-synchronously)
386 (defun nnweb-url-retrieve-asynch (url callback &rest data)
387 (url-retrieve url callback data)))
388
389 ;;;
390 ;;; DejaNews functions.
391 ;;;
392
393 (defun nnweb-dejanews-create-mapping ()
394 "Perform the search and create a number-to-url alist."
395 (save-excursion
396 (set-buffer nnweb-buffer)
397 (erase-buffer)
398 (when (funcall (nnweb-definition 'search) nnweb-search)
399 (let ((i 0)
400 (more t)
401 (case-fold-search t)
402 (active (or (cadr (assoc nnweb-group nnweb-group-alist))
403 (cons 1 0)))
404 subject date from
405 map url parse a table group text)
406 (while more
407 ;; Go through all the article hits on this page.
408 (goto-char (point-min))
409 (setq parse (w3-parse-buffer (current-buffer))
410 table (nth 1 (nnweb-parse-find-all 'table parse)))
411 (dolist (row (nth 2 (car (nth 2 table))))
412 (setq a (nnweb-parse-find 'a row)
413 url (cdr (assq 'href (nth 1 a)))
414 text (nreverse (nnweb-text row)))
415 (when a
416 (setq subject (nth 4 text)
417 group (nth 2 text)
418 date (nth 1 text)
419 from (nth 0 text))
420 (if (string-match "\\([0-9]+\\)/\\([0-9]+\\)/\\([0-9]+\\)" date)
421 (setq date (format "%s %s 00:00:00 %s"
422 (car (rassq (string-to-number
423 (match-string 2 date))
424 parse-time-months))
425 (match-string 3 date)
426 (match-string 1 date)))
427 (setq date "Jan 1 00:00:00 0000"))
428 (incf i)
429 (setq url (concat url "&fmt=text"))
430 (when (string-match "&context=[^&]+" url)
431 (setq url (replace-match "" t t url)))
432 (unless (nnweb-get-hashtb url)
433 (push
434 (list
435 (incf (cdr active))
436 (make-full-mail-header
437 (cdr active) (concat subject " (" group ")") from date
438 (concat "<" (nnweb-identifier url) "@dejanews>")
439 nil 0 0 url))
440 map)
441 (nnweb-set-hashtb (cadar map) (car map)))))
442 ;; See whether there is a "Get next 20 hits" button here.
443 (goto-char (point-min))
444 (if (or (not (re-search-forward
445 "HREF=\"\\([^\"]+\\)\"[<>b]+Next result" nil t))
446 (>= i nnweb-max-hits))
447 (setq more nil)
448 ;; Yup -- fetch it.
449 (setq more (match-string 1))
450 (erase-buffer)
451 (url-insert-file-contents more)))
452 ;; Return the articles in the right order.
453 (setq nnweb-articles
454 (sort (nconc nnweb-articles map) 'car-less-than-car))))))
455
456 (defun nnweb-dejanews-search (search)
457 (nnweb-insert
458 (concat
459 (nnweb-definition 'address)
460 "?"
461 (nnweb-encode-www-form-urlencoded
462 `(("ST" . "PS")
463 ("svcclass" . "dnyr")
464 ("QRY" . ,search)
465 ("defaultOp" . "AND")
466 ("DBS" . "1")
467 ("OP" . "dnquery.xp")
468 ("LNG" . "ALL")
469 ("maxhits" . "100")
470 ("threaded" . "0")
471 ("format" . "verbose2")
472 ("showsort" . "date")
473 ("agesign" . "1")
474 ("ageweight" . "1")))))
475 t)
476
477 (defun nnweb-dejanewsold-search (search)
478 (nnweb-fetch-form
479 (nnweb-definition 'address)
480 `(("query" . ,search)
481 ("defaultOp" . "AND")
482 ("svcclass" . "dnold")
483 ("maxhits" . "100")
484 ("format" . "verbose2")
485 ("threaded" . "0")
486 ("showsort" . "date")
487 ("agesign" . "1")
488 ("ageweight" . "1")))
489 t)
490
491 (defun nnweb-dejanews-identity (url)
492 "Return an unique identifier based on URL."
493 (if (string-match "AN=\\([0-9]+\\)" url)
494 (match-string 1 url)
495 url))
496
497 ;;;
498 ;;; InReference
499 ;;;
500
501 (defun nnweb-reference-create-mapping ()
502 "Perform the search and create a number-to-url alist."
503 (save-excursion
504 (set-buffer nnweb-buffer)
505 (erase-buffer)
506 (when (funcall (nnweb-definition 'search) nnweb-search)
507 (let ((i 0)
508 (more t)
509 (case-fold-search t)
510 (active (or (cadr (assoc nnweb-group nnweb-group-alist))
511 (cons 1 0)))
512 Subject Score Date Newsgroups From Message-ID
513 map url)
514 (while more
515 ;; Go through all the article hits on this page.
516 (goto-char (point-min))
517 (search-forward "</pre><hr>" nil t)
518 (delete-region (point-min) (point))
519 (goto-char (point-min))
520 (while (re-search-forward "^ +[0-9]+\\." nil t)
521 (narrow-to-region
522 (point)
523 (if (re-search-forward "^$" nil t)
524 (match-beginning 0)
525 (point-max)))
526 (goto-char (point-min))
527 (when (looking-at ".*href=\"\\([^\"]+\\)\"")
528 (setq url (match-string 1)))
529 (nnweb-remove-markup)
530 (goto-char (point-min))
531 (while (search-forward "\t" nil t)
532 (replace-match " "))
533 (goto-char (point-min))
534 (while (re-search-forward "^\\([^:]+\\): \\(.*\\)$" nil t)
535 (set (intern (match-string 1)) (match-string 2)))
536 (widen)
537 (search-forward "</pre>" nil t)
538 (incf i)
539 (unless (nnweb-get-hashtb url)
540 (push
541 (list
542 (incf (cdr active))
543 (make-full-mail-header
544 (cdr active) (concat "(" Newsgroups ") " Subject) From Date
545 Message-ID
546 nil 0 (string-to-int Score) url))
547 map)
548 (nnweb-set-hashtb (cadar map) (car map))))
549 (setq more nil))
550 ;; Return the articles in the right order.
551 (setq nnweb-articles
552 (sort (nconc nnweb-articles map) 'car-less-than-car))))))
553
554 (defun nnweb-reference-wash-article ()
555 (let ((case-fold-search t))
556 (goto-char (point-min))
557 (re-search-forward "^</center><hr>" nil t)
558 (delete-region (point-min) (point))
559 (search-forward "<pre>" nil t)
560 (forward-line -1)
561 (let ((body (point-marker)))
562 (search-forward "</pre>" nil t)
563 (delete-region (point) (point-max))
564 (nnweb-remove-markup)
565 (goto-char (point-min))
566 (while (looking-at " *$")
567 (gnus-delete-line))
568 (narrow-to-region (point-min) body)
569 (while (and (re-search-forward "^$" nil t)
570 (not (eobp)))
571 (gnus-delete-line))
572 (goto-char (point-min))
573 (while (looking-at "\\(^[^ ]+:\\) *")
574 (replace-match "\\1 " t)
575 (forward-line 1))
576 (goto-char (point-min))
577 (when (re-search-forward "^References:" nil t)
578 (narrow-to-region
579 (point) (if (re-search-forward "^$\\|^[^:]+:" nil t)
580 (match-beginning 0)
581 (point-max)))
582 (goto-char (point-min))
583 (while (not (eobp))
584 (unless (looking-at "References")
585 (insert "\t")
586 (forward-line 1)))
587 (goto-char (point-min))
588 (while (search-forward "," nil t)
589 (replace-match " " t t)))
590 (widen)
591 (nnweb-decode-entities)
592 (set-marker body nil))))
593
594 (defun nnweb-reference-search (search)
595 (url-insert-file-contents
596 (concat
597 (nnweb-definition 'address)
598 "?"
599 (nnweb-encode-www-form-urlencoded
600 `(("search" . "advanced")
601 ("querytext" . ,search)
602 ("subj" . "")
603 ("name" . "")
604 ("login" . "")
605 ("host" . "")
606 ("organization" . "")
607 ("groups" . "")
608 ("keywords" . "")
609 ("choice" . "Search")
610 ("startmonth" . "Jul")
611 ("startday" . "25")
612 ("startyear" . "1996")
613 ("endmonth" . "Aug")
614 ("endday" . "24")
615 ("endyear" . "1996")
616 ("mode" . "Quick")
617 ("verbosity" . "Verbose")
618 ("ranking" . "Relevance")
619 ("first" . "1")
620 ("last" . "25")
621 ("score" . "50")))))
622 (setq buffer-file-name nil)
623 t)
624
625 ;;;
626 ;;; Alta Vista
627 ;;;
628
629 (defun nnweb-altavista-create-mapping ()
630 "Perform the search and create a number-to-url alist."
631 (save-excursion
632 (set-buffer nnweb-buffer)
633 (erase-buffer)
634 (let ((part 0))
635 (when (funcall (nnweb-definition 'search) nnweb-search part)
636 (let ((i 0)
637 (more t)
638 (case-fold-search t)
639 (active (or (cadr (assoc nnweb-group nnweb-group-alist))
640 (cons 1 0)))
641 subject date from id group
642 map url)
643 (while more
644 ;; Go through all the article hits on this page.
645 (goto-char (point-min))
646 (search-forward "<dt>" nil t)
647 (delete-region (point-min) (match-beginning 0))
648 (goto-char (point-min))
649 (while (search-forward "<dt>" nil t)
650 (replace-match "\n<blubb>"))
651 (nnweb-decode-entities)
652 (goto-char (point-min))
653 (while (re-search-forward "<blubb>.*href=\"\\([^\"]+\\)\"><strong>\\([^>]*\\)</strong></a><dd>\\([^-]+\\)- <b>\\([^<]+\\)<.*href=\"news:\\([^\"]+\\)\">.*\">\\(.+\\)</a><P>"
654 nil t)
655 (setq url (match-string 1)
656 subject (match-string 2)
657 date (match-string 3)
658 group (match-string 4)
659 id (concat "<" (match-string 5) ">")
660 from (match-string 6))
661 (incf i)
662 (unless (nnweb-get-hashtb url)
663 (push
664 (list
665 (incf (cdr active))
666 (make-full-mail-header
667 (cdr active) (concat "(" group ") " subject) from date
668 id nil 0 0 url))
669 map)
670 (nnweb-set-hashtb (cadar map) (car map))))
671 ;; See if we want more.
672 (when (or (not nnweb-articles)
673 (>= i nnweb-max-hits)
674 (not (funcall (nnweb-definition 'search)
675 nnweb-search (incf part))))
676 (setq more nil)))
677 ;; Return the articles in the right order.
678 (setq nnweb-articles
679 (sort (nconc nnweb-articles map) 'car-less-than-car)))))))
680
681 (defun nnweb-altavista-wash-article ()
682 (goto-char (point-min))
683 (let ((case-fold-search t))
684 (when (re-search-forward "^<strong>" nil t)
685 (delete-region (point-min) (match-beginning 0)))
686 (goto-char (point-min))
687 (while (looking-at "<strong>\\([^ ]+\\) +</strong> +\\(.*\\)$")
688 (replace-match "\\1: \\2" t)
689 (forward-line 1))
690 (when (re-search-backward "^References:" nil t)
691 (narrow-to-region (point) (progn (forward-line 1) (point)))
692 (goto-char (point-min))
693 (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t)
694 (replace-match "&lt;\\1&gt; " t)))
695 (widen)
696 (nnweb-remove-markup)
697 (nnweb-decode-entities)))
698
699 (defun nnweb-altavista-search (search &optional part)
700 (url-insert-file-contents
701 (concat
702 (nnweb-definition 'address)
703 "?"
704 (nnweb-encode-www-form-urlencoded
705 `(("pg" . "aq")
706 ("what" . "news")
707 ,@(when part `(("stq" . ,(int-to-string (* part 30)))))
708 ("fmt" . "d")
709 ("q" . ,search)
710 ("r" . "")
711 ("d0" . "")
712 ("d1" . "")))))
713 (setq buffer-file-name nil)
714 t)
715
716 ;;; 305 ;;;
717 ;;; Deja bought by google.com 306 ;;; Deja bought by google.com
718 ;;; 307 ;;;
719 308
720 (defun nnweb-google-wash-article () 309 (defun nnweb-google-wash-article ()
729 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t) 318 (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
730 (replace-match "")) 319 (replace-match ""))
731 (goto-char (point-min)) 320 (goto-char (point-min))
732 (while (search-forward "<br>" nil t) 321 (while (search-forward "<br>" nil t)
733 (replace-match "\n")) 322 (replace-match "\n"))
734 (nnweb-remove-markup) 323 (mm-url-remove-markup)
735 (goto-char (point-min)) 324 (goto-char (point-min))
736 (while (re-search-forward "^[ \t]*\n" nil t) 325 (while (re-search-forward "^[ \t]*\n" nil t)
737 (replace-match "")) 326 (replace-match ""))
738 (goto-char (point-max)) 327 (goto-char (point-max))
739 (insert "\n") 328 (insert "\n")
740 (widen) 329 (widen)
741 (narrow-to-region (point) (point-max)) 330 (narrow-to-region (point) (point-max))
742 (search-forward "</pre>" nil t) 331 (search-forward "</pre>" nil t)
743 (delete-region (point) (point-max)) 332 (delete-region (point) (point-max))
744 (nnweb-remove-markup) 333 (mm-url-remove-markup)
745 (widen))) 334 (widen)))
746 335
747 (defun nnweb-google-parse-1 (&optional Message-ID) 336 (defun nnweb-google-parse-1 (&optional Message-ID)
748 (let ((i 0) 337 (let ((i 0)
749 (case-fold-search t) 338 (case-fold-search t)
761 (setq mid (match-string 2) 350 (setq mid (match-string 2)
762 url (format 351 url (format
763 "http://groups.google.com/groups?selm=%s&output=gplain" mid)) 352 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
764 (narrow-to-region (search-forward ">" nil t) 353 (narrow-to-region (search-forward ">" nil t)
765 (search-forward "</a>" nil t)) 354 (search-forward "</a>" nil t))
766 (nnweb-remove-markup) 355 (mm-url-remove-markup)
767 (nnweb-decode-entities) 356 (mm-url-decode-entities)
768 (setq Subject (buffer-string)) 357 (setq Subject (buffer-string))
769 (goto-char (point-max)) 358 (goto-char (point-max))
770 (widen) 359 (widen)
771 (forward-line 1) 360 (forward-line 2)
772 (when (looking-at "<br><font[^>]+>") 361 (when (looking-at "<br><font[^>]+>")
773 (goto-char (match-end 0))) 362 (goto-char (match-end 0)))
774 (if (not (looking-at "<a[^>]+>")) 363 (if (not (looking-at "<a[^>]+>"))
775 (skip-chars-forward " \t") 364 (skip-chars-forward " \t")
776 (narrow-to-region (point) 365 (narrow-to-region (point)
777 (search-forward "</a>" nil t)) 366 (search-forward "</a>" nil t))
778 (nnweb-remove-markup) 367 (mm-url-remove-markup)
779 (nnweb-decode-entities) 368 (mm-url-decode-entities)
780 (setq Newsgroups (buffer-string)) 369 (setq Newsgroups (buffer-string))
781 (goto-char (point-max)) 370 (goto-char (point-max))
782 (widen) 371 (widen)
783 (skip-chars-forward "- \t")) 372 (skip-chars-forward "- \t"))
784 (when (looking-at 373 (when (looking-at
785 "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a") 374 "\\([0-9]+\\)[/ ]\\([A-Za-z]+\\)[/ ]\\([0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
786 (setq From (match-string 2) 375 (setq From (match-string 4)
787 Date (match-string 1))) 376 Date (format "%s %s 00:00:00 %s"
377 (match-string 2) (match-string 1)
378 (match-string 3))))
788 (forward-line 1) 379 (forward-line 1)
789 (incf i) 380 (incf i)
790 (unless (nnweb-get-hashtb url) 381 (unless (nnweb-get-hashtb url)
791 (push 382 (push
792 (list 383 (list
805 (let ((map (nnweb-google-parse-1 id)) header) 396 (let ((map (nnweb-google-parse-1 id)) header)
806 (setq nnweb-articles 397 (setq nnweb-articles
807 (nconc nnweb-articles map)) 398 (nconc nnweb-articles map))
808 (when (setq header (cadar map)) 399 (when (setq header (cadar map))
809 (mm-with-unibyte-current-buffer 400 (mm-with-unibyte-current-buffer
810 (nnweb-fetch-url (mail-header-xref header))) 401 (mm-url-insert (mail-header-xref header)))
811 (caar map)))) 402 (caar map))))
812 403
813 (defun nnweb-google-create-mapping () 404 (defun nnweb-google-create-mapping ()
814 "Perform the search and create a number-to-url alist." 405 "Perform the search and create a number-to-url alist."
815 (save-excursion 406 (save-excursion
816 (set-buffer nnweb-buffer) 407 (set-buffer nnweb-buffer)
817 (erase-buffer) 408 (erase-buffer)
818 (when (funcall (nnweb-definition 'search) nnweb-search) 409 (when (funcall (nnweb-definition 'search) nnweb-search)
819 (let ((more t)) 410 (let ((more t)
411 (i 0))
820 (while more 412 (while more
821 (setq nnweb-articles 413 (setq nnweb-articles
822 (nconc nnweb-articles (nnweb-google-parse-1))) 414 (nconc nnweb-articles (nnweb-google-parse-1)))
823 ;; FIXME: There is more. 415 ;; Check if there are more articles to fetch
824 (setq more nil)) 416 (goto-char (point-min))
417 (incf i 100)
418 (if (or (not (re-search-forward
419 "<td nowrap><a href=\\([^>]+\\).*<span class=b>Next</span>" nil t))
420 (>= i nnweb-max-hits))
421 (setq more nil)
422 ;; Yup, there are more articles
423 (setq more (concat "http://groups.google.com" (match-string 1)))
424 (when more
425 (erase-buffer)
426 (mm-url-insert more))))
825 ;; Return the articles in the right order. 427 ;; Return the articles in the right order.
826 (setq nnweb-articles 428 (setq nnweb-articles
827 (sort nnweb-articles 'car-less-than-car)))))) 429 (sort nnweb-articles 'car-less-than-car))))))
828 430
829 (defun nnweb-google-search (search) 431 (defun nnweb-google-search (search)
830 (nnweb-insert 432 (mm-url-insert
831 (concat 433 (concat
832 (nnweb-definition 'address) 434 (nnweb-definition 'address)
833 "?" 435 "?"
834 (nnweb-encode-www-form-urlencoded 436 (mm-url-encode-www-form-urlencoded
835 `(("q" . ,search) 437 `(("q" . ,search)
836 ("num". "100") 438 ("num". "100")
837 ("hq" . "") 439 ("hq" . "")
838 ("hl" . "") 440 ("hl" . "")
839 ("lr" . "") 441 ("lr" . "")
842 t) 444 t)
843 445
844 (defun nnweb-google-identity (url) 446 (defun nnweb-google-identity (url)
845 "Return an unique identifier based on URL." 447 "Return an unique identifier based on URL."
846 (if (string-match "selm=\\([^ &>]+\\)" url) 448 (if (string-match "selm=\\([^ &>]+\\)" url)
449 (match-string 1 url)
450 url))
451
452 ;;;
453 ;;; gmane.org
454 ;;;
455 (defun nnweb-gmane-create-mapping ()
456 "Perform the search and create a number-to-url alist."
457 (save-excursion
458 (set-buffer nnweb-buffer)
459 (erase-buffer)
460 (when (funcall (nnweb-definition 'search) nnweb-search)
461 (let ((more t)
462 (case-fold-search t)
463 (active (or (cadr (assoc nnweb-group nnweb-group-alist))
464 (cons 1 0)))
465 subject group url
466 map)
467 ;; Remove stuff from the beginning of results
468 (goto-char (point-min))
469 (search-forward "Search Results</h1><ul>" nil t)
470 (delete-region (point-min) (point))
471 (goto-char (point-min))
472 ;; Iterate over the actual hits
473 (while (re-search-forward ".*href=\"\\([^\"]+\\)\">\\(.*\\)" nil t)
474 (setq url (concat "http://gmane.org/" (match-string 1)))
475 (setq subject (match-string 2))
476 (unless (nnweb-get-hashtb url)
477 (push
478 (list
479 (incf (cdr active))
480 (make-full-mail-header
481 (cdr active) (concat "(" group ") " subject) nil nil
482 nil nil 0 0 url))
483 map)
484 (nnweb-set-hashtb (cadar map) (car map))))
485 ;; Return the articles in the right order.
486 (setq nnweb-articles
487 (sort (nconc nnweb-articles map) 'car-less-than-car))))))
488
489 (defun nnweb-gmane-wash-article ()
490 (let ((case-fold-search t))
491 (goto-char (point-min))
492 (re-search-forward "<!--X-Head-of-Message-->" nil t)
493 (delete-region (point-min) (point))
494 (goto-char (point-min))
495 (while (looking-at "^<li><em>\\([^ ]+\\)</em>.*</li>")
496 (replace-match "\\1\\2" t)
497 (forward-line 1))
498 (mm-url-remove-markup)))
499
500 (defun nnweb-gmane-search (search)
501 (mm-url-insert
502 (concat
503 (nnweb-definition 'address)
504 "?"
505 (mm-url-encode-www-form-urlencoded
506 `(("query" . ,search)))))
507 (setq buffer-file-name nil)
508 t)
509
510
511 (defun nnweb-gmane-identity (url)
512 "Return a unique identifier based on URL."
513 (if (string-match "group=\\(.+\\)" url)
847 (match-string 1 url) 514 (match-string 1 url)
848 url)) 515 url))
849 516
850 ;;; 517 ;;;
851 ;;; General web/w3 interface utility functions 518 ;;; General web/w3 interface utility functions
867 " ")) 534 " "))
868 (insert ">\n") 535 (insert ">\n")
869 (mapcar 'nnweb-insert-html (nth 2 parse)) 536 (mapcar 'nnweb-insert-html (nth 2 parse))
870 (insert "</" (symbol-name (car parse)) ">\n"))) 537 (insert "</" (symbol-name (car parse)) ">\n")))
871 538
872 (defun nnweb-encode-www-form-urlencoded (pairs)
873 "Return PAIRS encoded for forms."
874 (mapconcat
875 (function
876 (lambda (data)
877 (concat (w3-form-encode-xwfu (car data)) "="
878 (w3-form-encode-xwfu (cdr data)))))
879 pairs "&"))
880
881 (defun nnweb-fetch-form (url pairs)
882 "Fetch a form from URL with PAIRS as the data using the POST method."
883 (let ((url-request-data (nnweb-encode-www-form-urlencoded pairs))
884 (url-request-method "POST")
885 (url-request-extra-headers
886 '(("Content-type" . "application/x-www-form-urlencoded"))))
887 (url-insert-file-contents url)
888 (setq buffer-file-name nil))
889 t)
890
891 (defun nnweb-decode-entities ()
892 "Decode all HTML entities."
893 (goto-char (point-min))
894 (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
895 (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
896 (let ((c
897 (string-to-number (substring
898 (match-string 1) 1))))
899 (if (mm-char-or-char-int-p c) c 32))
900 (or (cdr (assq (intern (match-string 1))
901 w3-html-entities))
902 ?#))))
903 (unless (stringp elem)
904 (setq elem (char-to-string elem)))
905 (replace-match elem t t))))
906
907 (defun nnweb-decode-entities-string (string)
908 (with-temp-buffer
909 (insert string)
910 (nnweb-decode-entities)
911 (buffer-substring (point-min) (point-max))))
912
913 (defun nnweb-remove-markup ()
914 "Remove all HTML markup, leaving just plain text."
915 (goto-char (point-min))
916 (while (search-forward "<!--" nil t)
917 (delete-region (match-beginning 0)
918 (or (search-forward "-->" nil t)
919 (point-max))))
920 (goto-char (point-min))
921 (while (re-search-forward "<[^>]+>" nil t)
922 (replace-match "" t t)))
923
924 (defun nnweb-insert (url &optional follow-refresh)
925 "Insert the contents from an URL in the current buffer.
926 If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
927 (let ((name buffer-file-name))
928 (if follow-refresh
929 (save-restriction
930 (narrow-to-region (point) (point))
931 (url-insert-file-contents url)
932 (goto-char (point-min))
933 (when (re-search-forward
934 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
935 (let ((url (match-string 1)))
936 (delete-region (point-min) (point-max))
937 (nnweb-insert url t))))
938 (url-insert-file-contents url))
939 (setq buffer-file-name name)))
940
941 (defun nnweb-parse-find (type parse &optional maxdepth) 539 (defun nnweb-parse-find (type parse &optional maxdepth)
942 "Find the element of TYPE in PARSE." 540 "Find the element of TYPE in PARSE."
943 (catch 'found 541 (catch 'found
944 (nnweb-parse-find-1 type parse maxdepth))) 542 (nnweb-parse-find-1 type parse maxdepth)))
945 543
985 (push element nnweb-text) 583 (push element nnweb-text)
986 (when (and (consp element) 584 (when (and (consp element)
987 (listp (cdr element))) 585 (listp (cdr element)))
988 (nnweb-text-1 element))))) 586 (nnweb-text-1 element)))))
989 587
990 (defun nnweb-replace-in-string (string match newtext)
991 (while (string-match match string)
992 (setq string (replace-match newtext t t string)))
993 string)
994
995 (provide 'nnweb) 588 (provide 'nnweb)
996 589
997 ;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697 590 ;;; arch-tag: f59307eb-c90f-479f-b7d2-dbd8bf51b697
998 ;;; nnweb.el ends here 591 ;;; nnweb.el ends here