diff lisp/gnus/nnweb.el @ 40542:93f6c74a2f60

* mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with the Gnus CVS. * mm-util.el (mm-mime-mule-charset-alist): Move down and call mm-coding-system-p. Don't correct it only in XEmacs. (mm-charset-to-coding-system): Use mm-coding-system-p and mm-get-coding-system-list. (mm-emacs-mule, mm-mule4-p): New. (mm-enable-multibyte, mm-disable-multibyte, mm-enable-multibyte-mule4, mm-disable-multibyte-mule4, mm-with-unibyte-current-buffer, mm-with-unibyte-current-buffer-mule4): Use them. (mm-find-mime-charset-region): Treat iso-2022-jp. From Dave Love <fx@gnu.org>: * mm-util.el (mm-mime-mule-charset-alist): Make it correct by construction. (mm-charset-synonym-alist): Remove windows-125[02]. Make other entries conditional on not having a coding system defined for them. (mm-mule-charset-to-mime-charset): Use find-coding-systems-for-charsets if defined. (mm-charset-to-coding-system): Don't use mm-get-coding-system-list. Look in mm-charset-synonym-alist later. Add last resort search of coding systems. (mm-enable-multibyte-mule4, mm-disable-multibyte-mule4) (mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like Mule 4. (mm-find-mime-charset-region): Re-write. (mm-with-unibyte-current-buffer): Restore buffer as well as multibyteness.
author ShengHuo ZHU <zsh@cs.rochester.edu>
date Wed, 31 Oct 2001 04:16:51 +0000
parents db55e81c9ccf
children 79f5c0e65980
line wrap: on
line diff
--- a/lisp/gnus/nnweb.el	Wed Oct 31 02:54:33 2001 +0000
+++ b/lisp/gnus/nnweb.el	Wed Oct 31 04:16:51 2001 +0000
@@ -1,5 +1,5 @@
 ;;; nnweb.el --- retrieving articles via web search engines
-;; Copyright (C) 1996, 1997, 1998, 1999, 2000
+;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
 ;;        Free Software Foundation, Inc.
 
 ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org>
@@ -55,25 +55,48 @@
 (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
   "Where nnweb will save its files.")
 
-(defvoo nnweb-type 'dejanews
+(defvoo nnweb-type 'google
   "What search engine type is being used.
-Valid types include `dejanews', `dejanewsold', `reference',
+Valid types include `google', `dejanews', `dejanewsold', `reference',
 and `altavista'.")
 
 (defvar nnweb-type-definition
-  '((dejanews
+  '(
+    (google
+     ;;(article . nnweb-google-wash-article)
+     ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
+     (article . ignore)
+     (id . "http://groups.google.com/groups?selm=%s&output=gplain")
+     ;;(reference . nnweb-google-reference)
+     (reference . identity)
+     (map . nnweb-google-create-mapping)
+     (search . nnweb-google-search)
+     (address . "http://groups.google.com/groups")
+     (identifier . nnweb-google-identity))
+    (dejanews ;; alias of google
+     ;;(article . nnweb-google-wash-article)
+     ;;(id . "http://groups.google.com/groups?as_umsgid=%s")
      (article . ignore)
-     (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text")
-     (map . nnweb-dejanews-create-mapping)
-     (search . nnweb-dejanews-search)
-     (address . "http://www.deja.com/=dnc/qs.xp")
-     (identifier . nnweb-dejanews-identity))
-    (dejanewsold
-     (article . ignore)
-     (map . nnweb-dejanews-create-mapping)
-     (search . nnweb-dejanewsold-search)
-     (address . "http://www.deja.com/dnquery.xp")
-     (identifier . nnweb-dejanews-identity))
+     (id . "http://groups.google.com/groups?selm=%s&output=gplain")
+     ;;(reference . nnweb-google-reference)
+     (reference . identity)
+     (map . nnweb-google-create-mapping)
+     (search . nnweb-google-search)
+     (address . "http://groups.google.com/groups")
+     (identifier . nnweb-google-identity))
+;;;     (dejanews
+;;;      (article . ignore)
+;;;      (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text")
+;;;      (map . nnweb-dejanews-create-mapping)
+;;;      (search . nnweb-dejanews-search)
+;;;      (address . "http://www.deja.com/=dnc/qs.xp")
+;;;      (identifier . nnweb-dejanews-identity))
+;;;     (dejanewsold
+;;;      (article . ignore)
+;;;      (map . nnweb-dejanews-create-mapping)
+;;;      (search . nnweb-dejanewsold-search)
+;;;      (address . "http://www.deja.com/dnquery.xp")
+;;;      (identifier . nnweb-dejanews-identity))
     (reference
      (article . nnweb-reference-wash-article)
      (map . nnweb-reference-create-mapping)
@@ -124,6 +147,8 @@
 
 (deffoo nnweb-request-scan (&optional group server)
   (nnweb-possibly-change-server group server)
+  (if nnweb-ephemeral-p
+      (setq nnweb-hashtb (gnus-make-hashtable 4095)))
   (funcall (nnweb-definition 'map))
   (unless nnweb-ephemeral-p
     (nnweb-write-active)
@@ -134,9 +159,10 @@
   (when (and group
 	     (not (equal group nnweb-group))
 	     (not nnweb-ephemeral-p))
+    (setq nnweb-group group
+	  nnweb-articles nil)
     (let ((info (assoc group nnweb-group-alist)))
       (when info
-	(setq nnweb-group group)
 	(setq nnweb-type (nth 2 info))
 	(setq nnweb-search (nth 3 info))
 	(unless dont-check
@@ -175,17 +201,19 @@
 		(and (stringp article)
 		     (nnweb-definition 'id t)
 		     (let ((fetch (nnweb-definition 'id))
-			   art)
+			   art active)
 		       (when (string-match "^<\\(.*\\)>$" article)
 			 (setq art (match-string 1 article)))
-		       (and fetch
-			    art
-			    (mm-with-unibyte-current-buffer
-			      (nnweb-fetch-url
-			       (format fetch article)))))))
+		       (when (and fetch art)
+			 (setq url (format fetch art))
+			 (mm-with-unibyte-current-buffer
+			   (nnweb-fetch-url url))
+			 (if (nnweb-definition 'reference t)
+			     (setq article
+				   (funcall (nnweb-definition
+					     'reference) article)))))))
 	(unless nnheader-callback-function
-	  (funcall (nnweb-definition 'article))
-	  (nnweb-decode-entities))
+	  (funcall (nnweb-definition 'article)))
 	(nnheader-report 'nnweb "Fetched article %s" article)
 	(cons group (and (numberp article) article))))))
 
@@ -290,10 +318,11 @@
       (nnweb-open-server server)))
   (unless nnweb-group-alist
     (nnweb-read-active))
+  (unless nnweb-hashtb
+    (setq nnweb-hashtb (gnus-make-hashtable 4095)))
   (when group
     (when (and (not nnweb-ephemeral-p)
-	       (not (equal group nnweb-group)))
-      (setq nnweb-hashtb (gnus-make-hashtable 4095))
+	       (equal group nnweb-group))
       (nnweb-request-group group nil t))))
 
 (defun nnweb-init (server)
@@ -393,7 +422,7 @@
 				     (car (rassq (string-to-number
 						  (match-string 2 date))
 						 parse-time-months))
-				     (match-string 3 date) 
+				     (match-string 3 date)
 				     (match-string 1 date)))
 		(setq date "Jan 1 00:00:00 0000"))
 	      (incf i)
@@ -559,6 +588,7 @@
 	(while (search-forward "," nil t)
 	  (replace-match " " t t)))
       (widen)
+      (nnweb-decode-entities)
       (set-marker body nil))))
 
 (defun nnweb-reference-search (search)
@@ -663,7 +693,8 @@
       (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t)
 	(replace-match "&lt;\\1&gt; " t)))
     (widen)
-    (nnweb-remove-markup)))
+    (nnweb-remove-markup)
+    (nnweb-decode-entities)))
 
 (defun nnweb-altavista-search (search &optional part)
   (url-insert-file-contents
@@ -683,13 +714,147 @@
   t)
 
 ;;;
+;;; Deja bought by google.com
+;;;
+
+(defun nnweb-google-wash-article ()
+  (let ((case-fold-search t) url)
+    (goto-char (point-min))
+    (re-search-forward "^<pre>" nil t)
+    (narrow-to-region (point-min) (point))
+    (search-backward "<table " nil t 2)
+    (delete-region (point-min) (point))
+    (if (re-search-forward "Search Result [0-9]+" nil t)
+	(replace-match ""))
+    (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
+	(replace-match ""))
+    (goto-char (point-min))
+    (while (search-forward "<br>" nil t)
+      (replace-match "\n"))
+    (nnweb-remove-markup)
+    (goto-char (point-min))
+    (while (re-search-forward "^[ \t]*\n" nil t)
+      (replace-match ""))
+    (goto-char (point-max))
+    (insert "\n")
+    (widen)
+    (narrow-to-region (point) (point-max))
+    (search-forward "</pre>" nil t)
+    (delete-region (point) (point-max))
+    (nnweb-remove-markup)
+    (widen)))
+
+(defun nnweb-google-parse-1 (&optional Message-ID)
+  (let ((i 0)
+	(case-fold-search t)
+	(active (cadr (assoc nnweb-group nnweb-group-alist)))
+	Subject Score Date Newsgroups From
+	map url mid)
+    (unless active
+      (push (list nnweb-group (setq active (cons 1 0))
+		  nnweb-type nnweb-search)
+	    nnweb-group-alist))
+    ;; Go through all the article hits on this page.
+    (goto-char (point-min))
+    (while (re-search-forward
+	    "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+      (setq mid (match-string 2)
+	    url (format 
+		 "http://groups.google.com/groups?selm=%s&output=gplain" mid))
+      (narrow-to-region (search-forward ">" nil t)
+			(search-forward "</a>" nil t))
+      (nnweb-remove-markup)
+      (nnweb-decode-entities)
+      (setq Subject (buffer-string))
+      (goto-char (point-max))
+      (widen)
+      (forward-line 1)
+      (when (looking-at "<br><font[^>]+>")
+	(goto-char (match-end 0)))
+      (if (not (looking-at "<a[^>]+>"))
+	  (skip-chars-forward " \t")
+	(narrow-to-region (point)
+			  (search-forward "</a>" nil t))
+	(nnweb-remove-markup)
+	(nnweb-decode-entities)
+	(setq Newsgroups (buffer-string))
+	(goto-char (point-max))
+	(widen)
+	(skip-chars-forward "- \t"))
+      (when (looking-at
+	     "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a")
+	(setq From (match-string 2)
+	      Date (match-string 1)))
+      (forward-line 1)
+      (incf i)
+      (unless (nnweb-get-hashtb url)
+	(push
+	 (list
+	  (incf (cdr active))
+	  (make-full-mail-header
+	   (cdr active) (if Newsgroups
+			    (concat  "(" Newsgroups ") " Subject)
+			  Subject)
+	   From Date (or Message-ID mid)
+	   nil 0 0 url))
+	 map)
+	(nnweb-set-hashtb (cadar map) (car map))))
+    map))
+
+(defun nnweb-google-reference (id)
+  (let ((map (nnweb-google-parse-1 id)) header)
+    (setq nnweb-articles
+	  (nconc nnweb-articles map))
+    (when (setq header (cadar map))
+      (mm-with-unibyte-current-buffer
+	(nnweb-fetch-url (mail-header-xref header)))
+      (caar map))))
+
+(defun nnweb-google-create-mapping ()
+  "Perform the search and create an number-to-url alist."
+  (save-excursion
+    (set-buffer nnweb-buffer)
+    (erase-buffer)
+    (when (funcall (nnweb-definition 'search) nnweb-search)
+	(let ((more t))
+	  (while more
+	    (setq nnweb-articles
+		  (nconc nnweb-articles (nnweb-google-parse-1)))
+	    ;; FIXME: There is more.
+	    (setq more nil))
+	  ;; Return the articles in the right order.
+	  (setq nnweb-articles
+		(sort nnweb-articles 'car-less-than-car))))))
+
+(defun nnweb-google-search (search)
+  (nnweb-insert
+   (concat
+    (nnweb-definition 'address)
+    "?"
+    (nnweb-encode-www-form-urlencoded
+     `(("q" . ,search)
+       ("num". "100")
+       ("hq" . "")
+       ("hl" . "")
+       ("lr" . "")
+       ("safe" . "off")
+       ("sites" . "groups")))))
+  t)
+
+(defun nnweb-google-identity (url)
+  "Return an unique identifier based on URL."
+  (if (string-match "selm=\\([^ &>]+\\)" url)
+      (match-string 1 url)
+    url))
+
+;;;
 ;;; General web/w3 interface utility functions
 ;;;
 
 (defun nnweb-insert-html (parse)
   "Insert HTML based on a w3 parse tree."
   (if (stringp parse)
-      (insert parse)
+      (insert (nnheader-string-as-multibyte parse))
     (insert "<" (symbol-name (car parse)) " ")
     (insert (mapconcat
 	     (lambda (param)
@@ -729,7 +894,7 @@
   (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
     (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
 			(let ((c
-			       (string-to-number (substring 
+			       (string-to-number (substring
 						  (match-string 1) 1))))
 			  (if (mm-char-or-char-int-p c) c 32))
 		      (or (cdr (assq (intern (match-string 1))
@@ -739,9 +904,9 @@
 	(setq elem (char-to-string elem)))
       (replace-match elem t t))))
 
-(defun nnweb-decode-entities-string (str)
+(defun nnweb-decode-entities-string (string)
   (with-temp-buffer
-    (insert str)
+    (insert string)
     (nnweb-decode-entities)
     (buffer-substring (point-min) (point-max))))
 
@@ -760,12 +925,12 @@
   "Insert the contents from an URL in the current buffer.
 If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
   (let ((name buffer-file-name))
-    (if follow-refresh
+    (if follow-refresh 
 	(save-restriction
 	  (narrow-to-region (point) (point))
 	  (url-insert-file-contents url)
 	  (goto-char (point-min))
-	  (when (re-search-forward 
+	  (when (re-search-forward
 		 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t)
 	    (let ((url (match-string 1)))
 	      (delete-region (point-min) (point-max))
@@ -822,6 +987,11 @@
 		 (listp (cdr element)))
 	(nnweb-text-1 element)))))
 
+(defun nnweb-replace-in-string (string match newtext)
+  (while (string-match match string)
+    (setq string (replace-match newtext t t string)))
+  string)
+
 (provide 'nnweb)
 
 ;;; nnweb.el ends here