Mercurial > emacs
comparison lisp/gnus/nnrss.el @ 56927:55fd4f77387a after-merge-gnus-5_10
Revision: miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-523
Merge from emacs--gnus--5.10, gnus--rel--5.10
Patches applied:
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--base-0
tag of miles@gnu.org--gnu-2004/emacs--cvs-trunk--0--patch-464
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-1
Import from CVS branch gnus-5_10-branch
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-2
Merge from lorentey@elte.hu--2004/emacs--multi-tty--0, emacs--cvs-trunk--0
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-3
Merge from gnus--rel--5.10
* miles@gnu.org--gnu-2004/emacs--gnus--5.10--patch-4
Merge from gnus--rel--5.10
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-18
Update from CVS
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-19
Remove autoconf-generated files from archive
* miles@gnu.org--gnu-2004/gnus--rel--5.10--patch-20
Update from CVS
author | Miles Bader <miles@gnu.org> |
---|---|
date | Sat, 04 Sep 2004 13:13:48 +0000 |
parents | |
children | 9bdd97960431 |
comparison
equal
deleted
inserted
replaced
56926:f8e248e9a717 | 56927:55fd4f77387a |
---|---|
1 ;;; nnrss.el --- interfacing with RSS | |
2 ;; Copyright (C) 2001, 2002, 2003 Free Software Foundation, Inc. | |
3 | |
4 ;; Author: Shenghuo Zhu <zsh@cs.rochester.edu> | |
5 ;; Keywords: RSS | |
6 | |
7 ;; This file is part of GNU Emacs. | |
8 | |
9 ;; GNU Emacs is free software; you can redistribute it and/or modify | |
10 ;; it under the terms of the GNU General Public License as published | |
11 ;; by the Free Software Foundation; either version 2, or (at your | |
12 ;; option) any later version. | |
13 | |
14 ;; GNU Emacs is distributed in the hope that it will be useful, but | |
15 ;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 ;; General Public License for more details. | |
18 | |
19 ;; You should have received a copy of the GNU General Public License | |
20 ;; along with GNU Emacs; see the file COPYING. If not, write to the | |
21 ;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
22 ;; Boston, MA 02111-1307, USA. | |
23 | |
24 ;;; Commentary: | |
25 | |
26 ;;; Code: | |
27 | |
28 (eval-when-compile (require 'cl)) | |
29 | |
30 (require 'gnus) | |
31 (require 'nnoo) | |
32 (require 'nnmail) | |
33 (require 'message) | |
34 (require 'mm-util) | |
35 (require 'gnus-util) | |
36 (require 'time-date) | |
37 (require 'rfc2231) | |
38 (require 'mm-url) | |
39 (eval-when-compile | |
40 (ignore-errors | |
41 (require 'xml))) | |
42 (eval '(require 'xml)) | |
43 | |
44 (nnoo-declare nnrss) | |
45 | |
46 (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/") | |
47 "Where nnrss will save its files.") | |
48 | |
49 ;; (group max rss-url) | |
50 (defvoo nnrss-server-data nil) | |
51 | |
52 ;; (num timestamp url subject author date extra) | |
53 (defvoo nnrss-group-data nil) | |
54 (defvoo nnrss-group-max 0) | |
55 (defvoo nnrss-group-min 1) | |
56 (defvoo nnrss-group nil) | |
57 (defvoo nnrss-group-hashtb nil) | |
58 (defvoo nnrss-status-string "") | |
59 | |
60 (defconst nnrss-version "nnrss 1.0") | |
61 | |
62 (defvar nnrss-group-alist '() | |
63 "List of RSS addresses.") | |
64 | |
65 (defvar nnrss-use-local nil) | |
66 | |
67 (defvar nnrss-description-field 'X-Gnus-Description | |
68 "Field name used for DESCRIPTION. | |
69 To use the description in headers, put this name into `nnmail-extra-headers'.") | |
70 | |
71 (defvar nnrss-url-field 'X-Gnus-Url | |
72 "Field name used for URL. | |
73 To use the description in headers, put this name into `nnmail-extra-headers'.") | |
74 | |
75 (defvar nnrss-content-function nil | |
76 "A function which is called in `nnrss-request-article'. | |
77 The arguments are (ENTRY GROUP ARTICLE). | |
78 ENTRY is the record of the current headline. GROUP is the group name. | |
79 ARTICLE is the article number of the current headline.") | |
80 | |
81 (nnoo-define-basics nnrss) | |
82 | |
83 ;;; Interface functions | |
84 | |
85 (deffoo nnrss-retrieve-headers (articles &optional group server fetch-old) | |
86 (nnrss-possibly-change-group group server) | |
87 (let (e) | |
88 (save-excursion | |
89 (set-buffer nntp-server-buffer) | |
90 (erase-buffer) | |
91 (dolist (article articles) | |
92 (if (setq e (assq article nnrss-group-data)) | |
93 (insert (number-to-string (car e)) "\t" ;; number | |
94 (if (nth 3 e) | |
95 (nnrss-format-string (nth 3 e)) "") | |
96 "\t" ;; subject | |
97 (if (nth 4 e) | |
98 (nnrss-format-string (nth 4 e)) | |
99 "(nobody)") | |
100 "\t" ;;from | |
101 (or (nth 5 e) "") | |
102 "\t" ;; date | |
103 (format "<%d@%s.nnrss>" (car e) group) | |
104 "\t" ;; id | |
105 "\t" ;; refs | |
106 "-1" "\t" ;; chars | |
107 "-1" "\t" ;; lines | |
108 "" "\t" ;; Xref | |
109 (if (and (nth 6 e) | |
110 (memq nnrss-description-field | |
111 nnmail-extra-headers)) | |
112 (concat (symbol-name nnrss-description-field) | |
113 ": " | |
114 (nnrss-format-string (nth 6 e)) | |
115 "\t") | |
116 "") | |
117 (if (and (nth 2 e) | |
118 (memq nnrss-url-field | |
119 nnmail-extra-headers)) | |
120 (concat (symbol-name nnrss-url-field) | |
121 ": " | |
122 (nnrss-format-string (nth 2 e)) | |
123 "\t") | |
124 "") | |
125 "\n"))))) | |
126 'nov) | |
127 | |
128 (deffoo nnrss-request-group (group &optional server dont-check) | |
129 (nnrss-possibly-change-group group server) | |
130 (if dont-check | |
131 t | |
132 (nnrss-check-group group server) | |
133 (nnheader-report 'nnrss "Opened group %s" group) | |
134 (nnheader-insert | |
135 "211 %d %d %d %s\n" nnrss-group-max nnrss-group-min nnrss-group-max | |
136 (prin1-to-string group) | |
137 t))) | |
138 | |
139 (deffoo nnrss-close-group (group &optional server) | |
140 t) | |
141 | |
142 (deffoo nnrss-request-article (article &optional group server buffer) | |
143 (nnrss-possibly-change-group group server) | |
144 (let ((e (assq article nnrss-group-data)) | |
145 (boundary "=-=-=-=-=-=-=-=-=-") | |
146 (nntp-server-buffer (or buffer nntp-server-buffer)) | |
147 post err) | |
148 (when e | |
149 (catch 'error | |
150 (with-current-buffer nntp-server-buffer | |
151 (erase-buffer) | |
152 (goto-char (point-min)) | |
153 (insert "Mime-Version: 1.0\nContent-Type: multipart/alternative; boundary=\"" boundary "\"\n") | |
154 (if group | |
155 (insert "Newsgroups: " group "\n")) | |
156 (if (nth 3 e) | |
157 (insert "Subject: " (nnrss-format-string (nth 3 e)) "\n")) | |
158 (if (nth 4 e) | |
159 (insert "From: " (nnrss-format-string (nth 4 e)) "\n")) | |
160 (if (nth 5 e) | |
161 (insert "Date: " (nnrss-format-string (nth 5 e)) "\n")) | |
162 (insert "Message-ID: " (format "<%d@%s.nnrss>" (car e) group) "\n") | |
163 (insert "\n") | |
164 (let ((text (if (nth 6 e) | |
165 (nnrss-string-as-multibyte (nth 6 e)))) | |
166 (link (if (nth 2 e) | |
167 (nth 2 e)))) | |
168 (insert "\n\n--" boundary "\nContent-Type: text/plain\n\n") | |
169 (let ((point (point))) | |
170 (if text | |
171 (progn (insert text) | |
172 (goto-char point) | |
173 (while (re-search-forward "\n" nil t) | |
174 (replace-match " ")) | |
175 (goto-char (point-max)) | |
176 (insert "\n\n"))) | |
177 (if link | |
178 (insert link))) | |
179 (insert "\n\n--" boundary "\nContent-Type: text/html\n\n") | |
180 (let ((point (point))) | |
181 (if text | |
182 (progn (insert "<html><head></head><body>\n" text "\n</body></html>") | |
183 (goto-char point) | |
184 (while (re-search-forward "\n" nil t) | |
185 (replace-match " ")) | |
186 (goto-char (point-max)) | |
187 (insert "\n\n"))) | |
188 (if link | |
189 (insert "<p><a href=\"" link "\">link</a></p>\n")))) | |
190 (if nnrss-content-function | |
191 (funcall nnrss-content-function e group article))))) | |
192 (cond | |
193 (err | |
194 (nnheader-report 'nnrss err)) | |
195 ((not e) | |
196 (nnheader-report 'nnrss "no such id: %d" article)) | |
197 (t | |
198 (nnheader-report 'nnrss "article %s retrieved" (car e)) | |
199 ;; we return the article number. | |
200 (cons nnrss-group (car e)))))) | |
201 | |
202 (deffoo nnrss-request-list (&optional server) | |
203 (nnrss-possibly-change-group nil server) | |
204 (nnrss-generate-active) | |
205 t) | |
206 | |
207 (deffoo nnrss-open-server (server &optional defs connectionless) | |
208 (nnrss-read-server-data server) | |
209 (nnoo-change-server 'nnrss server defs) | |
210 t) | |
211 | |
212 (deffoo nnrss-request-expire-articles | |
213 (articles group &optional server force) | |
214 (nnrss-possibly-change-group group server) | |
215 (let (e days not-expirable changed) | |
216 (dolist (art articles) | |
217 (if (and (setq e (assq art nnrss-group-data)) | |
218 (nnmail-expired-article-p | |
219 group | |
220 (if (listp (setq days (nth 1 e))) days | |
221 (days-to-time (- days (time-to-days '(0 0))))) | |
222 force)) | |
223 (setq nnrss-group-data (delq e nnrss-group-data) | |
224 changed t) | |
225 (push art not-expirable))) | |
226 (if changed | |
227 (nnrss-save-group-data group server)) | |
228 not-expirable)) | |
229 | |
230 (deffoo nnrss-request-delete-group (group &optional force server) | |
231 (nnrss-possibly-change-group group server) | |
232 (setq nnrss-server-data | |
233 (delq (assoc group nnrss-server-data) nnrss-server-data)) | |
234 (nnrss-save-server-data server) | |
235 (let ((file (expand-file-name | |
236 (nnrss-translate-file-chars | |
237 (concat group (and server | |
238 (not (equal server "")) | |
239 "-") | |
240 server ".el")) nnrss-directory))) | |
241 (ignore-errors | |
242 (delete-file file))) | |
243 t) | |
244 | |
245 (deffoo nnrss-request-list-newsgroups (&optional server) | |
246 (nnrss-possibly-change-group nil server) | |
247 (save-excursion | |
248 (set-buffer nntp-server-buffer) | |
249 (erase-buffer) | |
250 (dolist (elem nnrss-group-alist) | |
251 (if (third elem) | |
252 (insert (car elem) "\t" (third elem) "\n")))) | |
253 t) | |
254 | |
255 (nnoo-define-skeleton nnrss) | |
256 | |
257 ;;; Internal functions | |
258 (eval-when-compile (defun xml-rpc-method-call (&rest args))) | |
259 (defun nnrss-fetch (url &optional local) | |
260 "Fetch the url and put it in a the expected lisp structure." | |
261 (with-temp-buffer | |
262 ;some CVS versions of url.el need this to close the connection quickly | |
263 (let* (xmlform htmlform) | |
264 ;; bit o' work necessary for w3 pre-cvs and post-cvs | |
265 (if local | |
266 (let ((coding-system-for-read 'binary)) | |
267 (insert-file-contents url)) | |
268 (mm-url-insert url)) | |
269 | |
270 ;; Because xml-parse-region can't deal with anything that isn't | |
271 ;; xml and w3-parse-buffer can't deal with some xml, we have to | |
272 ;; parse with xml-parse-region first and, if that fails, parse | |
273 ;; with w3-parse-buffer. Yuck. Eventually, someone should find out | |
274 ;; why w3-parse-buffer fails to parse some well-formed xml and | |
275 ;; fix it. | |
276 | |
277 (condition-case err | |
278 (setq xmlform (xml-parse-region (point-min) (point-max))) | |
279 (error (if (fboundp 'w3-parse-buffer) | |
280 (setq htmlform (caddar (w3-parse-buffer | |
281 (current-buffer)))) | |
282 (message "nnrss: Not valid XML and w3 parse not available (%s)" | |
283 url)))) | |
284 (if htmlform | |
285 htmlform | |
286 xmlform)))) | |
287 | |
288 (defun nnrss-possibly-change-group (&optional group server) | |
289 (when (and server | |
290 (not (nnrss-server-opened server))) | |
291 (nnrss-open-server server)) | |
292 (when (and group (not (equal group nnrss-group))) | |
293 (nnrss-read-group-data group server) | |
294 (setq nnrss-group group))) | |
295 | |
296 (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories)) | |
297 | |
298 (defun nnrss-generate-active () | |
299 (if (y-or-n-p "fetch extra categories? ") | |
300 (dolist (func nnrss-extra-categories) | |
301 (funcall func))) | |
302 (save-excursion | |
303 (set-buffer nntp-server-buffer) | |
304 (erase-buffer) | |
305 (dolist (elem nnrss-group-alist) | |
306 (insert (prin1-to-string (car elem)) " 0 1 y\n")) | |
307 (dolist (elem nnrss-server-data) | |
308 (unless (assoc (car elem) nnrss-group-alist) | |
309 (insert (prin1-to-string (car elem)) " 0 1 y\n"))))) | |
310 | |
311 ;;; data functions | |
312 | |
313 (defun nnrss-read-server-data (server) | |
314 (setq nnrss-server-data nil) | |
315 (let ((file (expand-file-name | |
316 (nnrss-translate-file-chars | |
317 (concat "nnrss" (and server | |
318 (not (equal server "")) | |
319 "-") | |
320 server | |
321 ".el")) | |
322 nnrss-directory))) | |
323 (when (file-exists-p file) | |
324 (with-temp-buffer | |
325 (let ((coding-system-for-read 'binary) | |
326 emacs-lisp-mode-hook) | |
327 (insert-file-contents file) | |
328 (emacs-lisp-mode) | |
329 (goto-char (point-min)) | |
330 (eval-buffer)))))) | |
331 | |
332 (defun nnrss-save-server-data (server) | |
333 (gnus-make-directory nnrss-directory) | |
334 (let ((file (expand-file-name | |
335 (nnrss-translate-file-chars | |
336 (concat "nnrss" (and server | |
337 (not (equal server "")) | |
338 "-") | |
339 server ".el")) | |
340 nnrss-directory))) | |
341 (let ((coding-system-for-write 'binary) | |
342 print-level print-length) | |
343 (with-temp-file file | |
344 (insert "(setq nnrss-group-alist '" | |
345 (prin1-to-string nnrss-group-alist) | |
346 ")\n") | |
347 (insert "(setq nnrss-server-data '" | |
348 (prin1-to-string nnrss-server-data) | |
349 ")\n"))))) | |
350 | |
351 (defun nnrss-read-group-data (group server) | |
352 (setq nnrss-group-data nil) | |
353 (setq nnrss-group-hashtb (gnus-make-hashtable)) | |
354 (let ((pair (assoc group nnrss-server-data))) | |
355 (setq nnrss-group-max (or (cadr pair) 0)) | |
356 (setq nnrss-group-min (+ nnrss-group-max 1))) | |
357 (let ((file (expand-file-name | |
358 (nnrss-translate-file-chars | |
359 (concat group (and server | |
360 (not (equal server "")) | |
361 "-") | |
362 server ".el")) | |
363 nnrss-directory))) | |
364 (when (file-exists-p file) | |
365 (with-temp-buffer | |
366 (let ((coding-system-for-read 'binary) | |
367 emacs-lisp-mode-hook) | |
368 (insert-file-contents file) | |
369 (emacs-lisp-mode) | |
370 (goto-char (point-min)) | |
371 (eval-buffer))) | |
372 (dolist (e nnrss-group-data) | |
373 (gnus-sethash (nth 2 e) e nnrss-group-hashtb) | |
374 (if (and (car e) (> nnrss-group-min (car e))) | |
375 (setq nnrss-group-min (car e))) | |
376 (if (and (car e) (< nnrss-group-max (car e))) | |
377 (setq nnrss-group-max (car e))))))) | |
378 | |
379 (defun nnrss-save-group-data (group server) | |
380 (gnus-make-directory nnrss-directory) | |
381 (let ((file (expand-file-name | |
382 (nnrss-translate-file-chars | |
383 (concat group (and server | |
384 (not (equal server "")) | |
385 "-") | |
386 server ".el")) | |
387 nnrss-directory))) | |
388 (let ((coding-system-for-write 'binary) | |
389 print-level print-length) | |
390 (with-temp-file file | |
391 (insert "(setq nnrss-group-data '" | |
392 (prin1-to-string nnrss-group-data) | |
393 ")\n"))))) | |
394 | |
395 ;;; URL interface | |
396 | |
397 (defun nnrss-no-cache (url) | |
398 "") | |
399 | |
400 (defun nnrss-insert-w3 (url) | |
401 (mm-with-unibyte-current-buffer | |
402 (mm-url-insert url))) | |
403 | |
404 (defun nnrss-decode-entities-unibyte-string (string) | |
405 (if string | |
406 (mm-with-unibyte-buffer | |
407 (insert string) | |
408 (mm-url-decode-entities-nbsp) | |
409 (buffer-string)))) | |
410 | |
411 (defalias 'nnrss-insert 'nnrss-insert-w3) | |
412 | |
413 (if (featurep 'xemacs) | |
414 (defalias 'nnrss-string-as-multibyte 'identity) | |
415 (defalias 'nnrss-string-as-multibyte 'string-as-multibyte)) | |
416 | |
417 ;;; Snarf functions | |
418 | |
419 (defun nnrss-check-group (group server) | |
420 (let (file xml subject url extra changed author | |
421 date rss-ns rdf-ns content-ns dc-ns) | |
422 (if (and nnrss-use-local | |
423 (file-exists-p (setq file (expand-file-name | |
424 (nnrss-translate-file-chars | |
425 (concat group ".xml")) | |
426 nnrss-directory)))) | |
427 (setq xml (nnrss-fetch file t)) | |
428 (setq url (or (nth 2 (assoc group nnrss-server-data)) | |
429 (second (assoc group nnrss-group-alist)))) | |
430 (unless url | |
431 (setq url | |
432 (cdr | |
433 (assoc 'href | |
434 (nnrss-discover-feed | |
435 (read-string | |
436 (format "URL to search for %s: " group) "http://"))))) | |
437 (let ((pair (assoc group nnrss-server-data))) | |
438 (if pair | |
439 (setcdr (cdr pair) (list url)) | |
440 (push (list group nnrss-group-max url) nnrss-server-data))) | |
441 (setq changed t)) | |
442 (setq xml (nnrss-fetch url))) | |
443 ;; See | |
444 ;; http://feeds.archive.org/validator/docs/howto/declare_namespaces.html | |
445 ;; for more RSS namespaces. | |
446 (setq dc-ns (nnrss-get-namespace-prefix xml "http://purl.org/dc/elements/1.1/") | |
447 rdf-ns (nnrss-get-namespace-prefix xml "http://www.w3.org/1999/02/22-rdf-syntax-ns#") | |
448 rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/") | |
449 content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/")) | |
450 (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml))) | |
451 (when (and (listp item) | |
452 (eq (intern (concat rss-ns "item")) (car item)) | |
453 (setq url (nnrss-decode-entities-unibyte-string | |
454 (nnrss-node-text rss-ns 'link (cddr item)))) | |
455 (not (gnus-gethash url nnrss-group-hashtb))) | |
456 (setq subject (nnrss-node-text rss-ns 'title item)) | |
457 (setq extra (or (nnrss-node-text content-ns 'encoded item) | |
458 (nnrss-node-text rss-ns 'description item))) | |
459 (setq author (or (nnrss-node-text rss-ns 'author item) | |
460 (nnrss-node-text dc-ns 'creator item) | |
461 (nnrss-node-text dc-ns 'contributor item))) | |
462 (setq date (or (nnrss-node-text dc-ns 'date item) | |
463 (nnrss-node-text rss-ns 'pubDate item) | |
464 (message-make-date))) | |
465 (push | |
466 (list | |
467 (incf nnrss-group-max) | |
468 (current-time) | |
469 url | |
470 (and subject (nnrss-decode-entities-unibyte-string subject)) | |
471 (and author (nnrss-decode-entities-unibyte-string author)) | |
472 date | |
473 (and extra (nnrss-decode-entities-unibyte-string extra))) | |
474 nnrss-group-data) | |
475 (gnus-sethash url (car nnrss-group-data) nnrss-group-hashtb) | |
476 (setq changed t))) | |
477 (when changed | |
478 (nnrss-save-group-data group server) | |
479 (let ((pair (assoc group nnrss-server-data))) | |
480 (if pair | |
481 (setcar (cdr pair) nnrss-group-max) | |
482 (push (list group nnrss-group-max) nnrss-server-data))) | |
483 (nnrss-save-server-data server)))) | |
484 | |
485 (defun nnrss-generate-download-script () | |
486 "Generate a download script in the current buffer. | |
487 It is useful when `(setq nnrss-use-local t)'." | |
488 (interactive) | |
489 (insert "#!/bin/sh\n") | |
490 (insert "WGET=wget\n") | |
491 (insert "RSSDIR='" (expand-file-name nnrss-directory) "'\n") | |
492 (dolist (elem nnrss-server-data) | |
493 (let ((url (or (nth 2 elem) | |
494 (second (assoc (car elem) nnrss-group-alist))))) | |
495 (insert "$WGET -q -O \"$RSSDIR\"/'" | |
496 (nnrss-translate-file-chars (concat (car elem) ".xml")) | |
497 "' '" url "'\n")))) | |
498 | |
499 (defun nnrss-translate-file-chars (name) | |
500 (let ((nnheader-file-name-translation-alist | |
501 (append nnheader-file-name-translation-alist '((?' . ?_))))) | |
502 (nnheader-translate-file-chars name))) | |
503 | |
504 (defvar nnrss-moreover-url | |
505 "http://w.moreover.com/categories/category_list_rss.html" | |
506 "The url of moreover.com categories.") | |
507 | |
508 (defun nnrss-snarf-moreover-categories () | |
509 "Snarf RSS links from moreover.com." | |
510 (interactive) | |
511 (let (category name url changed) | |
512 (with-temp-buffer | |
513 (nnrss-insert nnrss-moreover-url) | |
514 (goto-char (point-min)) | |
515 (while (re-search-forward | |
516 "<a name=\"\\([^\"]+\\)\">\\|<a href=\"\\(http://[^\"]*moreover\\.com[^\"]+page\\?c=\\([^\"&]+\\)&o=rss\\)" nil t) | |
517 (if (match-string 1) | |
518 (setq category (match-string 1)) | |
519 (setq url (match-string 2) | |
520 name (mm-url-decode-entities-string | |
521 (rfc2231-decode-encoded-string | |
522 (match-string 3)))) | |
523 (if category | |
524 (setq name (concat category "." name))) | |
525 (unless (assoc name nnrss-server-data) | |
526 (setq changed t) | |
527 (push (list name 0 url) nnrss-server-data))))) | |
528 (if changed | |
529 (nnrss-save-server-data "")))) | |
530 | |
531 (defun nnrss-format-string (string) | |
532 (gnus-replace-in-string (nnrss-string-as-multibyte string) " *\n *" " ")) | |
533 | |
534 (defun nnrss-node-text (namespace local-name element) | |
535 (let* ((node (assq (intern (concat namespace (symbol-name local-name))) | |
536 element)) | |
537 (text (if (and node (listp node)) | |
538 (nnrss-node-just-text node) | |
539 node)) | |
540 (cleaned-text (if text (gnus-replace-in-string | |
541 text "^[\000-\037\177]+\\|^ +\\| +$" "")))) | |
542 (if (string-equal "" cleaned-text) | |
543 nil | |
544 cleaned-text))) | |
545 | |
546 (defun nnrss-node-just-text (node) | |
547 (if (and node (listp node)) | |
548 (mapconcat 'nnrss-node-just-text (cddr node) " ") | |
549 node)) | |
550 | |
551 (defun nnrss-find-el (tag data &optional found-list) | |
552 "Find the all matching elements in the data. Careful with this on | |
553 large documents!" | |
554 (if (listp data) | |
555 (mapcar (lambda (bit) | |
556 (if (car-safe bit) | |
557 (progn (if (equal tag (car bit)) | |
558 (setq found-list | |
559 (append found-list | |
560 (list bit)))) | |
561 (if (and (listp (car-safe (caddr bit))) | |
562 (not (stringp (caddr bit)))) | |
563 (setq found-list | |
564 (append found-list | |
565 (nnrss-find-el | |
566 tag (caddr bit)))) | |
567 (setq found-list | |
568 (append found-list | |
569 (nnrss-find-el | |
570 tag (cddr bit)))))))) | |
571 data)) | |
572 found-list) | |
573 | |
574 (defun nnrss-rsslink-p (el) | |
575 "Test if the element we are handed is an RSS autodiscovery link." | |
576 (and (eq (car-safe el) 'link) | |
577 (string-equal (cdr (assoc 'rel (cadr el))) "alternate") | |
578 (or (string-equal (cdr (assoc 'type (cadr el))) | |
579 "application/rss+xml") | |
580 (string-equal (cdr (assoc 'type (cadr el))) "text/xml")))) | |
581 | |
582 (defun nnrss-get-rsslinks (data) | |
583 "Extract the <link> elements that are links to RSS from the parsed data." | |
584 (delq nil (mapcar | |
585 (lambda (el) | |
586 (if (nnrss-rsslink-p el) el)) | |
587 (nnrss-find-el 'link data)))) | |
588 | |
589 (defun nnrss-extract-hrefs (data) | |
590 "Recursively extract hrefs from a page's source. DATA should be | |
591 the output of xml-parse-region or w3-parse-buffer." | |
592 (mapcar (lambda (ahref) | |
593 (cdr (assoc 'href (cadr ahref)))) | |
594 (nnrss-find-el 'a data))) | |
595 | |
596 (defmacro nnrss-match-macro (base-uri item | |
597 onsite-list offsite-list) | |
598 `(cond ((or (string-match (concat "^" ,base-uri) ,item) | |
599 (not (string-match "://" ,item))) | |
600 (setq ,onsite-list (append ,onsite-list (list ,item)))) | |
601 (t (setq ,offsite-list (append ,offsite-list (list ,item)))))) | |
602 | |
603 (defun nnrss-order-hrefs (base-uri hrefs) | |
604 "Given a list of hrefs, sort them using the following priorities: | |
605 1. links ending in .rss | |
606 2. links ending in .rdf | |
607 3. links ending in .xml | |
608 4. links containing the above | |
609 5. offsite links | |
610 | |
611 BASE-URI is used to determine the location of the links and | |
612 whether they are `offsite' or `onsite'." | |
613 (let (rss-onsite-end rdf-onsite-end xml-onsite-end | |
614 rss-onsite-in rdf-onsite-in xml-onsite-in | |
615 rss-offsite-end rdf-offsite-end xml-offsite-end | |
616 rss-offsite-in rdf-offsite-in xml-offsite-in) | |
617 (mapcar (lambda (href) | |
618 (if (not (null href)) | |
619 (cond ((string-match "\\.rss$" href) | |
620 (nnrss-match-macro | |
621 base-uri href rss-onsite-end rss-offsite-end)) | |
622 ((string-match "\\.rdf$" href) | |
623 (nnrss-match-macro | |
624 base-uri href rdf-onsite-end rdf-offsite-end)) | |
625 ((string-match "\\.xml$" href) | |
626 (nnrss-match-macro | |
627 base-uri href xml-onsite-end xml-offsite-end)) | |
628 ((string-match "rss" href) | |
629 (nnrss-match-macro | |
630 base-uri href rss-onsite-in rss-offsite-in)) | |
631 ((string-match "rdf" href) | |
632 (nnrss-match-macro | |
633 base-uri href rdf-onsite-in rdf-offsite-in)) | |
634 ((string-match "xml" href) | |
635 (nnrss-match-macro | |
636 base-uri href xml-onsite-in xml-offsite-in))))) | |
637 hrefs) | |
638 (append | |
639 rss-onsite-end rdf-onsite-end xml-onsite-end | |
640 rss-onsite-in rdf-onsite-in xml-onsite-in | |
641 rss-offsite-end rdf-offsite-end xml-offsite-end | |
642 rss-offsite-in rdf-offsite-in xml-offsite-in))) | |
643 | |
644 (defun nnrss-discover-feed (url) | |
645 "Given a page, find an RSS feed using Mark Pilgrim's | |
646 `ultra-liberal rss locator' (http://diveintomark.org/2002/08/15.html)." | |
647 | |
648 (let ((parsed-page (nnrss-fetch url))) | |
649 | |
650 ;; 1. if this url is the rss, use it. | |
651 (if (nnrss-rss-p parsed-page) | |
652 (let ((rss-ns (nnrss-get-namespace-prefix parsed-page "http://purl.org/rss/1.0/"))) | |
653 (nnrss-rss-title-description rss-ns parsed-page url)) | |
654 | |
655 ;; 2. look for the <link rel="alternate" | |
656 ;; type="application/rss+xml" and use that if it is there. | |
657 (let ((links (nnrss-get-rsslinks parsed-page))) | |
658 (if links | |
659 (let* ((xml (nnrss-fetch | |
660 (cdr (assoc 'href (cadar links))))) | |
661 (rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/"))) | |
662 (nnrss-rss-title-description rss-ns xml (cdr (assoc 'href (cadar links))))) | |
663 | |
664 ;; 3. look for links on the site in the following order: | |
665 ;; - onsite links ending in .rss, .rdf, or .xml | |
666 ;; - onsite links containing any of the above | |
667 ;; - offsite links ending in .rss, .rdf, or .xml | |
668 ;; - offsite links containing any of the above | |
669 (let* ((base-uri (progn (string-match ".*://[^/]+/?" url) | |
670 (match-string 0 url))) | |
671 (hrefs (nnrss-order-hrefs | |
672 base-uri (nnrss-extract-hrefs parsed-page))) | |
673 (rss-link nil)) | |
674 (while (and (eq rss-link nil) (not (eq hrefs nil))) | |
675 (let ((href-data (nnrss-fetch (car hrefs)))) | |
676 (if (nnrss-rss-p href-data) | |
677 (let* ((rss-ns (nnrss-get-namespace-prefix href-data "http://purl.org/rss/1.0/"))) | |
678 (setq rss-link (nnrss-rss-title-description | |
679 rss-ns href-data (car hrefs)))) | |
680 (setq hrefs (cdr hrefs))))) | |
681 (if rss-link rss-link | |
682 | |
683 ;; 4. check syndic8 | |
684 (nnrss-find-rss-via-syndic8 url)))))))) | |
685 | |
686 (defun nnrss-find-rss-via-syndic8 (url) | |
687 "query syndic8 for the rss feeds it has for the url." | |
688 (if (not (locate-library "xml-rpc")) | |
689 (progn | |
690 (message "XML-RPC is not available... not checking Syndic8.") | |
691 nil) | |
692 (require 'xml-rpc) | |
693 (let ((feedid (xml-rpc-method-call | |
694 "http://www.syndic8.com/xmlrpc.php" | |
695 'syndic8.FindSites | |
696 url))) | |
697 (when feedid | |
698 (let* ((feedinfo (xml-rpc-method-call | |
699 "http://www.syndic8.com/xmlrpc.php" | |
700 'syndic8.GetFeedInfo | |
701 feedid)) | |
702 (urllist | |
703 (delq nil | |
704 (mapcar | |
705 (lambda (listinfo) | |
706 (if (string-equal | |
707 (cdr (assoc "status" listinfo)) | |
708 "Syndicated") | |
709 (cons | |
710 (cdr (assoc "sitename" listinfo)) | |
711 (list | |
712 (cons 'title | |
713 (cdr (assoc | |
714 "sitename" listinfo))) | |
715 (cons 'href | |
716 (cdr (assoc | |
717 "dataurl" listinfo))))))) | |
718 feedinfo)))) | |
719 (if (not (> (length urllist) 1)) | |
720 (cdar urllist) | |
721 (let ((completion-ignore-case t) | |
722 (selection | |
723 (mapcar (lambda (listinfo) | |
724 (cons (cdr (assoc "sitename" listinfo)) | |
725 (string-to-int | |
726 (cdr (assoc "feedid" listinfo))))) | |
727 feedinfo))) | |
728 (cdr (assoc | |
729 (completing-read | |
730 "Multiple feeds found. Select one: " | |
731 selection nil t) urllist))))))))) | |
732 | |
733 (defun nnrss-rss-p (data) | |
734 "Test if data is an RSS feed. Simply ensures that the first | |
735 element is rss or rdf." | |
736 (or (eq (caar data) 'rss) | |
737 (eq (caar data) 'rdf:RDF))) | |
738 | |
739 (defun nnrss-rss-title-description (rss-namespace data url) | |
740 "Return the title of an RSS feed." | |
741 (if (nnrss-rss-p data) | |
742 (let ((description (intern (concat rss-namespace "description"))) | |
743 (title (intern (concat rss-namespace "title"))) | |
744 (channel (nnrss-find-el (intern (concat rss-namespace "channel")) | |
745 data))) | |
746 (list | |
747 (cons 'description (caddr (nth 0 (nnrss-find-el description channel)))) | |
748 (cons 'title (caddr (nth 0 (nnrss-find-el title channel)))) | |
749 (cons 'href url))))) | |
750 | |
751 (defun nnrss-get-namespace-prefix (el uri) | |
752 "Given EL (containing a parsed element) and URI (containing a string | |
753 that gives the URI for which you want to retrieve the namespace | |
754 prefix), return the prefix." | |
755 (let* ((prefix (car (rassoc uri (cadar el)))) | |
756 (nslist (if prefix | |
757 (split-string (symbol-name prefix) ":"))) | |
758 (ns (cond ((eq (length nslist) 1) ; no prefix given | |
759 "") | |
760 ((eq (length nslist) 2) ; extract prefix | |
761 (cadr nslist))))) | |
762 (if (and ns (not (eq ns ""))) | |
763 (concat ns ":") | |
764 ns))) | |
765 | |
766 (provide 'nnrss) | |
767 | |
768 | |
769 ;;; nnrss.el ends here | |
770 | |
771 ;;; arch-tag: 12910c07-0cdf-44fb-8d2c-416ded64c267 |