comparison lisp/gnus/nnrss.el @ 85712:a3c27999decb

Update Gnus to No Gnus 0.7 from the Gnus CVS trunk Revision: emacs@sv.gnu.org/emacs--devo--0--patch-911
author Miles Bader <miles@gnu.org>
date Sun, 28 Oct 2007 09:18:39 +0000
parents 24202b793a08
children 1cdfc94602cb
comparison
equal deleted inserted replaced
85711:b6f5dc84b2e1 85712:a3c27999decb
48 (nnoo-declare nnrss) 48 (nnoo-declare nnrss)
49 49
50 (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/") 50 (defvoo nnrss-directory (nnheader-concat gnus-directory "rss/")
51 "Where nnrss will save its files.") 51 "Where nnrss will save its files.")
52 52
53 (defvoo nnrss-ignore-article-fields '(slash:comments)
54 "*List of fields that should be ignored when comparing RSS articles.
55 Some RSS feeds update article fields during their lives, e.g. to
56 indicate the number of comments or the number of times the
57 articles have been seen. However, if there is a difference
58 between the local article and the distant one, the latter is
59 considered to be new. To avoid this and discard some fields, set
60 this variable to the list of fields to be ignored.")
61
53 ;; (group max rss-url) 62 ;; (group max rss-url)
54 (defvoo nnrss-server-data nil) 63 (defvoo nnrss-server-data nil)
55 64
56 ;; (num timestamp url subject author date extra) 65 ;; (num timestamp url subject author date extra)
57 (defvoo nnrss-group-data nil) 66 (defvoo nnrss-group-data nil)
58 (defvoo nnrss-group-max 0) 67 (defvoo nnrss-group-max 0)
59 (defvoo nnrss-group-min 1) 68 (defvoo nnrss-group-min 1)
60 (defvoo nnrss-group nil) 69 (defvoo nnrss-group nil)
61 (defvoo nnrss-group-hashtb nil) 70 (defvoo nnrss-group-hashtb (make-hash-table :test 'equal))
62 (defvoo nnrss-status-string "") 71 (defvoo nnrss-status-string "")
63 72
64 (defconst nnrss-version "nnrss 1.0") 73 (defconst nnrss-version "nnrss 1.0")
65 74
66 (defvar nnrss-group-alist '() 75 (defvar nnrss-group-alist '()
81 The arguments are (ENTRY GROUP ARTICLE). 90 The arguments are (ENTRY GROUP ARTICLE).
82 ENTRY is the record of the current headline. GROUP is the group name. 91 ENTRY is the record of the current headline. GROUP is the group name.
83 ARTICLE is the article number of the current headline.") 92 ARTICLE is the article number of the current headline.")
84 93
85 (defvar nnrss-file-coding-system mm-universal-coding-system 94 (defvar nnrss-file-coding-system mm-universal-coding-system
86 "Coding system used when reading and writing files.") 95 "*Coding system used when reading and writing files.
96 If you run Gnus with various versions of Emacsen, the value of this
97 variable should be the coding system that all those Emacsen support.
98 Note that you have to regenerate all the nnrss groups if you change
99 the value. Moreover, you should be patient even if you are made to
100 read the same articles twice, that arises for the difference of the
101 versions of xml.el.")
87 102
88 (defvar nnrss-compatible-encoding-alist 103 (defvar nnrss-compatible-encoding-alist
89 (delq nil (mapcar (lambda (elem) 104 (delq nil (mapcar (lambda (elem)
90 (if (and (mm-coding-system-p (car elem)) 105 (if (and (mm-coding-system-p (car elem))
91 (mm-coding-system-p (cdr elem))) 106 (mm-coding-system-p (cdr elem)))
363 (setq nnrss-group-alist (delq elem nnrss-group-alist)))) 378 (setq nnrss-group-alist (delq elem nnrss-group-alist))))
364 (setq nnrss-server-data 379 (setq nnrss-server-data
365 (delq (assoc group nnrss-server-data) nnrss-server-data)) 380 (delq (assoc group nnrss-server-data) nnrss-server-data))
366 (nnrss-save-server-data server) 381 (nnrss-save-server-data server)
367 (ignore-errors 382 (ignore-errors
368 (delete-file (nnrss-make-filename group server))) 383 (let ((file-name-coding-system nnmail-pathname-coding-system))
384 (delete-file (nnrss-make-filename group server))))
369 t) 385 t)
370 386
371 (deffoo nnrss-request-list-newsgroups (&optional server) 387 (deffoo nnrss-request-list-newsgroups (&optional server)
372 (nnrss-possibly-change-group nil server) 388 (nnrss-possibly-change-group nil server)
373 (save-excursion 389 (save-excursion
389 it is used instead. If the xml contents doesn't specify the encoding, 405 it is used instead. If the xml contents doesn't specify the encoding,
390 return `utf-8' which is the default encoding for xml if it is available, 406 return `utf-8' which is the default encoding for xml if it is available,
391 otherwise return nil." 407 otherwise return nil."
392 (goto-char (point-min)) 408 (goto-char (point-min))
393 (if (re-search-forward 409 (if (re-search-forward
394 "<\\?[^>]*encoding=\\(\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)" 410 "<\\?[^>]*encoding=\\(?:\"\\([^\">]+\\)\"\\|'\\([^'>]+\\)'\\)"
395 nil t) 411 nil t)
396 (let ((encoding (intern (downcase (or (match-string 2) 412 (let ((encoding (intern (downcase (or (match-string 1)
397 (match-string 3)))))) 413 (match-string 2))))))
398 (or 414 (or
399 (mm-coding-system-p (cdr (assq encoding 415 (mm-coding-system-p (cdr (assq encoding
400 nnrss-compatible-encoding-alist))) 416 nnrss-compatible-encoding-alist)))
401 (mm-coding-system-p encoding) 417 (mm-coding-system-p encoding)
402 (mm-coding-system-p (car (rassq encoding 418 (mm-coding-system-p (car (rassq encoding
460 476
461 (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories)) 477 (defvar nnrss-extra-categories '(nnrss-snarf-moreover-categories))
462 478
463 (defun nnrss-generate-active () 479 (defun nnrss-generate-active ()
464 (when (y-or-n-p "Fetch extra categories? ") 480 (when (y-or-n-p "Fetch extra categories? ")
465 (dolist (func nnrss-extra-categories) 481 (mapc 'funcall nnrss-extra-categories))
466 (funcall func)))
467 (save-excursion 482 (save-excursion
468 (set-buffer nntp-server-buffer) 483 (set-buffer nntp-server-buffer)
469 (erase-buffer) 484 (erase-buffer)
470 (dolist (elem nnrss-group-alist) 485 (dolist (elem nnrss-group-alist)
471 (insert (prin1-to-string (car elem)) " 0 1 y\n")) 486 (insert (prin1-to-string (car elem)) " 0 1 y\n"))
498 ((string-match 513 ((string-match
499 (eval-when-compile 514 (eval-when-compile
500 (concat 515 (concat
501 ;; 1. year 516 ;; 1. year
502 "\\(199[0-9]\\|20[0-9][0-9]\\)" 517 "\\(199[0-9]\\|20[0-9][0-9]\\)"
503 "\\(-" 518 "\\(?:-"
504 ;; 3. month 519 ;; 2. month
505 "\\([01][0-9]\\)" 520 "\\([01][0-9]\\)"
506 "\\(-" 521 "\\(?:-"
507 ;; 5. day 522 ;; 3. day
508 "\\([0-3][0-9]\\)" 523 "\\([0-3][0-9]\\)"
509 "\\)?\\)?\\(T" 524 "\\)?\\)?\\(?:T"
510 ;; 7. hh:mm 525 ;; 4. hh:mm
511 "\\([012][0-9]:[0-5][0-9]\\)" 526 "\\([012][0-9]:[0-5][0-9]\\)"
512 "\\(" 527 "\\(?:"
513 ;; 9. :ss 528 ;; 5. :ss
514 "\\(:[0-5][0-9]\\)" 529 "\\(:[0-5][0-9]\\)"
515 "\\(\\.[0-9]+\\)?\\)?\\)?" 530 "\\(?:\\.[0-9]+\\)?\\)?\\)?"
516 ;; 13+14,15,16. zone 531 ;; 6+7,8,9. zone
517 "\\(\\(\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)" 532 "\\(?:\\(?:\\([+-][012][0-9]\\):\\([0-5][0-9]\\)\\)"
518 "\\|\\([+-][012][0-9][0-5][0-9]\\)" 533 "\\|\\([+-][012][0-9][0-5][0-9]\\)"
519 "\\|\\(Z\\)\\)?")) 534 "\\|\\(Z\\)\\)?"))
520 date) 535 date)
521 (setq year (string-to-number (match-string 1 date)) 536 (setq year (string-to-number (match-string 1 date))
522 month (string-to-number (or (match-string 3 date) "1")) 537 month (string-to-number (or (match-string 2 date) "1"))
523 day (string-to-number (or (match-string 5 date) "1")) 538 day (string-to-number (or (match-string 3 date) "1"))
524 time (if (match-beginning 9) 539 time (if (match-beginning 5)
525 (substring date (match-beginning 7) (match-end 9)) 540 (substring date (match-beginning 4) (match-end 5))
526 (concat (or (match-string 7 date) "00:00") ":00")) 541 (concat (or (match-string 4 date) "00:00") ":00"))
527 zone (cond ((match-beginning 13) 542 zone (cond ((match-beginning 6)
528 (concat (match-string 13 date) 543 (concat (match-string 6 date)
529 (match-string 14 date))) 544 (match-string 7 date)))
530 ((match-beginning 16) ;; Z 545 ((match-beginning 9) ;; Z
531 "+0000") 546 "+0000")
532 (t ;; nil if zone is not provided. 547 (t ;; nil if zone is not provided.
533 (match-string 15 date)))))) 548 (match-string 8 date))))))
534 (if month 549 (if month
535 (progn 550 (progn
536 (setq cts (current-time-string (encode-time 0 0 0 day month year))) 551 (setq cts (current-time-string (encode-time 0 0 0 day month year)))
537 (format "%s, %02d %s %04d %s%s" 552 (format "%s, %02d %s %04d %s%s"
538 (substring cts 0 3) day (substring cts 4 7) year time 553 (substring cts 0 3) day (substring cts 4 7) year time
543 558
544 ;;; data functions 559 ;;; data functions
545 560
546 (defun nnrss-read-server-data (server) 561 (defun nnrss-read-server-data (server)
547 (setq nnrss-server-data nil) 562 (setq nnrss-server-data nil)
548 (let ((file (nnrss-make-filename "nnrss" server))) 563 (let ((file (nnrss-make-filename "nnrss" server))
564 (file-name-coding-system nnmail-pathname-coding-system))
549 (when (file-exists-p file) 565 (when (file-exists-p file)
550 ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII 566 ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
551 ;; file names. So, we use `insert-file-contents' instead. 567 ;; file names. So, we use `insert-file-contents' instead.
552 (mm-with-multibyte-buffer 568 (mm-with-multibyte-buffer
553 (let ((coding-system-for-read nnrss-file-coding-system) 569 (let ((coding-system-for-read nnrss-file-coding-system))
554 (file-name-coding-system nnmail-pathname-coding-system))
555 (insert-file-contents file) 570 (insert-file-contents file)
556 (eval-region (point-min) (point-max))))))) 571 (eval-region (point-min) (point-max)))))))
557 572
558 (defun nnrss-save-server-data (server) 573 (defun nnrss-save-server-data (server)
559 (gnus-make-directory nnrss-directory) 574 (gnus-make-directory nnrss-directory)
566 (insert "\n") 581 (insert "\n")
567 (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data))))) 582 (gnus-prin1 `(setq nnrss-server-data ',nnrss-server-data)))))
568 583
569 (defun nnrss-read-group-data (group server) 584 (defun nnrss-read-group-data (group server)
570 (setq nnrss-group-data nil) 585 (setq nnrss-group-data nil)
571 (setq nnrss-group-hashtb (gnus-make-hashtable)) 586 (if (hash-table-p nnrss-group-hashtb)
587 (clrhash nnrss-group-hashtb)
588 (setq nnrss-group-hashtb (make-hash-table :test 'equal)))
572 (let ((pair (assoc group nnrss-server-data))) 589 (let ((pair (assoc group nnrss-server-data)))
573 (setq nnrss-group-max (or (cadr pair) 0)) 590 (setq nnrss-group-max (or (cadr pair) 0))
574 (setq nnrss-group-min (+ nnrss-group-max 1))) 591 (setq nnrss-group-min (+ nnrss-group-max 1)))
575 (let ((file (nnrss-make-filename group server))) 592 (let ((file (nnrss-make-filename group server))
593 (file-name-coding-system nnmail-pathname-coding-system))
576 (when (file-exists-p file) 594 (when (file-exists-p file)
577 ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII 595 ;; In Emacs 21.3 and earlier, `load' doesn't support non-ASCII
578 ;; file names. So, we use `insert-file-contents' instead. 596 ;; file names. So, we use `insert-file-contents' instead.
579 (mm-with-multibyte-buffer 597 (mm-with-multibyte-buffer
580 (let ((coding-system-for-read nnrss-file-coding-system) 598 (let ((coding-system-for-read nnrss-file-coding-system))
581 (file-name-coding-system nnmail-pathname-coding-system))
582 (insert-file-contents file) 599 (insert-file-contents file)
583 (eval-region (point-min) (point-max)))) 600 (eval-region (point-min) (point-max))))
584 (dolist (e nnrss-group-data) 601 (dolist (e nnrss-group-data)
585 (gnus-sethash (or (nth 2 e) (nth 6 e)) t nnrss-group-hashtb) 602 (puthash (nth 9 e) t nnrss-group-hashtb)
586 (when (and (car e) (> nnrss-group-min (car e))) 603 (when (and (car e) (> nnrss-group-min (car e)))
587 (setq nnrss-group-min (car e))) 604 (setq nnrss-group-min (car e)))
588 (when (and (car e) (< nnrss-group-max (car e))) 605 (when (and (car e) (< nnrss-group-max (car e)))
589 (setq nnrss-group-max (car e))))))) 606 (setq nnrss-group-max (car e)))))))
590 607
660 (delete-backward-char 1)) 677 (delete-backward-char 1))
661 (buffer-string))) 678 (buffer-string)))
662 679
663 ;;; Snarf functions 680 ;;; Snarf functions
664 681
682 (defun nnrss-make-hash-index (item)
683 (setq item (gnus-remove-if
684 (lambda (field)
685 (when (listp field)
686 (memq (car field) nnrss-ignore-article-fields)))
687 item))
688 (md5 (gnus-prin1-to-string item)
689 nil nil
690 nnrss-file-coding-system))
691
665 (defun nnrss-check-group (group server) 692 (defun nnrss-check-group (group server)
666 (let (file xml subject url extra changed author date feed-subject 693 (let (file xml subject url extra changed author date feed-subject
667 enclosure comments rss-ns rdf-ns content-ns dc-ns) 694 enclosure comments rss-ns rdf-ns content-ns dc-ns
695 hash-index)
668 (if (and nnrss-use-local 696 (if (and nnrss-use-local
669 (file-exists-p (setq file (expand-file-name 697 (file-exists-p (setq file (expand-file-name
670 (nnrss-translate-file-chars 698 (nnrss-translate-file-chars
671 (concat group ".xml")) 699 (concat group ".xml"))
672 nnrss-directory)))) 700 nnrss-directory))))
694 rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/") 722 rss-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/")
695 content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/")) 723 content-ns (nnrss-get-namespace-prefix xml "http://purl.org/rss/1.0/modules/content/"))
696 (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml))) 724 (dolist (item (nreverse (nnrss-find-el (intern (concat rss-ns "item")) xml)))
697 (when (and (listp item) 725 (when (and (listp item)
698 (string= (concat rss-ns "item") (car item)) 726 (string= (concat rss-ns "item") (car item))
699 (if (setq url (nnrss-decode-entities-string 727 (progn (setq hash-index (nnrss-make-hash-index item))
700 (nnrss-node-text rss-ns 'link (cddr item)))) 728 (not (gethash hash-index nnrss-group-hashtb))))
701 (not (gnus-gethash url nnrss-group-hashtb))
702 (setq extra (or (nnrss-node-text content-ns 'encoded item)
703 (nnrss-node-text rss-ns 'description item)))
704 (not (gnus-gethash extra nnrss-group-hashtb))))
705 (setq subject (nnrss-node-text rss-ns 'title item)) 729 (setq subject (nnrss-node-text rss-ns 'title item))
706 (setq extra (or extra 730 (setq url (nnrss-decode-entities-string
707 (nnrss-node-text content-ns 'encoded item) 731 (nnrss-node-text rss-ns 'link (cddr item))))
732 (setq extra (or (nnrss-node-text content-ns 'encoded item)
708 (nnrss-node-text rss-ns 'description item))) 733 (nnrss-node-text rss-ns 'description item)))
709 (if (setq feed-subject (nnrss-node-text dc-ns 'subject item)) 734 (if (setq feed-subject (nnrss-node-text dc-ns 'subject item))
710 (setq extra (concat feed-subject "<br /><br />" extra))) 735 (setq extra (concat feed-subject "<br /><br />" extra)))
711 (setq author (or (nnrss-node-text rss-ns 'author item) 736 (setq author (or (nnrss-node-text rss-ns 'author item)
712 (nnrss-node-text dc-ns 'creator item) 737 (nnrss-node-text dc-ns 'creator item)
744 (and subject (nnrss-mime-encode-string subject)) 769 (and subject (nnrss-mime-encode-string subject))
745 (and author (nnrss-mime-encode-string author)) 770 (and author (nnrss-mime-encode-string author))
746 date 771 date
747 (and extra (nnrss-decode-entities-string extra)) 772 (and extra (nnrss-decode-entities-string extra))
748 enclosure 773 enclosure
749 comments) 774 comments
775 hash-index)
750 nnrss-group-data) 776 nnrss-group-data)
751 (gnus-sethash (or url extra) t nnrss-group-hashtb) 777 (puthash hash-index t nnrss-group-hashtb)
752 (setq changed t)) 778 (setq changed t))
753 (setq extra nil)) 779 (setq extra nil))
754 (when changed 780 (when changed
755 (nnrss-save-group-data group server) 781 (nnrss-save-group-data group server)
756 (let ((pair (assoc group nnrss-server-data))) 782 (let ((pair (assoc group nnrss-server-data)))
945 BASE-URI is used to determine the location of the links and 971 BASE-URI is used to determine the location of the links and
946 whether they are `offsite' or `onsite'." 972 whether they are `offsite' or `onsite'."
947 (let (rss-onsite-end rdf-onsite-end xml-onsite-end 973 (let (rss-onsite-end rdf-onsite-end xml-onsite-end
948 rss-onsite-in rdf-onsite-in xml-onsite-in 974 rss-onsite-in rdf-onsite-in xml-onsite-in
949 rss-offsite-end rdf-offsite-end xml-offsite-end 975 rss-offsite-end rdf-offsite-end xml-offsite-end
950 rss-offsite-in rdf-offsite-in xml-offsite-in) 976 rss-offsite-in rdf-offsite-in xml-offsite-in)
951 (dolist (href hrefs) 977 (dolist (href hrefs)
952 (cond ((null href)) 978 (cond ((null href))
953 ((string-match "\\.rss$" href) 979 ((string-match "\\.rss$" href)
954 (nnrss-match-macro 980 (nnrss-match-macro
955 base-uri href rss-onsite-end rss-offsite-end)) 981 base-uri href rss-onsite-end rss-offsite-end))