Mercurial > emacs
changeset 40542:93f6c74a2f60
* mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with
the Gnus CVS.
* mm-util.el (mm-mime-mule-charset-alist): Move down and call
mm-coding-system-p. Don't correct it only in XEmacs.
(mm-charset-to-coding-system): Use mm-coding-system-p and
mm-get-coding-system-list.
(mm-emacs-mule, mm-mule4-p): New.
(mm-enable-multibyte, mm-disable-multibyte,
mm-enable-multibyte-mule4, mm-disable-multibyte-mule4,
mm-with-unibyte-current-buffer,
mm-with-unibyte-current-buffer-mule4): Use them.
(mm-find-mime-charset-region): Treat iso-2022-jp.
From Dave Love <fx@gnu.org>:
* mm-util.el (mm-mime-mule-charset-alist): Make it correct by
construction.
(mm-charset-synonym-alist): Remove windows-125[02]. Make other
entries conditional on not having a coding system defined for
them.
(mm-mule-charset-to-mime-charset): Use
find-coding-systems-for-charsets if defined.
(mm-charset-to-coding-system): Don't use
mm-get-coding-system-list. Look in mm-charset-synonym-alist
later. Add last resort search of coding systems.
(mm-enable-multibyte-mule4, mm-disable-multibyte-mule4)
(mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like
Mule 4.
(mm-find-mime-charset-region): Re-write.
(mm-with-unibyte-current-buffer): Restore buffer as well as
multibyteness.
author | ShengHuo ZHU <zsh@cs.rochester.edu> |
---|---|
date | Wed, 31 Oct 2001 04:16:51 +0000 |
parents | b3ba4328511c |
children | 9461cfa8d18d |
files | lisp/gnus/ChangeLog lisp/gnus/mm-util.el lisp/gnus/nnslashdot.el lisp/gnus/nnultimate.el lisp/gnus/nnweb.el |
diffstat | 5 files changed, 738 insertions(+), 442 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/gnus/ChangeLog Wed Oct 31 02:54:33 2001 +0000 +++ b/lisp/gnus/ChangeLog Wed Oct 31 04:16:51 2001 +0000 @@ -1,3 +1,38 @@ +2001-10-30 ShengHuo ZHU <zsh@cs.rochester.edu> + + * mm-util.el, nnultimate.el, nnweb.el, nnslashdot.el: Sync with + the Gnus CVS. + + * mm-util.el (mm-mime-mule-charset-alist): Move down and call + mm-coding-system-p. Don't correct it only in XEmacs. + (mm-charset-to-coding-system): Use mm-coding-system-p and + mm-get-coding-system-list. + (mm-emacs-mule, mm-mule4-p): New. + (mm-enable-multibyte, mm-disable-multibyte, + mm-enable-multibyte-mule4, mm-disable-multibyte-mule4, + mm-with-unibyte-current-buffer, + mm-with-unibyte-current-buffer-mule4): Use them. + (mm-find-mime-charset-region): Treat iso-2022-jp. + + From Dave Love <fx@gnu.org>: + + * mm-util.el (mm-mime-mule-charset-alist): Make it correct by + construction. + (mm-charset-synonym-alist): Remove windows-125[02]. Make other + entries conditional on not having a coding system defined for + them. + (mm-mule-charset-to-mime-charset): Use + find-coding-systems-for-charsets if defined. + (mm-charset-to-coding-system): Don't use + mm-get-coding-system-list. Look in mm-charset-synonym-alist + later. Add last resort search of coding systems. + (mm-enable-multibyte-mule4, mm-disable-multibyte-mule4) + (mm-with-unibyte-current-buffer-mule4): Just treat Mule 5 like + Mule 4. + (mm-find-mime-charset-region): Re-write. + (mm-with-unibyte-current-buffer): Restore buffer as well as + multibyteness. + 2001-10-30 Simon Josefsson <jas@extundo.com> * nnimap.el (nnimap-date-days-ago): Defeat locale.
--- a/lisp/gnus/mm-util.el Wed Oct 31 02:54:33 2001 +0000 +++ b/lisp/gnus/mm-util.el Wed Oct 31 04:16:51 2001 +0000 @@ -1,4 +1,4 @@ -;;; mm-util.el --- utility functions for MIME things +;;; mm-util.el --- Utility functions for Mule and low level things ;; Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> @@ -27,11 +27,145 @@ (eval-when-compile (require 'cl)) (require 'mail-prsvr) +(eval-and-compile + (mapcar + (lambda (elem) + (let ((nfunc (intern (format "mm-%s" (car elem))))) + (if (fboundp (car elem)) + (defalias nfunc (car elem)) + (defalias nfunc (cdr elem))))) + '((decode-coding-string . (lambda (s a) s)) + (encode-coding-string . (lambda (s a) s)) + (encode-coding-region . ignore) + (coding-system-list . ignore) + (decode-coding-region . ignore) + (char-int . identity) + (device-type . ignore) + (coding-system-equal . equal) + (annotationp . ignore) + (set-buffer-file-coding-system . ignore) + (make-char + . (lambda (charset int) + (int-to-char int))) + (read-charset + . (lambda (prompt) + "Return a charset." + (intern + (completing-read + prompt + (mapcar (lambda (e) (list (symbol-name (car e)))) + mm-mime-mule-charset-alist) + nil t)))) + (subst-char-in-string + . (lambda (from to string) ;; stolen (and renamed) from nnheader.el + "Replace characters in STRING from FROM to TO." + (let ((string (substring string 0)) ;Copy string. + (len (length string)) + (idx 0)) + ;; Replace all occurrences of FROM with TO. + (while (< idx len) + (when (= (aref string idx) from) + (aset string idx to)) + (setq idx (1+ idx))) + string))) + (string-as-unibyte . identity) + (string-as-multibyte . identity) + (multibyte-string-p . ignore)))) + +(eval-and-compile + (defalias 'mm-char-or-char-int-p + (cond + ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) + ((fboundp 'char-valid-p) 'char-valid-p) + (t 'identity)))) + +(eval-and-compile + (defalias 'mm-read-coding-system + (cond + ((fboundp 'read-coding-system) + (if (and (featurep 'xemacs) + (<= (string-to-number emacs-version) 21.1)) + (lambda (prompt &optional default-coding-system) + (read-coding-system prompt)) + 'read-coding-system)) + (t (lambda (prompt &optional default-coding-system) + "Prompt the user for a coding system." + (completing-read + prompt (mapcar (lambda (s) (list (symbol-name (car s)))) + mm-mime-mule-charset-alist))))))) + +(defvar mm-coding-system-list nil) +(defun mm-get-coding-system-list () + "Get the coding system list." + (or mm-coding-system-list + (setq mm-coding-system-list (mm-coding-system-list)))) + (defun mm-coding-system-p (sym) "Return non-nil if SYM is a coding system." (or (and (fboundp 'coding-system-p) (coding-system-p sym)) (memq sym (mm-get-coding-system-list)))) +(defvar mm-charset-synonym-alist + `( + ;; Perfectly fine? A valid MIME name, anyhow. + ,(unless (mm-coding-system-p 'big5) + '(big5 . cn-big5)) + ;; Not in XEmacs, but it's not a proper MIME charset anyhow. + ,(unless (mm-coding-system-p 'x-ctext) + '(x-ctext . ctext)) + ;; Apparently not defined in Emacs 20, but is a valid MIME name. + ,(unless (mm-coding-system-p 'gb2312) + '(gb2312 . cn-gb-2312)) + ;; Windows-1252 is actually a superset of Latin-1. See also + ;; `gnus-article-dumbquotes-map'. + ;;,(unless (mm-coding-system-p 'windows-1252) + ; should be defined eventually + ;; '(windows-1252 . iso-8859-1)) + ;; ISO-8859-15 is very similar to ISO-8859-1. + ;;,(unless (mm-coding-system-p 'iso-8859-15) ; Emacs 21 defines it. + ;; '(iso-8859-15 . iso-8859-1)) + ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft + ;; Outlook users in Czech republic. Use this to allow reading of their + ;; e-mails. cp1250 should be defined by M-x codepage-setup. + ;;,(unless (mm-coding-system-p 'windows-1250) + ; should be defined eventually + ;; '(windows-1250 . cp1250)) + ) + "A mapping from invalid charset names to the real charset names.") + +(defvar mm-binary-coding-system + (cond + ((mm-coding-system-p 'binary) 'binary) + ((mm-coding-system-p 'no-conversion) 'no-conversion) + (t nil)) + "100% binary coding system.") + +(defvar mm-text-coding-system + (or (if (memq system-type '(windows-nt ms-dos ms-windows)) + (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) + (and (mm-coding-system-p 'raw-text) 'raw-text)) + mm-binary-coding-system) + "Text-safe coding system (For removing ^M).") + +(defvar mm-text-coding-system-for-write nil + "Text coding system for write.") + +(defvar mm-auto-save-coding-system + (cond + ((mm-coding-system-p 'emacs-mule) + (if (memq system-type '(windows-nt ms-dos ms-windows)) + (if (mm-coding-system-p 'emacs-mule-dos) + 'emacs-mule-dos mm-binary-coding-system) + 'emacs-mule)) + ((mm-coding-system-p 'escape-quoted) 'escape-quoted) + (t mm-binary-coding-system)) + "Coding system of auto save file.") + +(defvar mm-universal-coding-system mm-auto-save-coding-system + "The universal Coding system.") + +;; Fixme: some of the cars here aren't valid MIME charsets. That +;; should only matter with XEmacs, though. (defvar mm-mime-mule-charset-alist `((us-ascii ascii) (iso-8859-1 latin-iso8859-1) @@ -40,7 +174,7 @@ (iso-8859-4 latin-iso8859-4) (iso-8859-5 cyrillic-iso8859-5) ;; Non-mule (X)Emacs uses the last mule-charset for 8bit characters. - ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default + ;; The fake mule-charset, gnus-koi8-r, tells Gnus that the default ;; charset is koi8-r, not iso-8859-5. (koi8-r cyrillic-iso8859-5 gnus-koi8-r) (iso-8859-6 arabic-iso8859-6) @@ -76,108 +210,32 @@ chinese-cns11643-3 chinese-cns11643-4 chinese-cns11643-5 chinese-cns11643-6 chinese-cns11643-7) - ;; utf-8 comes either from Mule-UCS or Mule 5+. - ,@(if (mm-coding-system-p 'utf-8) - (list (cons 'utf-8 (delete 'ascii - (coding-system-get - 'mule-utf-8 - 'safe-charsets)))))) + ,(if (or (not (fboundp 'charsetp)) ;; non-Mule case + (charsetp 'unicode-a) + (not (mm-coding-system-p 'mule-utf-8))) + '(utf-8 unicode-a unicode-b unicode-c unicode-d unicode-e) + ;; If we have utf-8 we're in Mule 5+. + (append '(utf-8) + (delete 'ascii + (coding-system-get 'mule-utf-8 'safe-charsets))))) "Alist of MIME-charset/MULE-charsets.") -(eval-and-compile - (mapcar - (lambda (elem) - (let ((nfunc (intern (format "mm-%s" (car elem))))) - (if (fboundp (car elem)) - (defalias nfunc (car elem)) - (defalias nfunc (cdr elem))))) - '((decode-coding-string . (lambda (s a) s)) - (encode-coding-string . (lambda (s a) s)) - (encode-coding-region . ignore) - (coding-system-list . ignore) - (decode-coding-region . ignore) - (char-int . identity) - (device-type . ignore) - (coding-system-equal . equal) - (annotationp . ignore) - (set-buffer-file-coding-system . ignore) - (make-char - . (lambda (charset int) - (int-to-char int))) - (read-coding-system - . (lambda (prompt) - "Prompt the user for a coding system." - (completing-read - prompt (mapcar (lambda (s) (list (symbol-name (car s)))) - mm-mime-mule-charset-alist)))) - (read-charset - . (lambda (prompt) - "Return a charset." - (intern - (completing-read - prompt - (mapcar (lambda (e) (list (symbol-name (car e)))) - mm-mime-mule-charset-alist) - nil t)))) - (string-as-unibyte . identity) - (multibyte-string-p . ignore) - ))) - -(eval-and-compile - (defalias 'mm-char-or-char-int-p - (cond - ((fboundp 'char-or-char-int-p) 'char-or-char-int-p) - ((fboundp 'char-valid-p) 'char-valid-p) - (t 'identity)))) - -(defvar mm-coding-system-list nil) -(defun mm-get-coding-system-list () - "Get the coding system list." - (or mm-coding-system-list - (setq mm-coding-system-list (mm-coding-system-list)))) - -(defvar mm-charset-synonym-alist - `((big5 . cn-big5) - (gb2312 . cn-gb-2312) - ;; Windows-1252 is actually a superset of Latin-1. See also - ;; `gnus-article-dumbquotes-map'. - ,(unless (mm-coding-system-p 'windows-1252) ; should be defined eventually - '(windows-1252 . iso-8859-1)) - ;; Windows-1250 is a variant of Latin-2 heavily used by Microsoft - ;; Outlook users in Czech republic. Use this to allow reading of their - ;; e-mails. cp1250 should be defined by M-x codepage-setup. - ,(unless (mm-coding-system-p 'windows-1250) ; should be defined eventually - '(windows-1250 . cp1250)) - (x-ctext . ctext)) - "A mapping from invalid charset names to the real charset names.") - -(defvar mm-binary-coding-system - (cond - ((mm-coding-system-p 'binary) 'binary) - ((mm-coding-system-p 'no-conversion) 'no-conversion) - (t nil)) - "100% binary coding system.") - -(defvar mm-text-coding-system - (or (if (memq system-type '(windows-nt ms-dos ms-windows)) - (and (mm-coding-system-p 'raw-text-dos) 'raw-text-dos) - (and (mm-coding-system-p 'raw-text) 'raw-text)) - mm-binary-coding-system) - "Text-safe coding system (For removing ^M).") - -(defvar mm-text-coding-system-for-write nil - "Text coding system for write.") - -(defvar mm-auto-save-coding-system - (cond - ((mm-coding-system-p 'emacs-mule) - (if (memq system-type '(windows-nt ms-dos ms-windows)) - (if (mm-coding-system-p 'emacs-mule-dos) - 'emacs-mule-dos mm-binary-coding-system) - 'emacs-mule)) - ((mm-coding-system-p 'escape-quoted) 'escape-quoted) - (t mm-binary-coding-system)) - "Coding system of auto save file.") +;; Correct by construction, but should be unnecessary: +;; XEmacs hates it. +(when (and (not (featurep 'xemacs)) + (fboundp 'coding-system-list) + (fboundp 'sort-coding-systems)) + (setq mm-mime-mule-charset-alist + (apply + 'nconc + (mapcar + (lambda (cs) + (when (and (coding-system-get cs 'mime-charset) + (not (eq t (coding-system-get cs 'safe-charsets)))) + (list (cons (coding-system-get cs 'mime-charset) + (delq 'ascii + (coding-system-get cs 'safe-charsets)))))) + (sort-coding-systems (coding-system-list 'base-only)))))) ;;; Internal variables: @@ -185,14 +243,21 @@ (defun mm-mule-charset-to-mime-charset (charset) "Return the MIME charset corresponding to the given Mule CHARSET." - (let ((alist mm-mime-mule-charset-alist) - out) - (while alist - (when (memq charset (cdar alist)) - (setq out (caar alist) - alist nil)) - (pop alist)) - out)) + (if (fboundp 'find-coding-systems-for-charsets) + (let (mime) + (dolist (cs (find-coding-systems-for-charsets (list charset))) + (unless mime + (when cs + (setq mime (coding-system-get cs 'mime-charset))))) + mime) + (let ((alist mm-mime-mule-charset-alist) + out) + (while alist + (when (memq charset (cdar alist)) + (setq out (caar alist) + alist nil)) + (pop alist)) + out))) (defun mm-charset-to-coding-system (charset &optional lbt) "Return coding-system corresponding to CHARSET. @@ -201,9 +266,6 @@ used as the line break code type of the coding system." (when (stringp charset) (setq charset (intern (downcase charset)))) - (setq charset - (or (cdr (assq charset mm-charset-synonym-alist)) - charset)) (when lbt (setq charset (intern (format "%s-%s" charset lbt)))) (cond @@ -215,58 +277,73 @@ 'ascii) ;; Check to see whether we can handle this charset. (This depends ;; on there being some coding system matching each `mime-charset' - ;; coding sysytem property defined, as there should be.) - ((memq charset (mm-get-coding-system-list)) + ;; property defined, as there should be.) + ((and (mm-coding-system-p charset) +;;; Doing this would potentially weed out incorrect charsets. +;;; charset +;;; (eq charset (coding-system-get charset 'mime-charset)) + ) + charset) + ;; Translate invalid charsets. + ((mm-coding-system-p (setq charset + (cdr (assq charset + mm-charset-synonym-alist)))) charset) - ;; Nope. - (t - nil))) + ;; Last resort: search the coding system list for entries which + ;; have the right mime-charset in case the canonical name isn't + ;; defined (though it should be). + ((let (cs) + ;; mm-get-coding-system-list returns a list of cs without lbt. + ;; Do we need -lbt? + (dolist (c (mm-get-coding-system-list)) + (if (and (null cs) + (eq charset (coding-system-get c 'mime-charset))) + (setq cs c))) + cs)))) -(if (fboundp 'subst-char-in-string) - (defsubst mm-replace-chars-in-string (string from to) - (subst-char-in-string from to string)) - (defun mm-replace-chars-in-string (string from to) - "Replace characters in STRING from FROM to TO." - (let ((string (substring string 0)) ;Copy string. - (len (length string)) - (idx 0)) - ;; Replace all occurrences of FROM with TO. - (while (< idx len) - (when (= (aref string idx) from) - (aset string idx to)) - (setq idx (1+ idx))) - string))) +(defsubst mm-replace-chars-in-string (string from to) + (mm-subst-char-in-string from to string)) -(defsubst mm-enable-multibyte () - "Set the multibyte flag of the current buffer. +(eval-and-compile + (defvar mm-emacs-mule (and (not (featurep 'xemacs)) + (boundp 'default-enable-multibyte-characters) + default-enable-multibyte-characters + (fboundp 'set-buffer-multibyte)) + "Emacs mule.") + + (defvar mm-mule4-p (and mm-emacs-mule + (fboundp 'charsetp) + (not (charsetp 'eight-bit-control))) + "Mule version 4.") + + (if mm-emacs-mule + (defun mm-enable-multibyte () + "Set the multibyte flag of the current buffer. Only do this if the default value of `enable-multibyte-characters' is non-nil. This is a no-op in XEmacs." - (when (and (fboundp 'set-buffer-multibyte) - (boundp 'enable-multibyte-characters) - (default-value 'enable-multibyte-characters)) - (set-buffer-multibyte t))) + (set-buffer-multibyte t)) + (defalias 'mm-enable-multibyte 'ignore)) -(defsubst mm-disable-multibyte () - "Unset the multibyte flag of in the current buffer. + (if mm-emacs-mule + (defun mm-disable-multibyte () + "Unset the multibyte flag of in the current buffer. This is a no-op in XEmacs." - (when (fboundp 'set-buffer-multibyte) - (set-buffer-multibyte nil))) + (set-buffer-multibyte nil)) + (defalias 'mm-disable-multibyte 'ignore)) -(defsubst mm-enable-multibyte-mule4 () - "Enable multibyte in the current buffer. + (if mm-mule4-p + (defun mm-enable-multibyte-mule4 () + "Enable multibyte in the current buffer. Only used in Emacs Mule 4." - (when (and (fboundp 'set-buffer-multibyte) - (boundp 'enable-multibyte-characters) - (default-value 'enable-multibyte-characters) - (not (charsetp 'eight-bit-control))) - (set-buffer-multibyte t))) - -(defsubst mm-disable-multibyte-mule4 () - "Disable multibyte in the current buffer. + (set-buffer-multibyte t)) + (defalias 'mm-enable-multibyte-mule4 'ignore)) + + (if mm-mule4-p + (defun mm-disable-multibyte-mule4 () + "Disable multibyte in the current buffer. Only used in Emacs Mule 4." - (when (and (fboundp 'set-buffer-multibyte) - (not (charsetp 'eight-bit-control))) - (set-buffer-multibyte nil))) + (set-buffer-multibyte nil)) + (defalias 'mm-disable-multibyte-mule4 'ignore))) (defun mm-preferred-coding-system (charset) ;; A typo in some Emacs versions. @@ -294,10 +371,10 @@ (progn (setq mail-parse-mule-charset (and (boundp 'current-language-environment) - (car (last - (assq 'charset - (assoc current-language-environment - language-info-alist)))))) + (car (last + (assq 'charset + (assoc current-language-environment + language-info-alist)))))) (if (or (not mail-parse-mule-charset) (eq mail-parse-mule-charset 'ascii)) (setq mail-parse-mule-charset @@ -309,6 +386,8 @@ (defun mm-mime-charset (charset) "Return the MIME charset corresponding to the given Mule CHARSET." + (if (eq charset 'unknown) + (error "The message contains non-printable characters, please use attachment")) (if (and (fboundp 'coding-system-get) (fboundp 'get-charset-property)) ;; This exists in Emacs 20. (or @@ -317,6 +396,7 @@ (mm-preferred-coding-system charset) 'mime-charset)) (and (eq charset 'ascii) 'us-ascii) + (mm-preferred-coding-system charset) (mm-mule-charset-to-mime-charset charset)) ;; This is for XEmacs. (mm-mule-charset-to-mime-charset charset))) @@ -330,21 +410,8 @@ (setq result (cons head result))) (nreverse result))) -(defun mm-find-mime-charset-region (b e) - "Return the MIME charsets needed to encode the region between B and E." - (let ((charsets (mapcar 'mm-mime-charset - (delq 'ascii - (mm-find-charset-region b e))))) - (when (memq 'iso-2022-jp-2 charsets) - (setq charsets (delq 'iso-2022-jp charsets))) - (setq charsets (mm-delete-duplicates charsets)) - (if (and (> (length charsets) 1) - (fboundp 'find-coding-systems-region) - (let ((cs (find-coding-systems-region b e))) - (or (memq 'utf-8 cs) (memq 'mule-utf-8 cs)))) - '(utf-8) - charsets))) - +;; It's not clear whether this is supposed to mean the global or local +;; setting. I think it's used inconsistently. -- fx (defsubst mm-multibyte-p () "Say whether multibyte is enabled." (if (and (not (featurep 'xemacs)) @@ -352,6 +419,39 @@ enable-multibyte-characters (featurep 'mule))) +(defun mm-find-mime-charset-region (b e) + "Return the MIME charsets needed to encode the region between B and E. +Nil means ASCII, a single-element list represents an appropriate MIME +charset, and a longer list means no appropriate charset." + ;; The return possibilities of this function are a mess... + (or (and + (mm-multibyte-p) + (fboundp 'find-coding-systems-region) + ;; Find the mime-charset of the most preferred coding + ;; system that has one. + (let ((systems (find-coding-systems-region b e)) + result) + ;; Fixme: The `mime-charset' (`x-ctext') of `compound-text' + ;; is not in the IANA list. + (setq systems (delq 'compound-text systems)) + (unless (equal systems '(undecided)) + (while systems + (let ((cs (coding-system-get (pop systems) 'mime-charset))) + (if cs + (setq systems nil + result (list cs)))))) + result)) + ;; Otherwise we're not multibyte, XEmacs or a single coding + ;; system won't cover it. + (let ((charsets + (mm-delete-duplicates + (mapcar 'mm-mime-charset + (delq 'ascii + (mm-find-charset-region b e)))))) + (if (memq 'iso-2022-jp-2 charsets) + (delq 'iso-2022-jp charsets) + charsets)))) + (defmacro mm-with-unibyte-buffer (&rest forms) "Create a temporary buffer, and evaluate FORMS there like `progn'. Use unibyte mode for this." @@ -364,15 +464,18 @@ "Evaluate FORMS with current current buffer temporarily made unibyte. Also bind `default-enable-multibyte-characters' to nil. Equivalent to `progn' in XEmacs" - (let ((multibyte (make-symbol "multibyte"))) - `(if (fboundp 'set-buffer-multibyte) - (let ((,multibyte enable-multibyte-characters)) + (let ((multibyte (make-symbol "multibyte")) + (buffer (make-symbol "buffer"))) + `(if mm-emacs-mule + (let ((,multibyte enable-multibyte-characters) + (,buffer (current-buffer))) (unwind-protect (let (default-enable-multibyte-characters) (set-buffer-multibyte nil) ,@forms) + (set-buffer ,buffer) (set-buffer-multibyte ,multibyte))) - (progn + (let (default-enable-multibyte-characters) ,@forms)))) (put 'mm-with-unibyte-current-buffer 'lisp-indent-function 0) (put 'mm-with-unibyte-current-buffer 'edebug-form-spec '(body)) @@ -380,22 +483,19 @@ (defmacro mm-with-unibyte-current-buffer-mule4 (&rest forms) "Evaluate FORMS there like `progn' in current buffer. Mule4 only." - (let ((multibyte (make-symbol "multibyte"))) - `(if (or (featurep 'xemacs) - (not (fboundp 'set-buffer-multibyte)) - (charsetp 'eight-bit-control)) ;; For Emacs Mule 4 only. - (progn - ,@forms) - (let ((,multibyte (default-value 'enable-multibyte-characters))) - (unwind-protect - (let ((buffer-file-coding-system mm-binary-coding-system) - (coding-system-for-read mm-binary-coding-system) - (coding-system-for-write mm-binary-coding-system)) - (set-buffer-multibyte nil) - (setq-default enable-multibyte-characters nil) - ,@forms) - (setq-default enable-multibyte-characters ,multibyte) - (set-buffer-multibyte ,multibyte)))))) + (let ((multibyte (make-symbol "multibyte")) + (buffer (make-symbol "buffer"))) + `(if mm-mule4-p + (let ((,multibyte enable-multibyte-characters) + (,buffer (current-buffer))) + (unwind-protect + (let (default-enable-multibyte-characters) + (set-buffer-multibyte nil) + ,@forms) + (set-buffer ,buffer) + (set-buffer-multibyte ,multibyte))) + (let (default-enable-multibyte-characters) + ,@forms)))) (put 'mm-with-unibyte-current-buffer-mule4 'lisp-indent-function 0) (put 'mm-with-unibyte-current-buffer-mule4 'edebug-form-spec '(body)) @@ -410,9 +510,14 @@ "Return a list of Emacs charsets in the region B to E." (cond ((and (mm-multibyte-p) - (fboundp 'find-charset-region)) + (fboundp 'find-charset-region)) ;; Remove composition since the base charsets have been included. - (delq 'composition (find-charset-region b e))) + ;; Remove eight-bit-*, treat them as ascii. + (let ((css (find-charset-region b e))) + (mapcar (lambda (cs) (setq css (delq cs css))) + '(composition eight-bit-control eight-bit-graphic + control-1)) + css)) (t ;; We are in a unibyte buffer or XEmacs non-mule, so we futz around a bit. (save-excursion @@ -425,8 +530,8 @@ (let (charset) (setq charset (and (boundp 'current-language-environment) - (car (last (assq 'charset - (assoc current-language-environment + (car (last (assq 'charset + (assoc current-language-environment language-info-alist)))))) (if (eq charset 'ascii) (setq charset nil)) (or charset @@ -476,15 +581,15 @@ (auto-mode-alist (if inhibit nil (mm-auto-mode-alist))) (default-major-mode 'fundamental-mode) (enable-local-variables nil) - (after-insert-file-functions nil) + (after-insert-file-functions nil) (enable-local-eval nil) (find-file-hooks nil) - (inhibit-file-name-operation (if inhibit + (inhibit-file-name-operation (if inhibit 'insert-file-contents inhibit-file-name-operation)) (inhibit-file-name-handlers (if inhibit - (append mm-inhibit-file-name-handlers + (append mm-inhibit-file-name-handlers inhibit-file-name-handlers) inhibit-file-name-handlers))) (insert-file-contents filename visit beg end replace))) @@ -497,37 +602,47 @@ Optional fourth argument specifies the coding system to use when encoding the file. If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." - (let ((coding-system-for-write - (or codesys mm-text-coding-system-for-write + (let ((coding-system-for-write + (or codesys mm-text-coding-system-for-write mm-text-coding-system)) - (inhibit-file-name-operation (if inhibit + (inhibit-file-name-operation (if inhibit 'append-to-file inhibit-file-name-operation)) (inhibit-file-name-handlers (if inhibit - (append mm-inhibit-file-name-handlers + (append mm-inhibit-file-name-handlers inhibit-file-name-handlers) inhibit-file-name-handlers))) (append-to-file start end filename))) -(defun mm-write-region (start end filename &optional append visit lockname +(defun mm-write-region (start end filename &optional append visit lockname coding-system inhibit) "Like `write-region'. If INHIBIT is non-nil, inhibit mm-inhibit-file-name-handlers." - (let ((coding-system-for-write - (or coding-system mm-text-coding-system-for-write + (let ((coding-system-for-write + (or coding-system mm-text-coding-system-for-write mm-text-coding-system)) - (inhibit-file-name-operation (if inhibit + (inhibit-file-name-operation (if inhibit 'write-region inhibit-file-name-operation)) (inhibit-file-name-handlers (if inhibit - (append mm-inhibit-file-name-handlers + (append mm-inhibit-file-name-handlers inhibit-file-name-handlers) inhibit-file-name-handlers))) (write-region start end filename append visit lockname))) +(defun mm-image-load-path (&optional package) + (let (dir result) + (dolist (path load-path (nreverse result)) + (if (file-directory-p + (setq dir (concat (file-name-directory + (directory-file-name path)) + "etc/" (or package "gnus/")))) + (push dir result)) + (push path result)))) + (provide 'mm-util) ;;; mm-util.el ends here
--- a/lisp/gnus/nnslashdot.el Wed Oct 31 02:54:33 2001 +0000 +++ b/lisp/gnus/nnslashdot.el Wed Oct 31 04:16:51 2001 +0000 @@ -1,5 +1,5 @@ ;;; nnslashdot.el --- interfacing with Slashdot -;; Copyright (C) 1999, 2000 Free Software Foundation, Inc. +;; Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> ;; Keywords: news @@ -57,6 +57,9 @@ "http://slashdot.org/article.pl?sid=%s&mode=nocomment" "Where nnslashdot will fetch the article from.") +(defvoo nnslashdot-backslash-url "http://slashdot.org/slashdot.xml" + "Where nnslashdot will fetch the stories from.") + (defvoo nnslashdot-threshold -1 "The article threshold.") @@ -86,19 +89,17 @@ (nnslashdot-possibly-change-server group server) (condition-case why (unless gnus-nov-is-evil - (if nnslashdot-threaded - (nnslashdot-threaded-retrieve-headers articles group) - (nnslashdot-sane-retrieve-headers articles group))) + (nnslashdot-retrieve-headers-1 articles group)) (search-failed (nnslashdot-lose why)))) -(deffoo nnslashdot-threaded-retrieve-headers (articles group) - (let ((last (car (last articles))) - (did nil) - (start 1) - (sid (caddr (assoc group nnslashdot-groups))) - (first-comments t) - (startats '(1)) - headers article subject score from date lines parent point s) +(deffoo nnslashdot-retrieve-headers-1 (articles group) + (let* ((last (car (last articles))) + (start (if nnslashdot-threaded 1 (pop articles))) + (entry (assoc group nnslashdot-groups)) + (sid (nth 2 entry)) + (first-comments t) + headers article subject score from date lines parent point cid + s startats changed) (save-excursion (set-buffer nnslashdot-buffer) (let ((case-fold-search t)) @@ -107,10 +108,10 @@ (nnweb-insert (format nnslashdot-article-url (nnslashdot-sid-strip sid)) t) (goto-char (point-min)) - (search-forward "Posted by ") - (when (looking-at "<a[^>]+>\\([^<]+\\)") - (setq from (nnweb-decode-entities-string (match-string 1)))) - (search-forward " on ") + (re-search-forward "Posted by[ \t\r\n]+") + (when (looking-at "\\(<a[^>]+>\\)?[ \t\r\n]*\\([^<\r\n]+\\)") + (setq from (nnweb-decode-entities-string (match-string 2)))) + (search-forward "on ") (setq date (nnslashdot-date-to-date (buffer-substring (point) (1- (search-forward "<"))))) (setq lines (/ (- (point) @@ -123,16 +124,16 @@ 1 group from date (concat "<" (nnslashdot-sid-strip sid) "%1@slashdot>") "" 0 lines nil nil)) - headers)) - (while (and (setq start (pop startats)) - (< start last)) + headers) + (setq start (if nnslashdot-threaded 2 (pop articles)))) + (while (and start (<= start last)) (setq point (goto-char (point-max))) (nnweb-insert (format nnslashdot-comments-url (nnslashdot-sid-strip sid) - nnslashdot-threshold 0 start) + nnslashdot-threshold 0 (- start 2)) t) - (when first-comments + (when (and nnslashdot-threaded first-comments) (setq first-comments nil) (goto-char (point-max)) (while (re-search-backward "startat=\\([0-9]+\\)" nil t) @@ -140,58 +141,68 @@ (unless (memq s startats) (push s startats))) (setq startats (sort startats '<))) + (setq article (if (and article (< start article)) article start)) (goto-char point) (while (re-search-forward "<a name=\"\\([0-9]+\\)\"><\\(b\\|H4\\)>\\([^<]+\\)</\\(b\\|H4\\)>.*score:\\([^)]+\\))" nil t) - (setq article (string-to-number (match-string 1)) + (setq cid (match-string 1) subject (match-string 3) score (match-string 5)) + (unless (assq article (nth 4 entry)) + (setcar (nthcdr 4 entry) (cons (cons article cid) (nth 4 entry))) + (setq changed t)) (when (string-match "^Re: *" subject) (setq subject (concat "Re: " (substring subject (match-end 0))))) - (setq subject (nnweb-decode-entities-string subject)) - (forward-line 1) + (setq subject (nnweb-decode-entities-string subject)) + (search-forward "<BR>") (if (looking-at - "by <a[^>]+>\\([^<]+\\)</a>[ \t\n]*.*(\\([^)]+\\))") + "by[ \t\n]+<a[^>]+>\\([^<]+\\)</a>[ \t\n]*(\\(<[^>]+>\\)*\\([^<>)]+\\))") (progn (goto-char (- (match-end 0) 5)) - (setq from (concat + (setq from (concat (nnweb-decode-entities-string (match-string 1)) - " <" (match-string 2) ">"))) + " <" (match-string 3) ">"))) (setq from "") - (when (looking-at "by \\(.+\\) on ") + (when (looking-at "by \\([^<>]*\\) on ") (goto-char (- (match-end 0) 5)) (setq from (nnweb-decode-entities-string (match-string 1))))) (search-forward " on ") (setq date (nnslashdot-date-to-date - (buffer-substring (point) (progn (end-of-line) (point))))) - (setq lines (/ (abs (- (search-forward "<td ") + (buffer-substring (point) (progn (skip-chars-forward "^()<>\n\r") (point))))) + (setq lines (/ (abs (- (search-forward "<td") (search-forward "</td>"))) 70)) - (forward-line 4) - (setq parent - (if (looking-at ".*cid=\\([0-9]+\\)") - (match-string 1) - nil)) - (setq did t) + (if (not + (re-search-forward ".*cid=\\([0-9]+\\)\">Parent</A>" nil t)) + (setq parent nil) + (setq parent (match-string 1)) + (when (string= parent "0") + (setq parent nil))) (push (cons - (1+ article) + article (make-full-mail-header - (1+ article) + article (concat subject " (" score ")") from date - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ article)) - "@slashdot>") + (concat "<" (nnslashdot-sid-strip sid) "%" cid "@slashdot>") (if parent - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ (string-to-number parent))) - "@slashdot>") + (concat "<" (nnslashdot-sid-strip sid) "%" + parent "@slashdot>") "") 0 lines nil nil)) - headers))))) + headers) + (while (and articles (<= (car articles) article)) + (pop articles)) + (setq article (1+ article))) + (if nnslashdot-threaded + (progn + (setq start (pop startats)) + (if start (setq start (+ start 2)))) + (setq start (pop articles)))))) + (if changed (nnslashdot-write-groups)) (setq nnslashdot-headers (sort headers 'car-less-than-car)) (save-excursion (set-buffer nntp-server-buffer) @@ -201,108 +212,6 @@ (nnheader-insert-nov (cdr header))))) 'nov)) -(deffoo nnslashdot-sane-retrieve-headers (articles group) - (let ((last (car (last articles))) - (did nil) - (start (max (1- (car articles)) 1)) - (sid (caddr (assoc group nnslashdot-groups))) - headers article subject score from date lines parent point) - (save-excursion - (set-buffer nnslashdot-buffer) - (erase-buffer) - (when (= start 1) - (nnweb-insert (format nnslashdot-article-url - (nnslashdot-sid-strip sid)) t) - (goto-char (point-min)) - (search-forward "Posted by ") - (when (looking-at "<a[^>]+>\\([^<]+\\)") - (setq from (nnweb-decode-entities-string (match-string 1)))) - (search-forward " on ") - (setq date (nnslashdot-date-to-date - (buffer-substring (point) (1- (search-forward "<"))))) - (forward-line 2) - (setq lines (count-lines (point) - (re-search-forward - "A href=\"\\(http://slashdot.org\\)?/article"))) - (push - (cons - 1 - (make-full-mail-header - 1 group from date (concat "<" (nnslashdot-sid-strip sid) - "%1@slashdot>") - "" 0 lines nil nil)) - headers)) - (while (or (not article) - (and did - (< article last))) - (when article - (setq start (1+ article))) - (setq point (goto-char (point-max))) - (nnweb-insert - (format nnslashdot-comments-url (nnslashdot-sid-strip sid) - nnslashdot-threshold 4 start) - t) - (goto-char point) - (while (re-search-forward - "<a name=\"\\([0-9]+\\)\"><\\(b\\|H4\\)>\\([^<]+\\)</\\(b\\|H4\\)>.*score:\\([^)]+\\))" - nil t) - (setq article (string-to-number (match-string 1)) - subject (match-string 3) - score (match-string 5)) - (when (string-match "^Re: *" subject) - (setq subject (concat "Re: " (substring subject (match-end 0))))) - (setq subject (nnweb-decode-entities-string subject)) - (forward-line 1) - (if (looking-at - "by <a[^>]+>\\([^<]+\\)</a>[ \t\n]*.*(\\([^)]+\\))") - (progn - (goto-char (- (match-end 0) 5)) - (setq from (concat - (nnweb-decode-entities-string (match-string 1)) - " <" (match-string 2) ">"))) - (setq from "") - (when (looking-at "by \\(.+\\) on ") - (goto-char (- (match-end 0) 5)) - (setq from (nnweb-decode-entities-string (match-string 1))))) - (search-forward " on ") - (setq date - (nnslashdot-date-to-date - (buffer-substring (point) (progn (end-of-line) (point))))) - (setq lines (/ (abs (- (search-forward "<td ") - (search-forward "</td>"))) - 70)) - (forward-line 2) - (setq parent - (if (looking-at ".*cid=\\([0-9]+\\)") - (match-string 1) - nil)) - (setq did t) - (push - (cons - (1+ article) - (make-full-mail-header - (1+ article) (concat subject " (" score ")") - from date - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ article)) - "@slashdot>") - (if parent - (concat "<" (nnslashdot-sid-strip sid) "%" - (number-to-string (1+ (string-to-number parent))) - "@slashdot>") - "") - 0 lines nil nil)) - headers)))) - (setq nnslashdot-headers - (sort headers (lambda (s1 s2) (< (car s1) (car s2))))) - (save-excursion - (set-buffer nntp-server-buffer) - (erase-buffer) - (mm-with-unibyte-current-buffer - (dolist (header nnslashdot-headers) - (nnheader-insert-nov (cdr header))))) - 'nov)) - (deffoo nnslashdot-request-group (group &optional server dont-check) (nnslashdot-possibly-change-server nil server) (let ((elem (assoc group nnslashdot-groups))) @@ -325,7 +234,7 @@ (deffoo nnslashdot-request-article (article &optional group server buffer) (nnslashdot-possibly-change-server group server) - (let (contents) + (let (contents cid) (condition-case why (save-excursion (set-buffer nnslashdot-buffer) @@ -333,23 +242,32 @@ (goto-char (point-min)) (when (and (stringp article) (string-match "%\\([0-9]+\\)@" article)) - (setq article (string-to-number (match-string 1 article)))) + (setq cid (match-string 1 article)) + (let ((map (nth 4 (assoc group nnslashdot-groups)))) + (while map + (if (equal (cdar map) cid) + (setq article (caar map) + map nil) + (setq map (cdr map)))))) (when (numberp article) (if (= article 1) (progn - (re-search-forward "Posted by *<[^>]+>[^>]*<[^>]+> *on ") + (re-search-forward + "Posted by") (search-forward "<BR>") (setq contents (buffer-substring (point) (progn (re-search-forward - "<p>.*A href=\"\\(http://slashdot.org\\)?/article") + "< [ \t\r\n]*<A HREF=\"\\(\\(http:\\)?//slashdot\\.org\\)?/article") (match-beginning 0))))) - (search-forward (format "<a name=\"%d\">" (1- article))) + (setq cid (cdr (assq article + (nth 4 (assoc group nnslashdot-groups))))) + (search-forward (format "<a name=\"%s\">" cid)) (setq contents (buffer-substring - (re-search-forward "<td[^>]+>") + (re-search-forward "<td[^>]*>") (search-forward "</td>"))))))) (search-failed (nnslashdot-lose why))) @@ -384,10 +302,10 @@ (let ((number 0) sid elem description articles gname) (condition-case why - ;; First we do the Ultramode to get info on all the latest groups. - (progn + ;; First we do the Ultramode to get info on all the latest groups. + (progn (mm-with-unibyte-buffer - (nnweb-insert "http://slashdot.org/slashdot.xml" t) + (nnweb-insert nnslashdot-backslash-url t) (goto-char (point-min)) (while (search-forward "<story>" nil t) (narrow-to-region (point) (search-forward "</story>")) @@ -404,7 +322,8 @@ (setq gname (concat description " (" sid ")")) (if (setq elem (assoc gname nnslashdot-groups)) (setcar (cdr elem) articles) - (push (list gname articles sid) nnslashdot-groups)) + (push (list gname articles sid (current-time) nil) + nnslashdot-groups)) (goto-char (point-max)) (widen))) ;; Then do the older groups. @@ -425,13 +344,14 @@ (setq gname (concat description " (" sid ")")) (if (setq elem (assoc gname nnslashdot-groups)) (setcar (cdr elem) articles) - (push (list gname articles sid) nnslashdot-groups))))) + (push (list gname articles sid (current-time) nil) + nnslashdot-groups))))) (incf number 30))) (search-failed (nnslashdot-lose why))) (nnslashdot-write-groups) (nnslashdot-generate-active) t)) - + (deffoo nnslashdot-request-newgroups (date &optional server) (nnslashdot-possibly-change-server nil server) (nnslashdot-generate-active) @@ -496,6 +416,24 @@ (setq nnslashdot-headers nil nnslashdot-groups nil)) +(deffoo nnslashdot-request-expire-articles + (articles group &optional server force) + (nnslashdot-possibly-change-server group server) + (let ((item (assoc group nnslashdot-groups))) + (when item + (if (fourth item) + (when (and (>= (length articles) (cadr item)) ;; All are expirable. + (nnmail-expired-article-p + group + (fourth item) + force)) + (setq nnslashdot-groups (delq item nnslashdot-groups)) + (nnslashdot-write-groups) + (setq articles nil)) ;; all expired. + (setcdr (cddr item) (list (current-time))) + (nnslashdot-write-groups)))) + articles) + (nnoo-define-skeleton nnslashdot) ;;; Internal functions @@ -508,18 +446,32 @@ (unless nnslashdot-groups (nnslashdot-read-groups))) +(defun nnslashdot-make-tuple (tuple n) + (prog1 + tuple + (while (> n 1) + (unless (cdr tuple) + (setcdr tuple (list nil))) + (setq tuple (cdr tuple) + n (1- n))))) + (defun nnslashdot-read-groups () (let ((file (expand-file-name "groups" nnslashdot-directory))) (when (file-exists-p file) (mm-with-unibyte-buffer (insert-file-contents file) (goto-char (point-min)) - (setq nnslashdot-groups (read (current-buffer))))))) + (setq nnslashdot-groups (read (current-buffer)))) + (if (and nnslashdot-groups (< (length (car nnslashdot-groups)) 5)) + (let ((groups nnslashdot-groups)) + (while groups + (nnslashdot-make-tuple (car groups) 5) + (setq groups (cdr groups)))))))) (defun nnslashdot-write-groups () (with-temp-file (expand-file-name "groups" nnslashdot-directory) - (prin1 nnslashdot-groups (current-buffer)))) - + (gnus-prin1 nnslashdot-groups))) + (defun nnslashdot-init (server) "Initialize buffers and such." (unless (file-exists-p nnslashdot-directory) @@ -528,7 +480,8 @@ (setq nnslashdot-buffer (save-excursion (nnheader-set-temp-buffer - (format " *nnslashdot %s*" server)))))) + (format " *nnslashdot %s*" server)))) + (push nnslashdot-buffer gnus-buffers))) (defun nnslashdot-date-to-date (sdate) (condition-case err @@ -552,11 +505,6 @@ (defun nnslashdot-lose (why) (error "Slashdot HTML has changed; please get a new version of nnslashdot")) -;(defun nnslashdot-sid-strip (sid) -; (if (string-match "^00/" sid) -; (substring sid (match-end 0)) -; sid)) - (defalias 'nnslashdot-sid-strip 'identity) (provide 'nnslashdot)
--- a/lisp/gnus/nnultimate.el Wed Oct 31 02:54:33 2001 +0000 +++ b/lisp/gnus/nnultimate.el Wed Oct 31 04:16:51 2001 +0000 @@ -56,6 +56,8 @@ (defvoo nnultimate-groups nil) (defvoo nnultimate-headers nil) (defvoo nnultimate-articles nil) +(defvar nnultimate-table-regexp + "postings.*editpost\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio") ;;; Interface functions @@ -74,13 +76,17 @@ (old-total (or (nth 6 entry) 1)) (furl "forumdisplay.cgi?action=topics&number=%d&DaysPrune=1000") (furls (list (concat nnultimate-address (format furl sid)))) + (nnultimate-table-regexp + "postings.*editpost\\|forumdisplay\\|getbio") headers article subject score from date lines parent point contents tinfo fetchers map elem a href garticles topic old-max - inc datel table string current-page total-contents pages + inc datel table current-page total-contents pages farticles forum-contents parse furl-fetched mmap farticle) (setq map mapping) (while (and (setq article (car articles)) map) + ;; Skip past the articles in the map until we reach the + ;; article we're looking for. (while (and map (or (> article (caar map)) (< (cadar map) (caar map)))) @@ -101,7 +107,7 @@ fetchers)) (pop articles) (setq article (car articles))))) - ;; Now we have the mapping from/to Gnus/nnultimate article numbers, + ;; Now we have the mapping from/to Gnus/nnultimate article numbers, ;; so we start fetching the topics that we need to satisfy the ;; request. (if (not fetchers) @@ -128,22 +134,27 @@ (setq contents (ignore-errors (w3-parse-buffer (current-buffer)))) (setq table (nnultimate-find-forum-table contents)) - (setq string (mapconcat 'identity (nnweb-text table) "")) - (when (string-match "topic is \\([0-9]\\) pages" string) - (setq pages (string-to-number (match-string 1 string))) - (setcdr table nil) - (setq table (nnultimate-find-forum-table contents))) + (goto-char (point-min)) + (when (re-search-forward "topic is \\([0-9]+\\) pages" nil t) + (setq pages (string-to-number (match-string 1)))) (setq contents (cdr (nth 2 (car (nth 2 table))))) (setq total-contents (nconc total-contents contents)) (incf current-page)) - ;;(setq total-contents (nreverse total-contents)) - (dolist (art (cdr elem)) - (if (not (nth (1- (cdr art)) total-contents)) - () ;(debug) - (push (list (car art) - (nth (1- (cdr art)) total-contents) - subject) - nnultimate-articles))))) + (when t + (let ((i 0)) + (dolist (co total-contents) + (push (list (or (nnultimate-topic-article-to-article + group (car elem) (incf i)) + 1) + co subject) + nnultimate-articles)))) + (when nil + (dolist (art (cdr elem)) + (when (nth (1- (cdr art)) total-contents) + (push (list (car art) + (nth (1- (cdr art)) total-contents) + subject) + nnultimate-articles)))))) (setq nnultimate-articles (sort nnultimate-articles 'car-less-than-car)) ;; Now we have all the articles, conveniently in an alist @@ -161,17 +172,26 @@ (setq date (substring (car datel) (match-end 0)) datel nil)) (pop datel)) - (setq date (delete "" (split-string date "[- \n\t\r ]"))) - (if (or (member "AM" date) - (member "PM" date)) + (when date + (setq date (delete "" (split-string + date "[-, \n\t\r ]"))) + (if (or (member "AM" date) + (member "PM" date)) + (setq date (format + "%s %s %s %s" + (nth 1 date) + (if (and (>= (length (nth 0 date)) 3) + (assoc (downcase + (substring (nth 0 date) 0 3)) + parse-time-months)) + (substring (nth 0 date) 0 3) + (car (rassq (string-to-number (nth 0 date)) + parse-time-months))) + (nth 2 date) (nth 3 date))) (setq date (format "%s %s %s %s" - (car (rassq (string-to-number (nth 0 date)) + (car (rassq (string-to-number (nth 1 date)) parse-time-months)) - (nth 1 date) (nth 2 date) (nth 3 date))) - (setq date (format "%s %s %s %s" - (car (rassq (string-to-number (nth 1 date)) - parse-time-months)) - (nth 0 date) (nth 2 date) (nth 3 date)))) + (nth 0 date) (nth 2 date) (nth 3 date))))) (push (cons article @@ -180,7 +200,7 @@ from (or date "") (concat "<" (number-to-string sid) "%" (number-to-string article) - "@ultimate>") + "@ultimate." server ">") "" 0 (/ (length (mapconcat 'identity @@ -199,6 +219,16 @@ (nnheader-insert-nov (cdr header)))))) 'nov))) +(defun nnultimate-topic-article-to-article (group topic article) + (catch 'found + (dolist (elem (nth 5 (assoc group nnultimate-groups))) + (when (and (= topic (nth 2 elem)) + (>= article (nth 3 elem)) + (< article (+ (- (nth 1 elem) (nth 0 elem)) 1 + (nth 3 elem)))) + (throw 'found + (+ (nth 0 elem) (- article (nth 3 elem)))))))) + (deffoo nnultimate-request-group (group &optional server dont-check) (nnultimate-possibly-change-server nil server) (when (not nnultimate-groups) @@ -330,7 +360,7 @@ ;; the group is entered, there's 2 new articles in topic one ;; and 1 in topic three. Then Gnus article number 8-9 be 5-6 ;; in topic one and 10 will be the 2 in topic three. - (dolist (row (reverse forum-contents)) + (dolist (row (nreverse forum-contents)) (setq row (nth 2 row)) (when (setq a (nnweb-parse-find 'a row)) (setq subject (car (last (nnweb-text a))) @@ -403,7 +433,7 @@ nnultimate-groups-alist) (with-temp-file (expand-file-name "groups" nnultimate-directory) (prin1 nnultimate-groups-alist (current-buffer)))) - + (defun nnultimate-init (server) "Initialize buffers and such." (unless (file-exists-p nnultimate-directory) @@ -438,9 +468,7 @@ (nth 2 parse)))) (let ((href (cdr (assq 'href (nth 1 (nnweb-parse-find 'a parse 20))))) case-fold-search) - (when (and href (string-match - "postings\\|forumdisplay\\|Forum[0-9]+/HTML\\|getbio" - href)) + (when (and href (string-match nnultimate-table-regexp href)) t)))) (provide 'nnultimate)
--- a/lisp/gnus/nnweb.el Wed Oct 31 02:54:33 2001 +0000 +++ b/lisp/gnus/nnweb.el Wed Oct 31 04:16:51 2001 +0000 @@ -1,5 +1,5 @@ ;;; nnweb.el --- retrieving articles via web search engines -;; Copyright (C) 1996, 1997, 1998, 1999, 2000 +;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001 ;; Free Software Foundation, Inc. ;; Author: Lars Magne Ingebrigtsen <larsi@gnus.org> @@ -55,25 +55,48 @@ (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/") "Where nnweb will save its files.") -(defvoo nnweb-type 'dejanews +(defvoo nnweb-type 'google "What search engine type is being used. -Valid types include `dejanews', `dejanewsold', `reference', +Valid types include `google', `dejanews', `dejanewsold', `reference', and `altavista'.") (defvar nnweb-type-definition - '((dejanews + '( + (google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") + (article . ignore) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) + (dejanews ;; alias of google + ;;(article . nnweb-google-wash-article) + ;;(id . "http://groups.google.com/groups?as_umsgid=%s") (article . ignore) - (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanews-search) - (address . "http://www.deja.com/=dnc/qs.xp") - (identifier . nnweb-dejanews-identity)) - (dejanewsold - (article . ignore) - (map . nnweb-dejanews-create-mapping) - (search . nnweb-dejanewsold-search) - (address . "http://www.deja.com/dnquery.xp") - (identifier . nnweb-dejanews-identity)) + (id . "http://groups.google.com/groups?selm=%s&output=gplain") + ;;(reference . nnweb-google-reference) + (reference . identity) + (map . nnweb-google-create-mapping) + (search . nnweb-google-search) + (address . "http://groups.google.com/groups") + (identifier . nnweb-google-identity)) +;;; (dejanews +;;; (article . ignore) +;;; (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text") +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanews-search) +;;; (address . "http://www.deja.com/=dnc/qs.xp") +;;; (identifier . nnweb-dejanews-identity)) +;;; (dejanewsold +;;; (article . ignore) +;;; (map . nnweb-dejanews-create-mapping) +;;; (search . nnweb-dejanewsold-search) +;;; (address . "http://www.deja.com/dnquery.xp") +;;; (identifier . nnweb-dejanews-identity)) (reference (article . nnweb-reference-wash-article) (map . nnweb-reference-create-mapping) @@ -124,6 +147,8 @@ (deffoo nnweb-request-scan (&optional group server) (nnweb-possibly-change-server group server) + (if nnweb-ephemeral-p + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (funcall (nnweb-definition 'map)) (unless nnweb-ephemeral-p (nnweb-write-active) @@ -134,9 +159,10 @@ (when (and group (not (equal group nnweb-group)) (not nnweb-ephemeral-p)) + (setq nnweb-group group + nnweb-articles nil) (let ((info (assoc group nnweb-group-alist))) (when info - (setq nnweb-group group) (setq nnweb-type (nth 2 info)) (setq nnweb-search (nth 3 info)) (unless dont-check @@ -175,17 +201,19 @@ (and (stringp article) (nnweb-definition 'id t) (let ((fetch (nnweb-definition 'id)) - art) + art active) (when (string-match "^<\\(.*\\)>$" article) (setq art (match-string 1 article))) - (and fetch - art - (mm-with-unibyte-current-buffer - (nnweb-fetch-url - (format fetch article))))))) + (when (and fetch art) + (setq url (format fetch art)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url url)) + (if (nnweb-definition 'reference t) + (setq article + (funcall (nnweb-definition + 'reference) article))))))) (unless nnheader-callback-function - (funcall (nnweb-definition 'article)) - (nnweb-decode-entities)) + (funcall (nnweb-definition 'article))) (nnheader-report 'nnweb "Fetched article %s" article) (cons group (and (numberp article) article)))))) @@ -290,10 +318,11 @@ (nnweb-open-server server))) (unless nnweb-group-alist (nnweb-read-active)) + (unless nnweb-hashtb + (setq nnweb-hashtb (gnus-make-hashtable 4095))) (when group (when (and (not nnweb-ephemeral-p) - (not (equal group nnweb-group))) - (setq nnweb-hashtb (gnus-make-hashtable 4095)) + (equal group nnweb-group)) (nnweb-request-group group nil t)))) (defun nnweb-init (server) @@ -393,7 +422,7 @@ (car (rassq (string-to-number (match-string 2 date)) parse-time-months)) - (match-string 3 date) + (match-string 3 date) (match-string 1 date))) (setq date "Jan 1 00:00:00 0000")) (incf i) @@ -559,6 +588,7 @@ (while (search-forward "," nil t) (replace-match " " t t))) (widen) + (nnweb-decode-entities) (set-marker body nil)))) (defun nnweb-reference-search (search) @@ -663,7 +693,8 @@ (while (re-search-forward "<A.*\\?id@\\([^\"]+\\)\">[0-9]+</A>" nil t) (replace-match "<\\1> " t))) (widen) - (nnweb-remove-markup))) + (nnweb-remove-markup) + (nnweb-decode-entities))) (defun nnweb-altavista-search (search &optional part) (url-insert-file-contents @@ -683,13 +714,147 @@ t) ;;; +;;; Deja bought by google.com +;;; + +(defun nnweb-google-wash-article () + (let ((case-fold-search t) url) + (goto-char (point-min)) + (re-search-forward "^<pre>" nil t) + (narrow-to-region (point-min) (point)) + (search-backward "<table " nil t 2) + (delete-region (point-min) (point)) + (if (re-search-forward "Search Result [0-9]+" nil t) + (replace-match "")) + (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t) + (replace-match "")) + (goto-char (point-min)) + (while (search-forward "<br>" nil t) + (replace-match "\n")) + (nnweb-remove-markup) + (goto-char (point-min)) + (while (re-search-forward "^[ \t]*\n" nil t) + (replace-match "")) + (goto-char (point-max)) + (insert "\n") + (widen) + (narrow-to-region (point) (point-max)) + (search-forward "</pre>" nil t) + (delete-region (point) (point-max)) + (nnweb-remove-markup) + (widen))) + +(defun nnweb-google-parse-1 (&optional Message-ID) + (let ((i 0) + (case-fold-search t) + (active (cadr (assoc nnweb-group nnweb-group-alist))) + Subject Score Date Newsgroups From + map url mid) + (unless active + (push (list nnweb-group (setq active (cons 1 0)) + nnweb-type nnweb-search) + nnweb-group-alist)) + ;; Go through all the article hits on this page. + (goto-char (point-min)) + (while (re-search-forward + "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t) + (setq mid (match-string 2) + url (format + "http://groups.google.com/groups?selm=%s&output=gplain" mid)) + (narrow-to-region (search-forward ">" nil t) + (search-forward "</a>" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Subject (buffer-string)) + (goto-char (point-max)) + (widen) + (forward-line 1) + (when (looking-at "<br><font[^>]+>") + (goto-char (match-end 0))) + (if (not (looking-at "<a[^>]+>")) + (skip-chars-forward " \t") + (narrow-to-region (point) + (search-forward "</a>" nil t)) + (nnweb-remove-markup) + (nnweb-decode-entities) + (setq Newsgroups (buffer-string)) + (goto-char (point-max)) + (widen) + (skip-chars-forward "- \t")) + (when (looking-at + "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - <a") + (setq From (match-string 2) + Date (match-string 1))) + (forward-line 1) + (incf i) + (unless (nnweb-get-hashtb url) + (push + (list + (incf (cdr active)) + (make-full-mail-header + (cdr active) (if Newsgroups + (concat "(" Newsgroups ") " Subject) + Subject) + From Date (or Message-ID mid) + nil 0 0 url)) + map) + (nnweb-set-hashtb (cadar map) (car map)))) + map)) + +(defun nnweb-google-reference (id) + (let ((map (nnweb-google-parse-1 id)) header) + (setq nnweb-articles + (nconc nnweb-articles map)) + (when (setq header (cadar map)) + (mm-with-unibyte-current-buffer + (nnweb-fetch-url (mail-header-xref header))) + (caar map)))) + +(defun nnweb-google-create-mapping () + "Perform the search and create an number-to-url alist." + (save-excursion + (set-buffer nnweb-buffer) + (erase-buffer) + (when (funcall (nnweb-definition 'search) nnweb-search) + (let ((more t)) + (while more + (setq nnweb-articles + (nconc nnweb-articles (nnweb-google-parse-1))) + ;; FIXME: There is more. + (setq more nil)) + ;; Return the articles in the right order. + (setq nnweb-articles + (sort nnweb-articles 'car-less-than-car)))))) + +(defun nnweb-google-search (search) + (nnweb-insert + (concat + (nnweb-definition 'address) + "?" + (nnweb-encode-www-form-urlencoded + `(("q" . ,search) + ("num". "100") + ("hq" . "") + ("hl" . "") + ("lr" . "") + ("safe" . "off") + ("sites" . "groups"))))) + t) + +(defun nnweb-google-identity (url) + "Return an unique identifier based on URL." + (if (string-match "selm=\\([^ &>]+\\)" url) + (match-string 1 url) + url)) + +;;; ;;; General web/w3 interface utility functions ;;; (defun nnweb-insert-html (parse) "Insert HTML based on a w3 parse tree." (if (stringp parse) - (insert parse) + (insert (nnheader-string-as-multibyte parse)) (insert "<" (symbol-name (car parse)) " ") (insert (mapconcat (lambda (param) @@ -729,7 +894,7 @@ (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t) (let ((elem (if (eq (aref (match-string 1) 0) ?\#) (let ((c - (string-to-number (substring + (string-to-number (substring (match-string 1) 1)))) (if (mm-char-or-char-int-p c) c 32)) (or (cdr (assq (intern (match-string 1)) @@ -739,9 +904,9 @@ (setq elem (char-to-string elem))) (replace-match elem t t)))) -(defun nnweb-decode-entities-string (str) +(defun nnweb-decode-entities-string (string) (with-temp-buffer - (insert str) + (insert string) (nnweb-decode-entities) (buffer-substring (point-min) (point-max)))) @@ -760,12 +925,12 @@ "Insert the contents from an URL in the current buffer. If FOLLOW-REFRESH is non-nil, redirect refresh url in META." (let ((name buffer-file-name)) - (if follow-refresh + (if follow-refresh (save-restriction (narrow-to-region (point) (point)) (url-insert-file-contents url) (goto-char (point-min)) - (when (re-search-forward + (when (re-search-forward "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" nil t) (let ((url (match-string 1))) (delete-region (point-min) (point-max)) @@ -822,6 +987,11 @@ (listp (cdr element))) (nnweb-text-1 element))))) +(defun nnweb-replace-in-string (string match newtext) + (while (string-match match string) + (setq string (replace-match newtext t t string))) + string) + (provide 'nnweb) ;;; nnweb.el ends here