Mercurial > emacs
changeset 111842:265c545c8f48
Decode extra numeric entities.
mm-util.el (mm-extra-numeric-entities): New variable.
mm-url.el (mm-url-decode-entities):
mm-decode.el (mm-shr): Use it to decode extra numeric entities.
author | Katsumi Yamaoka <yamaoka@jpl.org> |
---|---|
date | Tue, 07 Dec 2010 05:06:56 +0000 |
parents | 25e3c2636c1f |
children | 94c9743593b9 |
files | lisp/gnus/ChangeLog lisp/gnus/mm-decode.el lisp/gnus/mm-url.el lisp/gnus/mm-util.el |
diffstat | 4 files changed, 53 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/lisp/gnus/ChangeLog Mon Dec 06 21:01:00 2010 -0500 +++ b/lisp/gnus/ChangeLog Tue Dec 07 05:06:56 2010 +0000 @@ -1,3 +1,10 @@ +2010-12-07 Katsumi Yamaoka <yamaoka@jpl.org> + + * mm-util.el (mm-extra-numeric-entities): New variable. + + * mm-url.el (mm-url-decode-entities): + * mm-decode.el (mm-shr): Use it to decode extra numeric entities. + 2010-12-07 Stefan Monnier <monnier@iro.umontreal.ca> * message.el: Use completion-at-point.
--- a/lisp/gnus/mm-decode.el Mon Dec 06 21:01:00 2010 -0500 +++ b/lisp/gnus/mm-decode.el Tue Dec 07 05:06:56 2010 +0000 @@ -1699,7 +1699,7 @@ (when handle (mm-with-part handle (buffer-string)))))) - shr-inhibit-images shr-blocked-images charset) + shr-inhibit-images shr-blocked-images charset char) (if (and (boundp 'gnus-summary-buffer) (buffer-name gnus-summary-buffer)) (with-current-buffer gnus-summary-buffer @@ -1714,13 +1714,25 @@ (narrow-to-region (point) (point)) (shr-insert-document (mm-with-part handle - (when (and charset - (setq charset (mm-charset-to-coding-system charset)) - (not (eq charset 'ascii))) - (insert (prog1 - (mm-decode-coding-string (buffer-string) charset) - (erase-buffer) - (mm-enable-multibyte)))) + (insert (prog1 + (if (and charset + (setq charset + (mm-charset-to-coding-system charset)) + (not (eq charset 'ascii))) + (mm-decode-coding-string (buffer-string) charset) + (mm-string-as-multibyte (buffer-string))) + (erase-buffer) + (mm-enable-multibyte))) + (goto-char (point-min)) + (setq case-fold-search t) + (while (re-search-forward + "&#\\(?:x\\([89][0-9a-f]\\)\\|\\(1[2-5][0-9]\\)\\);" nil t) + (when (setq char + (cdr (assq (if (match-beginning 1) + (string-to-number (match-string 1) 16) + (string-to-number (match-string 2))) + mm-extra-numeric-entities))) + (replace-match (char-to-string char)))) (libxml-parse-html-region (point-min) (point-max)))) (mm-handle-set-undisplayer handle
--- a/lisp/gnus/mm-url.el Mon Dec 06 21:01:00 2010 -0500 +++ b/lisp/gnus/mm-url.el Tue Dec 07 05:06:56 2010 +0000 @@ -365,16 +365,19 @@ (defun mm-url-decode-entities () "Decode all HTML entities." (goto-char (point-min)) - (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" nil t) + (while (re-search-forward "&\\(#[0-9]+\\|#x[0-9a-f]+\\|[a-z]+[0-9]*\\);" + nil t) (let* ((entity (match-string 1)) (elem (if (eq (aref entity 0) ?\#) - (let ((c (mm-ucs-to-char - ;; Hex number: ㈒ - (if (eq (aref entity 1) ?x) - (string-to-number (substring entity 2) - 16) - ;; Decimal number:  - (string-to-number (substring entity 1)))))) + (let ((c + ;; Hex number: ㈒ + (if (eq (aref entity 1) ?x) + (string-to-number (substring entity 2) + 16) + ;; Decimal number:  + (string-to-number (substring entity 1))))) + (setq c (or (cdr (assq c mm-extra-numeric-entities)) + (mm-ucs-to-char c))) (if (mm-char-or-char-int-p c) c ?#)) (or (cdr (assq (intern entity) mm-url-html-entities))
--- a/lisp/gnus/mm-util.el Mon Dec 06 21:01:00 2010 -0500 +++ b/lisp/gnus/mm-util.el Tue Dec 07 05:06:56 2010 +0000 @@ -866,6 +866,21 @@ Setting it to nil is useful on Emacsen supporting Unicode if sending mail with multiple parts is preferred to sending a Unicode one.") +(defvar mm-extra-numeric-entities + (mapcar + (lambda (item) + (cons (car item) (mm-ucs-to-char (cdr item)))) + '((#x80 . #x20AC) (#x82 . #x201A) (#x83 . #x0192) (#x84 . #x201E) + (#x85 . #x2026) (#x86 . #x2020) (#x87 . #x2021) (#x88 . #x02C6) + (#x89 . #x2030) (#x8A . #x0160) (#x8B . #x2039) (#x8C . #x0152) + (#x8E . #x017D) (#x91 . #x2018) (#x92 . #x2019) (#x93 . #x201C) + (#x94 . #x201D) (#x95 . #x2022) (#x96 . #x2013) (#x97 . #x2014) + (#x98 . #x02DC) (#x99 . #x2122) (#x9A . #x0161) (#x9B . #x203A) + (#x9C . #x0153) (#x9E . #x017E) (#x9F . #x0178))) + "*Alist of extra numeric entities and characters other than ISO 10646. +This table is used for decoding extra numeric entities to characters, +like \"€\" to the euro sign, mainly in html messages.") + ;;; Internal variables: ;;; Functions: